diff --git a/.bzr-mysql/default.conf b/.bzr-mysql/default.conf
index 39ebdda8d7a..782473d27aa 100644
--- a/.bzr-mysql/default.conf
+++ b/.bzr-mysql/default.conf
@@ -1,4 +1,4 @@
 [MYSQL]
-post_commit_to = "commits@lists.mysql.com"
-post_push_to = "commits@lists.mysql.com"
-tree_name = "mysql-5.4"
+post_commit_to = "guilhem@sun.com, alik@sun.com"
+#post_push_to = "commits@lists.mysql.com"
+tree_name = "mysql-trunk"
diff --git a/BUILD/check-cpu b/BUILD/check-cpu
index 27e0acf69a0..f73a872fecd 100755
--- a/BUILD/check-cpu
+++ b/BUILD/check-cpu
@@ -50,8 +50,13 @@ check_cpu () {
         model_name=`sysctl -n hw.model`
         ;;
       Darwin)
-        cpu_family=`uname -p`
-        model_name=`machine`
+        cpu_family=`sysctl -n machdep.cpu.vendor`
+        model_name=`sysctl -n machdep.cpu.brand_string`
+        if [ -z "$cpu_family" -o -z "$model_name" ]
+        then  
+          cpu_family=`uname -p`
+          model_name=`machine`
+        fi  
         ;;
       *)
         cpu_family=`uname -p`;
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6a41496f120..8fc99e5c736 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-CMAKE_MINIMUM_REQUIRED(VERSION 2.4.7 FATAL_ERROR)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
 
 PROJECT(MySql)
 
@@ -32,52 +32,6 @@ ADD_DEFINITIONS(-DHAVE_YASSL)
 # Set debug options
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DFORCE_INIT_OF_VARS")
 
-# Note that some engines are always compiled in, MyISAM, MyISAMMRG and HEAP,
-# these three plugin defintions are dummys for symmetry
-
-SET(WITH_HEAP_STORAGE_ENGINE TRUE)
-ADD_DEFINITIONS(-DWITH_HEAP_STORAGE_ENGINE)
-SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_heap_plugin")
-
-SET(WITH_MYISAM_STORAGE_ENGINE TRUE)
-ADD_DEFINITIONS(-DWITH_MYISAM_STORAGE_ENGINE)
-SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_myisam_plugin")
-
-SET(WITH_MYISAMMRG_STORAGE_ENGINE TRUE)
-ADD_DEFINITIONS(-DWITH_MYISAMMRG_STORAGE_ENGINE)
-SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_myisammrg_plugin")
-
-IF(WITH_ARCHIVE_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_ARCHIVE_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_archive_plugin")
-ENDIF(WITH_ARCHIVE_STORAGE_ENGINE)
-IF(WITH_BLACKHOLE_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_BLACKHOLE_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_blackhole_plugin")
-ENDIF(WITH_BLACKHOLE_STORAGE_ENGINE)
-IF(WITH_CSV_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_CSV_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_csv_plugin")
-ENDIF(WITH_CSV_STORAGE_ENGINE)
-IF(WITH_EXAMPLE_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_EXAMPLE_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_example_plugin")
-ENDIF(WITH_EXAMPLE_STORAGE_ENGINE)
-IF(WITH_INNOBASE_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_INNOBASE_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_innobase_plugin")
-ENDIF(WITH_INNOBASE_STORAGE_ENGINE)
-IF(WITH_PARTITION_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_PARTITION_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_partition_plugin")
-ENDIF(WITH_PARTITION_STORAGE_ENGINE)
-IF(WITH_FEDERATED_STORAGE_ENGINE)
-  ADD_DEFINITIONS(-DWITH_FEDERATED_STORAGE_ENGINE)
-  SET (mysql_plugin_defs "${mysql_plugin_defs},builtin_federated_plugin")
-ENDIF(WITH_FEDERATED_STORAGE_ENGINE)
-
-CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/sql/sql_builtin.cc.in 
-               ${CMAKE_SOURCE_DIR}/sql/sql_builtin.cc @ONLY)
 
 SET(localstatedir "C:\\mysql\\data")
 CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/support-files/my-huge.cnf.sh
@@ -122,6 +76,16 @@ IF(MSVC AND NOT CMAKE_GENERATOR MATCHES "Visual Studio 7")
     SET(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /wd4996")
 ENDIF(MSVC AND NOT CMAKE_GENERATOR MATCHES "Visual Studio 7")
 
+IF(CMAKE_GENERATOR MATCHES "Visual Studio 7")
+    # VS2003 has a bug that prevents linking mysqld with module definition file 
+    # (/DEF option for linker). Linker would incorrectly complain about multiply 
+    # defined symbols. Workaround is to disable dynamic plugins, so /DEF is not
+    # used.
+    MESSAGE("Warning: Building MySQL with Visual Studio 2003.NET is no more supported.")
+    MESSAGE("Please use a newer version of Visual Studio.")
+    SET(WITHOUT_DYNAMIC_PLUGINS TRUE)
+ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio 7")
+
 # Settings for Visual Studio 7 and above.  
 IF(MSVC)
     # replace /MDd with /MTd
@@ -165,12 +129,16 @@ IF(WIN32)
   ADD_DEFINITIONS("-D_WINDOWS -D__WIN__ -D_CRT_SECURE_NO_DEPRECATE")
 ENDIF(WIN32)
 
+# default to x86 platform.  We'll check for X64 in a bit
+SET (PLATFORM X86)
+
 # This definition is necessary to work around a bug with Intellisense described
 # here: http://tinyurl.com/2cb428.  Syntax highlighting is important for proper
 # debugger functionality.
 IF(CMAKE_SIZEOF_VOID_P MATCHES 8)
     MESSAGE(STATUS "Detected 64-bit platform.")
     ADD_DEFINITIONS("-D_WIN64")
+    SET (PLATFORM X64)
 ENDIF(CMAKE_SIZEOF_VOID_P MATCHES 8)
 
 IF(EMBED_MANIFESTS)
@@ -223,6 +191,81 @@ IF(EMBED_MANIFESTS)
     ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio 8 2005 Win64")
 ENDIF(EMBED_MANIFESTS)
 
+# Figure out what engines to build and how (statically or dynamically),
+# add preprocessor defines for storage engines.
+IF(WITHOUT_DYNAMIC_PLUGINS)
+  MESSAGE("Dynamic plugins are disabled.")
+ENDIF(WITHOUT_DYNAMIC_PLUGINS)
+
+FILE(GLOB STORAGE_SUBDIRS storage/*)
+FOREACH(SUBDIR ${STORAGE_SUBDIRS})
+  FILE(RELATIVE_PATH DIRNAME ${PROJECT_SOURCE_DIR}/storage ${SUBDIR})
+  STRING(TOUPPER ${DIRNAME} ENGINE)
+  STRING(TOLOWER ${DIRNAME} ENGINE_LOWER) 
+  IF (EXISTS ${SUBDIR}/CMakeLists.txt)
+    # Check  MYSQL_STORAGE_ENGINE macro is present
+    FILE(STRINGS ${SUBDIR}/CMakeLists.txt HAVE_STORAGE_ENGINE REGEX MYSQL_STORAGE_ENGINE)
+    IF(HAVE_STORAGE_ENGINE)
+      SET(ENGINE_BUILD_TYPE "DYNAMIC")
+      # Read plug.in to find out if a plugin is mandatory and whether it supports
+      # build as shared library (dynamic).
+      IF(EXISTS ${SUBDIR}/plug.in)
+        FILE(READ ${SUBDIR}/plug.in PLUGIN_FILE_CONTENT)
+        STRING (REGEX MATCH  "MYSQL_PLUGIN_DYNAMIC"  MYSQL_PLUGIN_DYNAMIC  ${PLUGIN_FILE_CONTENT})
+        STRING (REGEX MATCH  "MYSQL_PLUGIN_MANDATORY"  MYSQL_PLUGIN_MANDATORY  ${PLUGIN_FILE_CONTENT})
+        STRING (REGEX MATCH  "MYSQL_PLUGIN_STATIC"  MYSQL_PLUGIN_STATIC  ${PLUGIN_FILE_CONTENT})
+
+        IF(MYSQL_PLUGIN_MANDATORY)
+          SET(WITH_${ENGINE}_STORAGE_ENGINE TRUE)
+        ENDIF(MYSQL_PLUGIN_MANDATORY)
+
+        IF (WITH_${ENGINE}_STORAGE_ENGINE AND MYSQL_PLUGIN_STATIC)
+          SET(ENGINE_BUILD_TYPE "STATIC")
+        ELSEIF(NOT WITHOUT_${ENGINE}_STORAGE_ENGINE AND MYSQL_PLUGIN_DYNAMIC AND NOT WITHOUT_DYNAMIC_PLUGINS)
+          SET(ENGINE_BUILD_TYPE "DYNAMIC")
+        ELSE(WITH_${ENGINE}_STORAGE_ENGINE AND MYSQL_PLUGIN_STATIC)
+          SET(ENGINE_BUILD_TYPE "NONE")
+        ENDIF(WITH_${ENGINE}_STORAGE_ENGINE AND MYSQL_PLUGIN_STATIC)
+        IF (ENGINE_BUILD_TYPE STREQUAL "STATIC") 
+          SET (mysql_plugin_defs  "${mysql_plugin_defs},builtin_${ENGINE_LOWER}_plugin")
+          SET (MYSQLD_STATIC_ENGINE_LIBS ${MYSQLD_STATIC_ENGINE_LIBS} ${ENGINE_LOWER})
+          SET (STORAGE_ENGINE_DEFS "${STORAGE_ENGINE_DEFS} -DWITH_${ENGINE}_STORAGE_ENGINE")
+          SET (WITH_${ENGINE}_STORAGE_ENGINE TRUE)
+        ENDIF (ENGINE_BUILD_TYPE STREQUAL "STATIC")
+      ENDIF(EXISTS ${SUBDIR}/plug.in)
+
+      IF(NOT ENGINE_BUILD_TYPE STREQUAL "NONE")
+        LIST(APPEND  ${ENGINE_BUILD_TYPE}_ENGINE_DIRECTORIES  ${SUBDIR})
+      ENDIF(NOT ENGINE_BUILD_TYPE STREQUAL "NONE")
+      
+    ENDIF(HAVE_STORAGE_ENGINE)
+  ENDIF(EXISTS ${SUBDIR}/CMakeLists.txt)
+ENDFOREACH(SUBDIR ${STORAGE_SUBDIRS})
+
+# Special handling for partition(not really pluggable)
+IF(NOT WITHOUT_PARTITION_STORAGE_ENGINE)
+  SET (STORAGE_ENGINE_DEFS "${STORAGE_ENGINE_DEFS} -DWITH_PARTITION_STORAGE_ENGINE")
+  SET (mysql_plugin_defs  "${mysql_plugin_defs},builtin_partition_plugin")
+ENDIF(NOT WITHOUT_PARTITION_STORAGE_ENGINE)
+
+ADD_DEFINITIONS(${STORAGE_ENGINE_DEFS}) 
+
+# Now write out our mysql_plugin_defs struct
+CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/sql/sql_builtin.cc.in 
+               ${CMAKE_SOURCE_DIR}/sql/sql_builtin.cc @ONLY)
+
+# Add subdirectories for storage engines
+SET (ENGINE_BUILD_TYPE "STATIC")
+FOREACH(DIR ${STATIC_ENGINE_DIRECTORIES})
+  ADD_SUBDIRECTORY(${DIR})
+ENDFOREACH(DIR ${STATIC_ENGINE_DIRECTORIES})
+
+SET (ENGINE_BUILD_TYPE "DYNAMIC")
+FOREACH(DIR ${DYNAMIC_ENGINE_DIRECTORIES})
+  ADD_SUBDIRECTORY(${DIR})
+ENDFOREACH(DIR ${DYNAMIC_ENGINE_DIRECTORIES})
+
+
 # FIXME "debug" only needed if build type is "Debug", but
 # CMAKE_BUILD_TYPE is not set during configure time.
 ADD_SUBDIRECTORY(vio)
@@ -235,28 +278,7 @@ ADD_SUBDIRECTORY(zlib)
 ADD_SUBDIRECTORY(extra/yassl)
 ADD_SUBDIRECTORY(extra/yassl/taocrypt)
 ADD_SUBDIRECTORY(extra)
-ADD_SUBDIRECTORY(storage/heap)
-ADD_SUBDIRECTORY(storage/myisam)
-ADD_SUBDIRECTORY(storage/myisammrg)
 ADD_SUBDIRECTORY(client)
-IF(WITH_ARCHIVE_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/archive)
-ENDIF(WITH_ARCHIVE_STORAGE_ENGINE)
-IF(WITH_BLACKHOLE_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/blackhole)
-ENDIF(WITH_BLACKHOLE_STORAGE_ENGINE)
-IF(WITH_CSV_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/csv)
-ENDIF(WITH_CSV_STORAGE_ENGINE)
-IF(WITH_EXAMPLE_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/example)
-ENDIF(WITH_EXAMPLE_STORAGE_ENGINE)
-IF(WITH_FEDERATED_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/federated)
-ENDIF(WITH_FEDERATED_STORAGE_ENGINE)
-IF(WITH_INNOBASE_STORAGE_ENGINE)
-  ADD_SUBDIRECTORY(storage/innobase)
-ENDIF(WITH_INNOBASE_STORAGE_ENGINE)
 ADD_SUBDIRECTORY(sql)
 ADD_SUBDIRECTORY(libmysql)
 ADD_SUBDIRECTORY(tests)
diff --git a/client/Makefile.am b/client/Makefile.am
index 94db565ba37..ecdd010575f 100644
--- a/client/Makefile.am
+++ b/client/Makefile.am
@@ -71,7 +71,7 @@ mysqldump_SOURCES=              mysqldump.c \
 	                        $(top_srcdir)/mysys/mf_getdate.c
 
 mysqlimport_SOURCES=		mysqlimport.c
-
+mysqlimport_CFLAGS=		-DTHREAD -UUNDEF_THREADS_HACK
 mysqlimport_LDADD =		$(CXXLDFLAGS) $(CLIENT_THREAD_LIBS) \
 				@CLIENT_EXTRA_LDFLAGS@ \
 				$(LIBMYSQLCLIENT_LA) \
@@ -80,14 +80,14 @@ mysqlimport_LDADD =		$(CXXLDFLAGS) $(CLIENT_THREAD_LIBS) \
 mysqlshow_SOURCES=		mysqlshow.c
 
 mysqlslap_SOURCES=		mysqlslap.c
-mysqlslap_CFLAGS=		-DTHREAD -UUNDEF_THREADS_HACK
+mysqlslap_CFLAGS=		-DTHREAD -UMYSQL_CLIENT_NO_THREADS
 mysqlslap_LDADD =		$(CXXLDFLAGS) $(CLIENT_THREAD_LIBS) \
 				@CLIENT_EXTRA_LDFLAGS@ \
 				$(LIBMYSQLCLIENT_LA) \
 				$(top_builddir)/mysys/libmysys.a
 
 mysqltest_SOURCES=		mysqltest.cc
-mysqltest_CXXFLAGS=		-DTHREAD -UUNDEF_THREADS_HACK
+mysqltest_CXXFLAGS=		-DTHREAD -UMYSQL_CLIENT_NO_THREADS
 mysqltest_LDADD =		$(CXXLDFLAGS) $(CLIENT_THREAD_LIBS) \
 				@CLIENT_EXTRA_LDFLAGS@ \
 				$(LIBMYSQLCLIENT_LA) \
@@ -99,7 +99,7 @@ mysql_upgrade_SOURCES=          mysql_upgrade.c \
                                 $(top_srcdir)/mysys/my_getpagesize.c
 
 # Fix for mit-threads
-DEFS =			-DUNDEF_THREADS_HACK \
+DEFS =			-DMYSQL_CLIENT_NO_THREADS \
 			-DDEFAULT_MYSQL_HOME="\"$(prefix)\"" \
 			-DDATADIR="\"$(localstatedir)\""
 
diff --git a/client/mysql.cc b/client/mysql.cc
index 46141cd975f..5afbc2e960b 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -115,7 +115,7 @@ extern "C" {
 #define PROMPT_CHAR '\\'
 #define DEFAULT_DELIMITER ";"
 
-#define MAX_BATCH_BUFFER_SIZE (1024L * 1024L)
+#define MAX_BATCH_BUFFER_SIZE (1024L * 1024L * 1024L)
 
 typedef struct st_status
 {
@@ -143,7 +143,8 @@ static my_bool ignore_errors=0,wait_flag=0,quick=0,
 	       tty_password= 0, opt_nobeep=0, opt_reconnect=1,
 	       default_charset_used= 0, opt_secure_auth= 0,
                default_pager_set= 0, opt_sigint_ignore= 0,
-               show_warnings= 0, executing_query= 0, interrupted_query= 0;
+               show_warnings= 0, executing_query= 0, interrupted_query= 0,
+               ignore_spaces= 0;
 static my_bool debug_info_flag, debug_check_flag;
 static my_bool column_types_flag;
 static my_bool preserve_comments= 0;
@@ -1183,7 +1184,12 @@ int main(int argc,char *argv[])
         histfile= 0;
       }
     }
-    if (histfile)
+
+    /* We used to suggest setting MYSQL_HISTFILE=/dev/null. */
+    if (histfile && strncmp(histfile, "/dev/null", 10) == 0)
+      histfile= NULL;
+
+    if (histfile && histfile[0])
     {
       if (verbose)
 	tee_fprintf(stdout, "Reading history-file %s\n",histfile);
@@ -1218,7 +1224,8 @@ sig_handler mysql_end(int sig)
 {
   mysql_close(&mysql);
 #ifdef HAVE_READLINE
-  if (!status.batch && !quick && !opt_html && !opt_xml && histfile)
+  if (!status.batch && !quick && !opt_html && !opt_xml &&
+      histfile && histfile[0])
   {
     /* write-history */
     if (verbose)
@@ -1345,7 +1352,7 @@ static struct my_option my_long_options[] =
   {"debug", '#', "Output debug log", (uchar**) &default_dbug_option,
    (uchar**) &default_dbug_option, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
 #endif
-  {"debug-check", OPT_DEBUG_CHECK, "Check memory and open file usage at exit .",
+  {"debug-check", OPT_DEBUG_CHECK, "Check memory and open file usage at exit.",
    (uchar**) &debug_check_flag, (uchar**) &debug_check_flag, 0,
    GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"debug-info", 'T', "Print some debug info at exit.", (uchar**) &debug_info_flag,
@@ -1372,8 +1379,9 @@ static struct my_option my_long_options[] =
   {"no-named-commands", 'g',
    "Named commands are disabled. Use \\* form only, or use named commands only in the beginning of a line ending with a semicolon (;) Since version 10.9 the client now starts with this option ENABLED by default! Disable with '-G'. Long format commands still work from the first line. WARNING: option deprecated; use --disable-named-commands instead.",
    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"ignore-spaces", 'i', "Ignore space after function names.", 0, 0, 0,
-   GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"ignore-spaces", 'i', "Ignore space after function names.",
+   (uchar**) &ignore_spaces, (uchar**) &ignore_spaces, 0, GET_BOOL, NO_ARG, 0, 0,
+   0, 0, 0, 0},
   {"local-infile", OPT_LOCAL_INFILE, "Enable/disable LOAD DATA LOCAL INFILE.",
    (uchar**) &opt_local_infile,
    (uchar**) &opt_local_infile, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
@@ -1794,6 +1802,10 @@ static int get_options(int argc, char **argv)
     my_end_arg= MY_CHECK_ERROR | MY_GIVE_INFO;
   if (debug_check_flag)
     my_end_arg= MY_CHECK_ERROR;
+
+  if (ignore_spaces)
+    connect_flag|= CLIENT_IGNORE_SPACE;
+
   return(0);
 }
 
@@ -1967,7 +1979,7 @@ static COMMANDS *find_command(char *name,char cmd_char)
     */
     if (strstr(name, "\\g") || (strstr(name, delimiter) &&
                                 !(strlen(name) >= 9 &&
-                                  !my_strnncoll(charset_info,
+                                  !my_strnncoll(&my_charset_latin1,
                                                 (uchar*) name, 9,
                                                 (const uchar*) "delimiter",
                                                 9))))
@@ -1988,11 +2000,11 @@ static COMMANDS *find_command(char *name,char cmd_char)
   {
     if (commands[i].func &&
 	((name &&
-	  !my_strnncoll(charset_info,(uchar*)name,len,
+	  !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
 				     (uchar*)commands[i].name,len) &&
 	  !commands[i].name[len] &&
 	  (!end || (end && commands[i].takes_params))) ||
-	 !name && commands[i].cmd_char == cmd_char))
+	 (!name && commands[i].cmd_char == cmd_char)))
     {
       DBUG_PRINT("exit",("found command: %s", commands[i].name));
       DBUG_RETURN(&commands[i]);
@@ -2151,7 +2163,7 @@ static bool add_line(String &buffer,char *line,char *in_string,
       buffer.length(0);
     }
     else if (!*ml_comment && (!*in_string && (inchar == '#' ||
-			      inchar == '-' && pos[1] == '-' &&
+                                              (inchar == '-' && pos[1] == '-' &&
                               /*
                                 The third byte is either whitespace or is the
                                 end of the line -- which would occur only
@@ -2159,7 +2171,7 @@ static bool add_line(String &buffer,char *line,char *in_string,
                                 itself whitespace and should also match.
                               */
 			      (my_isspace(charset_info,pos[2]) ||
-                               !pos[2]))))
+                               !pos[2])))))
     {
       // Flush previously accepted characters
       if (out != line)
@@ -2720,7 +2732,7 @@ static int com_server_help(String *buffer __attribute__((unused)),
 {
   MYSQL_ROW cur;
   const char *server_cmd= buffer->ptr();
-  char cmd_buf[100];
+  char cmd_buf[100 + 1];
   MYSQL_RES *result;
   int error;
   
@@ -3381,9 +3393,12 @@ print_table_data_html(MYSQL_RES *result)
   {
     while((field = mysql_fetch_field(result)))
     {
-      tee_fprintf(PAGER, "<TH>%s</TH>", (field->name ? 
-					 (field->name[0] ? field->name : 
-					  " &nbsp; ") : "NULL"));
+      tee_fputs("<TH>", PAGER);
+      if (field->name && field->name[0])
+        xmlencode_print(field->name, field->name_length);
+      else
+        tee_fputs(field->name ? " &nbsp; " : "NULL", PAGER);
+      tee_fputs("</TH>", PAGER);
     }
     (void) tee_fputs("</TR>", PAGER);
   }
@@ -3396,7 +3411,7 @@ print_table_data_html(MYSQL_RES *result)
     for (uint i=0; i < mysql_num_fields(result); i++)
     {
       (void) tee_fputs("<TD>", PAGER);
-      safe_put_field(cur[i],lengths[i]);
+      xmlencode_print(cur[i], lengths[i]);
       (void) tee_fputs("</TD>", PAGER);
     }
     (void) tee_fputs("</TR>", PAGER);
@@ -4249,41 +4264,36 @@ com_status(String *buffer __attribute__((unused)),
   MYSQL_RES *result;
   LINT_INIT(result);
 
+  if (mysql_real_query_for_lazy(
+        C_STRING_WITH_LEN("select DATABASE(), USER() limit 1")))
+    return 0;
+
   tee_puts("--------------", stdout);
   usage(1);					/* Print version */
-  if (connected)
+  tee_fprintf(stdout, "\nConnection id:\t\t%lu\n",mysql_thread_id(&mysql));
+  /*
+    Don't remove "limit 1",
+    it is protection againts SQL_SELECT_LIMIT=0
+  */
+  if (mysql_store_result_for_lazy(&result))
   {
-    tee_fprintf(stdout, "\nConnection id:\t\t%lu\n",mysql_thread_id(&mysql));
-    /* 
-      Don't remove "limit 1", 
-      it is protection againts SQL_SELECT_LIMIT=0
-    */
-    if (!mysql_query(&mysql,"select DATABASE(), USER() limit 1") &&
-	(result=mysql_use_result(&mysql)))
+    MYSQL_ROW cur=mysql_fetch_row(result);
+    if (cur)
     {
-      MYSQL_ROW cur=mysql_fetch_row(result);
-      if (cur)
-      {
-        tee_fprintf(stdout, "Current database:\t%s\n", cur[0] ? cur[0] : "");
-        tee_fprintf(stdout, "Current user:\t\t%s\n", cur[1]);
-      }
-      mysql_free_result(result);
-    } 
+      tee_fprintf(stdout, "Current database:\t%s\n", cur[0] ? cur[0] : "");
+      tee_fprintf(stdout, "Current user:\t\t%s\n", cur[1]);
+    }
+    mysql_free_result(result);
+  }
+
 #ifdef HAVE_OPENSSL
-    if ((status_str= mysql_get_ssl_cipher(&mysql)))
-      tee_fprintf(stdout, "SSL:\t\t\tCipher in use is %s\n",
-		  status_str);
-    else
-#endif /* HAVE_OPENSSL */
-      tee_puts("SSL:\t\t\tNot in use", stdout);
-  }
+  if ((status_str= mysql_get_ssl_cipher(&mysql)))
+    tee_fprintf(stdout, "SSL:\t\t\tCipher in use is %s\n",
+                status_str);
   else
-  {
-    vidattr(A_BOLD);
-    tee_fprintf(stdout, "\nNo connection\n");
-    vidattr(A_NORMAL);
-    return 0;
-  }
+#endif /* HAVE_OPENSSL */
+    tee_puts("SSL:\t\t\tNot in use", stdout);
+
   if (skip_updates)
   {
     vidattr(A_BOLD);
@@ -4302,8 +4312,14 @@ com_status(String *buffer __attribute__((unused)),
     tee_fprintf(stdout, "Insert id:\t\t%s\n", llstr(id, buff));
 
   /* "limit 1" is protection against SQL_SELECT_LIMIT=0 */
-  if (!mysql_query(&mysql,"select @@character_set_client, @@character_set_connection, @@character_set_server, @@character_set_database limit 1") &&
-      (result=mysql_use_result(&mysql)))
+  if (mysql_real_query_for_lazy(C_STRING_WITH_LEN(
+        "select @@character_set_client, @@character_set_connection, "
+        "@@character_set_server, @@character_set_database limit 1")))
+  {
+    if (mysql_errno(&mysql) == CR_SERVER_GONE_ERROR)
+      return 0;
+  }
+  if (mysql_store_result_for_lazy(&result))
   {
     MYSQL_ROW cur=mysql_fetch_row(result);
     if (cur)
diff --git a/client/mysql_upgrade.c b/client/mysql_upgrade.c
index 190bb2383e9..641d4a38d16 100644
--- a/client/mysql_upgrade.c
+++ b/client/mysql_upgrade.c
@@ -39,6 +39,7 @@ static uint my_end_arg= 0;
 static char *opt_user= (char*)"root";
 
 static DYNAMIC_STRING ds_args;
+static DYNAMIC_STRING conn_args;
 
 static char *opt_password= 0;
 static my_bool tty_password= 0;
@@ -115,11 +116,11 @@ static struct my_option my_long_options[]=
 #endif
   {"socket", 'S', "Socket file to use for connection.",
    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#include <sslopt-longopts.h>
   {"tmpdir", 't', "Directory for temporary files",
    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"user", 'u', "User for login if not current user.", (uchar**) &opt_user,
    (uchar**) &opt_user, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
-#include <sslopt-longopts.h>
   {"verbose", 'v', "Display more output about the process",
    (uchar**) &opt_verbose, (uchar**) &opt_verbose, 0,
    GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
@@ -135,6 +136,7 @@ static void free_used_memory(void)
   free_defaults(defaults_argv);
 
   dynstr_free(&ds_args);
+  dynstr_free(&conn_args);
 }
 
 
@@ -204,7 +206,7 @@ static void add_one_option(DYNAMIC_STRING* ds,
     }
   }
   dynstr_append_os_quoted(ds, "--", opt->name, eq, arg, NullS);
-  dynstr_append(&ds_args, " ");
+  dynstr_append(ds, " ");
 }
 
 
@@ -231,6 +233,8 @@ get_one_option(int optid, const struct my_option *opt,
     break;
 
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			/* Don't require password */
     tty_password= 1;
     add_option= FALSE;
     if (argument)
@@ -254,6 +258,15 @@ get_one_option(int optid, const struct my_option *opt,
   case 'f': /* --force     */
     add_option= FALSE;
     break;
+
+  case 'h': /* --host */
+  case 'W': /* --pipe */
+  case 'P': /* --port */
+  case 'S': /* --socket */
+  case OPT_MYSQL_PROTOCOL: /* --protocol */
+  case OPT_SHARED_MEMORY_BASE_NAME: /* --shared-memory-base-name */
+    add_one_option(&conn_args, opt, argument);
+    break;
   }
 
   if (add_option)
@@ -601,6 +614,20 @@ static void create_mysql_upgrade_info_file(void)
 }
 
 
+/*
+  Print connection-related arguments.
+*/
+
+static void print_conn_args(const char *tool_name)
+{
+  if (conn_args.str[0])
+    verbose("Running '%s' with connection arguments: %s", tool_name,
+          conn_args.str);
+  else
+    verbose("Running '%s with default connection arguments", tool_name);
+}  
+
+
 /*
   Check and upgrade(if neccessary) all tables
   in the server using "mysqlcheck --check-upgrade .."
@@ -608,7 +635,7 @@ static void create_mysql_upgrade_info_file(void)
 
 static int run_mysqlcheck_upgrade(void)
 {
-  verbose("Running 'mysqlcheck'...");
+  print_conn_args("mysqlcheck");
   return run_tool(mysqlcheck_path,
                   NULL, /* Send output from mysqlcheck directly to screen */
                   "--no-defaults",
@@ -622,7 +649,7 @@ static int run_mysqlcheck_upgrade(void)
 
 static int run_mysqlcheck_fixnames(void)
 {
-  verbose("Running 'mysqlcheck'...");
+  print_conn_args("mysqlcheck");
   return run_tool(mysqlcheck_path,
                   NULL, /* Send output from mysqlcheck directly to screen */
                   "--no-defaults",
@@ -751,7 +778,8 @@ int main(int argc, char **argv)
     strncpy(self_name, argv[0], FN_REFLEN);
   }
 
-  if (init_dynamic_string(&ds_args, "", 512, 256))
+  if (init_dynamic_string(&ds_args, "", 512, 256) ||
+      init_dynamic_string(&conn_args, "", 512, 256))
     die("Out of memory");
 
   load_defaults("my", load_default_groups, &argc, &argv);
diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc
index df0dc1e7049..9865b67bb3b 100644
--- a/client/mysqladmin.cc
+++ b/client/mysqladmin.cc
@@ -232,6 +232,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     opt_count_iterations= 1;
     break;
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			// Don't require password
     if (argument)
     {
       char *start=argument;
@@ -677,10 +679,16 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
 	pos=argv[1];
 	for (;;)
 	{
-	  if (mysql_kill(mysql,(ulong) atol(pos)))
+          /* We don't use mysql_kill(), since it only handles 32-bit IDs. */
+          char buff[26], *out; /* "KILL " + max 20 digs + NUL */
+          out= strxmov(buff, "KILL ", NullS);
+          ullstr(strtoull(pos, NULL, 0), out);
+
+          if (mysql_query(mysql, buff))
 	  {
-	    my_printf_error(0, "kill failed on %ld; error: '%s'", error_flags,
-			    atol(pos), mysql_error(mysql));
+            /* out still points to just the number */
+	    my_printf_error(0, "kill failed on %s; error: '%s'", error_flags,
+			    out, mysql_error(mysql));
 	    error=1;
 	  }
 	  if (!(pos=strchr(pos,',')))
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index 1621db5ded8..82af7ca65f6 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -17,10 +17,8 @@
 
    TODO: print the catalog (some USE catalog.db ????).
 
-   Standalone program to read a MySQL binary log (or relay log);
-   can read files produced by 3.23, 4.x, 5.0 servers. 
+   Standalone program to read a MySQL binary log (or relay log).
 
-   Can read binlogs from 3.23/4.x/5.0 and relay logs from 4.x/5.0.
    Should be able to read any file of these categories, even with
    --start-position.
    An important fact: the Format_desc event of the log is at most the 3rd event
@@ -681,6 +679,7 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
 {
   char ll_buff[21];
   Log_event_type ev_type= ev->get_type_code();
+  my_bool destroy_evt= TRUE;
   DBUG_ENTER("process_event");
   print_event_info->short_form= short_form;
   Exit_status retval= OK_CONTINUE;
@@ -689,8 +688,8 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
     Format events are not concerned by --offset and such, we always need to
     read them to be able to process the wanted events.
   */
-  if ((rec_count >= offset) &&
-      ((my_time_t)(ev->when) >= start_datetime) ||
+  if (((rec_count >= offset) &&
+       ((my_time_t)(ev->when) >= start_datetime)) ||
       (ev_type == FORMAT_DESCRIPTION_EVENT))
   {
     if (ev_type != FORMAT_DESCRIPTION_EVENT)
@@ -871,12 +870,63 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
       break;
     }
     case TABLE_MAP_EVENT:
+    {
+      Table_map_log_event *map= ((Table_map_log_event *)ev);
+      if (shall_skip_database(map->get_db_name()))
+      {
+        print_event_info->m_table_map_ignored.set_table(map->get_table_id(), map);
+        destroy_evt= FALSE;
+        goto end;
+      }
+    }
     case WRITE_ROWS_EVENT:
     case DELETE_ROWS_EVENT:
     case UPDATE_ROWS_EVENT:
     case PRE_GA_WRITE_ROWS_EVENT:
     case PRE_GA_DELETE_ROWS_EVENT:
     case PRE_GA_UPDATE_ROWS_EVENT:
+    {
+      if (ev_type != TABLE_MAP_EVENT)
+      {
+        Rows_log_event *e= (Rows_log_event*) ev;
+        Table_map_log_event *ignored_map= 
+          print_event_info->m_table_map_ignored.get_table(e->get_table_id());
+        bool skip_event= (ignored_map != NULL);
+
+        /* 
+           end of statement check:
+             i) destroy/free ignored maps
+            ii) if skip event, flush cache now
+         */
+        if (e->get_flags(Rows_log_event::STMT_END_F))
+        {
+          /* 
+            Now is safe to clear ignored map (clear_tables will also
+            delete original table map events stored in the map).
+          */
+          if (print_event_info->m_table_map_ignored.count() > 0)
+            print_event_info->m_table_map_ignored.clear_tables();
+
+          /* 
+             One needs to take into account an event that gets
+             filtered but was last event in the statement. If this is
+             the case, previous rows events that were written into
+             IO_CACHEs still need to be copied from cache to
+             result_file (as it would happen in ev->print(...) if
+             event was not skipped).
+          */
+          if (skip_event)
+          {
+            if ((copy_event_cache_to_file_and_reinit(&print_event_info->head_cache, result_file) ||
+                copy_event_cache_to_file_and_reinit(&print_event_info->body_cache, result_file)))
+              goto err;
+          }
+        }
+
+        /* skip the event check */
+        if (skip_event)
+          goto end;
+      }
       /*
         These events must be printed in base64 format, if printed.
         base64 format requires a FD event to be safe, so if no FD
@@ -900,6 +950,7 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
         goto err;
       }
       /* FALL THROUGH */
+    }
     default:
       ev->print(result_file, print_event_info);
     }
@@ -919,7 +970,8 @@ end:
   {
     if (remote_opt)
       ev->temp_buf= 0;
-    delete ev;
+    if (destroy_evt) /* destroy it later if not set (ignored table map) */
+      delete ev;
   }
   DBUG_RETURN(retval);
 }
@@ -934,10 +986,13 @@ static struct my_option my_long_options[] =
    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
 #endif
   {"base64-output", OPT_BASE64_OUTPUT_MODE,
+    /* 'unspec' is not mentioned because it is just a placeholder. */
    "Determine when the output statements should be base64-encoded BINLOG "
    "statements: 'never' disables it and works only for binlogs without "
    "row-based events; 'auto' is the default and prints base64 only when "
    "necessary (i.e., for row-based events and format description events); "
+   "'decode-rows' suppresses BINLOG statements for row events, but does "
+   "not exit as an error if a row event is found, unlike 'never'; "
    "'always' prints base64 whenever possible. 'always' is for debugging "
    "only and should not be used in a production system. The default is "
    "'auto'. --base64-output is a short form for --base64-output=always."
@@ -1226,6 +1281,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     one_database = 1;
     break;
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";                     // Don't require password
     if (argument)
     {
       my_free(pass,MYF(MY_ALLOW_ZERO_PTR));
@@ -1529,8 +1586,7 @@ static Exit_status dump_remote_log_entries(PRINT_EVENT_INFO *print_event_info,
       If reading from a remote host, ensure the temp_buf for the
       Log_event class is pointing to the incoming stream.
     */
-    if (remote_opt)
-      ev->register_temp_buf((char*) net->read_pos + 1); 
+    ev->register_temp_buf((char *) net->read_pos + 1);
 
     Log_event_type type= ev->get_type_code();
     if (glob_description_event->binlog_version >= 3 ||
diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c
index d2edd084c57..c59049d8b72 100644
--- a/client/mysqlcheck.c
+++ b/client/mysqlcheck.c
@@ -286,6 +286,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     what_to_do= DO_UPGRADE;
     break;
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			/* Don't require password */
     if (argument)
     {
       char *start = argument;
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 5a1fa3cc090..6d45d901b33 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -221,7 +221,7 @@ static struct my_option my_long_options[] =
    (uchar**) &opt_compatible_mode_str, (uchar**) &opt_compatible_mode_str, 0,
    GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"compact", OPT_COMPACT,
-   "Give less verbose output (useful for debugging). Disables structure comments and header/footer constructs.  Enables options --skip-add-drop-table --no-set-names --skip-disable-keys --skip-add-locks",
+   "Give less verbose output (useful for debugging). Disables structure comments and header/footer constructs.  Enables options --skip-add-drop-table --skip-add-locks --skip-comments --skip-disable-keys --skip-set-charset",
    (uchar**) &opt_compact, (uchar**) &opt_compact, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0,
    0, 0},
   {"complete-insert", 'c', "Use complete insert statements.",
@@ -702,6 +702,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     break;
 #endif
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";                     /* Don't require password */
     if (argument)
     {
       char *start=argument;
@@ -1395,18 +1397,19 @@ static char *cover_definer_clause_in_sp(const char *def_str,
   SYNOPSIS
     open_sql_file_for_table
     name      name of the table or view
+    flags     flags (as per "man 2 open")
 
   RETURN VALUES
     0        Failed to open file
     > 0      Handle of the open file
 */
-static FILE* open_sql_file_for_table(const char* table)
+static FILE* open_sql_file_for_table(const char* table, int flags)
 {
   FILE* res;
   char filename[FN_REFLEN], tmp_path[FN_REFLEN];
   convert_dirname(tmp_path,path,NullS);
   res= my_fopen(fn_format(filename, table, tmp_path, ".sql", 4),
-                O_WRONLY, MYF(MY_WME));
+                flags, MYF(MY_WME));
   return res;
 }
 
@@ -2288,7 +2291,7 @@ static uint get_table_structure(char *table, char *db, char *table_type,
 
       if (path)
       {
-        if (!(sql_file= open_sql_file_for_table(table)))
+        if (!(sql_file= open_sql_file_for_table(table, O_WRONLY)))
           DBUG_RETURN(0);
 
         write_header(sql_file, db);
@@ -2499,7 +2502,7 @@ static uint get_table_structure(char *table, char *db, char *table_type,
     {
       if (path)
       {
-        if (!(sql_file= open_sql_file_for_table(table)))
+        if (!(sql_file= open_sql_file_for_table(table, O_WRONLY)))
           DBUG_RETURN(0);
         write_header(sql_file, db);
       }
@@ -2723,12 +2726,10 @@ continue_xml:
   DBUG_RETURN((uint) num_fields);
 } /* get_table_structure */
 
-static void dump_trigger_old(MYSQL_RES *show_triggers_rs,
+static void dump_trigger_old(FILE *sql_file, MYSQL_RES *show_triggers_rs,
                              MYSQL_ROW *show_trigger_row,
                              const char *table_name)
 {
-  FILE *sql_file= md_result_file;
-
   char quoted_table_name_buf[NAME_LEN * 2 + 3];
   char *quoted_table_name= quote_name(table_name, quoted_table_name_buf, 1);
 
@@ -2794,11 +2795,10 @@ static void dump_trigger_old(MYSQL_RES *show_triggers_rs,
   DBUG_VOID_RETURN;
 }
 
-static int dump_trigger(MYSQL_RES *show_create_trigger_rs,
+static int dump_trigger(FILE *sql_file, MYSQL_RES *show_create_trigger_rs,
                         const char *db_name,
                         const char *db_cl_name)
 {
-  FILE *sql_file= md_result_file;
   MYSQL_ROW row;
   int db_cl_altered= FALSE;
 
@@ -2862,22 +2862,28 @@ static int dump_triggers_for_table(char *table_name, char *db_name)
   uint       old_opt_compatible_mode= opt_compatible_mode;
   MYSQL_RES  *show_triggers_rs;
   MYSQL_ROW  row;
+  FILE      *sql_file= md_result_file;
 
   char       db_cl_name[MY_CS_NAME_SIZE];
+  int        ret= TRUE;
 
   DBUG_ENTER("dump_triggers_for_table");
   DBUG_PRINT("enter", ("db: %s, table_name: %s", db_name, table_name));
 
+  if (path && !(sql_file= open_sql_file_for_table(table_name,
+                                                  O_WRONLY | O_APPEND)))
+    DBUG_RETURN(1);
+
   /* Do not use ANSI_QUOTES on triggers in dump */
   opt_compatible_mode&= ~MASK_ANSI_QUOTES;
 
   /* Get database collation. */
 
   if (switch_character_set_results(mysql, "binary"))
-    DBUG_RETURN(TRUE);
+    goto done;
 
   if (fetch_db_collation(db_name, db_cl_name, sizeof (db_cl_name)))
-    DBUG_RETURN(TRUE);
+    goto done;
 
   /* Get list of triggers. */
 
@@ -2886,7 +2892,7 @@ static int dump_triggers_for_table(char *table_name, char *db_name)
               quote_for_like(table_name, name_buff));
 
   if (mysql_query_with_error_report(mysql, &show_triggers_rs, query_buff))
-    DBUG_RETURN(TRUE);
+    goto done;
 
   /* Dump triggers. */
 
@@ -2907,17 +2913,15 @@ static int dump_triggers_for_table(char *table_name, char *db_name)
         provide all the necessary information to restore trigger properly.
       */
 
-      dump_trigger_old(show_triggers_rs, &row, table_name);
+      dump_trigger_old(sql_file, show_triggers_rs, &row, table_name);
     }
     else
     {
       MYSQL_RES *show_create_trigger_rs= mysql_store_result(mysql);
 
       if (!show_create_trigger_rs ||
-          dump_trigger(show_create_trigger_rs, db_name, db_cl_name))
-      {
-        DBUG_RETURN(TRUE);
-      }
+          dump_trigger(sql_file, show_create_trigger_rs, db_name, db_cl_name))
+        goto done;
 
       mysql_free_result(show_create_trigger_rs);
     }
@@ -2927,7 +2931,7 @@ static int dump_triggers_for_table(char *table_name, char *db_name)
   mysql_free_result(show_triggers_rs);
 
   if (switch_character_set_results(mysql, default_charset))
-    DBUG_RETURN(TRUE);
+    goto done;
 
   /*
     make sure to set back opt_compatible mode to
@@ -2935,7 +2939,13 @@ static int dump_triggers_for_table(char *table_name, char *db_name)
   */
   opt_compatible_mode=old_opt_compatible_mode;
 
-  DBUG_RETURN(FALSE);
+  ret= FALSE;
+
+done:
+  if (path)
+    my_fclose(sql_file, MYF(0));
+
+  DBUG_RETURN(ret);
 }
 
 static void add_load_option(DYNAMIC_STRING *str, const char *option,
@@ -3811,6 +3821,10 @@ static int dump_all_databases()
     return 1;
   while ((row= mysql_fetch_row(tableres)))
   {
+    if (mysql_get_server_version(mysql) >= 50003 &&
+        !my_strcasecmp(&my_charset_latin1, row[0], "information_schema"))
+      continue;
+
     if (dump_all_tables_in_db(row[0]))
       result=1;
   }
@@ -3825,6 +3839,10 @@ static int dump_all_databases()
     }
     while ((row= mysql_fetch_row(tableres)))
     {
+      if (mysql_get_server_version(mysql) >= 50003 &&
+          !my_strcasecmp(&my_charset_latin1, row[0], "information_schema"))
+        continue;
+
       if (dump_all_views_in_db(row[0]))
         result=1;
     }
@@ -3931,10 +3949,6 @@ int init_dumping_tables(char *qdatabase)
 
 static int init_dumping(char *database, int init_func(char*))
 {
-  if (mysql_get_server_version(mysql) >= 50003 &&
-      !my_strcasecmp(&my_charset_latin1, database, "information_schema"))
-    return 1;
-
   if (mysql_select_db(mysql, database))
   {
     DB_error(mysql, "when selecting the database");
@@ -3993,6 +4007,7 @@ static int dump_all_tables_in_db(char *database)
     DBUG_RETURN(1);
   if (opt_xml)
     print_xml_tag(md_result_file, "", "\n", "database", "name=", database, NullS);
+
   if (lock_tables)
   {
     DYNAMIC_STRING query;
@@ -4226,7 +4241,10 @@ static int dump_selected_tables(char *db, char **table_names, int tables)
   }
   end= pos;
 
-  if (lock_tables)
+  /* Can't LOCK TABLES in INFORMATION_SCHEMA, so don't try. */
+  if (lock_tables &&
+      !(mysql_get_server_version(mysql) >= 50003 &&
+        !my_strcasecmp(&my_charset_latin1, db, "information_schema")))
   {
     if (mysql_real_query(mysql, lock_tables_query.str,
                          lock_tables_query.length-1))
@@ -4780,7 +4798,7 @@ static my_bool get_view_structure(char *table, char* db)
   /* If requested, open separate .sql file for this view */
   if (path)
   {
-    if (!(sql_file= open_sql_file_for_table(table)))
+    if (!(sql_file= open_sql_file_for_table(table, O_WRONLY)))
       DBUG_RETURN(1);
 
     write_header(sql_file, db);
diff --git a/client/mysqlimport.c b/client/mysqlimport.c
index 09ba27b287a..57aee7379f2 100644
--- a/client/mysqlimport.c
+++ b/client/mysqlimport.c
@@ -221,6 +221,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     break;
 #endif
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			/* Don't require password */
     if (argument)
     {
       char *start=argument;
diff --git a/client/mysqlshow.c b/client/mysqlshow.c
index 0e696aed211..15f791ca8fb 100644
--- a/client/mysqlshow.c
+++ b/client/mysqlshow.c
@@ -281,6 +281,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     opt_verbose++;
     break;
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			/* Don't require password */
     if (argument)
     {
       char *start=argument;
diff --git a/client/mysqlslap.c b/client/mysqlslap.c
index b8515289df5..316fb6a9da3 100644
--- a/client/mysqlslap.c
+++ b/client/mysqlslap.c
@@ -712,6 +712,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     verbose++;
     break;
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			/* Don't require password */
     if (argument)
     {
       char *start= argument;
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index fdd4ff141bc..265d3b0a8e7 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -280,6 +280,7 @@ enum enum_commands {
   Q_SEND_QUIT, Q_CHANGE_USER, Q_MKDIR, Q_RMDIR,
   Q_LIST_FILES, Q_LIST_FILES_WRITE_FILE, Q_LIST_FILES_APPEND_FILE,
   Q_SEND_SHUTDOWN, Q_SHUTDOWN_SERVER,
+  Q_MOVE_FILE,
 
   Q_UNKNOWN,			       /* Unknown command.   */
   Q_COMMENT,			       /* Comments, ignored. */
@@ -376,6 +377,7 @@ const char *command_names[]=
   "list_files_append_file",
   "send_shutdown",
   "shutdown_server",
+  "move_file",
 
   0
 };
@@ -966,6 +968,7 @@ void check_command_args(struct st_command *command,
   for (i= 0; i < num_args; i++)
   {
     const struct command_arg *arg= &args[i];
+    char delimiter;
 
     switch (arg->type) {
       /* A string */
@@ -974,8 +977,15 @@ void check_command_args(struct st_command *command,
       while (*ptr && *ptr == ' ')
         ptr++;
       start= ptr;
-      /* Find end of arg, terminated by "delimiter_arg" */
-      while (*ptr && *ptr != delimiter_arg)
+      delimiter = delimiter_arg;
+      /* If start of arg is ' ` or " search to matching quote end instead */
+      if (*ptr && strchr ("'`\"", *ptr))
+      {
+	delimiter= *ptr;
+	start= ++ptr;
+      }
+      /* Find end of arg, terminated by "delimiter" */
+      while (*ptr && *ptr != delimiter)
         ptr++;
       if (ptr > start)
       {
@@ -987,6 +997,11 @@ void check_command_args(struct st_command *command,
         /* Empty string */
         init_dynamic_string(arg->ds, "", 0, 0);
       }
+      /* Find real end of arg, terminated by "delimiter_arg" */
+      /* This will do nothing if arg was not closed by quotes */
+      while (*ptr && *ptr != delimiter_arg)
+        ptr++;      
+
       command->last_argument= (char*)ptr;
 
       /* Step past the delimiter */
@@ -1445,34 +1460,38 @@ static int run_tool(const char *tool_path, DYNAMIC_STRING *ds_res, ...)
   Test if diff is present.  This is needed on Windows systems
   as the OS returns 1 whether diff is successful or if it is
   not present.
-  Takes name of diff program as argument
-  
+
   We run diff -v and look for output in stdout.
   We don't redirect stderr to stdout to make for a simplified check
   Windows will output '"diff"' is not recognized... to stderr if it is
   not present.
 */
 
-int diff_check (const char *diff_name)
+#ifdef __WIN__
+
+static int diff_check(const char *diff_name)
 {
-    char buf[512]= {0};
-    FILE *res_file;
-    char cmd[128];
-    my_snprintf (cmd, sizeof(cmd), "%s -v", diff_name);
-    int have_diff = 0;
+  FILE *res_file;
+  char buf[128];
+  int have_diff= 0;
 
-    if (!(res_file= popen(cmd, "r")))
-        die("popen(\"%s\", \"r\") failed", cmd);
+  my_snprintf(buf, sizeof(buf), "%s -v", diff_name);
 
-    /* if diff is not present, nothing will be in stdout to increment have_diff */
-    if (fgets(buf, sizeof(buf), res_file))
-        {
-            have_diff += 1;
-        } 
-    pclose(res_file);
-    return have_diff;
+  if (!(res_file= popen(buf, "r")))
+    die("popen(\"%s\", \"r\") failed", buf);
+
+  /* if diff is not present, nothing will be in stdout to increment have_diff */
+  if (fgets(buf, sizeof(buf), res_file))
+    have_diff= 1;
+
+  pclose(res_file);
+
+  return have_diff;
 }
 
+#endif
+
+
 /*
   Show the diff of two files using the systems builtin diff
   command. If no such diff command exist, just dump the content
@@ -1794,7 +1813,7 @@ void check_result()
           log_file.file_name(), reject_file, errno);
 
     show_diff(NULL, result_file_name, reject_file);
-    die(mess);
+    die("%s", mess);
     break;
   }
   default: /* impossible */
@@ -2889,6 +2908,42 @@ void do_copy_file(struct st_command *command)
 }
 
 
+/*
+  SYNOPSIS
+  do_move_file
+  command	command handle
+
+  DESCRIPTION
+  move_file <from_file> <to_file>
+  Move <from_file> to <to_file>
+*/
+
+void do_move_file(struct st_command *command)
+{
+  int error;
+  static DYNAMIC_STRING ds_from_file;
+  static DYNAMIC_STRING ds_to_file;
+  const struct command_arg move_file_args[] = {
+    { "from_file", ARG_STRING, TRUE, &ds_from_file, "Filename to move from" },
+    { "to_file", ARG_STRING, TRUE, &ds_to_file, "Filename to move to" }
+  };
+  DBUG_ENTER("do_move_file");
+
+  check_command_args(command, command->first_argument,
+                     move_file_args,
+                     sizeof(move_file_args)/sizeof(struct command_arg),
+                     ' ');
+
+  DBUG_PRINT("info", ("Move %s to %s", ds_from_file.str, ds_to_file.str));
+  error= (my_rename(ds_from_file.str, ds_to_file.str,
+                    MYF(0)) != 0);
+  handle_command_error(command, error);
+  dynstr_free(&ds_from_file);
+  dynstr_free(&ds_to_file);
+  DBUG_VOID_RETURN;
+}
+
+
 /*
   SYNOPSIS
   do_chmod_file
@@ -4546,7 +4601,7 @@ void select_connection(struct st_command *command)
   };
   check_command_args(command, command->first_argument, connection_args,
                      sizeof(connection_args)/sizeof(struct command_arg),
-                     ',');
+                     ' ');
 
   DBUG_PRINT("info", ("changing connection: %s", ds_connection.str));
   select_connection_name(ds_connection.str);
@@ -5665,11 +5720,11 @@ static struct my_option my_long_options[] =
   {"sp-protocol", OPT_SP_PROTOCOL, "Use stored procedures for select",
    (uchar**) &sp_protocol, (uchar**) &sp_protocol, 0,
    GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+#include "sslopt-longopts.h"
   {"tail-lines", OPT_TAIL_LINES,
    "Number of lines of the resul to include in a failure report",
    (uchar**) &opt_tail_lines, (uchar**) &opt_tail_lines, 0,
    GET_INT, REQUIRED_ARG, 0, 0, 10000, 0, 0, 0},
-#include "sslopt-longopts.h"
   {"test-file", 'x', "Read test from/in this file (default stdin).",
    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"timer-file", 'm', "File where the timing in micro seconds is stored.",
@@ -5803,6 +5858,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     break;
   }
   case 'p':
+    if (argument == disabled_my_option)
+      argument= (char*) "";			// Don't require password
     if (argument)
     {
       my_free(opt_pass, MYF(MY_ALLOW_ZERO_PTR));
@@ -7678,6 +7735,7 @@ int main(int argc, char **argv)
       case Q_CHANGE_USER: do_change_user(command); break;
       case Q_CAT_FILE: do_cat_file(command); break;
       case Q_COPY_FILE: do_copy_file(command); break;
+      case Q_MOVE_FILE: do_move_file(command); break;
       case Q_CHMOD_FILE: do_chmod_file(command); break;
       case Q_PERL: do_perl(command); break;
       case Q_DELIMITER:
diff --git a/cmd-line-utils/readline/Makefile.am b/cmd-line-utils/readline/Makefile.am
index 5fcbcde0516..e5f5717858d 100644
--- a/cmd-line-utils/readline/Makefile.am
+++ b/cmd-line-utils/readline/Makefile.am
@@ -31,7 +31,7 @@ noinst_HEADERS =	readline.h chardefs.h keymaps.h \
 
 EXTRA_DIST=		emacs_keymap.c vi_keymap.c
 
-DEFS =			-DUNDEF_THREADS_HACK -DHAVE_CONFIG_H -DNO_KILL_INTR
+DEFS =			-DMYSQL_CLIENT_NO_THREADS -DHAVE_CONFIG_H -DNO_KILL_INTR
 
 # Don't update the files from bitkeeper
 %::SCCS/s.%
diff --git a/cmd-line-utils/readline/bind.c b/cmd-line-utils/readline/bind.c
index baed1dfad49..490691943a8 100644
--- a/cmd-line-utils/readline/bind.c
+++ b/cmd-line-utils/readline/bind.c
@@ -79,7 +79,7 @@ static int _rl_read_init_file PARAMS((const char *, int));
 static int glean_key_from_name PARAMS((char *));
 static int find_boolean_var PARAMS((const char *));
 
-static char *_rl_get_string_variable_value PARAMS((const char *));
+static const char *_rl_get_string_variable_value PARAMS((const char *));
 static int substring_member_of_array PARAMS((char *, const char **));
 
 static int currently_reading_init_file;
@@ -442,7 +442,7 @@ rl_translate_keyseq (seq, array, len)
 {
   register int i, c, l, temp;
 
-  for (i = l = 0; c = seq[i]; i++)
+  for (i = l = 0; (c = seq[i]); i++)
     {
       if (c == '\\')
 	{
@@ -701,7 +701,7 @@ rl_function_of_keyseq (keyseq, map, type)
     {
       unsigned char ic = keyseq[i];
 
-      if (META_CHAR (ic) && _rl_convert_meta_chars_to_ascii)
+      if (META_CHAR_FOR_UCHAR (ic) && _rl_convert_meta_chars_to_ascii)
 	{
 	  if (map[ESC].type == ISKMAP)
 	    {
@@ -776,7 +776,8 @@ _rl_read_file (filename, sizep)
   file_size = (size_t)finfo.st_size;
 
   /* check for overflow on very large files */
-  if (file_size != finfo.st_size || file_size + 1 < file_size)
+if ((sizeof(off_t) > sizeof(size_t) && finfo.st_size > (off_t)(size_t)~0) ||  
+    file_size + 1 < file_size)
     {
       if (file >= 0)
 	close (file);
@@ -807,7 +808,7 @@ _rl_read_file (filename, sizep)
 /* Re-read the current keybindings file. */
 int
 rl_re_read_init_file (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   int r;
   r = rl_read_init_file ((const char *)NULL);
@@ -1031,7 +1032,7 @@ parser_if (args)
 /* Invert the current parser state if there is anything on the stack. */
 static int
 parser_else (args)
-     char *args;
+     char *args __attribute__((unused));
 {
   register int i;
 
@@ -1062,7 +1063,7 @@ parser_else (args)
    _rl_parsing_conditionalized_out from the stack. */
 static int
 parser_endif (args)
-     char *args;
+     char *args __attribute__((unused));
 {
   if (if_stack_depth)
     _rl_parsing_conditionalized_out = if_stack[--if_stack_depth];
@@ -1185,7 +1186,7 @@ rl_parse_and_bind (string)
     {
       int passc = 0;
 
-      for (i = 1; c = string[i]; i++)
+      for (i = 1; (c = string[i]); i++)
 	{
 	  if (passc)
 	    {
@@ -1276,7 +1277,7 @@ rl_parse_and_bind (string)
       int delimiter, passc;
 
       delimiter = string[i++];
-      for (passc = 0; c = string[i]; i++)
+      for (passc = 0; (c = string[i]); i++)
 	{
 	  if (passc)
 	    {
@@ -1436,7 +1437,7 @@ static struct {
 #if defined (VISIBLE_STATS)
   { "visible-stats",		&rl_visible_stats,		0 },
 #endif /* VISIBLE_STATS */
-  { (char *)NULL, (int *)NULL }
+  { (char *)NULL, (int *)NULL, 0 }
 };
 
 static int
@@ -1505,7 +1506,7 @@ static struct {
   { "editing-mode",	V_STRING,	sv_editmode },
   { "isearch-terminators", V_STRING,	sv_isrchterm },
   { "keymap",		V_STRING,	sv_keymap },
-  { (char *)NULL,	0 }
+  { (char *)NULL,	0,              (_rl_sv_func_t*)NULL }
 };
 
 static int
@@ -1532,7 +1533,7 @@ bool_to_int (value)
 		(value[0] == '1' && value[1] == '\0'));
 }
 
-char *
+const char *
 rl_variable_value (name)
      const char *name;
 {
@@ -1799,7 +1800,7 @@ rl_set_keymap_from_edit_mode ()
 #endif /* VI_MODE */
 }
 
-char *
+const char *
 rl_get_keymap_name_from_edit_mode ()
 {
   if (rl_editing_mode == emacs_mode)
@@ -2048,7 +2049,7 @@ rl_function_dumper (print_readably)
 
   fprintf (rl_outstream, "\n");
 
-  for (i = 0; name = names[i]; i++)
+  for (i = 0; (name = names[i]); i++)
     {
       rl_command_func_t *function;
       char **invokers;
@@ -2108,7 +2109,7 @@ rl_function_dumper (print_readably)
    the output in such a way that it can be read back in. */
 int
 rl_dump_functions (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (rl_dispatching)
     fprintf (rl_outstream, "\r\n");
@@ -2188,7 +2189,7 @@ rl_macro_dumper (print_readably)
 
 int
 rl_dump_macros (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (rl_dispatching)
     fprintf (rl_outstream, "\r\n");
@@ -2197,12 +2198,13 @@ rl_dump_macros (count, key)
   return (0);
 }
 
-static char *
+static const char *
 _rl_get_string_variable_value (name)
      const char *name;
 {
   static char numbuf[32];
-  char *ret;
+  const char *ret;
+  char *tmp;
 
   if (_rl_stricmp (name, "bell-style") == 0)
     {
@@ -2230,11 +2232,11 @@ _rl_get_string_variable_value (name)
     {
       if (_rl_isearch_terminators == 0)
 	return 0;
-      ret = _rl_untranslate_macro_value (_rl_isearch_terminators);
-      if (ret)
+      tmp = _rl_untranslate_macro_value (_rl_isearch_terminators);
+      if (tmp)
 	{
-	  strncpy (numbuf, ret, sizeof (numbuf) - 1);
-	  free (ret);
+	  strncpy (numbuf, tmp, sizeof (numbuf) - 1);
+	  free (tmp);
 	  numbuf[sizeof(numbuf) - 1] = '\0';
 	}
       else
@@ -2257,7 +2259,7 @@ rl_variable_dumper (print_readably)
      int print_readably;
 {
   int i;
-  char *v;
+  const char *v;
 
   for (i = 0; boolean_varlist[i].name; i++)
     {
@@ -2286,7 +2288,7 @@ rl_variable_dumper (print_readably)
    the output in such a way that it can be read back in. */
 int
 rl_dump_variables (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (rl_dispatching)
     fprintf (rl_outstream, "\r\n");
diff --git a/cmd-line-utils/readline/chardefs.h b/cmd-line-utils/readline/chardefs.h
index def3a111bd3..0787d9943bb 100644
--- a/cmd-line-utils/readline/chardefs.h
+++ b/cmd-line-utils/readline/chardefs.h
@@ -59,7 +59,8 @@
 #define largest_char 255		    /* Largest character value. */
 
 #define CTRL_CHAR(c) ((c) < control_character_threshold && (((c) & 0x80) == 0))
-#define META_CHAR(c) ((c) > meta_character_threshold && (c) <= largest_char)
+#define META_CHAR_FOR_UCHAR(c) ((c) > meta_character_threshold)
+#define META_CHAR(c) (META_CHAR_FOR_UCHAR(c) && (c) <= largest_char)
 
 #define CTRL(c) ((c) & control_character_mask)
 #define META(c) ((c) | meta_character_bit)
diff --git a/cmd-line-utils/readline/complete.c b/cmd-line-utils/readline/complete.c
index 916aa5dd9b9..2745e4e4801 100644
--- a/cmd-line-utils/readline/complete.c
+++ b/cmd-line-utils/readline/complete.c
@@ -359,14 +359,14 @@ rl_complete (ignore, invoking_key)
 /* List the possible completions.  See description of rl_complete (). */
 int
 rl_possible_completions (ignore, invoking_key)
-     int ignore, invoking_key;
+     int ignore __attribute__((unused)), invoking_key __attribute__((unused));
 {
   return (rl_complete_internal ('?'));
 }
 
 int
 rl_insert_completions (ignore, invoking_key)
-     int ignore, invoking_key;
+     int ignore __attribute__((unused)), invoking_key __attribute__((unused));
 {
   return (rl_complete_internal ('*'));
 }
@@ -696,7 +696,8 @@ print_filename (to_print, full_pathname)
      char *to_print, *full_pathname;
 {
   int printed_len, extension_char, slen, tlen;
-  char *s, c, *new_full_pathname, *dn;
+  char *s, c, *new_full_pathname;
+  const char *dn;
 
   extension_char = 0;
   printed_len = fnprint (to_print);
@@ -783,7 +784,7 @@ print_filename (to_print, full_pathname)
 static char *
 rl_quote_filename (s, rtype, qcp)
      char *s;
-     int rtype;
+     int rtype __attribute__((unused));
      char *qcp;
 {
   char *r;
@@ -884,7 +885,7 @@ _rl_find_completion_word (fp, dp)
       /* We didn't find an unclosed quoted substring upon which to do
          completion, so use the word break characters to find the
          substring on which to complete. */
-      while (rl_point = MB_PREVCHAR (rl_line_buffer, rl_point, MB_FIND_ANY))
+      while ((rl_point = MB_PREVCHAR (rl_line_buffer, rl_point, MB_FIND_ANY)))
 	{
 	  scan = rl_line_buffer[rl_point];
 
@@ -1803,7 +1804,7 @@ rl_completion_matches (text, entry_function)
   match_list = (char **)xmalloc ((match_list_size + 1) * sizeof (char *));
   match_list[1] = (char *)NULL;
 
-  while (string = (*entry_function) (text, matches))
+  while ((string = (*entry_function) (text, matches)))
     {
       if (matches + 1 == match_list_size)
 	match_list = (char **)xrealloc
@@ -2111,7 +2112,7 @@ rl_filename_completion_function (text, state)
    ring the bell, and reset the counter to zero. */
 int
 rl_menu_complete (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   rl_compentry_func_t *our_func;
   int matching_filenames, found_quote;
diff --git a/cmd-line-utils/readline/display.c b/cmd-line-utils/readline/display.c
index 6f63faa9738..f9b88f0006c 100644
--- a/cmd-line-utils/readline/display.c
+++ b/cmd-line-utils/readline/display.c
@@ -127,7 +127,7 @@ int _rl_want_redisplay = 0;
 
 /* The stuff that gets printed out before the actual text of the line.
    This is usually pointing to rl_prompt. */
-char *rl_display_prompt = (char *)NULL;
+const char *rl_display_prompt = (const char *)NULL;
 
 /* Pseudo-global variables declared here. */
 
@@ -229,7 +229,10 @@ expand_prompt (pmt, lp, lip, niflp, vlp)
      int *lp, *lip, *niflp, *vlp;
 {
   char *r, *ret, *p, *igstart;
-  int l, rl, last, ignoring, ninvis, invfl, invflset, ind, pind, physchars;
+  int l, rl, last, ignoring, ninvis, invfl, invflset, physchars;
+#if defined (HANDLE_MULTIBYTE)
+  int ind, pind;
+#endif
 
   /* Short-circuit if we can. */
   if ((MB_CUR_MAX <= 1 || rl_byte_oriented) && strchr (pmt, RL_PROMPT_START_IGNORE) == 0)
@@ -242,7 +245,7 @@ expand_prompt (pmt, lp, lip, niflp, vlp)
       if (niflp)
 	*niflp = 0;
       if (vlp)
-	*vlp = lp ? *lp : strlen (r);
+	*vlp = lp ? *lp : (int)strlen (r);
       return r;
     }
 
@@ -459,9 +462,10 @@ rl_redisplay ()
   register int in, out, c, linenum, cursor_linenum;
   register char *line;
   int inv_botlin, lb_botlin, lb_linenum, o_cpos;
-  int newlines, lpos, temp, modmark, n0, num;
-  char *prompt_this_line;
+  int newlines, lpos, temp, modmark;
+  const char *prompt_this_line;
 #if defined (HANDLE_MULTIBYTE)
+  int num, n0;
   wchar_t wc;
   size_t wc_bytes;
   int wc_width;
@@ -626,7 +630,6 @@ rl_redisplay ()
      contents of the command line? */
   while (lpos >= _rl_screenwidth)
     {
-      int z;
       /* fix from Darin Johnson <darin@acuson.com> for prompt string with
          invisible characters that is longer than the screen width.  The
          prompt_invis_chars_first_line variable could be made into an array
@@ -635,6 +638,7 @@ rl_redisplay ()
          prompts that exceed two physical lines?
          Additional logic fix from Edward Catmur <ed@catmur.co.uk> */
 #if defined (HANDLE_MULTIBYTE)
+      int z;
       if (MB_CUR_MAX > 1 && rl_byte_oriented == 0)
 	{
 	  n0 = num;
@@ -878,6 +882,7 @@ rl_redisplay ()
   if (_rl_horizontal_scroll_mode == 0 && _rl_term_up && *_rl_term_up)
     {
       int nleft, pos, changed_screen_line, tx;
+      char empty_str[1] = { 0 };
 
       if (!rl_display_fixed || forced_display)
 	{
@@ -902,7 +907,7 @@ rl_redisplay ()
 #define VIS_LLEN(l)	((l) > _rl_vis_botlin ? 0 : (vis_lbreaks[l+1] - vis_lbreaks[l]))
 #define INV_LLEN(l)	(inv_lbreaks[l+1] - inv_lbreaks[l])
 #define VIS_CHARS(line) (visible_line + vis_lbreaks[line])
-#define VIS_LINE(line) ((line) > _rl_vis_botlin) ? "" : VIS_CHARS(line)
+#define VIS_LINE(line) ((line) > _rl_vis_botlin) ? empty_str : VIS_CHARS(line)
 #define INV_LINE(line) (invisible_line + inv_lbreaks[line])
 
 	  /* For each line in the buffer, do the updating display. */
@@ -969,7 +974,7 @@ rl_redisplay ()
 		  _rl_move_vert (linenum);
 		  _rl_move_cursor_relative (0, tt);
 		  _rl_clear_to_eol
-		    ((linenum == _rl_vis_botlin) ? strlen (tt) : _rl_screenwidth);
+		    ((linenum == _rl_vis_botlin) ? (int)strlen (tt) : _rl_screenwidth);
 		}
 	    }
 	  _rl_vis_botlin = inv_botlin;
@@ -1888,7 +1893,7 @@ rl_character_len (c, pos)
 
   uc = (unsigned char)c;
 
-  if (META_CHAR (uc))
+  if (META_CHAR_FOR_UCHAR (uc))
     return ((_rl_output_meta_chars == 0) ? 4 : 1);
 
   if (uc == '\t')
@@ -2261,7 +2266,7 @@ static void
 redraw_prompt (t)
      char *t;
 {
-  char *oldp;
+  const char *oldp;
 
   oldp = rl_display_prompt;
   rl_save_prompt ();
diff --git a/cmd-line-utils/readline/histexpand.c b/cmd-line-utils/readline/histexpand.c
index 45377fc3b5e..ab8d8ecc866 100644
--- a/cmd-line-utils/readline/histexpand.c
+++ b/cmd-line-utils/readline/histexpand.c
@@ -87,14 +87,14 @@ char history_comment_char = '\0';
 
 /* The list of characters which inhibit the expansion of text if found
    immediately following history_expansion_char. */
-char *history_no_expand_chars = " \t\n\r=";
+const char *history_no_expand_chars = " \t\n\r=";
 
 /* If set to a non-zero value, single quotes inhibit history expansion.
    The default is 0. */
 int history_quotes_inhibit_expansion = 0;
 
 /* Used to split words by history_tokenize_internal. */
-char *history_word_delimiters = HISTORY_WORD_DELIMITERS;
+const char *history_word_delimiters = HISTORY_WORD_DELIMITERS;
 
 /* If set, this points to a function that is called to verify that a
    particular history expansion should be performed. */
@@ -203,7 +203,7 @@ get_history_event (string, caller_index, delimiting_quote)
     }
 
   /* Only a closing `?' or a newline delimit a substring search string. */
-  for (local_index = i; c = string[i]; i++)
+  for (local_index = i; (c = string[i]); i++)
     {
 #if defined (HANDLE_MULTIBYTE)
       if (MB_CUR_MAX > 1 && rl_byte_oriented == 0)
diff --git a/cmd-line-utils/readline/histfile.c b/cmd-line-utils/readline/histfile.c
index d98293d933c..118c5ebd328 100644
--- a/cmd-line-utils/readline/histfile.c
+++ b/cmd-line-utils/readline/histfile.c
@@ -186,7 +186,8 @@ read_history_range (filename, from, to)
   file_size = (size_t)finfo.st_size;
 
   /* check for overflow on very large files */
-  if (file_size != finfo.st_size || file_size + 1 < file_size)
+if ((sizeof(off_t) > sizeof(size_t) && finfo.st_size > (off_t)(size_t)~0) ||  
+    file_size + 1 < file_size)
     {
       errno = overflow_errno;
       goto error_and_exit;
@@ -339,7 +340,8 @@ history_truncate_file (fname, lines)
   file_size = (size_t)finfo.st_size;
 
   /* check for overflow on very large files */
-  if (file_size != finfo.st_size || file_size + 1 < file_size)
+if ((sizeof(off_t) > sizeof(size_t) && finfo.st_size > (off_t)(size_t)~0) ||  
+    file_size + 1 < file_size)
     {
       close (file);
 #if defined (EFBIG)
diff --git a/cmd-line-utils/readline/history.h b/cmd-line-utils/readline/history.h
index 14ca2a996c7..5790ed1c71d 100644
--- a/cmd-line-utils/readline/history.h
+++ b/cmd-line-utils/readline/history.h
@@ -243,9 +243,9 @@ extern int history_length;
 extern int history_max_entries;
 extern char history_expansion_char;
 extern char history_subst_char;
-extern char *history_word_delimiters;
+extern const char *history_word_delimiters;
 extern char history_comment_char;
-extern char *history_no_expand_chars;
+extern const char *history_no_expand_chars;
 extern char *history_search_delimiter_chars;
 extern int history_quotes_inhibit_expansion;
 
diff --git a/cmd-line-utils/readline/input.c b/cmd-line-utils/readline/input.c
index 62c0443d890..84c0422059a 100644
--- a/cmd-line-utils/readline/input.c
+++ b/cmd-line-utils/readline/input.c
@@ -420,7 +420,7 @@ rl_read_key ()
   else
     {
       /* If input is coming from a macro, then use that. */
-      if (c = _rl_next_macro_key ())
+      if ((c = _rl_next_macro_key ()))
 	return (c);
 
       /* If the user has an event function, then call it periodically. */
diff --git a/cmd-line-utils/readline/isearch.c b/cmd-line-utils/readline/isearch.c
index 8060adb97cd..305c847d8da 100644
--- a/cmd-line-utils/readline/isearch.c
+++ b/cmd-line-utils/readline/isearch.c
@@ -75,7 +75,7 @@ static int _rl_isearch_cleanup PARAMS((_rl_search_cxt *, int));
 static char *last_isearch_string;
 static int last_isearch_string_len;
 
-static char *default_isearch_terminators = "\033\012";
+static const char *default_isearch_terminators = "\033\012";
 
 _rl_search_cxt *
 _rl_scxt_alloc (type, flags)
@@ -119,7 +119,7 @@ _rl_scxt_alloc (type, flags)
 void
 _rl_scxt_dispose (cxt, flags)
      _rl_search_cxt *cxt;
-     int flags;
+     int flags __attribute__((unused));
 {
   FREE (cxt->search_string);
   FREE (cxt->allocated_line);
@@ -154,7 +154,7 @@ rl_forward_search_history (sign, key)
 static void
 rl_display_search (search_string, reverse_p, where)
      char *search_string;
-     int reverse_p, where;
+     int reverse_p, where __attribute__((unused));
 {
   char *message;
   int msglen, searchlen;
@@ -614,7 +614,7 @@ _rl_isearch_cleanup (cxt, r)
    backwards. */
 static int
 rl_search_history (direction, invoking_key)
-     int direction, invoking_key;
+     int direction, invoking_key __attribute__((unused));
 {
   _rl_search_cxt *cxt;		/* local for now, but saved globally */
   int c, r;
diff --git a/cmd-line-utils/readline/kill.c b/cmd-line-utils/readline/kill.c
index 42c53948689..adae2e1cd07 100644
--- a/cmd-line-utils/readline/kill.c
+++ b/cmd-line-utils/readline/kill.c
@@ -79,7 +79,7 @@ static int rl_yank_nth_arg_internal PARAMS((int, int, int));
    of kill material. */
 int
 rl_set_retained_kills (num)
-     int num;
+     int num __attribute__((unused));
 {
   return 0;
 }
@@ -296,7 +296,7 @@ rl_backward_kill_line (direction, ignore)
 /* Kill the whole line, no matter where point is. */
 int
 rl_kill_full_line (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   rl_begin_undo_group ();
   rl_point = 0;
@@ -314,7 +314,7 @@ rl_kill_full_line (count, ignore)
    using behaviour that they expect. */
 int
 rl_unix_word_rubout (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   int orig_point;
 
@@ -347,7 +347,7 @@ rl_unix_word_rubout (count, key)
    deletes backward to directory separator (`/') or whitespace.  */
 int
 rl_unix_filename_rubout (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   int orig_point, c;
 
@@ -391,7 +391,7 @@ rl_unix_filename_rubout (count, key)
    doing. */
 int
 rl_unix_line_discard (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (rl_point == 0)
     rl_ding ();
@@ -428,7 +428,7 @@ region_kill_internal (delete)
 /* Copy the text in the region to the kill ring. */
 int
 rl_copy_region_to_kill (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   return (region_kill_internal (0));
 }
@@ -436,7 +436,7 @@ rl_copy_region_to_kill (count, ignore)
 /* Kill the text between the point and mark. */
 int
 rl_kill_region (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   int r, npoint;
 
@@ -501,7 +501,7 @@ rl_copy_backward_word (count, key)
 /* Yank back the last killed text.  This ignores arguments. */
 int
 rl_yank (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   if (rl_kill_ring == 0)
     {
@@ -520,7 +520,7 @@ rl_yank (count, ignore)
    yank back some other text. */
 int
 rl_yank_pop (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   int l, n;
 
diff --git a/cmd-line-utils/readline/macro.c b/cmd-line-utils/readline/macro.c
index 3473f705335..0ee7b3077c3 100644
--- a/cmd-line-utils/readline/macro.c
+++ b/cmd-line-utils/readline/macro.c
@@ -201,7 +201,7 @@ _rl_kill_kbd_macro ()
    re-executing the existing macro. */
 int
 rl_start_kbd_macro (ignore1, ignore2)
-     int ignore1, ignore2;
+     int ignore1 __attribute__((unused)), ignore2 __attribute__((unused));
 {
   if (RL_ISSTATE (RL_STATE_MACRODEF))
     {
@@ -226,7 +226,7 @@ rl_start_kbd_macro (ignore1, ignore2)
    that many times, counting the definition as the first time. */
 int
 rl_end_kbd_macro (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   if (RL_ISSTATE (RL_STATE_MACRODEF) == 0)
     {
@@ -246,7 +246,7 @@ rl_end_kbd_macro (count, ignore)
    COUNT says how many times to execute it. */
 int
 rl_call_last_kbd_macro (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   if (current_macro == 0)
     _rl_abort_internal ();
diff --git a/cmd-line-utils/readline/mbutil.c b/cmd-line-utils/readline/mbutil.c
index e21708fb748..b571afa18bb 100644
--- a/cmd-line-utils/readline/mbutil.c
+++ b/cmd-line-utils/readline/mbutil.c
@@ -346,8 +346,8 @@ _rl_char_value (buf, ind)
 #undef _rl_find_next_mbchar
 int
 _rl_find_next_mbchar (string, seed, count, flags)
-     char *string;
-     int seed, count, flags;
+     char *string __attribute__((unused));
+     int seed, count, flags __attribute__((unused));
 {
 #if defined (HANDLE_MULTIBYTE)
   return _rl_find_next_mbchar_internal (string, seed, count, flags);
@@ -362,8 +362,8 @@ _rl_find_next_mbchar (string, seed, count, flags)
 #undef _rl_find_prev_mbchar
 int
 _rl_find_prev_mbchar (string, seed, flags)
-     char *string;
-     int seed, flags;
+     char *string __attribute__((unused));
+     int seed, flags __attribute__((unused));
 {
 #if defined (HANDLE_MULTIBYTE)
   return _rl_find_prev_mbchar_internal (string, seed, flags);
diff --git a/cmd-line-utils/readline/misc.c b/cmd-line-utils/readline/misc.c
index e0e6893c60e..f5f0370fb6a 100644
--- a/cmd-line-utils/readline/misc.c
+++ b/cmd-line-utils/readline/misc.c
@@ -228,7 +228,7 @@ _rl_reset_argument ()
 /* Start a numeric argument with initial value KEY */
 int
 rl_digit_argument (ignore, key)
-     int ignore, key;
+     int ignore __attribute__((unused)), key;
 {
   _rl_arg_init ();
   if (RL_ISSTATE (RL_STATE_CALLBACK))
@@ -249,7 +249,7 @@ rl_digit_argument (ignore, key)
    dispatch on it.  If the key is the abort character then abort. */
 int
 rl_universal_argument (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   _rl_arg_init ();
   rl_numeric_arg *= 4;
@@ -413,7 +413,7 @@ _rl_history_set_point ()
 void
 rl_replace_from_history (entry, flags)
      HIST_ENTRY *entry;
-     int flags;			/* currently unused */
+     int flags __attribute__((unused));	/* currently unused */
 {
   /* Can't call with `1' because rl_undo_list might point to an undo list
      from a history entry, just like we're setting up here. */
@@ -440,7 +440,7 @@ rl_replace_from_history (entry, flags)
 /* Meta-< goes to the start of the history. */
 int
 rl_beginning_of_history (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   return (rl_get_previous_history (1 + where_history (), key));
 }
@@ -448,7 +448,7 @@ rl_beginning_of_history (count, key)
 /* Meta-> goes to the end of the history.  (The current line). */
 int
 rl_end_of_history (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   rl_maybe_replace_line ();
   using_history ();
@@ -553,7 +553,7 @@ rl_get_previous_history (count, key)
 /* How to toggle back and forth between editing modes. */
 int
 rl_vi_editing_mode (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
 #if defined (VI_MODE)
   _rl_set_insert_mode (RL_IM_INSERT, 1);	/* vi mode ignores insert mode */
@@ -566,7 +566,7 @@ rl_vi_editing_mode (count, key)
 
 int
 rl_emacs_editing_mode (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   rl_editing_mode = emacs_mode;
   _rl_set_insert_mode (RL_IM_INSERT, 1); /* emacs mode default is insert mode */
@@ -577,7 +577,7 @@ rl_emacs_editing_mode (count, key)
 /* Function for the rest of the library to use to set insert/overwrite mode. */
 void
 _rl_set_insert_mode (im, force)
-     int im, force;
+     int im, force __attribute__((unused));
 {
 #ifdef CURSOR_MODE
   _rl_set_cursor (im, force);
@@ -590,7 +590,7 @@ _rl_set_insert_mode (im, force)
    mode.  A negative or zero explicit argument selects insert mode. */
 int
 rl_overwrite_mode (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   if (rl_explicit_arg == 0)
     _rl_set_insert_mode (rl_insert_mode ^ 1, 0);
diff --git a/cmd-line-utils/readline/nls.c b/cmd-line-utils/readline/nls.c
index 6ec685ed9ea..ff40b14228c 100644
--- a/cmd-line-utils/readline/nls.c
+++ b/cmd-line-utils/readline/nls.c
@@ -101,7 +101,8 @@ _rl_init_eightbit ()
 /* If we have setlocale(3), just check the current LC_CTYPE category
    value, and go into eight-bit mode if it's not C or POSIX. */
 #if defined (HAVE_SETLOCALE)
-  char *lspec, *t;
+  const char *lspec;
+  char *t;
 
   /* Set the LC_CTYPE locale category from environment variables. */
   lspec = _rl_get_locale_var ("LC_CTYPE");
@@ -127,7 +128,8 @@ _rl_init_eightbit ()
     return (0);
 
 #else /* !HAVE_SETLOCALE */
-  char *lspec, *t;
+  const char *lspec;
+  char *t;
   int i;
 
   /* We don't have setlocale.  Finesse it.  Check the environment for the
diff --git a/cmd-line-utils/readline/readline.c b/cmd-line-utils/readline/readline.c
index 8c3cad52d36..fb92becdbf9 100644
--- a/cmd-line-utils/readline/readline.c
+++ b/cmd-line-utils/readline/readline.c
@@ -90,7 +90,7 @@ static void bind_arrow_keys_internal PARAMS((Keymap));
 static void bind_arrow_keys PARAMS((void));
 
 static void readline_default_bindings PARAMS((void));
-static void reset_default_bindings PARAMS((void));
+static void reset_default_bindings PARAMS((void)) __attribute__((unused));
 
 static int _rl_subseq_result PARAMS((int, Keymap, int, int));
 static int _rl_subseq_getchar PARAMS((int));
diff --git a/cmd-line-utils/readline/readline.h b/cmd-line-utils/readline/readline.h
index b71bf98d204..668a452c765 100644
--- a/cmd-line-utils/readline/readline.h
+++ b/cmd-line-utils/readline/readline.h
@@ -304,7 +304,7 @@ extern int rl_bind_keyseq_if_unbound PARAMS((const char *, rl_command_func_t *))
 extern int rl_bind_keyseq_if_unbound_in_map PARAMS((const char *, rl_command_func_t *, Keymap));
 extern int rl_generic_bind PARAMS((int, const char *, char *, Keymap));
 
-extern char *rl_variable_value PARAMS((const char *));
+extern const char *rl_variable_value PARAMS((const char *));
 extern int rl_variable_bind PARAMS((const char *, const char *));
 
 /* Backwards compatibility, use rl_bind_keyseq_in_map instead. */
@@ -343,7 +343,7 @@ extern void rl_set_keymap PARAMS((Keymap));
 extern Keymap rl_get_keymap PARAMS((void));
 /* Undocumented; used internally only. */
 extern void rl_set_keymap_from_edit_mode PARAMS((void));
-extern char *rl_get_keymap_name_from_edit_mode PARAMS((void));
+extern const char *rl_get_keymap_name_from_edit_mode PARAMS((void));
 
 /* Functions for manipulating the funmap, which maps command names to functions. */
 extern int rl_add_funmap_entry PARAMS((const char *, rl_command_func_t *));
@@ -406,7 +406,7 @@ extern void rl_set_screen_size PARAMS((int, int));
 extern void rl_get_screen_size PARAMS((int *, int *));
 extern void rl_reset_screen_size PARAMS((void));
 
-extern char *rl_get_termcap PARAMS((const char *));
+extern const char *rl_get_termcap PARAMS((const char *));
 
 /* Functions for character input. */
 extern int rl_stuff_char PARAMS((int));
diff --git a/cmd-line-utils/readline/rlprivate.h b/cmd-line-utils/readline/rlprivate.h
index 64aa7bdd3fa..1ab696766b0 100644
--- a/cmd-line-utils/readline/rlprivate.h
+++ b/cmd-line-utils/readline/rlprivate.h
@@ -77,7 +77,7 @@ typedef struct  __rl_search_context
   int sline_len;
   int sline_index;
 
-  char  *search_terminators;
+  const char  *search_terminators;
 } _rl_search_cxt;
 
 /* Callback data for reading numeric arguments */
@@ -164,7 +164,7 @@ extern int rl_set_retained_kills PARAMS((int));
 extern void _rl_set_screen_size PARAMS((int, int));
 
 /* undo.c */
-extern int _rl_fix_last_undo_of_type PARAMS((int, int, int));
+extern int _rl_fix_last_undo_of_type PARAMS((enum undo_code, int, int));
 
 /* util.c */
 extern char *_rl_savestring PARAMS((const char *));
@@ -359,7 +359,7 @@ extern int _rl_vis_botlin;
 extern int _rl_last_c_pos;
 extern int _rl_suppress_redisplay;
 extern int _rl_want_redisplay;
-extern char *rl_display_prompt;
+extern const char *rl_display_prompt;
 
 /* isearch.c */
 extern char *_rl_isearch_terminators;
@@ -398,17 +398,17 @@ extern _rl_search_cxt *_rl_nscxt;
 /* terminal.c */
 extern int _rl_enable_keypad;
 extern int _rl_enable_meta;
-extern char *_rl_term_clreol;
-extern char *_rl_term_clrpag;
-extern char *_rl_term_im;
-extern char *_rl_term_ic;
-extern char *_rl_term_ei;
-extern char *_rl_term_DC;
-extern char *_rl_term_up;
-extern char *_rl_term_dc;
-extern char *_rl_term_cr;
-extern char *_rl_term_IC;
-extern char *_rl_term_forward_char;
+extern const char *_rl_term_clreol;
+extern const char *_rl_term_clrpag;
+extern const char *_rl_term_im;
+extern const char *_rl_term_ic;
+extern const char *_rl_term_ei;
+extern const char *_rl_term_DC;
+extern const char *_rl_term_up;
+extern const char *_rl_term_dc;
+extern const char *_rl_term_cr;
+extern const char *_rl_term_IC;
+extern const char *_rl_term_forward_char;
 extern int _rl_screenheight;
 extern int _rl_screenwidth;
 extern int _rl_screenchars;
diff --git a/cmd-line-utils/readline/rltty.c b/cmd-line-utils/readline/rltty.c
index 8c896bd3b26..8849206fd6d 100644
--- a/cmd-line-utils/readline/rltty.c
+++ b/cmd-line-utils/readline/rltty.c
@@ -764,7 +764,7 @@ rl_deprep_terminal ()
 
 int
 rl_restart_output (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
 #if defined (__MINGW32__)
   return 0;
@@ -802,7 +802,7 @@ rl_restart_output (count, key)
 
 int
 rl_stop_output (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
 #if defined (__MINGW32__)
   return 0;
diff --git a/cmd-line-utils/readline/search.c b/cmd-line-utils/readline/search.c
index cfa5db1dc17..1da450a692a 100644
--- a/cmd-line-utils/readline/search.c
+++ b/cmd-line-utils/readline/search.c
@@ -211,7 +211,7 @@ _rl_nsearch_init (dir, pchar)
   rl_end = rl_point = 0;
 
   p = _rl_make_prompt_for_search (pchar ? pchar : ':');
-  rl_message ("%s", p, 0);
+  rl_message ("%s", p);
   free (p);
 
   RL_SETSTATE(RL_STATE_NSEARCH);
@@ -383,7 +383,7 @@ noninc_search (dir, pchar)
    code calls this, KEY will be `?'. */
 int
 rl_noninc_forward_search (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   return noninc_search (1, (key == '?') ? '?' : 0);
 }
@@ -392,7 +392,7 @@ rl_noninc_forward_search (count, key)
    calls this, KEY will be `/'. */
 int
 rl_noninc_reverse_search (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   return noninc_search (-1, (key == '/') ? '/' : 0);
 }
@@ -401,7 +401,7 @@ rl_noninc_reverse_search (count, key)
    for.  If there is no saved search string, abort. */
 int
 rl_noninc_forward_search_again (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   int r;
 
@@ -418,7 +418,7 @@ rl_noninc_forward_search_again (count, key)
    for.  If there is no saved search string, abort. */
 int
 rl_noninc_reverse_search_again (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   int r;
 
diff --git a/cmd-line-utils/readline/terminal.c b/cmd-line-utils/readline/terminal.c
index a630bc02e05..3f92821f9dd 100644
--- a/cmd-line-utils/readline/terminal.c
+++ b/cmd-line-utils/readline/terminal.c
@@ -104,34 +104,36 @@ char PC, *BC, *UP;
 #endif /* __linux__ */
 
 /* Some strings to control terminal actions.  These are output by tputs (). */
-char *_rl_term_clreol;
-char *_rl_term_clrpag;
-char *_rl_term_cr;
-char *_rl_term_backspace;
-char *_rl_term_goto;
-char *_rl_term_pc;
+const char *_rl_term_clreol;
+const char *_rl_term_clrpag;
+const char *_rl_term_cr;
+const char *_rl_term_backspace;
+char _rl_term_backspace_default[2] = { '\b', 0 };
+const char *_rl_term_goto;
+const char *_rl_term_pc;
 
 /* Non-zero if we determine that the terminal can do character insertion. */
 int _rl_terminal_can_insert = 0;
 
 /* How to insert characters. */
-char *_rl_term_im;
-char *_rl_term_ei;
-char *_rl_term_ic;
-char *_rl_term_ip;
-char *_rl_term_IC;
+const char *_rl_term_im;
+const char *_rl_term_ei;
+const char *_rl_term_ic;
+const char *_rl_term_ip;
+const char *_rl_term_IC;
 
 /* How to delete characters. */
-char *_rl_term_dc;
-char *_rl_term_DC;
+const char *_rl_term_dc;
+const char *_rl_term_DC;
 
-char *_rl_term_forward_char;
+const char *_rl_term_forward_char;
 
 /* How to go up a line. */
-char *_rl_term_up;
+const char *_rl_term_up;
+char _rl_term_up_default[2] = { 0, 0 };
 
 /* A visible bell; char if the terminal can be made to flash the screen. */
-static char *_rl_visible_bell;
+static const char *_rl_visible_bell;
 
 /* Non-zero means the terminal can auto-wrap lines. */
 int _rl_term_autowrap = -1;
@@ -141,33 +143,33 @@ static int term_has_meta;
 
 /* The sequences to write to turn on and off the meta key, if this
    terminal has one. */
-static char *_rl_term_mm;
-static char *_rl_term_mo;
+static const char *_rl_term_mm;
+static const char *_rl_term_mo;
 
 /* The key sequences output by the arrow keys, if this terminal has any. */
-static char *_rl_term_ku;
-static char *_rl_term_kd;
-static char *_rl_term_kr;
-static char *_rl_term_kl;
+static const char *_rl_term_ku;
+static const char *_rl_term_kd;
+static const char *_rl_term_kr;
+static const char *_rl_term_kl;
 
 /* How to initialize and reset the arrow keys, if this terminal has any. */
-static char *_rl_term_ks;
-static char *_rl_term_ke;
+static const char *_rl_term_ks;
+static const char *_rl_term_ke;
 
 /* The key sequences sent by the Home and End keys, if any. */
-static char *_rl_term_kh;
-static char *_rl_term_kH;
-static char *_rl_term_at7;	/* @7 */
+static const char *_rl_term_kh;
+static const char *_rl_term_kH;
+static const char *_rl_term_at7;	/* @7 */
 
 /* Delete key */
-static char *_rl_term_kD;
+static const char *_rl_term_kD;
 
 /* Insert key */
-static char *_rl_term_kI;
+static const char *_rl_term_kI;
 
 /* Cursor control */
-static char *_rl_term_vs;	/* very visible */
-static char *_rl_term_ve;	/* normal */
+static const char *_rl_term_vs;	/* very visible */
+static const char *_rl_term_ve;	/* normal */
 
 static void bind_termcap_arrow_keys PARAMS((Keymap));
 
@@ -362,7 +364,7 @@ rl_resize_terminal ()
 
 struct _tc_string {
      const char *tc_var;
-     char **tc_value;
+     const char **tc_value;
 };
 
 /* This should be kept sorted, just in case we decide to change the
@@ -409,7 +411,7 @@ get_term_capabilities (bp)
      char **bp;
 {
 #if !defined (__DJGPP__)	/* XXX - doesn't DJGPP have a termcap library? */
-  register int i;
+  register unsigned int i;
 
   for (i = 0; i < NUM_TC_STRINGS; i++)
     *(tc_strings[i].tc_value) = tgetstr ((char *)tc_strings[i].tc_var, bp);
@@ -496,8 +498,9 @@ _rl_init_terminal_io (terminal_name)
          tgoto if _rl_term_IC or _rl_term_DC is defined, but just in case we
          change that later... */
       PC = '\0';
-      BC = _rl_term_backspace = "\b";
-      UP = _rl_term_up;
+      _rl_term_backspace = _rl_term_backspace_default;
+      BC = (char*)_rl_term_backspace;
+      UP = (char*)_rl_term_up;
 
       return 0;
     }
@@ -507,8 +510,8 @@ _rl_init_terminal_io (terminal_name)
   /* Set up the variables that the termcap library expects the application
      to provide. */
   PC = _rl_term_pc ? *_rl_term_pc : 0;
-  BC = _rl_term_backspace;
-  UP = _rl_term_up;
+  BC = (char*)_rl_term_backspace;
+  UP = (char*)_rl_term_up;
 
   if (!_rl_term_cr)
     _rl_term_cr = "\r";
@@ -568,11 +571,11 @@ bind_termcap_arrow_keys (map)
   _rl_keymap = xkeymap;
 }
 
-char *
+const char *
 rl_get_termcap (cap)
      const char *cap;
 {
-  register int i;
+  register unsigned int i;
 
   if (tcap_initialized == 0)
     return ((char *)NULL);
diff --git a/cmd-line-utils/readline/text.c b/cmd-line-utils/readline/text.c
index b26afeda525..272848c798c 100644
--- a/cmd-line-utils/readline/text.c
+++ b/cmd-line-utils/readline/text.c
@@ -410,7 +410,7 @@ rl_backward (count, key)
 /* Move to the beginning of the line. */
 int
 rl_beg_of_line (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   rl_point = 0;
   return 0;
@@ -419,7 +419,7 @@ rl_beg_of_line (count, key)
 /* Move to the end of the line. */
 int
 rl_end_of_line (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   rl_point = rl_end;
   return 0;
@@ -527,7 +527,7 @@ rl_backward_word (count, key)
 /* Clear the current line.  Numeric argument to C-l does this. */
 int
 rl_refresh_line (ignore1, ignore2)
-     int ignore1, ignore2;
+     int ignore1 __attribute__((unused)), ignore2 __attribute__((unused));
 {
   int curr_line;
 
@@ -566,7 +566,7 @@ rl_clear_screen (count, key)
 
 int
 rl_arrow_keys (count, c)
-     int count, c;
+     int count, c __attribute__((unused));
 {
   int ch;
 
@@ -884,7 +884,7 @@ _rl_insert_next_callback (data)
   
 int
 rl_quoted_insert (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   /* Let's see...should the callback interface futz with signal handling? */
 #if defined (HANDLE_SIGNALS)
@@ -907,7 +907,7 @@ rl_quoted_insert (count, key)
 /* Insert a tab character. */
 int
 rl_tab_insert (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   return (_rl_insert_char (count, '\t'));
 }
@@ -917,7 +917,7 @@ rl_tab_insert (count, key)
    meaning in the future. */
 int
 rl_newline (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   rl_done = 1;
 
@@ -951,7 +951,7 @@ rl_newline (count, key)
    is special cased. */
 int
 rl_do_lowercase_version (ignore1, ignore2)
-     int ignore1, ignore2;
+     int ignore1 __attribute__((unused)), ignore2 __attribute__((unused));
 {
   return 0;
 }
@@ -1118,7 +1118,7 @@ rl_rubout_or_delete (count, key)
 /* Delete all spaces and tabs around point. */
 int
 rl_delete_horizontal_space (count, ignore)
-     int count, ignore;
+     int count __attribute__((unused)), ignore __attribute__((unused));
 {
   int start = rl_point;
 
@@ -1163,9 +1163,9 @@ rl_delete_or_show_completions (count, key)
    A K*rn shell style function. */
 int
 rl_insert_comment (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
-  char *rl_comment_text;
+  const char *rl_comment_text;
   int rl_comment_len;
 
   rl_beg_of_line (1, key);
@@ -1202,7 +1202,7 @@ rl_insert_comment (count, key)
 /* Uppercase the word at point. */
 int
 rl_upcase_word (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   return (rl_change_case (count, UpCase));
 }
@@ -1210,7 +1210,7 @@ rl_upcase_word (count, key)
 /* Lowercase the word at point. */
 int
 rl_downcase_word (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   return (rl_change_case (count, DownCase));
 }
@@ -1218,7 +1218,7 @@ rl_downcase_word (count, key)
 /* Upcase the first letter, downcase the rest. */
 int
 rl_capitalize_word (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
  return (rl_change_case (count, CapCase));
 }
@@ -1381,7 +1381,7 @@ rl_transpose_words (count, key)
    then transpose the characters before point. */
 int
 rl_transpose_chars (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
 #if defined (HANDLE_MULTIBYTE)
   char *dummy;
@@ -1557,7 +1557,7 @@ _rl_char_search_callback (data)
 
 int
 rl_char_search (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
 #if defined (READLINE_CALLBACKS)
   if (RL_ISSTATE (RL_STATE_CALLBACK))
@@ -1575,7 +1575,7 @@ rl_char_search (count, key)
 
 int
 rl_backward_char_search (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
 #if defined (READLINE_CALLBACKS)
   if (RL_ISSTATE (RL_STATE_CALLBACK))
@@ -1612,7 +1612,7 @@ _rl_set_mark_at_pos (position)
 /* A bindable command to set the mark. */
 int
 rl_set_mark (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   return (_rl_set_mark_at_pos (rl_explicit_arg ? count : rl_point));
 }
@@ -1620,7 +1620,7 @@ rl_set_mark (count, key)
 /* Exchange the position of mark and point. */
 int
 rl_exchange_point_and_mark (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (rl_mark > rl_end)
     rl_mark = -1;
diff --git a/cmd-line-utils/readline/tilde.c b/cmd-line-utils/readline/tilde.c
index d50f7a0ffa4..128cc26d9a7 100644
--- a/cmd-line-utils/readline/tilde.c
+++ b/cmd-line-utils/readline/tilde.c
@@ -196,7 +196,7 @@ tilde_expand (string)
   int result_size, result_index;
 
   result_index = result_size = 0;
-  if (result = strchr (string, '~'))
+  if ((result = strchr (string, '~')))
     result = (char *)xmalloc (result_size = (strlen (string) + 16));
   else
     result = (char *)xmalloc (result_size = (strlen (string) + 1));
diff --git a/cmd-line-utils/readline/undo.c b/cmd-line-utils/readline/undo.c
index 5699193b14c..79846c26024 100644
--- a/cmd-line-utils/readline/undo.c
+++ b/cmd-line-utils/readline/undo.c
@@ -231,7 +231,8 @@ rl_do_undo ()
 
 int
 _rl_fix_last_undo_of_type (type, start, end)
-     int type, start, end;
+     enum undo_code type;
+     int start, end;
 {
   UNDO_LIST *rl;
 
@@ -289,7 +290,7 @@ rl_modifying (start, end)
 /* Revert the current line to its previous state. */
 int
 rl_revert_line (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   if (!rl_undo_list)
     rl_ding ();
@@ -309,7 +310,7 @@ rl_revert_line (count, key)
 /* Do some undoing of things that were done. */
 int
 rl_undo_command (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   if (count < 0)
     return 0;	/* Nothing to do. */
diff --git a/cmd-line-utils/readline/util.c b/cmd-line-utils/readline/util.c
index 935c9c927c2..50cfea75cb9 100644
--- a/cmd-line-utils/readline/util.c
+++ b/cmd-line-utils/readline/util.c
@@ -115,14 +115,14 @@ _rl_abort_internal ()
 
 int
 rl_abort (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   return (_rl_abort_internal ());
 }
 
 int
 rl_tty_status (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
 #if defined (TIOCSTAT)
   ioctl (1, TIOCSTAT, (char *)0);
@@ -172,7 +172,7 @@ rl_extend_line_buffer (len)
 /* A function for simple tilde expansion. */
 int
 rl_tilde_expand (ignore, key)
-     int ignore, key;
+     int ignore __attribute__((unused)), key __attribute__((unused));
 {
   register int start, end;
   char *homedir, *temp;
diff --git a/cmd-line-utils/readline/vi_mode.c b/cmd-line-utils/readline/vi_mode.c
index 25213cb762f..620bb863c7b 100644
--- a/cmd-line-utils/readline/vi_mode.c
+++ b/cmd-line-utils/readline/vi_mode.c
@@ -131,7 +131,7 @@ static int _rl_vi_callback_char_search PARAMS((_rl_callback_generic_arg *));
 void
 _rl_vi_initialize_line ()
 {
-  register int i;
+  register size_t i;
 
   for (i = 0; i < sizeof (vi_mark_chars) / sizeof (int); i++)
     vi_mark_chars[i] = -1;
@@ -190,7 +190,7 @@ _rl_vi_stuff_insert (count)
    puts you back into insert mode. */
 int
 rl_vi_redo (count, c)
-     int count, c;
+     int count, c __attribute__((unused));
 {
   int r;
 
@@ -238,7 +238,7 @@ rl_vi_undo (count, key)
 /* Yank the nth arg from the previous line into this line at point. */
 int
 rl_vi_yank_arg (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   /* Readline thinks that the first word on a line is the 0th, while vi
      thinks the first word on a line is the 1st.  Compensate. */
@@ -321,7 +321,7 @@ rl_vi_search (count, key)
 /* Completion, from vi's point of view. */
 int
 rl_vi_complete (ignore, key)
-     int ignore, key;
+     int ignore __attribute__((unused)), key;
 {
   if ((rl_point < rl_end) && (!whitespace (rl_line_buffer[rl_point])))
     {
@@ -348,7 +348,7 @@ rl_vi_complete (ignore, key)
 /* Tilde expansion for vi mode. */
 int
 rl_vi_tilde_expand (ignore, key)
-     int ignore, key;
+     int ignore __attribute__((unused)), key;
 {
   rl_tilde_expand (0, key);
   rl_vi_start_inserting (key, 1, rl_arg_sign);
@@ -419,7 +419,7 @@ rl_vi_end_word (count, key)
 /* Move forward a word the way that 'W' does. */
 int
 rl_vi_fWord (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point < (rl_end - 1))
     {
@@ -436,7 +436,7 @@ rl_vi_fWord (count, ignore)
 
 int
 rl_vi_bWord (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point > 0)
     {
@@ -460,7 +460,7 @@ rl_vi_bWord (count, ignore)
 
 int
 rl_vi_eWord (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point < (rl_end - 1))
     {
@@ -491,7 +491,7 @@ rl_vi_eWord (count, ignore)
 
 int
 rl_vi_fword (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point < (rl_end - 1))
     {
@@ -517,7 +517,7 @@ rl_vi_fword (count, ignore)
 
 int
 rl_vi_bword (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point > 0)
     {
@@ -556,7 +556,7 @@ rl_vi_bword (count, ignore)
 
 int
 rl_vi_eword (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   while (count-- && rl_point < rl_end - 1)
     {
@@ -581,7 +581,7 @@ rl_vi_eword (count, ignore)
 
 int
 rl_vi_insert_beg (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   rl_beg_of_line (1, key);
   rl_vi_insertion_mode (1, key);
@@ -610,7 +610,7 @@ _rl_vi_append_forward (key)
 
 int
 rl_vi_append_mode (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   _rl_vi_append_forward (key);
   rl_vi_start_inserting (key, 1, rl_arg_sign);
@@ -619,7 +619,7 @@ rl_vi_append_mode (count, key)
 
 int
 rl_vi_append_eol (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   rl_end_of_line (1, key);
   rl_vi_append_mode (1, key);
@@ -629,7 +629,7 @@ rl_vi_append_eol (count, key)
 /* What to do in the case of C-d. */
 int
 rl_vi_eof_maybe (count, c)
-     int count, c;
+     int count __attribute__((unused)), c __attribute__((unused));
 {
   return (rl_newline (1, '\n'));
 }
@@ -640,7 +640,7 @@ rl_vi_eof_maybe (count, c)
    switching keymaps. */
 int
 rl_vi_insertion_mode (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   _rl_keymap = vi_insertion_keymap;
   _rl_vi_last_key_before_insert = key;
@@ -703,7 +703,7 @@ _rl_vi_done_inserting ()
 
 int
 rl_vi_movement_mode (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   if (rl_point > 0)
     rl_backward_char (1, key);
@@ -783,7 +783,7 @@ _rl_vi_change_mbchar_case (count)
 
 int
 rl_vi_change_case (count, ignore)
-     int count, ignore;
+     int count, ignore __attribute__((unused));
 {
   int c, p;
 
@@ -1031,7 +1031,7 @@ rl_digit_loop1 ()
 
 int
 rl_vi_delete_to (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   int c;
 
@@ -1057,7 +1057,7 @@ rl_vi_delete_to (count, key)
 
 int
 rl_vi_change_to (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   int c, start_pos;
 
@@ -1110,7 +1110,7 @@ rl_vi_change_to (count, key)
 
 int
 rl_vi_yank_to (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   int c, save;
 
@@ -1202,7 +1202,7 @@ rl_vi_delete (count, key)
 
 int
 rl_vi_back_to_indent (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   rl_beg_of_line (1, key);
   while (rl_point < rl_end && whitespace (rl_line_buffer[rl_point]))
@@ -1212,7 +1212,7 @@ rl_vi_back_to_indent (count, key)
 
 int
 rl_vi_first_print (count, key)
-     int count, key;
+     int count __attribute__((unused)), key;
 {
   return (rl_vi_back_to_indent (1, key));
 }
@@ -1319,7 +1319,7 @@ rl_vi_char_search (count, key)
 /* Match brackets */
 int
 rl_vi_match (ignore, key)
-     int ignore, key;
+     int ignore __attribute__((unused)), key;
 {
   int count = 1, brack, pos, tmp, pre;
 
@@ -1426,7 +1426,7 @@ rl_vi_bracktype (c)
 static int
 _rl_vi_change_char (count, c, mb)
      int count, c;
-     char *mb;
+     char *mb __attribute__((unused));
 {
   int p;
 
@@ -1458,8 +1458,8 @@ _rl_vi_change_char (count, c, mb)
 
 static int
 _rl_vi_callback_getchar (mb, mlen)
-     char *mb;
-     int mlen;
+     char *mb __attribute__((unused));
+     int mlen __attribute__((unused));
 {
   int c;
 
@@ -1494,7 +1494,7 @@ _rl_vi_callback_change_char (data)
 
 int
 rl_vi_change_char (count, key)
-     int count, key;
+     int count, key __attribute__((unused));
 {
   int c;
   char mb[MB_LEN_MAX];
@@ -1582,7 +1582,7 @@ rl_vi_overstrike_delete (count, key)
 
 int
 rl_vi_replace (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
   int i;
 
@@ -1663,7 +1663,7 @@ _rl_vi_set_mark ()
 #if defined (READLINE_CALLBACKS)
 static int
 _rl_vi_callback_set_mark (data)
-     _rl_callback_generic_arg *data;
+     _rl_callback_generic_arg *data __attribute__((unused));
 {
   _rl_callback_func = 0;
   _rl_want_redisplay = 1;
@@ -1674,7 +1674,7 @@ _rl_vi_callback_set_mark (data)
 
 int
 rl_vi_set_mark (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
 #if defined (READLINE_CALLBACKS)
   if (RL_ISSTATE (RL_STATE_CALLBACK))
@@ -1721,7 +1721,7 @@ _rl_vi_goto_mark ()
 #if defined (READLINE_CALLBACKS)
 static int
 _rl_vi_callback_goto_mark (data)
-     _rl_callback_generic_arg *data;
+     _rl_callback_generic_arg *data __attribute__((unused));
 {
   _rl_callback_func = 0;
   _rl_want_redisplay = 1;
@@ -1732,7 +1732,7 @@ _rl_vi_callback_goto_mark (data)
 
 int
 rl_vi_goto_mark (count, key)
-     int count, key;
+     int count __attribute__((unused)), key __attribute__((unused));
 {
 #if defined (READLINE_CALLBACKS)
   if (RL_ISSTATE (RL_STATE_CALLBACK))
diff --git a/configure.in b/configure.in
index ec9774d05c6..a291ce73210 100644
--- a/configure.in
+++ b/configure.in
@@ -604,10 +604,11 @@ AC_SUBST(NOINST_LDFLAGS)
 # Check if we are using Linux and a glibc compiled with static nss
 # (this is true on the MySQL build machines to avoid NSS problems)
 #
+AC_CHECK_TOOL([NM], [nm]) 
 
 if test "$TARGET_LINUX" = "true" -a "$static_nss" = ""
 then
-  tmp=`nm /usr/lib*/libc.a  | grep _nss_files_getaliasent_r`
+  tmp=`$NM ${other_libc_lib:-/usr/lib*}/libc.a | grep _nss_files_getaliasent_r1`
   if test -n "$tmp"
   then
      STATIC_NSS_FLAGS="-lc -lnss_files -lnss_dns -lresolv"
@@ -1641,7 +1642,7 @@ esac
 
 # Build optimized or debug version ?
 # First check for gcc and g++
-if test "$ac_cv_prog_gcc" = "yes"
+if test "$GCC" = "yes"
 then
   DEBUG_CFLAGS="-g"
   DEBUG_OPTIMIZE_CC="-O"
@@ -1649,9 +1650,16 @@ then
 else
   DEBUG_CFLAGS="-g"
   DEBUG_OPTIMIZE_CC=""
-  OPTIMIZE_CFLAGS="-O"
+  case $SYSTEM_TYPE in                               
+    *solaris*)
+      OPTIMIZE_CFLAGS="-O1"
+      ;;
+    *)
+      OPTIMIZE_CFLAGS="-O"
+      ;;
+  esac
 fi
-if test "$ac_cv_prog_cxx_g" = "yes"
+if test "$GXX" = "yes"
 then
   DEBUG_CXXFLAGS="-g"
   DEBUG_OPTIMIZE_CXX="-O"
@@ -1659,7 +1667,14 @@ then
 else
   DEBUG_CXXFLAGS="-g"
   DEBUG_OPTIMIZE_CXX=""
-  OPTIMIZE_CXXFLAGS="-O"
+  case $SYSTEM_TYPE in
+    *solaris*)
+      OPTIMIZE_CXXFLAGS="-O1"
+      ;;
+    *)
+      OPTIMIZE_CXXFLAGS="-O"
+      ;;
+  esac
 fi
 
 case $SYSTEM_TYPE in
@@ -2119,6 +2134,25 @@ case "$mysql_cv_sys_os" in
         # unsupported priority values are passed to pthread_setschedprio.
         # Since the only supported value is 1, treat it as inexistent.
     ;;
+ SunOS) # Bug#42599 error: `pthread_setschedprio' was not declared in this scope
+        # In some installations, the pthread.h header used by GCC does not
+        # declare the pthread_setscheprio prototype, but the function is
+        # implemented. Since the function is used in C++ code, ensure that
+        # the function prototype is present.
+     AC_MSG_CHECKING([whether pthread_setschedprio is declared])
+     AC_LANG_PUSH([C++])
+     AC_COMPILE_IFELSE([
+       AC_LANG_PROGRAM([#include <pthread.h>],
+                      [(void)(pthread_setschedprio);])],
+       [ac_cv_func_pthread_setschedprio=yes],
+       [ac_cv_func_pthread_setschedprio=no])
+     AC_LANG_POP([C++])
+     AC_MSG_RESULT([$ac_cv_func_pthread_setschedprio])
+     if test "$ac_cv_func_pthread_setschedprio" = yes; then
+       AC_DEFINE(HAVE_PTHREAD_SETSCHEDPRIO, 1,
+                 [Define to 1 if you have the `pthread_setschedprio' function.])
+     fi
+    ;;
  *) AC_CHECK_FUNCS(pthread_setschedprio)
     ;;
 esac
diff --git a/dbug/user.r b/dbug/user.r
index 19de840d0ad..ef67ef7a7cf 100644
--- a/dbug/user.r
+++ b/dbug/user.r
@@ -32,6 +32,7 @@
 .\"           === Set line length
 .\".ll 6.5i
 .TL
+.warn 0
 D B U G
 .P 0
 C Program Debugging Package
diff --git a/extra/innochecksum.c b/extra/innochecksum.c
index 524637a1729..9bd4bfcc0cd 100644
--- a/extra/innochecksum.c
+++ b/extra/innochecksum.c
@@ -224,7 +224,7 @@ int main(int argc, char **argv)
   }
   else if (verbose)
   {
-    printf("file %s= %llu bytes (%lu pages)...\n", argv[1], size, pages);
+    printf("file %s = %llu bytes (%lu pages)...\n", argv[optind], size, pages);
     printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
   }
 
diff --git a/extra/perror.c b/extra/perror.c
index 80eb2af2dae..a98a4fc3d1b 100644
--- a/extra/perror.c
+++ b/extra/perror.c
@@ -115,7 +115,7 @@ static void usage(void)
 {
   print_version();
   puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
-  printf("Print a description for a system error code or an error code from\na MyISAM/ISAM/BDB table handler.\n");
+  printf("Print a description for a system error code or a MySQL error code.\n");
   printf("If you want to get the error for a negative error code, you should use\n-- before the first error code to tell perror that there was no more options.\n\n");
   printf("Usage: %s [OPTIONS] [ERRORCODE [ERRORCODE...]]\n",my_progname);
   my_print_help(my_long_options);
diff --git a/extra/yassl/src/handshake.cpp b/extra/yassl/src/handshake.cpp
index 262b5cb3b8b..b4d9005af15 100644
--- a/extra/yassl/src/handshake.cpp
+++ b/extra/yassl/src/handshake.cpp
@@ -790,15 +790,17 @@ void processReply(SSL& ssl)
     if (ssl.GetError()) return;
 
     if (DoProcessReply(ssl))
+    {
         // didn't complete process
         if (!ssl.getSocket().IsNonBlocking()) {
             // keep trying now, blocking ok
             while (!ssl.GetError())
                 if (DoProcessReply(ssl) == 0) break;
-    }
+        }
         else
             // user will have try again later, non blocking
             ssl.SetError(YasslError(SSL_ERROR_WANT_READ));
+    }
 }
 
 
@@ -873,10 +875,12 @@ void sendServerKeyExchange(SSL& ssl, BufferOutput buffer)
 void sendChangeCipher(SSL& ssl, BufferOutput buffer)
 {
     if (ssl.getSecurity().get_parms().entity_ == server_end)
+    {
         if (ssl.getSecurity().get_resuming())
             ssl.verifyState(clientKeyExchangeComplete);
         else
             ssl.verifyState(clientFinishedComplete);
+    }
     if (ssl.GetError()) return;
 
     ChangeCipherSpec ccs;
diff --git a/extra/yassl/src/yassl_imp.cpp b/extra/yassl/src/yassl_imp.cpp
index 20dfe50f132..f079df8c7ce 100644
--- a/extra/yassl/src/yassl_imp.cpp
+++ b/extra/yassl/src/yassl_imp.cpp
@@ -1305,6 +1305,7 @@ void ServerHello::Process(input_buffer&, SSL& ssl)
         ssl.useSecurity().use_connection().sessionID_Set_ = false;
 
     if (ssl.getSecurity().get_resuming())
+    {
         if (memcmp(session_id_, ssl.getSecurity().get_resume().GetID(),
                    ID_LEN) == 0) {
             ssl.set_masterSecret(ssl.getSecurity().get_resume().GetSecret());
@@ -1319,6 +1320,7 @@ void ServerHello::Process(input_buffer&, SSL& ssl)
             ssl.useSecurity().set_resuming(false);
             ssl.useLog().Trace("server denied resumption");
         }
+    }
 
     if (ssl.CompressionOn() && !compression_method_)
         ssl.UnSetCompression(); // server isn't supporting yaSSL zlib request
diff --git a/extra/yassl/taocrypt/include/modes.hpp b/extra/yassl/taocrypt/include/modes.hpp
index d1ebce7568b..4575fe1414b 100644
--- a/extra/yassl/taocrypt/include/modes.hpp
+++ b/extra/yassl/taocrypt/include/modes.hpp
@@ -96,10 +96,12 @@ inline void Mode_BASE::Process(byte* out, const byte* in, word32 sz)
     if (mode_ == ECB)
         ECB_Process(out, in, sz);
     else if (mode_ == CBC)
+    {
         if (dir_ == ENCRYPTION)
             CBC_Encrypt(out, in, sz);
         else
             CBC_Decrypt(out, in, sz);
+    }
 }
 
 
diff --git a/extra/yassl/taocrypt/src/asn.cpp b/extra/yassl/taocrypt/src/asn.cpp
index 3b1c1c2136a..78200841bda 100644
--- a/extra/yassl/taocrypt/src/asn.cpp
+++ b/extra/yassl/taocrypt/src/asn.cpp
@@ -781,10 +781,12 @@ void CertDecoder::GetDate(DateType dt)
     source_.advance(length);
 
     if (!ValidateDate(date, b, dt) && verify_)
+    {
         if (dt == BEFORE)
             source_.SetError(BEFORE_DATE_E);
         else
             source_.SetError(AFTER_DATE_E);
+    }
 
     // save for later use
     if (dt == BEFORE) {
@@ -1062,6 +1064,7 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
     }
     word32 rLen = GetLength(source);
     if (rLen != 20)
+    {
         if (rLen == 21) {       // zero at front, eat
             source.next();
             --rLen;
@@ -1074,6 +1077,7 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
             source.SetError(DSA_SZ_E);
             return 0;
         }
+    }
     memcpy(decoded, source.get_buffer() + source.get_index(), rLen);
     source.advance(rLen);
 
@@ -1084,6 +1088,7 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
     }
     word32 sLen = GetLength(source);
     if (sLen != 20)
+    {
         if (sLen == 21) {
             source.next();          // zero at front, eat
             --sLen;
@@ -1096,6 +1101,7 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
             source.SetError(DSA_SZ_E);
             return 0;
         }
+    }
     memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen);
     source.advance(sLen);
 
diff --git a/include/hash.h b/include/hash.h
index f4b82454b81..629b404e8a7 100644
--- a/include/hash.h
+++ b/include/hash.h
@@ -106,7 +106,7 @@ void my_hash_replace(HASH *hash, HASH_SEARCH_STATE *state, uchar *new_row);
 my_bool my_hash_check(HASH *hash); /* Only in debug library */
 
 #define my_hash_clear(H) bzero((char*) (H), sizeof(*(H)))
-#define my_hash_inited(H) ((H)->array.buffer != 0)
+#define my_hash_inited(H) ((H)->blength != 0)
 #define my_hash_init_opt(A,B,C,D,E,F,G,H) \
           (!my_hash_inited(A) && _my_hash_init(A,0,B,C,D,E,F,G, H CALLER_INFO))
 
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 04cf921dfee..451c8db549b 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -285,7 +285,7 @@ typedef struct charset_info_st
 #define ILLEGAL_CHARSET_INFO_NUMBER (~0U)
 
 
-extern CHARSET_INFO my_charset_bin;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
 extern CHARSET_INFO my_charset_big5_chinese_ci;
 extern CHARSET_INFO my_charset_big5_bin;
 extern CHARSET_INFO my_charset_cp932_japanese_ci;
@@ -298,7 +298,7 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
 extern CHARSET_INFO my_charset_gb2312_bin;
 extern CHARSET_INFO my_charset_gbk_chinese_ci;
 extern CHARSET_INFO my_charset_gbk_bin;
-extern CHARSET_INFO my_charset_latin1;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
 extern CHARSET_INFO my_charset_latin1_german2_ci;
 extern CHARSET_INFO my_charset_latin1_bin;
 extern CHARSET_INFO my_charset_latin2_czech_ci;
@@ -315,7 +315,7 @@ extern CHARSET_INFO my_charset_utf8_general_ci;
 extern CHARSET_INFO my_charset_utf8_unicode_ci;
 extern CHARSET_INFO my_charset_utf8_bin;
 extern CHARSET_INFO my_charset_cp1250_czech_ci;
-extern CHARSET_INFO my_charset_filename;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
 
 /* declarations for simple charsets */
 extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
diff --git a/include/my_global.h b/include/my_global.h
index 9e8c9bdfc70..4ad851e9e5d 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -276,7 +276,7 @@
 #endif
 
 /* The client defines this to avoid all thread code */
-#if defined(UNDEF_THREADS_HACK)
+#if defined(MYSQL_CLIENT_NO_THREADS) || defined(UNDEF_THREADS_HACK)
 #undef THREAD
 #undef HAVE_LINUXTHREADS
 #undef HAVE_NPTL
@@ -1573,4 +1573,17 @@ static inline double rint(double x)
 }
 #endif /* HAVE_RINT */
 
+/* 
+  MYSQL_PLUGIN_IMPORT macro is used to export mysqld data
+  (i.e variables) for usage in storage engine loadable plugins.
+  Outside of Windows, it is dummy.
+*/
+#ifndef MYSQL_PLUGIN_IMPORT
+#if (defined(_WIN32) && defined(MYSQL_DYNAMIC_PLUGIN))
+#define MYSQL_PLUGIN_IMPORT __declspec(dllimport)
+#else
+#define MYSQL_PLUGIN_IMPORT
+#endif
+#endif
+
 #endif /* my_global_h */
diff --git a/include/my_sys.h b/include/my_sys.h
index 01804cd089f..222564e0b44 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -221,8 +221,8 @@ extern uint    my_large_page_size;
 #endif
 
 /* charsets */
-extern CHARSET_INFO *default_charset_info;
-extern CHARSET_INFO *all_charsets[256];
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *all_charsets[256];
 extern CHARSET_INFO compiled_charsets[];
 
 /* statistics */
@@ -237,8 +237,8 @@ extern void (*my_sigtstp_cleanup)(void),
 	    (*my_sigtstp_restart)(void),
 	    (*my_abort_hook)(int);
 					/* Executed when comming from shell */
-extern int NEAR my_umask,		/* Default creation mask  */
-	   NEAR my_umask_dir,
+extern MYSQL_PLUGIN_IMPORT int NEAR my_umask;		/* Default creation mask  */
+extern int NEAR my_umask_dir,
 	   NEAR my_recived_signals,	/* Signals we have got */
 	   NEAR my_safe_to_handle_signal, /* Set when allowed to SIGTSTP */
 	   NEAR my_dont_interrupt;	/* call remember_intr when set */
@@ -511,7 +511,7 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *);
  ((info)->write_pos + (Count) <=(info)->write_end ?\
   (memcpy((info)->write_pos, (Buffer), (size_t)(Count)),\
    ((info)->write_pos+=(Count)),0) : \
-   (*(info)->write_function)((info),(Buffer),(Count)))
+   (*(info)->write_function)((info),(uchar *)(Buffer),(Count)))
 
 #define my_b_get(info) \
   ((info)->read_pos != (info)->read_end ?\
diff --git a/include/myisam.h b/include/myisam.h
index d7bfdf7191e..02251eeacb4 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -404,7 +404,8 @@ typedef struct st_mi_check_param
   my_off_t keydata,totaldata,key_blocks,start_check_pos;
   ha_rows total_records,total_deleted;
   ha_checksum record_checksum,glob_crc;
-  ulong	use_buffers,read_buffer_length,write_buffer_length,
+  ulonglong use_buffers;
+  ulong read_buffer_length,write_buffer_length,
 	sort_buffer_length,sort_key_blocks;
   uint out_flag,warning_printed,error_printed,verbose;
   uint opt_sort_key,total_files,max_level;
diff --git a/include/myisammrg.h b/include/myisammrg.h
index dafae157ee0..446ecb7d719 100644
--- a/include/myisammrg.h
+++ b/include/myisammrg.h
@@ -88,7 +88,8 @@ extern MYRG_INFO *myrg_parent_open(const char *parent_name,
                                    void *callback_param);
 extern int myrg_attach_children(MYRG_INFO *m_info, int handle_locking,
                                 MI_INFO *(*callback)(void*),
-                                void *callback_param);
+                                void *callback_param,
+                                my_bool *need_compat_check);
 extern int myrg_detach_children(MYRG_INFO *m_info);
 extern int myrg_panic(enum ha_panic_function function);
 extern int myrg_rfirst(MYRG_INFO *file,uchar *buf,int inx);
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h
index a978d44b918..2e59262d061 100644
--- a/include/mysql/plugin.h
+++ b/include/mysql/plugin.h
@@ -16,6 +16,16 @@
 #ifndef _my_plugin_h
 #define _my_plugin_h
 
+
+/*
+  On Windows, exports from DLL need to be declared
+*/
+#if (defined(_WIN32) && defined(MYSQL_DYNAMIC_PLUGIN))
+#define MYSQL_PLUGIN_EXPORT extern "C" __declspec(dllexport)
+#else
+#define MYSQL_PLUGIN_EXPORT
+#endif
+
 #ifdef __cplusplus
 class THD;
 class Item;
@@ -90,9 +100,9 @@ int PSIZE= sizeof(struct st_mysql_plugin);                                    \
 struct st_mysql_plugin DECLS[]= {
 #else
 #define __MYSQL_DECLARE_PLUGIN(NAME, VERSION, PSIZE, DECLS)                   \
-int _mysql_plugin_interface_version_= MYSQL_PLUGIN_INTERFACE_VERSION;         \
-int _mysql_sizeof_struct_st_plugin_= sizeof(struct st_mysql_plugin);          \
-struct st_mysql_plugin _mysql_plugin_declarations_[]= {
+MYSQL_PLUGIN_EXPORT int _mysql_plugin_interface_version_= MYSQL_PLUGIN_INTERFACE_VERSION;         \
+MYSQL_PLUGIN_EXPORT int _mysql_sizeof_struct_st_plugin_= sizeof(struct st_mysql_plugin);          \
+MYSQL_PLUGIN_EXPORT struct st_mysql_plugin _mysql_plugin_declarations_[]= {
 #endif
 
 #define mysql_declare_plugin(NAME) \
diff --git a/libmysql/Makefile.am b/libmysql/Makefile.am
index 21f8f372d0f..f67abfd8ac6 100644
--- a/libmysql/Makefile.am
+++ b/libmysql/Makefile.am
@@ -21,7 +21,7 @@
 # This file is public domain and comes with NO WARRANTY of any kind
 
 target =	libmysqlclient.la
-target_defs =	-DUNDEF_THREADS_HACK -DDONT_USE_RAID @LIB_EXTRA_CCFLAGS@
+target_defs =	-DMYSQL_CLIENT_NO_THREADS -DDONT_USE_RAID @LIB_EXTRA_CCFLAGS@
 LIBS =		@CLIENT_LIBS@ 
 INCLUDES =	-I$(top_builddir)/include -I$(top_srcdir)/include \
 		$(openssl_includes) @ZLIB_INCLUDES@
@@ -104,7 +104,7 @@ do-lib-dist:
 	echo "# A very minimal Makefile to compile" > $$dir/Makefile; \
 	echo "# the minimized libmysql library" >> $$dir/Makefile; \
 	echo "# This file is autogenerated from Makefile.am" >> $$dir/Makefile; \
-	echo 'CFLAGS=	-I. -DUNDEF_THREADS_HACK' >>$$dir/Makefile; \
+	echo 'CFLAGS=	-I. -DMYSQL_CLIENT_NO_THREADS' >>$$dir/Makefile; \
 	echo "obj=$$objs"  >>$$dir/Makefile; \
 	echo 'all: libmysql.a'  >>$$dir/Makefile; \
 	echo 'libmysql.a: $$(obj)'  >>$$dir/Makefile; \
diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt
index 1c8f80768d4..8500d73863a 100644
--- a/libmysqld/CMakeLists.txt
+++ b/libmysqld/CMakeLists.txt
@@ -87,63 +87,16 @@ FOREACH(rpath ${VIO_SOURCES})
   SET(LIB_SOURCES ${LIB_SOURCES} ../vio/${rpath})
 ENDFOREACH(rpath)
 
-# Engines
-INCLUDE(${CMAKE_SOURCE_DIR}/storage/heap/CMakeLists.txt)
-FOREACH(rpath ${HEAP_SOURCES})
-  SET(LIB_SOURCES ${LIB_SOURCES} ../storage/heap/${rpath})
-ENDFOREACH(rpath)
 
-INCLUDE(${CMAKE_SOURCE_DIR}/storage/myisam/CMakeLists.txt)
-FOREACH(rpath ${MYISAM_SOURCES})
-  SET(LIB_SOURCES ${LIB_SOURCES} ../storage/myisam/${rpath})
-ENDFOREACH(rpath)
 
-INCLUDE(${CMAKE_SOURCE_DIR}/storage/myisammrg/CMakeLists.txt)
-FOREACH(rpath ${MYISAMMRG_SOURCES})
-  SET(LIB_SOURCES ${LIB_SOURCES} ../storage/myisammrg/${rpath})
-ENDFOREACH(rpath)
-
-IF(WITH_ARCHIVE_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/archive/CMakeLists.txt)
-  FOREACH(rpath ${ARCHIVE_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/archive/${rpath})
+FOREACH (ENGINE_LIB ${MYSQLD_STATIC_ENGINE_LIBS})
+  INCLUDE(${CMAKE_SOURCE_DIR}/storage/${ENGINE_LIB}/CMakeLists.txt)
+  STRING(TOUPPER ${ENGINE_LIB} ENGINE_LIB_UPPER)
+  FOREACH(rpath ${${ENGINE_LIB_UPPER}_SOURCES})
+    SET(LIB_SOURCES ${LIB_SOURCES} ${CMAKE_SOURCE_DIR}/storage/${ENGINE_LIB}/${rpath})
   ENDFOREACH(rpath)
-ENDIF(WITH_ARCHIVE_STORAGE_ENGINE)
+ENDFOREACH(ENGINE_LIB)
 
-IF(WITH_BLACKHOLE_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/blackhole/CMakeLists.txt)
-  FOREACH(rpath ${BLACKHOLE_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/blackhole/${rpath})
-  ENDFOREACH(rpath)
-ENDIF(WITH_BLACKHOLE_STORAGE_ENGINE)
-
-IF(WITH_EXAMPLE_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/example/CMakeLists.txt)
-  FOREACH(rpath ${EXAMPLE_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/example/${rpath})
-  ENDFOREACH(rpath)
-ENDIF(WITH_EXAMPLE_STORAGE_ENGINE)
-
-IF(WITH_FEDERATED_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/federated/CMakeLists.txt)
-  FOREACH(rpath ${FEDERATED_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/federated/${rpath})
-  ENDFOREACH(rpath)
-ENDIF(WITH_FEDERATED_STORAGE_ENGINE)
-
-IF(WITH_INNOBASE_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/innobase/CMakeLists.txt)
-  FOREACH(rpath ${INNOBASE_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/innobase/${rpath})
-  ENDFOREACH(rpath)
-ENDIF(WITH_INNOBASE_STORAGE_ENGINE)
-
-IF(WITH_CSV_STORAGE_ENGINE)
-  INCLUDE(${CMAKE_SOURCE_DIR}/storage/csv/CMakeLists.txt)
-  FOREACH(rpath ${CSV_SOURCES})
-    SET(LIB_SOURCES ${LIB_SOURCES} ../storage/csv/${rpath})
-  ENDFOREACH(rpath)
-ENDIF(WITH_CSV_STORAGE_ENGINE)
 
 SET(SOURCE_SUBLIBS FALSE)
                             
diff --git a/mysql-test/Makefile.am b/mysql-test/Makefile.am
index 5e3582d5e16..810bead1adc 100644
--- a/mysql-test/Makefile.am
+++ b/mysql-test/Makefile.am
@@ -25,6 +25,7 @@ test_SCRIPTS =	mtr \
 		mysql-stress-test.pl
 
 nobase_test_DATA = \
+	valgrind.supp \
 	lib/v1/mysql-test-run.pl \
 	lib/v1/mtr_cases.pl \
 	lib/v1/mtr_io.pl \
@@ -41,7 +42,6 @@ nobase_test_DATA = \
 	lib/v1/mtr_im.pl \
 	lib/v1/mtr_process.pl \
 	lib/v1/mtr_unique.pl \
-\
 	lib/mtr_cases.pm \
 	lib/mtr_gcov.pl \
 	lib/mtr_gprof.pl \
@@ -69,9 +69,8 @@ nobase_test_DATA = \
 SUBDIRS =	lib/My/SafeProcess
 
 EXTRA_DIST =	README \
-		valgrind.supp \
 		$(test_SCRIPTS) \
-		$(nobase_test_DATA) 
+		$(nobase_test_DATA)
 
 # List of directories containing test + result files and the
 # related test data files that should be copied
@@ -93,12 +92,13 @@ TEST_DIRS = t r include std_data std_data/parts collections \
 	suite/jp suite/jp/t suite/jp/r suite/jp/std_data suite/jp/include \
 	suite/manual/t suite/manual/r \
 	suite/ndb_team suite/ndb_team/t suite/ndb_team/r \
-	suite/rpl suite/rpl/data suite/rpl/include suite/rpl/r \
+	suite/rpl suite/rpl/include suite/rpl/r \
 	suite/rpl/t \
 	suite/stress/include suite/stress/t suite/stress/r \
 	suite/ndb suite/ndb/t suite/ndb/r \
 	suite/rpl_ndb suite/rpl_ndb/t suite/rpl_ndb/r \
-	suite/parts suite/parts/t suite/parts/r suite/parts/inc
+	suite/parts suite/parts/t suite/parts/r suite/parts/inc \
+	suite/innodb suite/innodb/t suite/innodb/r suite/innodb/include
 
 # Used by dist-hook and install-data-local to copy all
 # test files into either dist or install directory
diff --git a/mysql-test/collections/default.experimental b/mysql-test/collections/default.experimental
index 103069f79cf..faa8ba110d3 100644
--- a/mysql-test/collections/default.experimental
+++ b/mysql-test/collections/default.experimental
@@ -1 +1,13 @@
 funcs_1.charset_collation_1              # depends on compile-time decisions
+binlog.binlog_tmp_table                  # Bug#45578: Test binlog_tmp_table fails ramdonly on PB2: Unknown table 't2'
+main.ctype_gbk_binlog                    # Bug#46010: main.ctype_gbk_binlog fails sporadically : Table 't2' already exists
+rpl.rpl_row_create_table                 # Bug#45576: rpl_row_create_table fails on PB2
+rpl.rpl_extraColmaster_myisam            # Bug#46013: rpl_extraColmaster_myisam fails on pb2
+rpl.rpl_stm_reset_slave                  # Bug#46014: rpl_stm_reset_slave crashes the server sporadically in pb2
+rpl.rpl_extraCol_myisam                  # Bug#40796
+rpl.rpl_extraColmaster_innodb            # Bug#40796
+rpl.rpl_extraCol_innodb                  # Bug#40796
+rpl_ndb.rpl_ndb_log                      # Bug#38998
+rpl.rpl_innodb_bug28430                  # Bug#46029
+rpl.rpl_row_basic_3innodb                # Bug#45243
+rpl.rpl_truncate_3innodb                 # Bug#46030
diff --git a/mysql-test/collections/default.push b/mysql-test/collections/default.push
index 0879b6fde2c..0f7c8d9949e 100644
--- a/mysql-test/collections/default.push
+++ b/mysql-test/collections/default.push
@@ -1,5 +1,5 @@
-perl mysql-test-run.pl --timer --force --comment=n_mix --mysqld=--binlog-format=mixed --experimental=collections/default.experimental
-perl mysql-test-run.pl --timer --force --comment=ps_row --ps-protocol --mysqld=--binlog-format=row --experimental=collections/default.experimental
-perl mysql-test-run.pl --timer --force --comment=embedded --embedded --experimental=collections/default.experimental
-perl mysql-test-run.pl --timer --force --comment=rpl_binlog_row --suite=rpl,binlog --mysqld=--binlog-format=row --experimental=collections/default.experimental
-perl mysql-test-run.pl --timer --force --comment=funcs_1 --suite=funcs_1 --experimental=collections/default.experimental
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=n_mix --mysqld=--binlog-format=mixed --experimental=collections/default.experimental
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=ps_row --ps-protocol --mysqld=--binlog-format=row --experimental=collections/default.experimental
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=embedded --embedded --experimental=collections/default.experimental
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=rpl_binlog_row --suite=rpl,binlog --mysqld=--binlog-format=row --experimental=collections/default.experimental
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=funcs_1 --suite=funcs_1 --experimental=collections/default.experimental
diff --git a/mysql-test/extra/rpl_tests/rpl_reset_slave.test b/mysql-test/extra/rpl_tests/rpl_reset_slave.test
index 2cc041a35e1..1f88c792fce 100644
--- a/mysql-test/extra/rpl_tests/rpl_reset_slave.test
+++ b/mysql-test/extra/rpl_tests/rpl_reset_slave.test
@@ -41,3 +41,57 @@ reset slave;
 start slave;
 sync_with_master;
 show status like 'slave_open_temp_tables';
+
+#
+#Bug#34654  	RESET SLAVE does not clear LAST_IO_Err* 
+#
+
+# clearing the status
+stop slave;
+reset slave;
+let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
+echo *** errno must be zero: $last_io_errno ***;
+
+#
+# verifying start slave resets Last_IO_Error and Last_IO_Errno.
+#
+
+change master to master_user='impossible_user_name';
+start slave;
+source include/wait_for_slave_io_error.inc;
+let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
+--disable_query_log
+eval SELECT $last_io_errno > 0 as ONE;
+--enable_query_log
+
+source include/stop_slave.inc;
+change master to master_user='root';
+source include/start_slave.inc;
+let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
+let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1);
+--echo *** last errno must be  zero: $last_io_errno ***
+--echo *** last error must be blank: $last_io_error ***
+
+#
+# verifying reset slave resets Last_{IO,SQL}_Err{or,no}
+#
+
+source include/stop_slave.inc;
+change master to master_user='impossible_user_name';
+start slave;
+source include/wait_for_slave_io_error.inc;
+let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
+--disable_query_log
+eval SELECT $last_io_errno > 0 as ONE;
+--enable_query_log
+
+source include/stop_slave.inc;
+reset slave;
+let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
+let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1);
+let $last_sql_errno= query_get_value(SHOW SLAVE STATUS, Last_SQL_Errno, 1);
+let $last_sql_error= query_get_value(SHOW SLAVE STATUS, Last_SQL_Error, 1);
+--echo *** io  last errno must be  zero: $last_io_errno  ***
+--echo *** io  last error must be blank: $last_io_error  ***
+--echo *** sql last errno must be  zero: $last_sql_errno ***
+--echo *** sql last error must be blank: $last_sql_error ***
diff --git a/mysql-test/include/commit.inc b/mysql-test/include/commit.inc
index a4e7d9ae601..d412eae8364 100644
--- a/mysql-test/include/commit.inc
+++ b/mysql-test/include/commit.inc
@@ -669,13 +669,9 @@ call p_verify_status_increment(1, 0, 1, 0);
 insert t1 set a=3;
 call p_verify_status_increment(2, 2, 2, 2);
 savepoint a;
-call p_verify_status_increment(0, 0, 0, 0);
+call p_verify_status_increment(1, 0, 1, 0);
 insert t1 set a=4;
---echo # Binlog does not register itself this time for other than the 1st
---echo # statement of the transaction with MIXED/STATEMENT binlog_format.
---echo # It needs registering with the ROW format. Therefore 1,0,2,2 are 
---echo # the correct arguments to this test after bug#40221 fixed.
-call p_verify_status_increment(1, 0, 2, 2);
+call p_verify_status_increment(2, 2, 2, 2);
 release savepoint a;
 rollback;
 call p_verify_status_increment(0, 0, 0, 0);
diff --git a/mysql-test/include/concurrent.inc b/mysql-test/include/concurrent.inc
index 2180ec4cc9c..66f8a65a102 100644
--- a/mysql-test/include/concurrent.inc
+++ b/mysql-test/include/concurrent.inc
@@ -659,11 +659,16 @@ drop table t1;
   connection thread1;
   select * from t1;
 
+--echo ** Cleanup
+connection thread1;
+disconnect thread1;
+--source include/wait_until_disconnected.inc
+--echo ** connection thread2
+connection thread2;
+disconnect thread2;
+--source include/wait_until_disconnected.inc
 --echo ** connection default
 connection default;
 drop table t1;
 drop user mysqltest@localhost;
 
-disconnect thread1;
-disconnect thread2;
-
diff --git a/mysql-test/include/grant_cache.inc b/mysql-test/include/grant_cache.inc
index 501e115f0ee..47eef1cdb67 100644
--- a/mysql-test/include/grant_cache.inc
+++ b/mysql-test/include/grant_cache.inc
@@ -171,15 +171,30 @@ show status like "Qcache_not_cached";
 
 # Cleanup
 
---echo ----- switch to connection default and close connections -----
-connection default;
+--echo ----- close connections -----
+connection root;
 disconnect root;
+--source include/wait_until_disconnected.inc
+connection root2;
 disconnect root2;
+--source include/wait_until_disconnected.inc
+connection user1;
 disconnect user1;
+--source include/wait_until_disconnected.inc
+connection user2;
 disconnect user2;
+--source include/wait_until_disconnected.inc
+connection user3;
 disconnect user3;
+--source include/wait_until_disconnected.inc
+connection user4;
 disconnect user4;
+--source include/wait_until_disconnected.inc
+connection unkuser;
 disconnect unkuser;
+--source include/wait_until_disconnected.inc
+--echo ----- switch to connection default -----
+connection default;
 
 #
 # A temporary 4.1 workaround to make this test pass if
diff --git a/mysql-test/include/handler.inc b/mysql-test/include/handler.inc
index 96f90aba8e0..6e7f53ba9b2 100644
--- a/mysql-test/include/handler.inc
+++ b/mysql-test/include/handler.inc
@@ -719,6 +719,7 @@ connection con1;
 --reap
 drop table t1;
 disconnect con1;
+--source include/wait_until_disconnected.inc
 connection default;
 
 #
diff --git a/mysql-test/include/index_merge1.inc b/mysql-test/include/index_merge1.inc
index 5837df67a75..d137b0957c0 100644
--- a/mysql-test/include/index_merge1.inc
+++ b/mysql-test/include/index_merge1.inc
@@ -527,4 +527,30 @@ where exists (select 1 from t2, t3
 
 drop table t0, t1, t2, t3;
 
+--echo #
+--echo # BUG#44810: index merge and order by with low sort_buffer_size 
+--echo # crashes server!
+--echo #
+CREATE TABLE t1(a VARCHAR(128),b VARCHAR(128),KEY(A),KEY(B));
+INSERT INTO t1 VALUES (REPEAT('a',128),REPEAT('b',128));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+SET SESSION sort_buffer_size=1;
+EXPLAIN 
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%' 
+  ORDER BY a,b;
+# we don't actually care about the result : we're checking if it crashes
+--disable_result_log
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%' 
+  ORDER BY a,b;
+--enable_result_log
+
+SET SESSION sort_buffer_size=DEFAULT;
+DROP TABLE t1;
+
+
 --echo End of 5.0 tests
diff --git a/mysql-test/include/mix1.inc b/mysql-test/include/mix1.inc
index 7c87949830f..6dabe4864a9 100644
--- a/mysql-test/include/mix1.inc
+++ b/mysql-test/include/mix1.inc
@@ -1162,6 +1162,25 @@ ROLLBACK;
 --error 1305
 ROLLBACK TO SAVEPOINT s4;
 
+#
+# Bug#39793 Foreign keys not constructed when column has a '#' in a comment or default value
+#
+
+#This statement should be written on a single line for proper testing
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY COMMENT 'My ID#', f2 INTEGER DEFAULT NULL, f3 CHAR(10) DEFAULT 'My ID#', CONSTRAINT f2_ref FOREIGN KEY (f2) REFERENCES t1 (f1)) ENGINE=INNODB;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug #36995: valgrind error in remove_const during subquery executions
+--echo #
+
+create table t1 (a bit(1) not null,b int) engine=myisam;
+create table t2 (c int) engine=innodb;
+explain
+select b from t1 where a not in (select b from t1,t2 group by a) group by a;
+DROP TABLE t1,t2;
+
 --echo End of 5.0 tests
 
 # Fix for BUG#19243 "wrong LAST_INSERT_ID() after ON DUPLICATE KEY
@@ -1479,43 +1498,12 @@ INSERT INTO t1 VALUES
   (4,1,3,'pk',NULL),(5,1,3,'c2',NULL),
   (2,1,4,'c_extra',NULL),(3,1,4,'c_extra',NULL);
 
-EXPLAIN SELECT * FROM t1 WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
+EXPLAIN SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
 
-SELECT * FROM t1 WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
 
 DROP TABLE t1;
 
-#
-# Bug#21704: Renaming column does not update FK definition.
-#
-
-#
-# --disable_warnings
-# DROP TABLE IF EXISTS t1;
-# DROP TABLE IF EXISTS t2;
-# --enable_warnings
-#
-# CREATE TABLE t1(id INT PRIMARY KEY)
-#   ENGINE=innodb;
-#
-# CREATE TABLE t2(
-#   t1_id INT PRIMARY KEY,
-#   CONSTRAINT fk1 FOREIGN KEY (t1_id) REFERENCES t1(id))
-#   ENGINE=innodb;
-#
-# --echo
-#
-# --disable_result_log
-# --error ER_ERROR_ON_RENAME
-# ALTER TABLE t1 CHANGE id id2 INT;
-# --enable_result_log
-#
-# --echo
-#
-# DROP TABLE t2;
-# DROP TABLE t1;
-#
-
 --echo #
 --echo # Bug #44290: explain crashes for subquery with distinct in
 --echo #             SQL_SELECT::test_quick_select
@@ -1535,4 +1523,31 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
 
 DROP TABLE t1;
 
+eval
+CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
+  ENGINE=$engine_type;
+INSERT INTO t1 VALUES (1,1,1), (1,1,1), (1,1,2), (1,1,1), (1,1,2);
+
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+                 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+EXPLAIN 
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+                 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+
+DROP TABLE t1;
+
+eval
+CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2), 
+  KEY (c3), KEY (c2, c3))
+  ENGINE=$engine_type;
+INSERT INTO t1 VALUES (1,1,1), (1,1,1), (1,1,2), (1,1,1), (1,1,2);
+
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+                 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+EXPLAIN 
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+                 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+
+DROP TABLE t1;
+
 --echo End of 5.1 tests
diff --git a/mysql-test/include/mtr_check.sql b/mysql-test/include/mtr_check.sql
index 12cb2c870a2..9db631a2615 100644
--- a/mysql-test/include/mtr_check.sql
+++ b/mysql-test/include/mtr_check.sql
@@ -57,3 +57,13 @@ BEGIN
     mysql.user;
 
 END||
+
+--
+-- Procedure used by test case used to force all
+-- servers to restart after testcase and thus skipping
+-- check test case after test
+--
+CREATE DEFINER=root@localhost PROCEDURE force_restart()
+BEGIN
+  SELECT 1 INTO OUTFILE 'force_restart';
+END||
diff --git a/mysql-test/include/mtr_warnings.sql b/mysql-test/include/mtr_warnings.sql
index 73287900f3c..b99402c8031 100644
--- a/mysql-test/include/mtr_warnings.sql
+++ b/mysql-test/include/mtr_warnings.sql
@@ -139,9 +139,9 @@ INSERT INTO global_suppressions VALUES
  ("Cannot find or open table test\/bug29807 from"),
 
  /* innodb foreign key tests that fail in ALTER or RENAME produce this */
- ("InnoDB: Error: in ALTER TABLE `test`.`t[12]`"),
+ ("InnoDB: Error: in ALTER TABLE `test`.`t[123]`"),
  ("InnoDB: Error: in RENAME TABLE table `test`.`t1`"),
- ("InnoDB: Error: table `test`.`t[12]` does not exist in the InnoDB internal"),
+ ("InnoDB: Error: table `test`.`t[123]` does not exist in the InnoDB internal"),
 
  /* Test case for Bug#14233 produces the following warnings: */
  ("Stored routine 'test'.'bug14233_1': invalid value in column mysql.proc"),
diff --git a/mysql-test/include/mysqldump.inc b/mysql-test/include/mysqldump.inc
new file mode 100644
index 00000000000..6227138b012
--- /dev/null
+++ b/mysql-test/include/mysqldump.inc
@@ -0,0 +1,50 @@
+################################################################################
+# mysqldump.inc
+#
+# SUMMARY: include file to facilitate testing the quality of mysqldump output
+#
+# INPUTS:  Two variables:
+#          $table_name - the name of the table that was dumped
+#          $mysqldumpfile - the name of the file that captured mysqldump output
+#
+# OUTPUTS: minor echo data:
+#          We 'echo' some stage information to the .result file:
+#          'altering original table', 'restoring from dumpfile', 'comparing'
+#
+# OTHER FILES:  We use include/diff_tables.inc to compare the original, renamed
+#               table with the 'restored' one. 
+#
+# DESCRIPTION: This file works by being fed the name of the original table
+#              and a mysqldump output file.  The original table is then renamed
+#              to <table_name>_orig, the mysqldump file is used to recreate the
+#              table, then diff_tables.inc is called to compare them.
+#
+# LIMITATIONS:  Does *NOT* work with xml output!
+#
+# AUTHOR: pcrews 2009-05-21
+#                Bug#40465 mysqldump.test does no checking of dump or restore
+#
+# LAST CHANGE: 2009-05-21 
+#
+################################################################################
+
+--echo # Begin testing mysqldump output + restore
+--echo # Create 'original table name - <table>_orig
+# NOTE: We use SET then let as query_get_value has issues with the extra commas
+#       used in the CONCAT statement.
+eval SET @orig_table_name = CONCAT('$table_name', '_orig');
+let $orig_table_name = query_get_value(SELECT @orig_table_name,@orig_table_name,1);
+--echo # Rename original table
+eval ALTER TABLE $table_name RENAME to $orig_table_name;
+--echo # Recreate table from mysqldump output
+--exec $MYSQL test < $mysqldumpfile
+--echo # Compare original and recreated tables
+--echo # Recreated table: $table_name
+--echo # Original table: $orig_table_name
+let $diff_table_1 = $table_name;
+let $diff_table_2 = $orig_table_name;
+--source include/diff_tables.inc
+--echo # Cleanup
+--remove_file $mysqldumpfile
+eval DROP TABLE $table_name, $orig_table_name;
+
diff --git a/mysql-test/include/no_valgrind_without_big.inc b/mysql-test/include/no_valgrind_without_big.inc
new file mode 100644
index 00000000000..743e974daec
--- /dev/null
+++ b/mysql-test/include/no_valgrind_without_big.inc
@@ -0,0 +1,12 @@
+# include/no_valgrind_without_big.inc
+#
+# If we are running with Valgrind ($VALGRIND_TEST <> 0) than the resource
+# consumption (storage space needed, runtime ...) will be extreme.
+# Therefore we require that the option "--big-test" is also set.
+#
+
+if (`SELECT $VALGRIND_TEST <> 0 AND '$BIG_TEST' = ''`)
+{
+   --skip Need "--big-test" when running with Valgrind
+}
+
diff --git a/mysql-test/include/query_cache.inc b/mysql-test/include/query_cache.inc
index 77ea0021a5d..7ce97b42158 100644
--- a/mysql-test/include/query_cache.inc
+++ b/mysql-test/include/query_cache.inc
@@ -177,6 +177,7 @@ show status like "Qcache_hits";
 
 # Final cleanup
 eval set GLOBAL query_cache_size=$save_query_cache_size;
+disconnect connection1;
+--source include/wait_until_disconnected.inc
 connection default;
 drop table t2;
-disconnect connection1;
diff --git a/mysql-test/include/wait_for_slave_io_error.inc b/mysql-test/include/wait_for_slave_io_error.inc
new file mode 100644
index 00000000000..094406e02b2
--- /dev/null
+++ b/mysql-test/include/wait_for_slave_io_error.inc
@@ -0,0 +1,23 @@
+# ==== Purpose ====
+#
+# Waits until the IO thread of the current connection has got an
+# error, or until a timeout is reached.
+#
+# ==== Usage ====
+#
+# source include/wait_for_slave_io_error.inc;
+#
+# Parameters to this macro are $slave_timeout and
+# $slave_keep_connection. See wait_for_slave_param.inc for
+# descriptions.
+
+let $old_slave_param_comparison= $slave_param_comparison;
+
+let $slave_param= Last_IO_Errno;
+let $slave_param_comparison= !=;
+let $slave_param_value= 0;
+let $slave_error_message= Failed while waiting for slave to produce an error in its sql thread;
+source include/wait_for_slave_param.inc;
+let $slave_error_message= ;
+
+let $slave_param_comparison= $old_slave_param_comparison;
diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm
index f3e9f521384..3ac9e385070 100644
--- a/mysql-test/lib/My/CoreDump.pm
+++ b/mysql-test/lib/My/CoreDump.pm
@@ -22,6 +22,33 @@ use My::Platform;
 
 use File::Temp qw/ tempfile tempdir /;
 
+my $hint_mysqld;		# Last resort guess for executable path
+
+# If path in core file is 79 chars we assume it's been truncated
+# Looks like we can still find the full path using 'strings'
+# If that doesn't work, use the hint (mysqld path) as last resort.
+
+sub _verify_binpath {
+  my ($binary, $core_name)= @_;
+  my $binpath;
+
+  if (length $binary != 79) {
+    $binpath= $binary;
+    print "Core generated by '$binpath'\n";
+  } else {
+    # Last occurrence of path ending in /mysql*, cut from first /
+    if (`strings '$core_name' | grep "/mysql[^/. ]*\$" | tail -1` =~ /(\/.*)/) {
+      $binpath= $1;
+      print "Guessing that core was generated by '$binpath'\n";
+    } else {
+      return unless $hint_mysqld;
+      $binpath= $hint_mysqld;
+      print "Wild guess that core was generated by '$binpath'\n";
+    }
+  }
+  return $binpath;
+}
+
 sub _gdb {
   my ($core_name)= @_;
 
@@ -33,7 +60,8 @@ sub _gdb {
   `gdb -c '$core_name' --batch 2>&1` =~
     /Core was generated by `([^\s\'\`]+)/;
   my $binary= $1 or return;
-  print "Core generated by '$binary'\n";
+
+  $binary= _verify_binpath ($binary, $core_name) or return;
 
   # Create tempfile containing gdb commands
   my ($tmp, $tmp_name) = tempfile();
@@ -73,7 +101,8 @@ sub _dbx {
   `echo | dbx - '$core_name' 2>&1` =~
     /Corefile specified executable: "([^"]+)"/;
   my $binary= $1 or return;
-  print "Core generated by '$binary'\n";
+
+  $binary= _verify_binpath ($binary, $core_name) or return;
 
   # Find all threads
   my @thr_ids = `echo threads | dbx '$binary' '$core_name' 2>&1` =~ /t@\d+/g;
@@ -203,7 +232,7 @@ sub _cdb {
 
   my $cdb_cmd = "!sym prompts off; !analyze -v; .ecxr; !for_each_frame dv /t;!uniqstack -p;q";
   my $cdb_output=
-    `cdb -z $core_name -i "$image_path" -y "$symbol_path" -t 0 -lines -c "$cdb_cmd" 2>&1`;
+    `cdb -c "$cdb_cmd" -z $core_name -i "$image_path" -y "$symbol_path" -t 0 -lines 2>&1`;
   return if $? >> 8;
   return unless $cdb_output;
   
@@ -225,7 +254,8 @@ EOF
 
 
 sub show {
-  my ($class, $core_name)= @_;
+  my ($class, $core_name, $exe_mysqld)= @_;
+  $hint_mysqld= $exe_mysqld;
 
   # On Windows, rely on cdb to be there...
   if (IS_WINDOWS)
diff --git a/mysql-test/lib/My/File/Path.pm b/mysql-test/lib/My/File/Path.pm
index 99edeecdaf7..25a26568eee 100644
--- a/mysql-test/lib/My/File/Path.pm
+++ b/mysql-test/lib/My/File/Path.pm
@@ -164,6 +164,9 @@ sub copytree {
       copytree("$from_dir/$_", "$to_dir/$_");
       next;
     }
+
+    # Only copy plain files
+    next unless -f "$from_dir/$_";
     copy("$from_dir/$_", "$to_dir/$_");
   }
   closedir(DIR);
diff --git a/mysql-test/lib/My/SafeProcess.pm b/mysql-test/lib/My/SafeProcess.pm
index 5ef3286ad8e..7e102b628ca 100644
--- a/mysql-test/lib/My/SafeProcess.pm
+++ b/mysql-test/lib/My/SafeProcess.pm
@@ -536,7 +536,37 @@ sub wait_any {
   return $proc;
 }
 
+
 #
+# Wait for all processes to exit
+#
+sub wait_all {
+  while(keys %running)
+  {
+    wait_any();
+  }
+}
+
+
+#
+# Check if any process has exited, but don't wait.
+#
+# Returns a reference to the SafeProcess that
+# exited or undefined
+#
+sub check_any {
+  for my $proc (values %running){
+    if ( $proc->is_child($$) ) {
+      if (not $proc->wait_one(0)) {
+	_verbose ("Found exited $proc");
+	return $proc;
+      }
+    }
+  }
+  return undef;
+}
+
+
 # Overload string operator
 # and fallback to default functions if no
 # overloaded function is found
diff --git a/mysql-test/lib/My/SafeProcess/Base.pm b/mysql-test/lib/My/SafeProcess/Base.pm
index 3fc1b1be017..9a6871264b8 100644
--- a/mysql-test/lib/My/SafeProcess/Base.pm
+++ b/mysql-test/lib/My/SafeProcess/Base.pm
@@ -83,6 +83,13 @@ sub exit_status {
   };
 }
 
+# threads.pm may not exist everywhere, so use only on Windows.
+
+use if $^O eq "MSWin32", "threads";
+use if $^O eq "MSWin32", "threads::shared";
+
+my $win32_spawn_lock :shared;
+
 
 #
 # Create a new process
@@ -104,6 +111,8 @@ sub create_process {
 
   if ($^O eq "MSWin32"){
 
+    lock($win32_spawn_lock);
+
     #printf STDERR "stdin %d, stdout %d, stderr %d\n",
     #    fileno STDIN, fileno STDOUT, fileno STDERR;
 
diff --git a/mysql-test/lib/My/SafeProcess/safe_process.cc b/mysql-test/lib/My/SafeProcess/safe_process.cc
index dc7b7da28c7..50c433b9b39 100644
--- a/mysql-test/lib/My/SafeProcess/safe_process.cc
+++ b/mysql-test/lib/My/SafeProcess/safe_process.cc
@@ -89,7 +89,7 @@ static void die(const char* fmt, ...)
 }
 
 
-static void kill_child (void)
+static void kill_child(void)
 {
   int status= 0;
 
@@ -119,7 +119,7 @@ static void kill_child (void)
 }
 
 
-static void handle_abort (int sig)
+extern "C" void handle_abort(int sig)
 {
     message("Got signal %d, child_pid: %d, sending ABRT", sig, child_pid);
 
@@ -128,8 +128,8 @@ static void handle_abort (int sig)
     }
 }
 
-    
-static void handle_signal (int sig)
+
+extern "C" void handle_signal(int sig)
 {
   message("Got signal %d, child_pid: %d", sig, child_pid);
   terminated= 1;
@@ -152,7 +152,7 @@ int main(int argc, char* const argv[] )
   pid_t own_pid= getpid();
   pid_t parent_pid= getppid();
   bool nocore = false;
-  
+
   /* Install signal handlers */
   signal(SIGTERM, handle_signal);
   signal(SIGINT,  handle_signal);
@@ -232,10 +232,11 @@ int main(int argc, char* const argv[] )
         message("setrlimit failed, errno=%d", errno);
       }
     }
-    
+
     // Signal that child is ready
     buf= 37;
-    write(pfd[1], &buf, 1);
+    if ((write(pfd[1], &buf, 1)) < 1)
+      die("Failed to signal that child is ready");
     // Close write end
     close(pfd[1]);
 
@@ -246,8 +247,10 @@ int main(int argc, char* const argv[] )
   close(pfd[1]); // Close unused write end
 
   // Wait for child to signal it's ready
-  read(pfd[0], &buf, 1);
-  if(buf != 37)
+  if ((read(pfd[0], &buf, 1)) < 1)
+    die("Failed to read signal from child");
+
+  if (buf != 37)
     die("Didn't get 37 from pipe");
   close(pfd[0]); // Close read end
 
@@ -272,7 +275,7 @@ int main(int argc, char* const argv[] )
       if (WIFEXITED(status))
       {
         // Process has exited, collect return status
-        int ret_code= WEXITSTATUS(status);
+        ret_code= WEXITSTATUS(status);
         message("Child exit: %d", ret_code);
         // Exit with exit status of the child
         exit(ret_code);
@@ -287,6 +290,6 @@ int main(int argc, char* const argv[] )
   }
   kill_child();
 
-  exit(1);
+  return 1;
 }
 
diff --git a/mysql-test/lib/My/SafeProcess/safe_process_win.cc b/mysql-test/lib/My/SafeProcess/safe_process_win.cc
index 4fb89f098ed..80c1b7a97f2 100755
--- a/mysql-test/lib/My/SafeProcess/safe_process_win.cc
+++ b/mysql-test/lib/My/SafeProcess/safe_process_win.cc
@@ -259,22 +259,37 @@ int main(int argc, const char** argv )
     the JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE flag, making sure it will be
     terminated when the last handle to it is closed(which is owned by
     this process).
+
+    If breakaway from job fails on some reason, fallback is to create a
+    new process group. Process groups also allow to kill process and its 
+    descedants, subject to some restrictions (processes have to run within
+    the same console,and must not ignore CTRL_BREAK)
   */
-  if (CreateProcess(NULL, (LPSTR)child_args,
+  DWORD create_flags[]= {CREATE_BREAKAWAY_FROM_JOB, CREATE_NEW_PROCESS_GROUP, 0};
+  BOOL process_created= FALSE;
+  BOOL jobobject_assigned= FALSE;
+
+  for (int i=0; i < sizeof(create_flags)/sizeof(create_flags[0]); i++)
+  { 
+    process_created= CreateProcess(NULL, (LPSTR)child_args,
                     NULL,
                     NULL,
                     TRUE, /* inherit handles */
-                    CREATE_SUSPENDED | CREATE_BREAKAWAY_FROM_JOB,
+                    CREATE_SUSPENDED | create_flags[i],
                     NULL,
                     NULL,
                     &si,
-                    &process_info) == 0)
-    die("CreateProcess failed");
+                    &process_info);
+    if (process_created)
+    {
+     jobobject_assigned= AssignProcessToJobObject(job_handle, process_info.hProcess);
+     break;
+    }
+  }
 
-  if (AssignProcessToJobObject(job_handle, process_info.hProcess) == 0)
+  if (!process_created)
   {
-    TerminateProcess(process_info.hProcess, 200);
-    die("AssignProcessToJobObject failed");
+    die("CreateProcess failed");
   }
   ResumeThread(process_info.hThread);
   CloseHandle(process_info.hThread);
@@ -312,6 +327,13 @@ int main(int argc, const char** argv )
     message("TerminateJobObject failed");
   CloseHandle(job_handle);
   message("Job terminated and closed");
+
+  if (!jobobject_assigned)
+  {
+    GenerateConsoleCtrlEvent(CTRL_BREAK_EVENT, process_info.dwProcessId);
+    TerminateProcess(process_info.hProcess, 202);
+  }
+
   if (wait_res != WAIT_OBJECT_0 + CHILD)
   {
     /* The child has not yet returned, wait for it */
diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm
index 23a85ef7ecc..c9c0f1796e5 100644
--- a/mysql-test/lib/mtr_cases.pm
+++ b/mysql-test/lib/mtr_cases.pm
@@ -33,7 +33,7 @@ our $print_testcases;
 our $skip_rpl;
 our $do_test;
 our $skip_test;
-our $opt_skip_combination;
+our $skip_combinations;
 our $binlog_format;
 our $enable_disabled;
 our $default_storage_engine;
@@ -119,11 +119,22 @@ sub collect_test_cases ($$) {
 	if ( $test->{name} =~ /.*\.$tname/ )
 	{
 	  $found= 1;
+	  last;
 	}
       }
       if ( not $found )
       {
-	mtr_error("Could not find '$tname' in '$suites' suite(s)");
+	mtr_error("Could not find '$tname' in '$suites' suite(s)") unless $sname;
+	# If suite was part of name, find it there
+	my ($this_case) = collect_one_suite($sname, [ $tname ]);
+	if ($this_case)
+        {
+	  push (@$cases, $this_case);
+	}
+	else
+	{
+	  mtr_error("Could not find '$tname' in '$sname' suite");
+        }
       }
     }
   }
@@ -375,7 +386,7 @@ sub collect_one_suite($)
   # Read combinations for this suite and build testcases x combinations
   # if any combinations exists
   # ----------------------------------------------------------------------
-  if ( ! $opt_skip_combination )
+  if ( ! $skip_combinations )
   {
     my @combinations;
     my $combination_file= "$suitedir/combinations";
@@ -464,6 +475,66 @@ sub collect_one_suite($)
       #print_testcases(@cases);
     }
   }
+
+  # ----------------------------------------------------------------------
+  # Testing InnoDB plugin.
+  # ----------------------------------------------------------------------
+  my $lib_innodb_plugin=
+    mtr_file_exists(::vs_config_dirs('storage/innodb_plugin', 'ha_innodb_plugin.dll'),
+                    "$::basedir/storage/innodb_plugin/.libs/ha_innodb_plugin.so",
+                    "$::basedir/lib/mysql/plugin/ha_innodb_plugin.so",
+                    "$::basedir/lib/mysql/plugin/ha_innodb_plugin.dll");
+  if ($::mysql_version_id >= 50100 && !(IS_WINDOWS && $::opt_embedded_server) &&
+      $lib_innodb_plugin)
+  {
+    my @new_cases;
+
+    foreach my $test (@cases)
+    {
+      next if ($test->{'skip'} || !$test->{'innodb_test'});
+      # Exceptions
+      next if ($test->{'name'} eq 'main.innodb'); # Failed with wrong errno (fk)
+      # innodb_file_per_table is rw with innodb_plugin
+      next if ($test->{'name'} eq 'sys_vars.innodb_file_per_table_basic');
+      # innodb_lock_wait_timeout is rw with innodb_plugin
+      next if ($test->{'name'} eq 'sys_vars.innodb_lock_wait_timeout_basic');
+      # Diff around innodb_thread_concurrency variable
+      next if ($test->{'name'} eq 'sys_vars.innodb_thread_concurrency_basic');
+      # Copy test options
+      my $new_test= My::Test->new();
+      while (my ($key, $value) = each(%$test))
+      {
+        if (ref $value eq "ARRAY")
+        {
+          push(@{$new_test->{$key}}, @$value);
+        }
+        else
+        {
+          $new_test->{$key}= $value;
+        }
+      }
+      my $plugin_filename= basename($lib_innodb_plugin);
+      push(@{$new_test->{master_opt}}, '--ignore-builtin-innodb');
+      push(@{$new_test->{master_opt}}, '--plugin-dir=' . dirname($lib_innodb_plugin));
+      push(@{$new_test->{master_opt}}, "--plugin_load=innodb=$plugin_filename;innodb_locks=$plugin_filename");
+      push(@{$new_test->{slave_opt}}, '--ignore-builtin-innodb');
+      push(@{$new_test->{slave_opt}}, '--plugin-dir=' . dirname($lib_innodb_plugin));
+      push(@{$new_test->{slave_opt}}, "--plugin_load=innodb=$plugin_filename;innodb_locks=$plugin_filename");
+      if ($new_test->{combination})
+      {
+        $new_test->{combination}.= ' + InnoDB plugin';
+      }
+      else
+      {
+        $new_test->{combination}= 'InnoDB plugin';
+      }
+      push(@new_cases, $new_test);
+    }
+    push(@cases, @new_cases);
+  }
+  # ----------------------------------------------------------------------
+  # End of testing InnoDB plugin.
+  # ----------------------------------------------------------------------
   optimize_cases(\@cases);
   #print_testcases(@cases);
 
@@ -887,7 +958,8 @@ sub collect_one_test_case {
   if ( $tinfo->{'innodb_test'} )
   {
     # This is a test that need innodb
-    if ( $::mysqld_variables{'innodb'} ne "TRUE" )
+    if ( $::mysqld_variables{'innodb'} eq "OFF" ||
+         ! exists $::mysqld_variables{'innodb'} )
     {
       # innodb is not supported, skip it
       $tinfo->{'skip'}= 1;
diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl
index a99119a199d..a42627c93cd 100644
--- a/mysql-test/lib/mtr_process.pl
+++ b/mysql-test/lib/mtr_process.pl
@@ -21,7 +21,25 @@
 use strict;
 use Socket;
 use Errno;
+use My::Platform;
+use if IS_WINDOWS, "Net::Ping";
 
+# Ancient perl might not have port_number method for Net::Ping.
+# Check it and use fallback to connect() if it is not present.
+BEGIN 
+{
+  my $use_netping= 0;
+  if (IS_WINDOWS)
+  {
+    my $ping = Net::Ping->new();
+    if ($ping->can("port_number"))
+    {
+      $use_netping= 1;
+    }
+  }
+  eval 'sub USE_NETPING { $use_netping }';
+}
+  
 sub sleep_until_file_created ($$$);
 sub mtr_ping_port ($);
 
@@ -30,6 +48,24 @@ sub mtr_ping_port ($) {
 
   mtr_verbose("mtr_ping_port: $port");
 
+  if (IS_WINDOWS && USE_NETPING)
+  {
+    # Under Windows, connect to a port that is not open is slow
+    # It takes ~1sec. Net::Ping with small timeout is much faster.
+    my $ping = Net::Ping->new();
+    $ping->port_number($port);
+    if ($ping->ping("localhost",0.1))
+    {
+      mtr_verbose("USED");
+      return 1;
+    }
+    else
+    {
+      mtr_verbose("FREE");
+      return 0;
+    }
+  }
+  
   my $remote= "localhost";
   my $iaddr=  inet_aton($remote);
   if ( ! $iaddr )
diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm
index 9c6ab35ee5e..a246c5bbef6 100644
--- a/mysql-test/lib/mtr_report.pm
+++ b/mysql-test/lib/mtr_report.pm
@@ -30,6 +30,8 @@ our @EXPORT= qw(report_option mtr_print_line mtr_print_thick_line
 		mtr_report_test);
 
 use mtr_match;
+use My::Platform;
+use POSIX qw[ _exit ];
 require "mtr_io.pl";
 
 my $tot_real_time= 0;
@@ -69,6 +71,8 @@ sub _mtr_report_test_name ($) {
 
   print _name(), _timestamp();
   printf "%-40s ", $tname;
+  my $worker = $tinfo->{worker};
+  printf "w$worker " if $worker;
 
   return $tname;
 }
@@ -217,8 +221,8 @@ sub mtr_report_test ($) {
 }
 
 
-sub mtr_report_stats ($) {
-  my $tests= shift;
+sub mtr_report_stats ($;$) {
+  my ($tests, $dont_error)= @_;
 
   # ----------------------------------------------------------------------
   # Find out how we where doing
@@ -257,6 +261,17 @@ sub mtr_report_stats ($) {
       $tot_restarts++;
     }
 
+    # Add counts for repeated runs, if any.
+    # Note that the last run has already been counted above.
+    my $num_repeat = $tinfo->{'repeat'} - 1;
+    if ( $num_repeat > 0 )
+    {
+      $tot_tests += $num_repeat;
+      my $rep_failed = $tinfo->{'rep_failures'} || 0;
+      $tot_failed += $rep_failed;
+      $tot_passed += $num_repeat - $rep_failed;
+    }
+
     # Look for warnings produced by mysqltest
     my $base_file= mtr_match_extension($tinfo->{'result_file'},
 				       "result"); # Trim extension
@@ -336,7 +351,7 @@ sub mtr_report_stats ($) {
     foreach my $tinfo (@$tests)
     {
       my $tname= $tinfo->{'name'};
-      if ( $tinfo->{failures} and ! $seen{$tname})
+      if ( ($tinfo->{failures} || $tinfo->{rep_failures}) and ! $seen{$tname})
       {
         print " $tname";
 	$seen{$tname}= 1;
@@ -359,7 +374,7 @@ sub mtr_report_stats ($) {
 
   if ( $tot_failed != 0 || $found_problems)
   {
-    mtr_error("there were failing test cases");
+    mtr_error("there were failing test cases") unless $dont_error;
   }
 }
 
@@ -459,7 +474,14 @@ sub mtr_warning (@) {
 sub mtr_error (@) {
   print STDERR _name(), _timestamp(),
     "mysql-test-run: *** ERROR: ", join(" ", @_), "\n";
-  exit(1);
+  if (IS_WINDOWS)
+  {
+    POSIX::_exit(1);
+  }
+  else
+  {
+    exit(1);
+  }
 }
 
 
diff --git a/mysql-test/lib/mtr_unique.pm b/mysql-test/lib/mtr_unique.pm
index 294a5d7b4d6..6b60157422d 100644
--- a/mysql-test/lib/mtr_unique.pm
+++ b/mysql-test/lib/mtr_unique.pm
@@ -28,32 +28,36 @@ sub msg {
  # print "### unique($$) - ", join(" ", @_), "\n";
 }
 
-my $file;
+my $dir;
 
 if(!IS_WINDOWS)
 {
-  $file= "/tmp/mysql-test-ports";
+  $dir= "/tmp/mysql-unique-ids";
 }
 else
 {
-  $file= $ENV{'TEMP'}."/mysql-test-ports";
-}
-  
-
-my %mtr_unique_ids;
-
-END {
-  my $allocated_id= $mtr_unique_ids{$$};
-  if (defined $allocated_id)
+  # Try to use machine-wide directory location for unique IDs,
+  # $ALLUSERSPROFILE . IF it is not available, fallback to $TEMP
+  # which is typically a per-user temporary directory
+  if (exists $ENV{'ALLUSERSPROFILE'} && -w $ENV{'ALLUSERSPROFILE'})
   {
-    mtr_release_unique_id($allocated_id);
+    $dir= $ENV{'ALLUSERSPROFILE'}."/mysql-unique-ids";
   }
-  delete $mtr_unique_ids{$$};
+  else
+  {
+    $dir= $ENV{'TEMP'}."/mysql-unique-ids";
+  }
+}
+
+my $mtr_unique_fh = undef;
+
+END
+{
+  mtr_release_unique_id();
 }
 
 #
-# Get a unique, numerical ID, given a file name (where all
-# requested IDs are stored), a minimum and a maximum value.
+# Get a unique, numerical ID in a specified range.
 #
 # If no unique ID within the specified parameters can be
 # obtained, return undef.
@@ -61,135 +65,63 @@ END {
 sub mtr_get_unique_id($$) {
   my ($min, $max)= @_;;
 
-  msg("get, '$file', $min-$max");
+  msg("get $min-$max, $$");
 
-  die "Can only get one unique id per process!" if $mtr_unique_ids{$$};
+  die "Can only get one unique id per process!" if defined $mtr_unique_fh;
 
-  my $ret = undef;
-  my $changed = 0;
 
-  if(eval("readlink '$file'") || eval("readlink '$file.sem'")) {
-    die 'lock file is a symbolic link';
-  }
+  # Make sure our ID directory exists
+  if (! -d $dir)
+  {
+    # If there is a file with the reserved
+    # directory name, just delete the file.
+    if (-e $dir)
+    {
+      unlink($dir);
+    }
 
-  chmod 0777, "$file.sem";
-  open SEM, ">", "$file.sem" or die "can't write to $file.sem";
-  flock SEM, LOCK_EX or die "can't lock $file.sem";
-  if(! -e $file) {
-    open FILE, ">", $file or die "can't create $file";
-    close FILE;
-  }
+    mkdir $dir;
+    chmod 0777, $dir;
 
-  msg("HAVE THE LOCK");
-
-  if(eval("readlink '$file'") || eval("readlink '$file.sem'")) {
-    die 'lock file is a symbolic link';
-  }
-
-  chmod 0777, $file;
-  open FILE, "+<", $file or die "can't open $file";
-  #select undef,undef,undef,0.2;
-  seek FILE, 0, 0;
-  my %taken = ();
-  while(<FILE>) {
-    chomp;
-    my ($id, $pid) = split / /;
-    $taken{$id} = $pid;
-    msg("taken: $id, $pid");
-    # Check if process with given pid is alive
-    if(!process_alive($pid)) {
-      print "Removing slot $id used by missing process $pid\n";
-      msg("Removing slot $id used by missing process $pid");
-      delete $taken{$id};
-      $changed++;
+    if(! -d $dir)
+    {
+      die "can't make directory $dir";
     }
   }
-  for(my $i=$min; $i<=$max; ++$i) {
-    if(! exists $taken{$i}) {
-      $ret = $i;
-      $taken{$i} = $$;
-      $changed++;
-      # Remember the id this process got
-      $mtr_unique_ids{$$}= $i;
-      msg(" got $i"); 
-      last;
+
+
+  my $fh;
+  for(my $id = $min; $id <= $max; $id++)
+  {
+    open( $fh, ">$dir/$id");
+    chmod 0666, "$dir/$id";
+    # Try to lock the file exclusively. If lock succeeds, we're done.
+    if (flock($fh, LOCK_EX|LOCK_NB))
+    {
+      # Store file handle - we would need it to release the ID (==unlock the file)
+      $mtr_unique_fh = $fh;
+      return $id;
+    }
+    else
+    {
+      close $fh;
     }
   }
-  if($changed) {
-    seek FILE, 0, 0;
-    truncate FILE, 0 or die "can't truncate $file";
-    for my $k (keys %taken) {
-      print FILE $k . ' ' . $taken{$k} . "\n";
-    }
-  }
-  close FILE;
-
-  msg("RELEASING THE LOCK");
-  flock SEM, LOCK_UN or warn "can't unlock $file.sem";
-  close SEM;
-
-  return $ret;
+  return undef;
 }
 
 
 #
 # Release a unique ID.
 #
-sub mtr_release_unique_id($) {
-  my ($myid)= @_;
-
-  msg("release, $myid");
-
-
-  if(eval("readlink '$file'") || eval("readlink '$file.sem'")) {
-    die 'lock file is a symbolic link';
-  }
-
-  open SEM, ">", "$file.sem" or die "can't write to $file.sem";
-  flock SEM, LOCK_EX or die "can't lock $file.sem";
-
-  msg("HAVE THE LOCK");
-
-  if(eval("readlink '$file'") || eval("readlink '$file.sem'")) {
-    die 'lock file is a symbolic link';
-  }
-
-  if(! -e $file) {
-    open FILE, ">", $file or die "can't create $file";
-    close FILE;
-  }
-  open FILE, "+<", $file or die "can't open $file";
-  #select undef,undef,undef,0.2;
-  seek FILE, 0, 0;
-  my %taken = ();
-  while(<FILE>) {
-    chomp;
-    my ($id, $pid) = split / /;
-    msg(" taken, $id $pid");
-    $taken{$id} = $pid;
-  }
-
-  if ($taken{$myid} != $$)
+sub mtr_release_unique_id()
+{
+  msg("release $$");
+  if (defined $mtr_unique_fh)
   {
-    msg(" The unique id for this process does not match pid");
+    close $mtr_unique_fh;
+    $mtr_unique_fh = undef;
   }
-
-
-  msg(" removing $myid");
-  delete $taken{$myid};
-  seek FILE, 0, 0;
-  truncate FILE, 0 or die "can't truncate $file";
-  for my $k (keys %taken) {
-    print FILE $k . ' ' . $taken{$k} . "\n";
-  }
-  close FILE;
-
-  msg("RELEASE THE LOCK");
-
-  flock SEM, LOCK_UN or warn "can't unlock $file.sem";
-  close SEM;
-
-  delete $mtr_unique_ids{$$};
 }
 
 
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index 3949148ec49..9e168463e99 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -126,7 +126,7 @@ my $path_config_file;           # The generated config file, var/my.cnf
 # executables will be used by the test suite.
 our $opt_vs_config = $ENV{'MTR_VS_CONFIG'};
 
-my $DEFAULT_SUITES= "main,binlog,federated,rpl,rpl_ndb,ndb";
+my $DEFAULT_SUITES= "main,binlog,federated,rpl,rpl_ndb,ndb,innodb";
 my $opt_suites;
 
 our $opt_verbose= 0;  # Verbose output, enable with --verbose
@@ -209,6 +209,7 @@ sub check_timeout { return $opt_testcase_timeout * 6; };
 
 my $opt_start;
 my $opt_start_dirty;
+my $opt_wait_all;
 my $opt_repeat= 1;
 my $opt_retry= 3;
 my $opt_retry_failure= 2;
@@ -312,7 +313,7 @@ sub main {
 
   #######################################################################
   my $num_tests= @$tests;
-  if ( not defined $opt_parallel ) {
+  if ( $opt_parallel eq "auto" ) {
     # Try to find a suitable value for number of workers
     my $sys_info= My::SysInfo->new();
 
@@ -429,6 +430,7 @@ sub run_test_server ($$$) {
   my $completed= [];
   my %running;
   my $result;
+  my $exe_mysqld= find_mysqld($basedir) || ""; # Used as hint to CoreDump
 
   my $suite_timeout_proc= My::SafeProcess->timer(suite_timeout());
 
@@ -500,7 +502,7 @@ sub run_test_server ($$$) {
 			   mtr_report(" - found '$core_name'",
 				      "($num_saved_cores/$opt_max_save_core)");
 
-			   My::CoreDump->show($core_file);
+			   My::CoreDump->show($core_file, $exe_mysqld);
 
 			   if ($num_saved_cores >= $opt_max_save_core) {
 			     mtr_report(" - deleting it, already saved",
@@ -515,6 +517,7 @@ sub run_test_server ($$$) {
 	      }
 	    }
 	    $num_saved_datadir++;
+	    $num_failed_test++ unless $result->{retries};
 
 	    if ( !$opt_force ) {
 	      # Test has failed, force is off
@@ -525,11 +528,12 @@ sub run_test_server ($$$) {
 	    elsif ($opt_max_test_fail > 0 and
 		   $num_failed_test >= $opt_max_test_fail) {
 	      $suite_timeout_proc->kill();
+	      push(@$completed, $result);
+	      mtr_report_stats($completed, 1);
 	      mtr_report("Too many tests($num_failed_test) failed!",
 			 "Terminating...");
 	      return undef;
 	    }
-	    $num_failed_test++;
 	  }
 
 	  # Retry test run after test failure
@@ -554,9 +558,11 @@ sub run_test_server ($$$) {
 
 	  # Repeat test $opt_repeat number of times
 	  my $repeat= $result->{repeat} || 1;
-	  if ($repeat < $opt_repeat)
+	  # Don't repeat if test was skipped
+	  if ($repeat < $opt_repeat && $result->{'result'} ne 'MTR_RES_SKIPPED')
 	  {
 	    $result->{retries}= 0;
+	    $result->{rep_failures}++ if $result->{failures};
 	    $result->{failures}= 0;
 	    delete($result->{result});
 	    $result->{repeat}= $repeat+1;
@@ -655,6 +661,7 @@ sub run_test_server ($$$) {
     # ----------------------------------------------------
     if ( ! $suite_timeout_proc->wait_one(0) )
     {
+      mtr_report_stats($completed, 1);
       mtr_report("Test suite timeout! Terminating...");
       return undef;
     }
@@ -719,6 +726,8 @@ sub run_worker ($) {
       delete($test->{'comment'});
       delete($test->{'logfile'});
 
+      $test->{worker} = $thread_num if $opt_parallel > 1;
+
       run_testcase($test);
       #$test->{result}= 'MTR_RES_PASSED';
       # Send it back, now with results set
@@ -786,7 +795,7 @@ sub command_line_setup {
              'vs-config'                => \$opt_vs_config,
 
 	     # Max number of parallel threads to use
-	     'parallel=i'               => \$opt_parallel,
+	     'parallel=s'               => \$opt_parallel,
 
              # Config file to use as template for all tests
 	     'defaults-file=s'          => \&collect_option,
@@ -876,6 +885,7 @@ sub command_line_setup {
              'sleep=i'                  => \$opt_sleep,
              'start-dirty'              => \$opt_start_dirty,
              'start'                    => \$opt_start,
+             'wait-all'                 => \$opt_wait_all,
 	     'print-testcases'          => \&collect_option,
 	     'repeat=i'                 => \$opt_repeat,
 	     'retry=i'                  => \$opt_retry,
@@ -1125,9 +1135,9 @@ sub command_line_setup {
   # --------------------------------------------------------------------------
   # Check parallel value
   # --------------------------------------------------------------------------
-  if ($opt_parallel < 1)
+  if ($opt_parallel ne "auto" && $opt_parallel < 1)
   {
-    mtr_error("0 or negative parallel value makes no sense, use positive number");
+    mtr_error("0 or negative parallel value makes no sense, use 'auto' or positive number");
   }
 
   # --------------------------------------------------------------------------
@@ -1233,6 +1243,15 @@ sub command_line_setup {
     }
   }
 
+  # --------------------------------------------------------------------------
+  # Check use of wait-all
+  # --------------------------------------------------------------------------
+
+  if ($opt_wait_all && ! ($opt_start_dirty || $opt_start))
+  {
+    mtr_error("--wait-all can only be used with --start or --start-dirty");
+  }
+
   # --------------------------------------------------------------------------
   # Check timeout arguments
   # --------------------------------------------------------------------------
@@ -1323,29 +1342,31 @@ sub set_build_thread_ports($) {
 
   if ( lc($opt_build_thread) eq 'auto' ) {
     my $found_free = 0;
-    $build_thread = 250;	# Start attempts from here
+    $build_thread = 300;	# Start attempts from here
     while (! $found_free)
     {
-      $build_thread= mtr_get_unique_id($build_thread, 299);
+      $build_thread= mtr_get_unique_id($build_thread, 349);
       if ( !defined $build_thread ) {
-	mtr_error("Could not get a unique build thread id");
+        mtr_error("Could not get a unique build thread id");
       }
       $found_free= check_ports_free($build_thread);
       # If not free, release and try from next number
-      mtr_release_unique_id($build_thread++) unless $found_free;
+      if (! $found_free) {
+        mtr_release_unique_id();
+        $build_thread++;
+      }
     }
   }
   else
   {
     $build_thread = $opt_build_thread + $thread - 1;
+    if (! check_ports_free($build_thread)) {
+      # Some port was not free(which one has already been printed)
+      mtr_error("Some port(s) was not free")
+    }
   }
   $ENV{MTR_BUILD_THREAD}= $build_thread;
 
-  if (! check_ports_free($build_thread)) {
-    # Some port was not free(which one has already been printed)
-    mtr_error("Some port(s) was not free")
-  }
-
   # Calculate baseport
   $baseport= $build_thread * 10 + 10000;
   if ( $baseport < 5001 or $baseport + 9 >= 32767 )
@@ -1739,15 +1760,26 @@ sub environment_setup {
   # --------------------------------------------------------------------------
   # Add the path where mysqld will find ha_example.so
   # --------------------------------------------------------------------------
-  if ($mysql_version_id >= 50100) {
+  if ($mysql_version_id >= 50100 && !(IS_WINDOWS && $opt_embedded_server)) {
+    my $plugin_filename;
+    if (IS_WINDOWS)
+    {
+       $plugin_filename = "ha_example.dll"; 
+    }
+    else 
+    {
+       $plugin_filename = "ha_example.so";
+    }
     my $lib_example_plugin=
-      mtr_file_exists(vs_config_dirs('storage/example', 'ha_example.dll'),
-		      "$basedir/storage/example/.libs/ha_example.so",);
+      mtr_file_exists(vs_config_dirs('storage/example',$plugin_filename),
+		      "$basedir/storage/example/.libs/".$plugin_filename);
     $ENV{'EXAMPLE_PLUGIN'}=
       ($lib_example_plugin ? basename($lib_example_plugin) : "");
     $ENV{'EXAMPLE_PLUGIN_OPT'}= "--plugin-dir=".
       ($lib_example_plugin ? dirname($lib_example_plugin) : "");
 
+    $ENV{'HA_EXAMPLE_SO'}="'".$plugin_filename."'";
+    $ENV{'EXAMPLE_PLUGIN_LOAD'}="--plugin_load=;EXAMPLE=".$plugin_filename.";";
   }
 
   # ----------------------------------------------------
@@ -3134,6 +3166,26 @@ sub find_analyze_request
 }
 
 
+# The test can leave a file in var/tmp/ to signal
+# that all servers should be restarted
+sub restart_forced_by_test
+{
+  my $restart = 0;
+  foreach my $mysqld ( mysqlds() )
+  {
+    my $datadir = $mysqld->value('datadir');
+    my $force_restart_file = "$datadir/mtr/force_restart";
+    if ( -f $force_restart_file )
+    {
+      mtr_verbose("Restart of servers forced by test");
+      $restart = 1;
+      last;
+    }
+  }
+  return $restart;
+}
+
+
 # Return timezone value of tinfo or default value
 sub timezone {
   my ($tinfo)= @_;
@@ -3175,7 +3227,7 @@ sub run_testcase ($) {
     {
 
       # Remove old datadirs
-      clean_datadir();
+      clean_datadir() unless $opt_start_dirty;
 
       # Restore old ENV
       while (my ($option, $value)= each( %old_env )) {
@@ -3242,19 +3294,29 @@ sub run_testcase ($) {
   # --------------------------------------------------------------------
   # If --start or --start-dirty given, stop here to let user manually
   # run tests
+  # If --wait-all is also given, do the same, but don't die if one
+  # server exits
   # ----------------------------------------------------------------------
+
   if ( $opt_start or $opt_start_dirty )
   {
     mtr_print("\nStarted", started(all_servers()));
     mtr_print("Waiting for server(s) to exit...");
-    my $proc= My::SafeProcess->wait_any();
-    if ( grep($proc eq $_, started(all_servers())) )
-    {
-      mtr_print("Server $proc died");
+    if ( $opt_wait_all ) {
+      My::SafeProcess->wait_all();
+      mtr_print( "All servers exited" );
+      exit(1);
+    }
+    else {
+      my $proc= My::SafeProcess->wait_any();
+      if ( grep($proc eq $_, started(all_servers())) )
+      {
+        mtr_print("Server $proc died");
+        exit(1);
+      }
+      mtr_print("Unknown process $proc died");
       exit(1);
     }
-    mtr_print("Unknown process $proc died");
-    exit(1);
   }
 
   my $test_timeout_proc= My::SafeProcess->timer(testcase_timeout());
@@ -3272,10 +3334,38 @@ sub run_testcase ($) {
   }
 
   my $test= start_mysqltest($tinfo);
+  # Set only when we have to keep waiting after expectedly died server
+  my $keep_waiting_proc = 0;
 
   while (1)
   {
-    my $proc= My::SafeProcess->wait_any();
+    my $proc;
+    if ($keep_waiting_proc)
+    {
+      # Any other process exited?
+      $proc = My::SafeProcess->check_any();
+      if ($proc)
+      {
+	mtr_verbose ("Found exited process $proc");
+	# If that was the timeout, cancel waiting
+	if ( $proc eq $test_timeout_proc )
+	{
+	  $keep_waiting_proc = 0;
+	}
+      }
+      else
+      {
+	$proc = $keep_waiting_proc;
+      }
+    }
+    else
+    {
+      $proc= My::SafeProcess->wait_any();
+    }
+
+    # Will be restored if we need to keep waiting
+    $keep_waiting_proc = 0;
+
     unless ( defined $proc )
     {
       mtr_error("wait_any failed");
@@ -3302,7 +3392,11 @@ sub run_testcase ($) {
       if ( $res == 0 )
       {
 	my $check_res;
-	if ( $opt_check_testcases and
+	if ( restart_forced_by_test() )
+	{
+	  stop_all_servers();
+	}
+	elsif ( $opt_check_testcases and
 	     $check_res= check_testcase($tinfo, "after"))
 	{
 	  if ($check_res == 1) {
@@ -3367,8 +3461,12 @@ sub run_testcase ($) {
     # ----------------------------------------------------
     # Check if it was an expected crash
     # ----------------------------------------------------
-    if ( check_expected_crash_and_restart($proc) )
+    my $check_crash = check_expected_crash_and_restart($proc);
+    if ($check_crash)
     {
+      # Keep waiting if it returned 2, if 1 don't wait or stop waiting.
+      $keep_waiting_proc = 0 if $check_crash == 1;
+      $keep_waiting_proc = $proc if $check_crash == 2;
       next;
     }
 
@@ -3709,16 +3807,16 @@ sub check_expected_crash_and_restart {
     {
       mtr_verbose("Crash was expected, file '$expect_file' exists");
 
-      while (1){
-
+      for (my $waits = 0;  $waits < 50;  $waits++)
+      {
 	# If last line in expect file starts with "wait"
 	# sleep a little and try again, thus allowing the
 	# test script to control when the server should start
-	# up again
+	# up again. Keep trying for up to 5s at a time.
 	my $last_line= mtr_lastlinesfromfile($expect_file, 1);
 	if ($last_line =~ /^wait/ )
 	{
-	  mtr_verbose("Test says wait before restart");
+	  mtr_verbose("Test says wait before restart") if $waits == 0;
 	  mtr_milli_sleep(100);
 	  next;
 	}
@@ -3728,11 +3826,11 @@ sub check_expected_crash_and_restart {
 	# Start server with same settings as last time
 	mysqld_start($mysqld, $mysqld->{'started_opts'});
 
-	last;
+	return 1;
       }
+      # Loop ran through: we should keep waiting after a re-check
+      return 2;
     }
-
-    return 1;
   }
 
   # Not an expected crash
@@ -4431,14 +4529,17 @@ sub start_servers($) {
     my $mysqld_basedir= $mysqld->value('basedir');
     if ( $basedir eq $mysqld_basedir )
     {
-      # Copy datadir from installed system db
-      for my $path ( "$opt_vardir", "$opt_vardir/..") {
-	my $install_db= "$path/install.db";
-	copytree($install_db, $datadir)
-	  if -d $install_db;
+      if (! $opt_start_dirty)	# If dirty, keep possibly grown system db
+      {
+	# Copy datadir from installed system db
+	for my $path ( "$opt_vardir", "$opt_vardir/..") {
+	  my $install_db= "$path/install.db";
+	  copytree($install_db, $datadir)
+	    if -d $install_db;
+	}
+	mtr_error("Failed to copy system db to '$datadir'")
+	  unless -d $datadir;
       }
-      mtr_error("Failed to copy system db to '$datadir'")
-	unless -d $datadir;
     }
     else
     {
@@ -4978,10 +5079,13 @@ Options to control what engine/variation to run
   vs-config             Visual Studio configuration used to create executables
                         (default: MTR_VS_CONFIG environment variable)
 
-  config|defaults-file=<config template> Use fixed config template for all
+  defaults-file=<config template> Use fixed config template for all
                         tests
   defaults_extra_file=<config template> Extra config template to add to
                         all generated configs
+  combination=<opt>     Use at least twice to run tests with specified 
+                        options to mysqld
+  skip-combinations     Ignore combination file (or options)
 
 Options to control directories to use
   tmpdir=DIR            The directory where temporary files are stored
@@ -5004,7 +5108,6 @@ Options to control what test suites or cases to run
   force                 Continue to run the suite after failure
   with-ndbcluster-only  Run only tests that include "ndb" in the filename
   skip-ndb[cluster]     Skip all tests that need cluster
-  skip-ndb[cluster]-slave Skip all tests that need a slave cluster
   do-test=PREFIX or REGEX
                         Run test cases which name are prefixed with PREFIX
                         or fulfills REGEX
@@ -5019,6 +5122,9 @@ Options to control what test suites or cases to run
                         The default is: "$DEFAULT_SUITES"
   skip-rpl              Skip the replication test cases.
   big-test              Also run tests marked as "big"
+  enable-disabled       Run also tests marked as disabled
+  print_testcases       Don't run the tests but print details about all the
+                        selected tests, in the order they would be run.
 
 Options that specify ports
 
@@ -5087,7 +5193,7 @@ Options for valgrind
   valgrind-options=ARGS Deprecated, use --valgrind-option
   valgrind-option=ARGS  Option to give valgrind, replaces default option(s),
                         can be specified more then once
-  valgrind-path=[EXE]   Path to the valgrind executable
+  valgrind-path=<EXE>   Path to the valgrind executable
   callgrind             Instruct valgrind to use callgrind
 
 Misc options
@@ -5095,14 +5201,19 @@ Misc options
   comment=STR           Write STR to the output
   notimer               Don't show test case execution time
   verbose               More verbose output(use multiple times for even more)
+  verbose-restart       Write when and why servers are restarted
   start                 Only initialize and start the servers, using the
                         startup settings for the first specified test case
                         Example:
                          $0 --start alias &
   start-dirty           Only start the servers (without initialization) for
                         the first specified test case
+  wait-all              If --start or --start-dirty option is used, wait for all
+                        servers to exit before finishing the process
   fast                  Run as fast as possible, dont't wait for servers
                         to shutdown etc.
+  parallel=N            Run tests in N parallel threads (default=1)
+                        Use parallel=auto for auto-setting of N
   repeat=N              Run each test N number of times
   retry=N               Retry tests that fail N times, limit number of failures
                         to $opt_retry_failure
@@ -5120,6 +5231,12 @@ Misc options
   sleep=SECONDS         Passed to mysqltest, will be used as fixed sleep time
   gcov                  Collect coverage information after the test.
                         The result is a gcov file per source and header file.
+  experimental=<file>   Refer to list of tests considered experimental;
+                        failures will be marked exp-fail instead of fail.
+  report-features       First run a "test" that reports mysql features
+  timestamp             Print timestamp before each test report line
+  timediff              With --timestamp, also print time passed since
+                        *previous* test started
 
 HERE
   exit(1);
diff --git a/mysql-test/r/bug46080.result b/mysql-test/r/bug46080.result
new file mode 100644
index 00000000000..18c7c22829a
--- /dev/null
+++ b/mysql-test/r/bug46080.result
@@ -0,0 +1,14 @@
+#
+# Bug #46080: group_concat(... order by) crashes server when
+#  sort_buffer_size cannot allocate
+#
+CREATE TABLE t1(a CHAR(255));
+INSERT INTO t1 VALUES ('a');
+SET @@SESSION.sort_buffer_size=5*16*1000000;
+SET @@SESSION.max_heap_table_size=5*1000000;
+# Must not crash.
+SELECT GROUP_CONCAT(a ORDER BY a) FROM t1 GROUP BY a;
+DROP TABLE t1;
+SET @@SESSION.sort_buffer_size=default;
+SET @@SESSION.max_heap_table_size=default;
+End of 5.0 tests
diff --git a/mysql-test/r/cast.result b/mysql-test/r/cast.result
index c1af92c5f8d..dd61396e485 100644
--- a/mysql-test/r/cast.result
+++ b/mysql-test/r/cast.result
@@ -439,3 +439,16 @@ HOUR(NULL)	MINUTE(NULL)	SECOND(NULL)
 NULL	NULL	NULL
 DROP TABLE t1;
 End of 5.0 tests
+#
+#  Bug #44766: valgrind error when using convert() in a subquery
+#
+CREATE TABLE t1(a tinyint);
+INSERT INTO t1 VALUES (127);
+SELECT 1 FROM
+(
+SELECT CONVERT(t2.a USING UTF8) FROM t1, t1 t2 LIMIT 1
+) AS s LIMIT 1;
+1
+1
+DROP TABLE t1;
+End of 5.1 tests
diff --git a/mysql-test/r/commit_1innodb.result b/mysql-test/r/commit_1innodb.result
index de80dba47c1..cabd4c29c1d 100644
--- a/mysql-test/r/commit_1innodb.result
+++ b/mysql-test/r/commit_1innodb.result
@@ -766,15 +766,11 @@ call p_verify_status_increment(2, 2, 2, 2);
 SUCCESS
 
 savepoint a;
-call p_verify_status_increment(0, 0, 0, 0);
+call p_verify_status_increment(1, 0, 1, 0);
 SUCCESS
 
 insert t1 set a=4;
-# Binlog does not register itself this time for other than the 1st
-# statement of the transaction with MIXED/STATEMENT binlog_format.
-# It needs registering with the ROW format. Therefore 1,0,2,2 are 
-# the correct arguments to this test after bug#40221 fixed.
-call p_verify_status_increment(1, 0, 2, 2);
+call p_verify_status_increment(2, 2, 2, 2);
 SUCCESS
 
 release savepoint a;
diff --git a/mysql-test/r/concurrent_innodb_safelog.result b/mysql-test/r/concurrent_innodb_safelog.result
index e6adaac1068..24a84afb9ce 100644
--- a/mysql-test/r/concurrent_innodb_safelog.result
+++ b/mysql-test/r/concurrent_innodb_safelog.result
@@ -785,6 +785,8 @@ eta	tipo	c
 70	1	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
 80	22	jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
 90	11	kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk
+** Cleanup
+** connection thread2
 ** connection default
 drop table t1;
 drop user mysqltest@localhost;
diff --git a/mysql-test/r/concurrent_innodb_unsafelog.result b/mysql-test/r/concurrent_innodb_unsafelog.result
index e9c53d4cfa0..35fc2d89cfe 100644
--- a/mysql-test/r/concurrent_innodb_unsafelog.result
+++ b/mysql-test/r/concurrent_innodb_unsafelog.result
@@ -781,6 +781,8 @@ eta	tipo	c
 70	1	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
 80	1	jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
 90	11	kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk
+** Cleanup
+** connection thread2
 ** connection default
 drop table t1;
 drop user mysqltest@localhost;
diff --git a/mysql-test/r/consistent_snapshot.result b/mysql-test/r/consistent_snapshot.result
index 694c996a58e..3a0227b1a1a 100644
--- a/mysql-test/r/consistent_snapshot.result
+++ b/mysql-test/r/consistent_snapshot.result
@@ -1,6 +1,9 @@
 DROP TABLE IF EXISTS t1;
 # Establish connection con1 (user=root)
 # Establish connection con2 (user=root)
+### Test 1:
+### - While a consistent snapshot transaction is executed,
+###   no external inserts should be visible to the transaction.
 # Switch to connection con1
 CREATE TABLE t1 (a INT) ENGINE=innodb;
 START TRANSACTION WITH CONSISTENT SNAPSHOT;
@@ -10,6 +13,9 @@ INSERT INTO t1 VALUES(1);
 SELECT * FROM t1;
 a
 COMMIT;
+### Test 2:
+### - For any non-consistent snapshot transaction, external
+###   committed inserts should be visible to the transaction.
 DELETE FROM t1;
 START TRANSACTION;
 # Switch to connection con2
@@ -19,5 +25,18 @@ SELECT * FROM t1;
 a
 1
 COMMIT;
+### Test 3:
+### - Bug#44664: valgrind warning for COMMIT_AND_CHAIN and ROLLBACK_AND_CHAIN
+###   Chaining a transaction does not retain consistency level.
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+DELETE FROM t1;
+COMMIT WORK AND CHAIN;
+# Switch to connection con2
+INSERT INTO t1 VALUES(1);
+# Switch to connection con1
+SELECT * FROM t1;
+a
+1
+COMMIT;
 # Switch to connection default + close connections con1 and con2
 DROP TABLE t1;
diff --git a/mysql-test/r/ctype_cp932_binlog_row.result b/mysql-test/r/ctype_cp932_binlog_row.result
index 0370b7a1cf6..cbac6b14669 100644
--- a/mysql-test/r/ctype_cp932_binlog_row.result
+++ b/mysql-test/r/ctype_cp932_binlog_row.result
@@ -9,10 +9,10 @@ EXECUTE stmt1 USING @var1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t1(f1 blob)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 SELECT HEX(f1) FROM t1;
 HEX(f1)
 8300
diff --git a/mysql-test/r/ctype_cp932_binlog_stm.result b/mysql-test/r/ctype_cp932_binlog_stm.result
index 0cd2d395ebc..044885d1ea7 100644
--- a/mysql-test/r/ctype_cp932_binlog_stm.result
+++ b/mysql-test/r/ctype_cp932_binlog_stm.result
@@ -9,7 +9,7 @@ EXECUTE stmt1 USING @var1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t1(f1 blob)
-master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 VALUES('�\0')
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 VALUES(0x8300)
 SELECT HEX(f1) FROM t1;
 HEX(f1)
 8300
@@ -29,21 +29,29 @@ HEX(s1)	HEX(s2)	d
 466F6F2773206120426172	ED40ED41ED42	47.93
 DROP PROCEDURE bug18293|
 DROP TABLE t4|
-SHOW BINLOG EVENTS FROM 369|
+SHOW BINLOG EVENTS FROM 370|
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	369	Query	1	535	use `test`; CREATE TABLE t4 (s1 CHAR(50) CHARACTER SET latin1,
+master-bin.000001	370	Query	1	536	use `test`; CREATE TABLE t4 (s1 CHAR(50) CHARACTER SET latin1,
 s2 CHAR(50) CHARACTER SET cp932,
 d DECIMAL(10,2))
-master-bin.000001	535	Query	1	784	use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `bug18293`(IN ins1 CHAR(50),
+master-bin.000001	536	Query	1	785	use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `bug18293`(IN ins1 CHAR(50),
 IN ins2 CHAR(50) CHARACTER SET cp932,
 IN ind DECIMAL(10,2))
 BEGIN
 INSERT INTO t4 VALUES (ins1, ins2, ind);
 END
-master-bin.000001	784	Query	1	1048	use `test`; INSERT INTO t4 VALUES ( NAME_CONST('ins1',_latin1 0x466F6F2773206120426172 COLLATE 'latin1_swedish_ci'),  NAME_CONST('ins2',_cp932 0xED40ED41ED42 COLLATE 'cp932_japanese_ci'),  NAME_CONST('ind',47.93))
-master-bin.000001	1048	Query	1	1137	use `test`; DROP PROCEDURE bug18293
-master-bin.000001	1137	Query	1	1216	use `test`; DROP TABLE t4
+master-bin.000001	785	Query	1	1049	use `test`; INSERT INTO t4 VALUES ( NAME_CONST('ins1',_latin1 0x466F6F2773206120426172 COLLATE 'latin1_swedish_ci'),  NAME_CONST('ins2',_cp932 0xED40ED41ED42 COLLATE 'cp932_japanese_ci'),  NAME_CONST('ind',47.93))
+master-bin.000001	1049	Query	1	1138	use `test`; DROP PROCEDURE bug18293
+master-bin.000001	1138	Query	1	1217	use `test`; DROP TABLE t4
 End of 5.0 tests
-SHOW BINLOG EVENTS FROM 364;
+SHOW BINLOG EVENTS FROM 365;
 ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
+Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
+CREATE TABLE t1 (a varchar(16)) character set cp932;
+INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
+SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
+hex(a)	hex(lower(a))	hex(upper(a))
+8352835E	8352835E	8352835E
+8372835E	8372835E	8372835E
+DROP TABLE t1;
 End of 5.1 tests
diff --git a/mysql-test/r/ctype_gbk_binlog.result b/mysql-test/r/ctype_gbk_binlog.result
new file mode 100644
index 00000000000..a49e170ff19
--- /dev/null
+++ b/mysql-test/r/ctype_gbk_binlog.result
@@ -0,0 +1,26 @@
+SET NAMES gbk;
+CREATE TABLE t1 (
+f1 BLOB
+) ENGINE=MyISAM DEFAULT CHARSET=gbk;
+CREATE PROCEDURE p1(IN val BLOB)
+BEGIN
+SET @tval = val;
+SET @sql_cmd = CONCAT_WS(' ', 'insert into t1(f1) values(?)');
+PREPARE stmt FROM @sql_cmd;
+EXECUTE stmt USING @tval;
+DEALLOCATE PREPARE stmt;
+END|
+SET @`tcontent`:=_binary 0x50434B000900000000000000E9000000 COLLATE `binary`/*!*/;
+CALL p1(@`tcontent`);
+FLUSH LOGS;
+DROP PROCEDURE p1;
+RENAME TABLE t1 to t2;
+SELECT hex(f1) FROM t2;
+hex(f1)
+50434B000900000000000000E9000000
+SELECT hex(f1) FROM t1;
+hex(f1)
+50434B000900000000000000E9000000
+DROP PROCEDURE p1;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/mysql-test/r/ctype_ldml.result b/mysql-test/r/ctype_ldml.result
index 5dc9ea35cc3..711921eb526 100644
--- a/mysql-test/r/ctype_ldml.result
+++ b/mysql-test/r/ctype_ldml.result
@@ -40,6 +40,14 @@ abcd	abcd
 efgh	efgh
 ijkl	ijkl
 DROP TABLE t1;
+#
+# Bug#43827 Server closes connections and restarts
+#
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci);
+INSERT INTO t1 SELECT REPEAT('a',11);
+Warnings:
+Warning	1265	Data truncated for column 'c1' at row 1
+DROP TABLE t1;
 show collation like 'ucs2_vn_ci';
 Collation	Charset	Id	Default	Compiled	Sortlen
 ucs2_vn_ci	ucs2	242			8
diff --git a/mysql-test/r/ctype_sjis.result b/mysql-test/r/ctype_sjis.result
index 91d6ebd9795..1469e335f23 100644
--- a/mysql-test/r/ctype_sjis.result
+++ b/mysql-test/r/ctype_sjis.result
@@ -209,3 +209,13 @@ SET NAMES sjis;
 SELECT HEX('�����@�\') FROM DUAL;
 HEX('�����@�\')
 8DB2939181408C5C
+# Start of 5.1 tests
+Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
+CREATE TABLE t1 (a varchar(16)) character set sjis;
+INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
+SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
+hex(a)	hex(lower(a))	hex(upper(a))
+8352835E	8352835E	8352835E
+8372835E	8372835E	8372835E
+DROP TABLE t1;
+# End of 5.1 tests
diff --git a/mysql-test/r/ddl_i18n_koi8r.result b/mysql-test/r/ddl_i18n_koi8r.result
index af3a0899181..fe24c17a1c5 100644
--- a/mysql-test/r/ddl_i18n_koi8r.result
+++ b/mysql-test/r/ddl_i18n_koi8r.result
@@ -2829,7 +2829,11 @@ t2	CREATE TABLE `t2` (
   `col1` varchar(10) COLLATE cp1251_general_cs DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=cp1251 COLLATE=cp1251_general_cs
 
+---> connection: con2
+
+---> connection: con3
+
 ---> connection: default
-use test|
-DROP DATABASE mysqltest1|
-DROP DATABASE mysqltest2|
+USE test;
+DROP DATABASE mysqltest1;
+DROP DATABASE mysqltest2;
diff --git a/mysql-test/r/ddl_i18n_utf8.result b/mysql-test/r/ddl_i18n_utf8.result
index 10c2afcadc1..cf4272bf90c 100644
--- a/mysql-test/r/ddl_i18n_utf8.result
+++ b/mysql-test/r/ddl_i18n_utf8.result
@@ -2829,7 +2829,11 @@ t2	CREATE TABLE `t2` (
   `col1` varchar(10) COLLATE cp1251_general_cs DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=cp1251 COLLATE=cp1251_general_cs
 
+---> connection: con2
+
+---> connection: con3
+
 ---> connection: default
-use test|
-DROP DATABASE mysqltest1|
-DROP DATABASE mysqltest2|
+USE test;
+DROP DATABASE mysqltest1;
+DROP DATABASE mysqltest2;
diff --git a/mysql-test/r/derived.result b/mysql-test/r/derived.result
index 306c51fb8cf..80f04ffd455 100644
--- a/mysql-test/r/derived.result
+++ b/mysql-test/r/derived.result
@@ -383,3 +383,21 @@ select t2.* from (select * from t1) as A inner join t2 on A.ID = t2.FID;
 ID	DATA	FID
 drop table t1, t2;
 drop user mysqltest_1;
+# End of 4.1 tests
+SELECT 0 FROM
+(SELECT 0) t01, (SELECT 0) t02, (SELECT 0) t03, (SELECT 0) t04, (SELECT 0) t05,
+(SELECT 0) t06, (SELECT 0) t07, (SELECT 0) t08, (SELECT 0) t09, (SELECT 0) t10,
+(SELECT 0) t11, (SELECT 0) t12, (SELECT 0) t13, (SELECT 0) t14, (SELECT 0) t15,
+(SELECT 0) t16, (SELECT 0) t17, (SELECT 0) t18, (SELECT 0) t19, (SELECT 0) t20,
+(SELECT 0) t21, (SELECT 0) t22, (SELECT 0) t23, (SELECT 0) t24, (SELECT 0) t25,
+(SELECT 0) t26, (SELECT 0) t27, (SELECT 0) t28, (SELECT 0) t29, (SELECT 0) t30,
+(SELECT 0) t31, (SELECT 0) t32, (SELECT 0) t33, (SELECT 0) t34, (SELECT 0) t35,
+(SELECT 0) t36, (SELECT 0) t37, (SELECT 0) t38, (SELECT 0) t39, (SELECT 0) t40,
+(SELECT 0) t41, (SELECT 0) t42, (SELECT 0) t43, (SELECT 0) t44, (SELECT 0) t45,
+(SELECT 0) t46, (SELECT 0) t47, (SELECT 0) t48, (SELECT 0) t49, (SELECT 0) t50,
+(SELECT 0) t51, (SELECT 0) t52, (SELECT 0) t53, (SELECT 0) t54, (SELECT 0) t55,
+(SELECT 0) t56, (SELECT 0) t57, (SELECT 0) t58, (SELECT 0) t59, (SELECT 0) t60,
+(SELECT 0) t61;
+0
+0
+# End of 5.0 tests
diff --git a/mysql-test/r/distinct.result b/mysql-test/r/distinct.result
index f71bbd175e3..e0324af8cfd 100644
--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -629,21 +629,21 @@ SELECT DISTINCT @v5:= fruit_id, @v6:= fruit_name INTO @v7, @v8 FROM t1 WHERE
 fruit_name = 'APPLE';
 SELECT @v5, @v6, @v7, @v8;
 @v5	@v6	@v7	@v8
-3	PEAR	3	PEAR
+2	APPLE	2	APPLE
 SELECT DISTINCT @v5 + fruit_id, CONCAT(@v6, fruit_name) INTO @v9, @v10 FROM t1 
 WHERE fruit_name = 'APPLE';
 SELECT @v5, @v6, @v7, @v8, @v9, @v10;
 @v5	@v6	@v7	@v8	@v9	@v10
-3	PEAR	3	PEAR	5	PEARAPPLE
+2	APPLE	2	APPLE	4	APPLEAPPLE
 SELECT DISTINCT @v11:= @v5 + fruit_id, @v12:= CONCAT(@v6, fruit_name) INTO 
 @v13, @v14 FROM t1 WHERE fruit_name = 'APPLE';
 SELECT @v11, @v12, @v13, @v14;
 @v11	@v12	@v13	@v14
-6	PEARPEAR	6	PEARPEAR
+4	APPLEAPPLE	4	APPLEAPPLE
 SELECT DISTINCT @v13, @v14 INTO @v15, @v16 FROM t1 WHERE fruit_name = 'APPLE';
 SELECT @v15, @v16;
 @v15	@v16
-6	PEARPEAR
+4	APPLEAPPLE
 SELECT DISTINCT 2 + 2, 'Bob' INTO @v17, @v18 FROM t1 WHERE fruit_name = 
 'APPLE';
 SELECT @v17, @v18;
diff --git a/mysql-test/r/events_stress.result b/mysql-test/r/events_stress.result
index 17eb32b36b7..9b9f3caaff6 100644
--- a/mysql-test/r/events_stress.result
+++ b/mysql-test/r/events_stress.result
@@ -63,3 +63,4 @@ DROP TABLE fill_it1;
 DROP TABLE fill_it2;
 DROP TABLE fill_it3;
 DROP DATABASE events_test;
+SET GLOBAL event_scheduler=off;
diff --git a/mysql-test/r/func_compress.result b/mysql-test/r/func_compress.result
index 8e14b7695ee..b4e61d0e4fc 100644
--- a/mysql-test/r/func_compress.result
+++ b/mysql-test/r/func_compress.result
@@ -117,4 +117,13 @@ Warnings:
 Error	1259	ZLIB: Input data corrupted
 Error	1259	ZLIB: Input data corrupted
 drop table t1;
+CREATE TABLE t1 (c1 INT);
+INSERT INTO t1 VALUES (1), (1111), (11111);
+SELECT UNCOMPRESS(c1), UNCOMPRESSED_LENGTH(c1) FROM t1;
+UNCOMPRESS(c1)	UNCOMPRESSED_LENGTH(c1)
+NULL	NULL
+NULL	NULL
+NULL	825307441
+EXPLAIN EXTENDED SELECT * FROM (SELECT UNCOMPRESSED_LENGTH(c1) FROM t1) AS s;
+DROP TABLE t1;
 End of 5.0 tests
diff --git a/mysql-test/r/func_concat.result b/mysql-test/r/func_concat.result
index 7e7c163716e..75b4888fbb2 100644
--- a/mysql-test/r/func_concat.result
+++ b/mysql-test/r/func_concat.result
@@ -89,3 +89,34 @@ c1	c2
 	First
 DROP TABLE t1;
 # End of 5.0 tests
+#
+# Bug #44743: Join in combination with concat does not always work
+#
+CREATE TABLE t1 (
+a VARCHAR(100) NOT NULL DEFAULT '0',
+b VARCHAR(2) NOT NULL DEFAULT '',
+c VARCHAR(2) NOT NULL DEFAULT '',
+d TEXT NOT NULL,
+PRIMARY KEY (a, b, c),
+KEY (a)
+) DEFAULT CHARSET=utf8;
+INSERT INTO t1 VALUES ('gui_A', 'a', 'b', 'str1'),
+('gui_AB', 'a', 'b', 'str2'), ('gui_ABC', 'a', 'b', 'str3');
+CREATE TABLE t2 (
+a VARCHAR(100) NOT NULL DEFAULT '',
+PRIMARY KEY (a)
+) DEFAULT CHARSET=latin1;
+INSERT INTO t2 VALUES ('A'), ('AB'), ('ABC');
+SELECT CONCAT('gui_', t2.a), t1.d FROM t2 
+LEFT JOIN t1 ON t1.a = CONCAT('gui_', t2.a) AND t1.b = 'a' AND t1.c = 'b';
+CONCAT('gui_', t2.a)	d
+gui_A	str1
+gui_AB	str2
+gui_ABC	str3
+EXPLAIN SELECT CONCAT('gui_', t2.a), t1.d FROM t2 
+LEFT JOIN t1 ON t1.a = CONCAT('gui_', t2.a) AND t1.b = 'a' AND t1.c = 'b';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	PRIMARY	102	NULL	3	Using index
+1	SIMPLE	t1	eq_ref	PRIMARY,a	PRIMARY	318	func,const,const	1	
+DROP TABLE t1, t2;
+# End of 5.1 tests
diff --git a/mysql-test/r/func_crypt.result b/mysql-test/r/func_crypt.result
index 25b921681c5..c2f369b3941 100644
--- a/mysql-test/r/func_crypt.result
+++ b/mysql-test/r/func_crypt.result
@@ -95,3 +95,14 @@ Note	1003	select password('idkfa ') AS `password('idkfa ')`,old_password('idkfa'
 select encrypt('1234','_.');
 encrypt('1234','_.')
 #
+#
+# Bug #44767: invalid memory reads in password() and old_password() 
+#             functions
+#
+CREATE TABLE t1(c1 MEDIUMBLOB);
+INSERT INTO t1 VALUES (REPEAT('a', 1024));
+SELECT OLD_PASSWORD(c1), PASSWORD(c1) FROM t1;
+OLD_PASSWORD(c1)	PASSWORD(c1)
+77023ffe214c04ff	*82E58A2C08AAFE72C8EB523069CD8ADB33F78F58
+DROP TABLE t1;
+End of 5.0 tests
diff --git a/mysql-test/r/func_in.result b/mysql-test/r/func_in.result
index 1e967b668c5..88a822a2fa6 100644
--- a/mysql-test/r/func_in.result
+++ b/mysql-test/r/func_in.result
@@ -587,4 +587,25 @@ SELECT CASE c1 WHEN c1 + 1 THEN 1 END, ABS(AVG(c0)) FROM t1;
 CASE c1 WHEN c1 + 1 THEN 1 END	ABS(AVG(c0))
 NULL	1.0000
 DROP TABLE t1;
+CREATE TABLE t1(a TEXT, b INT, c INT UNSIGNED, d DECIMAL(12,2), e REAL);
+INSERT INTO t1 VALUES('iynfj', 1, 1, 1, 1);
+INSERT INTO t1 VALUES('innfj', 2, 2, 2, 2);
+SELECT SUM( DISTINCT a ) FROM t1 GROUP BY a HAVING a IN ( AVG( 1 ), 1 + a);
+SUM( DISTINCT a )
+SELECT SUM( DISTINCT b ) FROM t1 GROUP BY b HAVING b IN ( AVG( 1 ), 1 + b);
+SUM( DISTINCT b )
+1
+SELECT SUM( DISTINCT c ) FROM t1 GROUP BY c HAVING c IN ( AVG( 1 ), 1 + c);
+SUM( DISTINCT c )
+1
+SELECT SUM( DISTINCT d ) FROM t1 GROUP BY d HAVING d IN ( AVG( 1 ), 1 + d);
+SUM( DISTINCT d )
+1.00
+SELECT SUM( DISTINCT e ) FROM t1 GROUP BY e HAVING e IN ( AVG( 1 ), 1 + e);
+SUM( DISTINCT e )
+1
+SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN 
+((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d));
+SUM( DISTINCT e )
+DROP TABLE t1;
 End of 5.1 tests
diff --git a/mysql-test/r/func_math.result b/mysql-test/r/func_math.result
index c3d2db2d553..fd7ef72409e 100644
--- a/mysql-test/r/func_math.result
+++ b/mysql-test/r/func_math.result
@@ -437,6 +437,13 @@ a	ROUND(a)
 -1e+16	-10000000000000002
 1e+16	10000000000000002
 DROP TABLE t1;
+CREATE TABLE t1(f1 LONGTEXT) engine=myisam;
+INSERT INTO t1 VALUES ('a');
+SELECT 1 FROM (SELECT ROUND(f1) AS a FROM t1) AS s WHERE a LIKE 'a';
+1
+SELECT 1 FROM (SELECT ROUND(f1, f1) AS a FROM t1) AS s WHERE a LIKE 'a';
+1
+DROP TABLE t1;
 End of 5.0 tests
 SELECT 1e308 + 1e308;
 1e308 + 1e308
@@ -456,4 +463,23 @@ NULL
 SELECT POW(10, 309);
 POW(10, 309)
 NULL
+#
+# Bug #44768: SIGFPE crash when selecting rand from a view
+#             containing null
+#
+CREATE OR REPLACE VIEW v1 AS SELECT NULL AS a;
+SELECT RAND(a) FROM v1;
+RAND(a)
+0.155220427694936
+DROP VIEW v1;
+SELECT RAND(a) FROM (SELECT NULL AS a) b;
+RAND(a)
+0.155220427694936
+CREATE TABLE t1 (i INT);
+INSERT INTO t1 VALUES (NULL);
+SELECT RAND(i) FROM t1;
+RAND(i)
+0.155220427694936
+DROP TABLE t1;
+#
 End of 5.1 tests
diff --git a/mysql-test/r/func_set.result b/mysql-test/r/func_set.result
index ecdc35ac4cd..14ebc8203ec 100644
--- a/mysql-test/r/func_set.result
+++ b/mysql-test/r/func_set.result
@@ -146,3 +146,16 @@ NULL
 0
 0
 drop table t1;
+CREATE TABLE t1( a SET('a', 'b', 'c') );
+CREATE TABLE t2( a SET('a', 'b', 'c') );
+INSERT INTO t1 VALUES ('d');
+Warnings:
+Warning	1265	Data truncated for column 'a' at row 1
+INSERT INTO t2 VALUES ('');
+SELECT CONVERT( a USING latin1 ) FROM t1;
+CONVERT( a USING latin1 )
+
+SELECT CONVERT( a USING latin1 ) FROM t2;
+CONVERT( a USING latin1 )
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result
index 25cbf2470ed..a0c3935fde0 100644
--- a/mysql-test/r/func_str.result
+++ b/mysql-test/r/func_str.result
@@ -2525,6 +2525,15 @@ SELECT DATE_FORMAT(c, GET_FORMAT(DATE, 'eur')) h, CONCAT(UPPER(aa),', ', aa) i F
 h	i
 31.12.2008	AAAAAA, aaaaaa
 DROP TABLE t1;
+#
+# BUG#44774: load_file function produces valgrind warnings
+#
+CREATE TABLE t1 (a TINYBLOB);
+INSERT INTO t1 VALUES ('aaaaaaaa');
+SELECT LOAD_FILE(a) FROM t1;
+LOAD_FILE(a)
+NULL
+DROP TABLE t1;
 End of 5.0 tests
 drop table if exists t1;
 create table t1(f1 tinyint default null)engine=myisam;
diff --git a/mysql-test/r/gis.result b/mysql-test/r/gis.result
index 494b7a36532..a3708d06a1c 100644
--- a/mysql-test/r/gis.result
+++ b/mysql-test/r/gis.result
@@ -984,4 +984,52 @@ f4	geometry	YES		NULL
 f5	datetime	YES		NULL	
 drop view v1;
 drop table t1;
+SELECT MultiPoint(12345,'');
+MultiPoint(12345,'')
+NULL
+SELECT MultiPoint(123451,'');
+MultiPoint(123451,'')
+NULL
+SELECT MultiPoint(1234512,'');
+MultiPoint(1234512,'')
+NULL
+SELECT MultiPoint(12345123,'');
+MultiPoint(12345123,'')
+NULL
+SELECT MultiLineString(12345,'');
+MultiLineString(12345,'')
+NULL
+SELECT MultiLineString(123451,'');
+MultiLineString(123451,'')
+NULL
+SELECT MultiLineString(1234512,'');
+MultiLineString(1234512,'')
+NULL
+SELECT MultiLineString(12345123,'');
+MultiLineString(12345123,'')
+NULL
+SELECT LineString(12345,'');
+LineString(12345,'')
+NULL
+SELECT LineString(123451,'');
+LineString(123451,'')
+NULL
+SELECT LineString(1234512,'');
+LineString(1234512,'')
+NULL
+SELECT LineString(12345123,'');
+LineString(12345123,'')
+NULL
+SELECT Polygon(12345,'');
+Polygon(12345,'')
+NULL
+SELECT Polygon(123451,'');
+Polygon(123451,'')
+NULL
+SELECT Polygon(1234512,'');
+Polygon(1234512,'')
+NULL
+SELECT Polygon(12345123,'');
+Polygon(12345123,'')
+NULL
 End of 5.1 tests
diff --git a/mysql-test/r/grant.result b/mysql-test/r/grant.result
index de80a83d538..a677d71b266 100644
--- a/mysql-test/r/grant.result
+++ b/mysql-test/r/grant.result
@@ -1358,3 +1358,58 @@ DROP USER 'userbug33464'@'localhost';
 USE test;
 DROP DATABASE dbbug33464;
 SET @@global.log_bin_trust_function_creators= @old_log_bin_trust_function_creators;
+CREATE USER user1;
+CREATE USER user2;
+GRANT CREATE ON db1.* TO 'user1'@'localhost';
+GRANT CREATE ROUTINE ON db1.* TO 'user1'@'localhost';
+GRANT CREATE ON db1.* TO 'user2'@'%';
+GRANT CREATE ROUTINE ON db1.* TO 'user2'@'%';
+FLUSH PRIVILEGES;
+SHOW GRANTS FOR 'user1'@'localhost';
+Grants for user1@localhost
+GRANT USAGE ON *.* TO 'user1'@'localhost'
+GRANT CREATE, CREATE ROUTINE ON `db1`.* TO 'user1'@'localhost'
+** Connect as user1 and create a procedure.
+** The creation will imply implicitly assigned
+** EXECUTE and ALTER ROUTINE privileges to
+** the current user user1@localhost. 
+SELECT @@GLOBAL.sql_mode;
+@@GLOBAL.sql_mode
+
+SELECT @@SESSION.sql_mode;
+@@SESSION.sql_mode
+
+CREATE DATABASE db1;
+CREATE PROCEDURE db1.proc1(p1 INT)
+BEGIN
+SET @x = 0;
+REPEAT SET @x = @x + 1; UNTIL @x > p1 END REPEAT;
+END ;||
+** Connect as user2 and create a procedure.
+** Implicitly assignment of privileges will
+** fail because the user2@localhost is an
+** unknown user.
+CREATE PROCEDURE db1.proc2(p1 INT)
+BEGIN
+SET @x = 0;
+REPEAT SET @x = @x + 1; UNTIL @x > p1 END REPEAT;
+END ;||
+Warnings:
+Warning	1404	Failed to grant EXECUTE and ALTER ROUTINE privileges
+SHOW GRANTS FOR 'user1'@'localhost';
+Grants for user1@localhost
+GRANT USAGE ON *.* TO 'user1'@'localhost'
+GRANT CREATE, CREATE ROUTINE ON `db1`.* TO 'user1'@'localhost'
+GRANT EXECUTE, ALTER ROUTINE ON PROCEDURE `db1`.`proc1` TO 'user1'@'localhost'
+SHOW GRANTS FOR 'user2';
+Grants for user2@%
+GRANT USAGE ON *.* TO 'user2'@'%'
+GRANT CREATE, CREATE ROUTINE ON `db1`.* TO 'user2'@'%'
+DROP PROCEDURE db1.proc1;
+DROP PROCEDURE db1.proc2;
+REVOKE ALL ON db1.* FROM 'user1'@'localhost';
+REVOKE ALL ON db1.* FROM 'user2'@'%';
+DROP USER 'user1';
+DROP USER 'user1'@'localhost';
+DROP USER 'user2';
+DROP DATABASE db1;
diff --git a/mysql-test/r/grant_cache_no_prot.result b/mysql-test/r/grant_cache_no_prot.result
index cb9acaf540d..32bb9cce90e 100644
--- a/mysql-test/r/grant_cache_no_prot.result
+++ b/mysql-test/r/grant_cache_no_prot.result
@@ -206,7 +206,8 @@ Qcache_hits	8
 show status like "Qcache_not_cached";
 Variable_name	Value
 Qcache_not_cached	8
------ switch to connection default and close connections -----
+----- close connections -----
+----- switch to connection default -----
 set names binary;
 delete from mysql.user where user in ("mysqltest_1","mysqltest_2","mysqltest_3");
 delete from mysql.db where user in ("mysqltest_1","mysqltest_2","mysqltest_3");
diff --git a/mysql-test/r/grant_cache_ps_prot.result b/mysql-test/r/grant_cache_ps_prot.result
index cf1450f3b75..281468ee2e1 100644
--- a/mysql-test/r/grant_cache_ps_prot.result
+++ b/mysql-test/r/grant_cache_ps_prot.result
@@ -206,7 +206,8 @@ Qcache_hits	8
 show status like "Qcache_not_cached";
 Variable_name	Value
 Qcache_not_cached	5
------ switch to connection default and close connections -----
+----- close connections -----
+----- switch to connection default -----
 set names binary;
 delete from mysql.user where user in ("mysqltest_1","mysqltest_2","mysqltest_3");
 delete from mysql.db where user in ("mysqltest_1","mysqltest_2","mysqltest_3");
diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result
index b17884c4f7a..27448d3e949 100644
--- a/mysql-test/r/group_min_max.result
+++ b/mysql-test/r/group_min_max.result
@@ -2462,4 +2462,43 @@ c
 1
 2
 DROP TABLE t1;
+#
+# Bug #45386: Wrong query result with MIN function in field list, 
+#  WHERE and GROUP BY clause
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b));
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+# test MIN
+#should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t	range	NULL	a	10	NULL	9	Using where; Using index for group-by
+#should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+a	MIN(b)
+2	1
+# test MAX
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t	range	NULL	a	10	NULL	9	Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+a	MAX(b)
+2	0
+# test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t	range	NULL	a	10	NULL	9	Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+a	MAX(b)
+2	1
+DROP TABLE t;
 End of 5.0 tests
diff --git a/mysql-test/r/heap_btree.result b/mysql-test/r/heap_btree.result
index 9db03855c01..7ad0f212d99 100644
--- a/mysql-test/r/heap_btree.result
+++ b/mysql-test/r/heap_btree.result
@@ -336,4 +336,11 @@ a	b
 NULL	NULL
 NULL	1
 drop table t1;
+#
+# bug#39918 - memory (heap) engine crashing while executing self join with delete
+#
+CREATE TABLE t1(a INT, KEY USING BTREE (a)) ENGINE=MEMORY;
+INSERT INTO t1 VALUES(1),(1);
+DELETE a1 FROM t1 AS a1, t1 AS a2 WHERE a1.a=a2.a;
+DROP TABLE t1;
 End of 5.0 tests
diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result
index 8a935d87457..c639b20de91 100644
--- a/mysql-test/r/index_merge_myisam.result
+++ b/mysql-test/r/index_merge_myisam.result
@@ -557,6 +557,30 @@ a
 1
 2
 drop table t0, t1, t2, t3;
+#
+# BUG#44810: index merge and order by with low sort_buffer_size 
+# crashes server!
+#
+CREATE TABLE t1(a VARCHAR(128),b VARCHAR(128),KEY(A),KEY(B));
+INSERT INTO t1 VALUES (REPEAT('a',128),REPEAT('b',128));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+SET SESSION sort_buffer_size=1;
+Warnings:
+Warning	1292	Truncated incorrect sort_buffer_size value: '1'
+EXPLAIN 
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%' 
+ORDER BY a,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index_merge	a,b	a,b	131,131	NULL	64	Using sort_union(a,b); Using where; Using filesort
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%' 
+ORDER BY a,b;
+SET SESSION sort_buffer_size=DEFAULT;
+DROP TABLE t1;
 End of 5.0 tests
 #---------------- ROR-index_merge tests -----------------------
 SET SESSION STORAGE_ENGINE = MyISAM;
diff --git a/mysql-test/r/information_schema_db.result b/mysql-test/r/information_schema_db.result
index 475839569c7..6305f8cd47a 100644
--- a/mysql-test/r/information_schema_db.result
+++ b/mysql-test/r/information_schema_db.result
@@ -61,7 +61,7 @@ begin
 select table_name from information_schema.key_column_usage
 order by table_name;
 end|
-create table t1 
+create table t1
 (f1 int(10) unsigned not null,
 f2 varchar(100) not null,
 primary key (f1), unique key (f2));
@@ -203,15 +203,15 @@ View	Create View	character_set_client	collation_connection
 v2	CREATE ALGORITHM=UNDEFINED DEFINER=`testdb_2`@`localhost` SQL SECURITY DEFINER VIEW `v2` AS select `v1`.`f1` AS `f1` from `testdb_1`.`v1`	latin1	latin1_swedish_ci
 show create view testdb_1.v1;
 ERROR 42000: SHOW VIEW command denied to user 'testdb_2'@'localhost' for table 'v1'
-select table_name from information_schema.columns a 
+select table_name from information_schema.columns a
 where a.table_name = 'v2';
 table_name
 v2
-select view_definition from information_schema.views a 
+select view_definition from information_schema.views a
 where a.table_name = 'v2';
 view_definition
 select `v1`.`f1` AS `f1` from `testdb_1`.`v1`
-select view_definition from information_schema.views a 
+select view_definition from information_schema.views a
 where a.table_name = 'testdb_1.v1';
 view_definition
 select * from v2;
diff --git a/mysql-test/r/init_file.result b/mysql-test/r/init_file.result
index 8e014815a9c..43ed908ad01 100644
--- a/mysql-test/r/init_file.result
+++ b/mysql-test/r/init_file.result
@@ -4,6 +4,7 @@ SELECT * INTO @Y FROM init_file.startup limit 1,1;
 SELECT YEAR(@X)-YEAR(@Y);
 YEAR(@X)-YEAR(@Y)
 0
+DROP DATABASE init_file;
 ok
 end of 4.1 tests
 select * from t1;
@@ -19,3 +20,5 @@ y
 3
 11
 13
+drop table t1, t2;
+call mtr.force_restart();
diff --git a/mysql-test/r/innodb_bug21704.result b/mysql-test/r/innodb_bug21704.result
new file mode 100644
index 00000000000..b8e0b15d50d
--- /dev/null
+++ b/mysql-test/r/innodb_bug21704.result
@@ -0,0 +1,55 @@
+#
+# Bug#21704: Renaming column does not update FK definition.
+#
+
+# Test that it's not possible to rename columns participating in a
+# foreign key (either in the referencing or referenced table).
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ROW_FORMAT=COMPACT ENGINE=INNODB;
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT,
+CONSTRAINT fk1 FOREIGN KEY (a) REFERENCES t1(a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY(b), C INT,
+CONSTRAINT fk2 FOREIGN KEY (b) REFERENCES t3 (a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+INSERT INTO t1 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t3 VALUES (1,1,1),(2,2,2),(3,3,3);
+
+# Test renaming the column in the referenced table.
+
+ALTER TABLE t1 CHANGE a c INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t1' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t1 CHANGE b c INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Test renaming the column in the referencing table
+
+ALTER TABLE t2 CHANGE a c INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t2' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t2 CHANGE b c INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Test with self-referential constraints
+
+ALTER TABLE t3 CHANGE a d INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t3' (errno: 150)
+ALTER TABLE t3 CHANGE b d INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t3' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t3 CHANGE c d INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Cleanup.
+
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
diff --git a/mysql-test/r/innodb_bug40565.result b/mysql-test/r/innodb_bug40565.result
new file mode 100644
index 00000000000..21e923d9336
--- /dev/null
+++ b/mysql-test/r/innodb_bug40565.result
@@ -0,0 +1,9 @@
+create table bug40565(value decimal(4,2)) engine=innodb;
+insert into bug40565 values (1), (null);
+update bug40565 set value=NULL;
+affected rows: 1
+info: Rows matched: 2  Changed: 1  Warnings: 0
+update bug40565 set value=NULL;
+affected rows: 0
+info: Rows matched: 2  Changed: 0  Warnings: 0
+drop table bug40565;
diff --git a/mysql-test/r/innodb_bug42101-nonzero.result b/mysql-test/r/innodb_bug42101-nonzero.result
new file mode 100644
index 00000000000..277dfffdd35
--- /dev/null
+++ b/mysql-test/r/innodb_bug42101-nonzero.result
@@ -0,0 +1,26 @@
+set global innodb_commit_concurrency=0;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+42
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=0;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
diff --git a/mysql-test/r/innodb_bug42101.result b/mysql-test/r/innodb_bug42101.result
new file mode 100644
index 00000000000..805097ffe9d
--- /dev/null
+++ b/mysql-test/r/innodb_bug42101.result
@@ -0,0 +1,22 @@
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=1;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=42;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
diff --git a/mysql-test/r/innodb_bug45357.result b/mysql-test/r/innodb_bug45357.result
new file mode 100644
index 00000000000..7adeff2062f
--- /dev/null
+++ b/mysql-test/r/innodb_bug45357.result
@@ -0,0 +1,7 @@
+set session transaction isolation level read committed;
+create table bug45357(a int, b int,key(b))engine=innodb;
+insert into bug45357 values (25170,6122);
+update bug45357 set a=1 where b=30131;
+delete from bug45357 where b < 20996;
+delete from bug45357 where b < 7001;
+drop table bug45357;
diff --git a/mysql-test/r/innodb_mysql.result b/mysql-test/r/innodb_mysql.result
index 191a8578d4c..3ff5f04b6c6 100644
--- a/mysql-test/r/innodb_mysql.result
+++ b/mysql-test/r/innodb_mysql.result
@@ -1408,6 +1408,30 @@ SAVEPOINT s4;
 ROLLBACK;
 ROLLBACK TO SAVEPOINT s4;
 ERROR 42000: SAVEPOINT s4 does not exist
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY COMMENT 'My ID#', f2 INTEGER DEFAULT NULL, f3 CHAR(10) DEFAULT 'My ID#', CONSTRAINT f2_ref FOREIGN KEY (f2) REFERENCES t1 (f1)) ENGINE=INNODB;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `f1` int(11) NOT NULL COMMENT 'My ID#',
+  `f2` int(11) DEFAULT NULL,
+  `f3` char(10) DEFAULT 'My ID#',
+  PRIMARY KEY (`f1`),
+  KEY `f2_ref` (`f2`),
+  CONSTRAINT `f2_ref` FOREIGN KEY (`f2`) REFERENCES `t1` (`f1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+DROP TABLE t1;
+#
+# Bug #36995: valgrind error in remove_const during subquery executions
+#
+create table t1 (a bit(1) not null,b int) engine=myisam;
+create table t2 (c int) engine=innodb;
+explain
+select b from t1 where a not in (select b from t1,t2 group by a) group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
+2	DEPENDENT SUBQUERY	t1	system	NULL	NULL	NULL	NULL	0	const row not found
+2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	1	
+DROP TABLE t1,t2;
 End of 5.0 tests
 CREATE TABLE `t2` (
 `k` int(11) NOT NULL auto_increment,
@@ -1677,10 +1701,10 @@ INSERT INTO t1 VALUES
 (4,1,2,'c2',NULL),(5,1,2,'c1',NULL),(2,1,3,'c2',NULL),(3,1,3,'c2',NULL),
 (4,1,3,'pk',NULL),(5,1,3,'c2',NULL),
 (2,1,4,'c_extra',NULL),(3,1,4,'c_extra',NULL);
-EXPLAIN SELECT * FROM t1 WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
+EXPLAIN SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	vid	PRIMARY	12	NULL	16	Using where
-SELECT * FROM t1 WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
+1	SIMPLE	t1	index	NULL	PRIMARY	12	NULL	16	Using where
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE tid = 1 AND vid = 3 ORDER BY idx DESC;
 vid	tid	idx	name	type
 3	1	4	c_extra	NULL
 3	1	3	c2	NULL
@@ -1706,6 +1730,35 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<derived2>	system	NULL	NULL	NULL	NULL	1	
 2	DERIVED	t1	index	c3,c2	c2	10	NULL	5	
 DROP TABLE t1;
+CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
+ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,1,1), (1,1,1), (1,1,2), (1,1,1), (1,1,2);
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+1
+1
+EXPLAIN 
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	<derived2>	system	NULL	NULL	NULL	NULL	1	
+2	DERIVED	t1	index	c3,c2	c2	18	NULL	5	
+DROP TABLE t1;
+CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2), 
+KEY (c3), KEY (c2, c3))
+ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,1,1), (1,1,1), (1,1,2), (1,1,1), (1,1,2);
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+1
+1
+EXPLAIN 
+SELECT 1 FROM (SELECT COUNT(DISTINCT c1) 
+FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	<derived2>	system	NULL	NULL	NULL	NULL	1	
+2	DERIVED	t1	index	c3,c2	c2	14	NULL	5	
+DROP TABLE t1;
 End of 5.1 tests
 drop table if exists t1, t2, t3;
 create table t1(a int);
@@ -2040,4 +2093,119 @@ DROP TABLE t4;
 DROP TABLE t1;
 DROP TABLE t2;
 DROP TABLE t3;
+CREATE TABLE t1 (a INT, b INT, KEY (a)) ENGINE = INNODB;
+CREATE TABLE t2 (a INT KEY, b INT, KEY (b)) ENGINE = INNODB;
+CREATE TABLE t3 (a INT, b INT KEY, KEY (a)) ENGINE = INNODB;
+CREATE TABLE t4 (a INT KEY, b INT, KEY (b)) ENGINE = INNODB;
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t3 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105), (6, 106);
+INSERT INTO t4 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+UPDATE t1, t2 SET t1.a = t1.a + 100, t2.b = t1.a + 10 
+WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b;
+SELECT * FROM t2;
+a	b
+1	1
+2	12
+3	13
+4	14
+5	5
+UPDATE t3, t4 SET t3.a = t3.a + 100, t4.b = t3.a + 10 
+WHERE t3.a BETWEEN 2 AND 4 AND t4.a = t3.b - 100;
+SELECT * FROM t4;
+a	b
+1	1
+2	12
+3	13
+4	14
+5	5
+DROP TABLE t1, t2, t3, t4;
+#
+# Bug#44886: SIGSEGV in test_if_skip_sort_order() -
+#            uninitialized variable used as subscript 
+#
+CREATE TABLE t1 (a INT, b INT, c INT, d INT, PRIMARY KEY (b), KEY (a,c))
+ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,1,1,0);
+CREATE TABLE t2 (a INT, b INT, e INT, KEY (e)) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,1,2);
+CREATE TABLE t3 (a INT, b INT) ENGINE=MyISAM;
+INSERT INTO t3 VALUES (1, 1);
+SELECT * FROM t1, t2, t3
+WHERE t1.a = t3.a AND (t1.b = t3.b OR t1.d) AND t2.b = t1.b AND t2.e = 2
+GROUP BY t1.b;
+a	b	c	d	a	b	e	a	b
+1	1	1	0	1	1	2	1	1
+DROP TABLE t1, t2, t3;
+#
+# Bug #45828: Optimizer won't use partial primary key if another 
+# index can prevent filesort
+#
+CREATE TABLE `t1` (
+c1 int NOT NULL,
+c2 int NOT NULL,
+c3 int NOT NULL,
+PRIMARY KEY (c1,c2),
+KEY  (c3)
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (5,2,1246276747);
+INSERT INTO t1 VALUES (2,1,1246281721);
+INSERT INTO t1 VALUES (7,3,1246281756);
+INSERT INTO t1 VALUES (4,2,1246282139);
+INSERT INTO t1 VALUES (3,1,1246282230);
+INSERT INTO t1 VALUES (1,0,1246282712);
+INSERT INTO t1 VALUES (8,3,1246282765);
+INSERT INTO t1 SELECT c1+10,c2+10,c3+10 FROM t1;
+INSERT INTO t1 SELECT c1+100,c2+100,c3+100 from t1;
+INSERT INTO t1 SELECT c1+1000,c2+1000,c3+1000 from t1;
+INSERT INTO t1 SELECT c1+10000,c2+10000,c3+10000 from t1;
+INSERT INTO t1 SELECT c1+100000,c2+100000,c3+100000 from t1;
+INSERT INTO t1 SELECT c1+1000000,c2+1000000,c3+1000000 from t1;
+SELECT * FROM t1 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+c1	c2	c3
+EXPLAIN SELECT * FROM t1 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	PRIMARY,c3	PRIMARY	4	const	1	Using where; Using filesort
+EXPLAIN SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	PRIMARY	PRIMARY	4	const	1	Using where; Using filesort
+CREATE TABLE t2 (
+c1 int NOT NULL,
+c2 int NOT NULL,
+c3 int NOT NULL,
+KEY (c1,c2),
+KEY (c3)
+) ENGINE=InnoDB;
+explain SELECT * FROM t2 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	c1,c3	c1	4	const	1	Using where; Using filesort
+DROP TABLE t1,t2;
+#
+# 36259: Optimizing with ORDER BY
+#
+CREATE TABLE t1 (
+a INT NOT NULL AUTO_INCREMENT,
+b INT NOT NULL,
+c INT NOT NULL,
+d VARCHAR(5),
+e INT NOT NULL,
+PRIMARY KEY (a), KEY i2 (b,c,d)
+) ENGINE=InnoDB;
+INSERT INTO t1 (b,c,d,e) VALUES (1,1,'a',1), (2,2,'b',2);
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+EXPLAIN SELECT * FROM t1 WHERE b=1 AND c=1 ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	i2	i2	8	const,const	1	Using where; Using filesort
+EXPLAIN SELECT * FROM t1 FORCE INDEX(i2) WHERE b=1 and c=1 ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	i2	i2	8	const,const	1	Using where; Using filesort
+EXPLAIN SELECT * FROM t1 FORCE INDEX(PRIMARY) WHERE b=1 AND c=1 ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	128	Using where
+DROP TABLE t1;
 End of 5.1 tests
diff --git a/mysql-test/r/insert_select.result b/mysql-test/r/insert_select.result
index 780e91ea73f..2f2cc6334a9 100644
--- a/mysql-test/r/insert_select.result
+++ b/mysql-test/r/insert_select.result
@@ -765,6 +765,11 @@ f1	f2
 2	2
 10	10
 DROP TABLE t1, t2;
+CREATE TABLE t1 ( a INT KEY, b INT );
+INSERT INTO t1 VALUES ( 0, 1 );
+INSERT INTO t1 ( b ) SELECT MAX( b ) FROM t1 WHERE b = 2;
+ERROR 23000: Duplicate entry '0' for key 'PRIMARY'
+DROP TABLE t1;
 SET SQL_MODE='STRICT_TRANS_TABLES,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';
 CREATE TABLE t1 (c VARCHAR(30), INDEX ix_c (c(10)));
 CREATE TABLE t2 (d VARCHAR(10));
diff --git a/mysql-test/r/log_tables_debug.result b/mysql-test/r/log_tables_debug.result
new file mode 100644
index 00000000000..daedb8c103a
--- /dev/null
+++ b/mysql-test/r/log_tables_debug.result
@@ -0,0 +1,24 @@
+SET @old_general_log= @@global.general_log;
+SET @old_general_log_file= @@global.general_log_file;
+SET @old_slow_query_log= @@global.slow_query_log;
+SET @old_slow_query_log_file= @@global.slow_query_log_file;
+#
+# Bug#45387 Information about statement id for prepared
+#           statements missed from general log
+#
+SET @@global.general_log = ON;
+SET @@global.general_log_file = 'bug45387_general.log';
+SET SESSION debug='+d,reset_log_last_time';
+FLUSH LOGS;
+SET @@global.general_log = @old_general_log;
+SET @@global.general_log_file = @old_general_log_file;
+SET SESSION debug='-d';
+Bug#45387: ID match.
+End of 5.1 tests
+#
+# Cleanup
+#
+SET global general_log = @old_general_log;
+SET global general_log_file = @old_general_log_file;
+SET global slow_query_log = @old_slow_query_log;
+SET global slow_query_log_file = @old_slow_query_log_file;
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index f53b328d14e..bf9108459d7 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -2127,4 +2127,18 @@ SELECT * FROM m1;
 ERROR HY000: Unable to open underlying table which is differently defined or of non-MyISAM type or doesn't exist
 DROP VIEW v1;
 DROP TABLE m1, t1;
+#
+# Bug #45796: invalid memory reads and writes when altering merge and 
+#             base tables
+#
+CREATE TABLE t1(c1 INT) ENGINE=MyISAM;
+CREATE TABLE m1(c1 INT) ENGINE=MERGE UNION=(t1);
+ALTER TABLE m1 ADD INDEX idx_c1(c1);
+SELECT * FROM m1;
+ERROR HY000: Unable to open underlying table which is differently defined or of non-MyISAM type or doesn't exist
+ALTER TABLE t1 ADD INDEX idx_c1(c1);
+SELECT * FROM m1;
+c1
+DROP TABLE m1;
+DROP TABLE t1;
 End of 5.1 tests
diff --git a/mysql-test/r/mysql-bug45236.result b/mysql-test/r/mysql-bug45236.result
new file mode 100644
index 00000000000..cefcb1d314c
--- /dev/null
+++ b/mysql-test/r/mysql-bug45236.result
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS t1;
+SET @old_max_allowed_packet= @@global.max_allowed_packet;
+SET @@global.max_allowed_packet = 1024 * 1024 + 1024;
+CREATE TABLE t1(data LONGBLOB);
+INSERT INTO t1 SELECT CONCAT(REPEAT('1', 1024*1024 - 27), 
+"\'\r dummydb dummyhost");
+DROP TABLE t1;
+SET @@global.max_allowed_packet = @old_max_allowed_packet;
diff --git a/mysql-test/r/mysql.result b/mysql-test/r/mysql.result
index 5054c3aa76f..e704f81e187 100644
--- a/mysql-test/r/mysql.result
+++ b/mysql-test/r/mysql.result
@@ -192,6 +192,13 @@ delimiter
 1
 1
 1
+COUNT (*)
+1
+COUNT (*)
+1
+COUNT (*)
+1
+ERROR 2005 (HY000) at line 1: Unknown MySQL server host 'invalid_hostname' (errno)
 End of 5.0 tests
 WARNING: --server-arg option not supported in this configuration.
 Warning (Code 1286): Unknown table engine 'nonexistent'
@@ -200,4 +207,5 @@ Warning (Code 1286): Unknown table engine 'nonexistent2'
 Warning (Code 1266): Using storage engine MyISAM for table 't2'
 Error (Code 1050): Table 't2' already exists
 drop tables t1, t2;
+<TABLE BORDER=1><TR><TH>&lt;</TH></TR><TR><TD>&lt; &amp; &gt;</TD></TR></TABLE>
 End of tests
diff --git a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result
index b55a96b6f30..295a2f41d40 100644
--- a/mysql-test/r/mysqlbinlog.result
+++ b/mysql-test/r/mysqlbinlog.result
@@ -471,4 +471,7 @@ IS NOT NULL
 1
 *** Unsigned server_id 4294967295 is found: 1 ***
 SET @@global.server_id= 1;
+RESET MASTER;
+FLUSH LOGS;
+End of 5.0 tests
 End of 5.1 tests
diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
index c97131563cb..2f8e0ce5ec1 100644
--- a/mysql-test/r/mysqldump.result
+++ b/mysql-test/r/mysqldump.result
@@ -3563,9 +3563,6 @@ grant REPLICATION CLIENT on *.* to mysqltest_1@localhost;
 drop table t1;
 drop user mysqltest_1@localhost;
 #
-# Bug#21527 mysqldump incorrectly tries to LOCK TABLES on the
-#           information_schema database.
-#
 # Bug#21424 mysqldump failing to export/import views
 #
 create database mysqldump_myDB;
@@ -3605,6 +3602,39 @@ drop user myDB_User@localhost;
 drop database mysqldump_myDB;
 use test;
 #
+# Bug #21527 mysqldump incorrectly tries to LOCK TABLES on the 
+# information_schema database.
+#
+# Bug #33762: mysqldump can not dump INFORMATION_SCHEMA
+#
+DROP TABLE IF EXISTS `TABLES`;
+/*!40101 SET @saved_cs_client     = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TEMPORARY TABLE `TABLES` (
+  `TABLE_CATALOG` varchar(512) DEFAULT NULL,
+  `TABLE_SCHEMA` varchar(64) NOT NULL DEFAULT '',
+  `TABLE_NAME` varchar(64) NOT NULL DEFAULT '',
+  `TABLE_TYPE` varchar(64) NOT NULL DEFAULT '',
+  `ENGINE` varchar(64) DEFAULT NULL,
+  `VERSION` bigint(21) unsigned DEFAULT NULL,
+  `ROW_FORMAT` varchar(10) DEFAULT NULL,
+  `TABLE_ROWS` bigint(21) unsigned DEFAULT NULL,
+  `AVG_ROW_LENGTH` bigint(21) unsigned DEFAULT NULL,
+  `DATA_LENGTH` bigint(21) unsigned DEFAULT NULL,
+  `MAX_DATA_LENGTH` bigint(21) unsigned DEFAULT NULL,
+  `INDEX_LENGTH` bigint(21) unsigned DEFAULT NULL,
+  `DATA_FREE` bigint(21) unsigned DEFAULT NULL,
+  `AUTO_INCREMENT` bigint(21) unsigned DEFAULT NULL,
+  `CREATE_TIME` datetime DEFAULT NULL,
+  `UPDATE_TIME` datetime DEFAULT NULL,
+  `CHECK_TIME` datetime DEFAULT NULL,
+  `TABLE_COLLATION` varchar(32) DEFAULT NULL,
+  `CHECKSUM` bigint(21) unsigned DEFAULT NULL,
+  `CREATE_OPTIONS` varchar(255) DEFAULT NULL,
+  `TABLE_COMMENT` varchar(80) NOT NULL DEFAULT ''
+) ENGINE=MEMORY DEFAULT CHARSET=utf8;
+/*!40101 SET character_set_client = @saved_cs_client */;
+#
 # Bug#19745 mysqldump --xml produces invalid xml
 #
 DROP TABLE IF EXISTS t1;
@@ -4006,6 +4036,181 @@ UNLOCK TABLES;
 /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
 
 DROP TABLE t1;
+create table t1 (a text , b text);
+create table t2 (a text , b text);
+insert t1 values ("Duck, Duck", "goose");
+insert t1 values ("Duck, Duck", "pidgeon");
+insert t2 values ("We the people", "in order to perform");
+insert t2 values ("a more perfect", "union");
+select * from t1;
+a	b
+Duck, Duck	goose
+Duck, Duck	pidgeon
+select * from t2;
+a	b
+We the people	in order to perform
+a more perfect	union
+test.t1: Records: 2  Deleted: 0  Skipped: 0  Warnings: 0
+test.t2: Records: 2  Deleted: 0  Skipped: 0  Warnings: 0
+select * from t1;
+a	b
+Duck, Duck	goose
+Duck, Duck	pidgeon
+Duck, Duck	goose
+Duck, Duck	pidgeon
+select * from t2;
+a	b
+We the people	in order to perform
+a more perfect	union
+We the people	in order to perform
+a more perfect	union
+create table words(a varchar(255));
+create table words2(b varchar(255));
+select * from t1;
+a	b
+Duck, Duck	goose
+Duck, Duck	pidgeon
+Duck, Duck	goose
+Duck, Duck	pidgeon
+Duck, Duck	goose
+Duck, Duck	pidgeon
+select * from t2;
+a	b
+We the people	in order to perform
+a more perfect	union
+We the people	in order to perform
+a more perfect	union
+We the people	in order to perform
+a more perfect	union
+select * from words;
+a
+Aarhus
+Aaron
+Ababa
+aback
+abaft
+abandon
+abandoned
+abandoning
+abandonment
+abandons
+Aarhus
+Aaron
+Ababa
+aback
+abaft
+abandon
+abandoned
+abandoning
+abandonment
+abandons
+abase
+abased
+abasement
+abasements
+abases
+abash
+abashed
+abashes
+abashing
+abasing
+abate
+abated
+abatement
+abatements
+abater
+abates
+abating
+Abba
+abbe
+abbey
+abbeys
+abbot
+abbots
+Abbott
+abbreviate
+abbreviated
+abbreviates
+abbreviating
+abbreviation
+abbreviations
+Abby
+abdomen
+abdomens
+abdominal
+abduct
+abducted
+abduction
+abductions
+abductor
+abductors
+abducts
+Abe
+abed
+Abel
+Abelian
+Abelson
+Aberdeen
+Abernathy
+aberrant
+aberration
+select * from words2;
+b
+abase
+abased
+abasement
+abasements
+abases
+abash
+abashed
+abashes
+abashing
+abasing
+abate
+abated
+abatement
+abatements
+abater
+abates
+abating
+Abba
+abbe
+abbey
+abbeys
+abbot
+abbots
+Abbott
+abbreviate
+abbreviated
+abbreviates
+abbreviating
+abbreviation
+abbreviations
+Abby
+abdomen
+abdomens
+abdominal
+abduct
+abducted
+abduction
+abductions
+abductor
+abductors
+abducts
+Abe
+abed
+Abel
+Abelian
+Abelson
+Aberdeen
+Abernathy
+aberrant
+aberration
+drop table words;
+mysql-import: Error: 1146, Table 'test.words' doesn't exist, when using table: words
+drop table t1;
+drop table t2;
+drop table words2;
 #
 # Bug#16853 mysqldump doesn't show events
 #
@@ -4226,6 +4431,57 @@ DROP DATABASE mysqldump_test_db;
 # -- End of test case for Bug#32538.
 
 SET @@GLOBAL.CONCURRENT_INSERT = @OLD_CONCURRENT_INSERT;
+
+Bug #34861 - mysqldump with --tab gives weird output for triggers.
+
+CREATE TABLE t1 (f1 INT);
+CREATE TRIGGER tr1 BEFORE UPDATE ON t1 FOR EACH ROW SET @f1 = 1;
+CREATE PROCEDURE pr1 () SELECT "Meow";
+CREATE EVENT ev1 ON SCHEDULE AT '2030-01-01 00:00:00' DO SELECT "Meow";
+
+SHOW TRIGGERS;
+Trigger	Event	Table	Statement	Timing	Created	sql_mode	Definer	character_set_client	collation_connection	Database Collation
+tr1	UPDATE	t1	SET @f1 = 1	BEFORE	NULL		root@localhost	latin1	latin1_swedish_ci	latin1_swedish_ci
+SHOW EVENTS;
+Db	Name	Definer	Time zone	Type	Execute at	Interval value	Interval field	Starts	Ends	Status	Originator	character_set_client	collation_connection	Database Collation
+test	ev1	root@localhost	SYSTEM	ONE TIME	2030-01-01 00:00:00	NULL	NULL	NULL	NULL	ENABLED	1	latin1	latin1_swedish_ci	latin1_swedish_ci
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+name	body
+pr1	SELECT "Meow"
+
+dump table; if anything goes to stdout, it ends up here: ---------------
+
+drop everything
+DROP EVENT ev1;
+DROP TRIGGER tr1;
+DROP TABLE t1;
+DROP PROCEDURE pr1;
+
+reload table; this should restore table and trigger
+SHOW TRIGGERS;
+Trigger	Event	Table	Statement	Timing	Created	sql_mode	Definer	character_set_client	collation_connection	Database Collation
+tr1	UPDATE	t1	SET @f1 = 1	BEFORE	NULL		root@localhost	latin1	latin1_swedish_ci	latin1_swedish_ci
+SHOW EVENTS;
+Db	Name	Definer	Time zone	Type	Execute at	Interval value	Interval field	Starts	Ends	Status	Originator	character_set_client	collation_connection	Database Collation
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+name	body
+
+reload db; this should restore routines and events
+SHOW TRIGGERS;
+Trigger	Event	Table	Statement	Timing	Created	sql_mode	Definer	character_set_client	collation_connection	Database Collation
+tr1	UPDATE	t1	SET @f1 = 1	BEFORE	NULL		root@localhost	latin1	latin1_swedish_ci	latin1_swedish_ci
+SHOW EVENTS;
+Db	Name	Definer	Time zone	Type	Execute at	Interval value	Interval field	Starts	Ends	Status	Originator	character_set_client	collation_connection	Database Collation
+test	ev1	root@localhost	SYSTEM	ONE TIME	2030-01-01 00:00:00	NULL	NULL	NULL	NULL	ENABLED	1	latin1	latin1_swedish_ci	latin1_swedish_ci
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+name	body
+pr1	SELECT "Meow"
+
+cleanup
+DROP EVENT IF EXISTS ev1;
+DROP PROCEDURE IF EXISTS pr1;
+DROP TRIGGER IF EXISTS tr1;
+DROP TABLE IF EXISTS t1;
 #
 # End of 5.1 tests
 #
diff --git a/mysql-test/r/mysqldump_restore.result b/mysql-test/r/mysqldump_restore.result
new file mode 100644
index 00000000000..16698251913
--- /dev/null
+++ b/mysql-test/r/mysqldump_restore.result
@@ -0,0 +1,110 @@
+# Set concurrent_insert = 0 to prevent random errors
+# will reset to original value at the end of the test
+SET @old_concurrent_insert = @@global.concurrent_insert;
+SET @@global.concurrent_insert = 0;
+# Pre-test cleanup
+DROP TABLE IF EXISTS t1;
+# Begin tests
+#
+# Bug#2005 Long decimal comparison bug.
+#
+CREATE TABLE t1 (a DECIMAL(64, 20));
+INSERT INTO t1 VALUES ("1234567890123456789012345678901234567890"),
+("0987654321098765432109876543210987654321");
+# Begin testing mysqldump output + restore
+# Create 'original table name - <table>_orig
+SET @orig_table_name = CONCAT('test.t1', '_orig');
+# Rename original table
+ALTER TABLE test.t1 RENAME to test.t1_orig;
+# Recreate table from mysqldump output
+# Compare original and recreated tables
+# Recreated table: test.t1
+# Original table: test.t1_orig
+Comparing tables test.t1 and test.t1_orig
+# Cleanup
+DROP TABLE test.t1, test.t1_orig;
+#
+# Bug#3361 mysqldump quotes DECIMAL values inconsistently
+#
+CREATE TABLE t1 (a DECIMAL(10,5), b FLOAT);
+INSERT INTO t1 VALUES (1.2345, 2.3456);
+INSERT INTO t1 VALUES ('1.2345', 2.3456);
+INSERT INTO t1 VALUES ("1.2345", 2.3456);
+SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ANSI_QUOTES';
+INSERT INTO t1 VALUES (1.2345, 2.3456);
+INSERT INTO t1 VALUES ('1.2345', 2.3456);
+INSERT INTO t1 VALUES ("1.2345", 2.3456);
+ERROR 42S22: Unknown column '1.2345' in 'field list'
+SET SQL_MODE=@OLD_SQL_MODE;
+# Begin testing mysqldump output + restore
+# Create 'original table name - <table>_orig
+SET @orig_table_name = CONCAT('test.t1', '_orig');
+# Rename original table
+ALTER TABLE test.t1 RENAME to test.t1_orig;
+# Recreate table from mysqldump output
+# Compare original and recreated tables
+# Recreated table: test.t1
+# Original table: test.t1_orig
+Comparing tables test.t1 and test.t1_orig
+# Cleanup
+DROP TABLE test.t1, test.t1_orig;
+#
+# Bug#1994 mysqldump does not correctly dump UCS2 data
+# Bug#4261 mysqldump 10.7 (mysql 4.1.2) --skip-extended-insert drops NULL from inserts
+#
+CREATE TABLE t1 (a  VARCHAR(255)) DEFAULT CHARSET koi8r;
+INSERT INTO t1  VALUES (_koi8r x'C1C2C3C4C5'), (NULL);
+# Begin testing mysqldump output + restore
+# Create 'original table name - <table>_orig
+SET @orig_table_name = CONCAT('test.t1', '_orig');
+# Rename original table
+ALTER TABLE test.t1 RENAME to test.t1_orig;
+# Recreate table from mysqldump output
+# Compare original and recreated tables
+# Recreated table: test.t1
+# Original table: test.t1_orig
+Comparing tables test.t1 and test.t1_orig
+# Cleanup
+DROP TABLE test.t1, test.t1_orig;
+#
+# WL#2319 Exclude Tables from dump
+#
+CREATE TABLE t1 (a INT);
+CREATE TABLE t2 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+INSERT INTO t2 VALUES (4),(5),(6);
+# Begin testing mysqldump output + restore
+# Create 'original table name - <table>_orig
+SET @orig_table_name = CONCAT('test.t2', '_orig');
+# Rename original table
+ALTER TABLE test.t2 RENAME to test.t2_orig;
+# Recreate table from mysqldump output
+# Compare original and recreated tables
+# Recreated table: test.t2
+# Original table: test.t2_orig
+Comparing tables test.t2 and test.t2_orig
+# Cleanup
+DROP TABLE test.t2, test.t2_orig;
+DROP TABLE t1;
+#
+# Bug#8830 mysqldump --skip-extended-insert causes --hex-blob to dump wrong values
+#
+CREATE TABLE t1 (`b` blob);
+INSERT INTO `t1` VALUES (0x602010000280100005E71A);
+# Begin testing mysqldump output + restore
+# Create 'original table name - <table>_orig
+SET @orig_table_name = CONCAT('test.t1', '_orig');
+# Rename original table
+ALTER TABLE test.t1 RENAME to test.t1_orig;
+# Recreate table from mysqldump output
+# Compare original and recreated tables
+# Recreated table: test.t1
+# Original table: test.t1_orig
+Comparing tables test.t1 and test.t1_orig
+# Cleanup
+DROP TABLE test.t1, test.t1_orig;
+# End tests
+# Cleanup
+# Reset concurrent_insert to its original value
+SET @@global.concurrent_insert = @old_concurrent_insert;
+# remove mysqldumpfile
diff --git a/mysql-test/r/mysqltest.result b/mysql-test/r/mysqltest.result
index a9c20e34517..52a1734ea54 100644
--- a/mysql-test/r/mysqltest.result
+++ b/mysql-test/r/mysqltest.result
@@ -545,6 +545,8 @@ mysqltest: At line 1: Failed to open file 'non_existing_file'
 mysqltest: At line 1: Missing required argument 'filename' to command 'file_exists'
 mysqltest: At line 1: Missing required argument 'from_file' to command 'copy_file'
 mysqltest: At line 1: Missing required argument 'to_file' to command 'copy_file'
+mysqltest: At line 1: Missing required argument 'from_file' to command 'move_file'
+mysqltest: At line 1: Missing required argument 'to_file' to command 'move_file'
 mysqltest: At line 1: Missing required argument 'mode' to command 'chmod'
 mysqltest: At line 1: You must write a 4 digit octal number for mode
 mysqltest: At line 1: You must write a 4 digit octal number for mode
@@ -697,6 +699,7 @@ statement="SHOW COLUMNS FROM t1" row_number=1, column_name="Type", Value=int(11)
 statement=SHOW COLUMNS FROM t1 row_number=1, column_name=Default, Value=NULL
 value= ->A B<-
 value= 1
+value= 2
 mysqltest: At line 1: query_get_value - argument list started with '(' must be ended with ')'
 mysqltest: At line 1: Missing required argument 'query' to command 'query_get_value'
 mysqltest: At line 1: Missing required argument 'column name' to command 'query_get_value'
diff --git a/mysql-test/r/openssl_1.result b/mysql-test/r/openssl_1.result
index c408c14b716..b0dd3acd662 100644
--- a/mysql-test/r/openssl_1.result
+++ b/mysql-test/r/openssl_1.result
@@ -202,4 +202,10 @@ Ssl_cipher	RC4-SHA
 select 'is still running; no cipher request crashed the server' as result from dual;
 result
 is still running; no cipher request crashed the server
+GRANT SELECT ON test.* TO bug42158@localhost REQUIRE X509;
+FLUSH PRIVILEGES;
+SHOW STATUS LIKE 'Ssl_cipher';
+Variable_name	Value
+Ssl_cipher	DHE-RSA-AES256-SHA
+DROP USER bug42158@localhost;
 End of 5.1 tests
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result
index f69ba522a9c..8e3fbde1ea8 100644
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@@ -1924,5 +1924,72 @@ EXPLAIN PARTITIONS SELECT * FROM t1 WHERE a>=200;
 id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	p2,p3	ALL	NULL	NULL	NULL	NULL	3	Using where
 DROP TABLE t1;
+CREATE TABLE t1 ( a INT, b INT, c INT, KEY bc(b, c) )
+PARTITION BY KEY (a, b) PARTITIONS 3
+;
+INSERT INTO t1 VALUES
+(17, 1, -8),
+(3,  1, -7),
+(23, 1, -6),
+(22, 1, -5),
+(11, 1, -4),
+(21, 1, -3),
+(19, 1, -2),
+(30, 1, -1),
+(20, 1, 1),
+(16, 1, 2),
+(18, 1, 3),
+(9,  1, 4),
+(15, 1, 5),
+(28, 1, 6),
+(29, 1, 7),
+(25, 1, 8),
+(10, 1, 9),
+(13, 1, 10),
+(27, 1, 11),
+(24, 1, 12),
+(12, 1, 13),
+(26, 1, 14),
+(14, 1, 15)
+;
+SELECT b, c FROM t1 WHERE b = 1 GROUP BY b, c;
+b	c
+1	-8
+1	-7
+1	-6
+1	-5
+1	-4
+1	-3
+1	-2
+1	-1
+1	1
+1	2
+1	3
+1	4
+1	5
+1	6
+1	7
+1	8
+1	9
+1	10
+1	11
+1	12
+1	13
+1	14
+1	15
+EXPLAIN
+SELECT b, c FROM t1 WHERE b = 1 GROUP BY b, c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	bc	bc	10	NULL	7	Using where; Using index for group-by
+DROP TABLE t1;
+#
+# Bug #45807: crash accessing partitioned table and sql_mode 
+#   contains ONLY_FULL_GROUP_BY
+#
+SET SESSION SQL_MODE='ONLY_FULL_GROUP_BY';
+CREATE TABLE t1(id INT,KEY(id)) ENGINE=MYISAM 
+PARTITION BY HASH(id) PARTITIONS 2;
+DROP TABLE t1;
+SET SESSION SQL_MODE=DEFAULT;
 End of 5.1 tests
 SET @@global.general_log= @old_general_log;
diff --git a/mysql-test/r/query_cache_debug.result b/mysql-test/r/query_cache_debug.result
index b03a71d3fec..eb59e62c8ba 100644
--- a/mysql-test/r/query_cache_debug.result
+++ b/mysql-test/r/query_cache_debug.result
@@ -71,3 +71,111 @@ DROP TABLE t1,t2;
 SET GLOBAL concurrent_insert= DEFAULT;
 SET GLOBAL query_cache_size= DEFAULT;
 SET GLOBAL query_cache_type= DEFAULT;
+#
+# Bug43758 Query cache can lock up threads in 'freeing items' state
+#
+FLUSH STATUS;
+SET GLOBAL query_cache_type=DEMAND;
+SET GLOBAL query_cache_size= 1024*768;
+DROP TABLE IF EXISTS t1,t2,t3,t4,t5;
+CREATE TABLE t1 (a VARCHAR(100));
+CREATE TABLE t2 (a VARCHAR(100));
+CREATE TABLE t3 (a VARCHAR(100));
+CREATE TABLE t4 (a VARCHAR(100));
+CREATE TABLE t5 (a VARCHAR(100));
+INSERT INTO t1 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t2 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t3 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t4 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t5 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+=================================== Connection thd1
+**
+** Load Query Cache with a result set and one table.
+**
+SELECT SQL_CACHE * FROM t1;
+a
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+*************************************************************************
+** We want to accomplish the following state:
+**  - Query cache status: TABLE_FLUSH_IN_PROGRESS
+**  - THD1: invalidate_table_internal (iterating query blocks)
+**  - THD2: query_cache_insert (cond_wait)
+**  - THD3: query_cache_insert (cond_wait)
+**  - No thread should be holding the structure_guard_mutex.
+**
+** First step is to place a DELETE-statement on the debug hook just
+** before the mutex lock in invalidate_table_internal.
+** This will allow new result sets to be written into the QC.
+** 
+SET SESSION debug='+d,wait_in_query_cache_invalidate1';
+SET SESSION debug='+d,wait_in_query_cache_invalidate2';
+DELETE FROM t1 WHERE a like '%a%';;
+=================================== Connection default
+** Assert that the expect process status is obtained.
+**
+=================================== Connection thd2
+** On THD2: Insert a result into the cache. This attempt will be blocked
+** because of a debug hook placed just before the mutex lock after which
+** the first part of the result set is written.
+SET SESSION debug='+d,wait_in_query_cache_insert';
+SELECT SQL_CACHE * FROM t2 UNION SELECT * FROM t3;
+=================================== Connection thd3
+** On THD3: Insert another result into the cache and block on the same
+** debug hook.
+SET SESSION debug='+d,wait_in_query_cache_insert';
+SELECT SQL_CACHE * FROM t4 UNION SELECT * FROM t5;;
+=================================== Connection default
+** Assert that the two SELECT-stmt threads to reach the hook.
+**
+**
+** Signal the DELETE thread, THD1, to continue. It will enter the mutex
+** lock and set query cache status to TABLE_FLUSH_IN_PROGRESS and then
+** unlock the mutex before stopping on the next debug hook.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_invalidate1' LIMIT 1 INTO @flush_thread_id;
+KILL QUERY @flush_thread_id;
+** Assert that we reach the next debug hook.
+**
+** Signal the remaining debug hooks blocking THD2 and THD3.
+** The threads will grab the guard mutex enter the wait condition and
+** and finally release the mutex. The threads will continue to wait
+** until a broadcast signal reaches them causing both threads to 
+** come alive and check the condition.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_insert' ORDER BY id ASC LIMIT 1 INTO @thread_id;
+KILL QUERY @thread_id;
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_insert' ORDER BY id DESC LIMIT 1 INTO @thread_id;
+KILL QUERY @thread_id;
+**
+** Finally signal the DELETE statement on THD1 one last time.
+** The stmt will complete the query cache invalidation and return 
+** cache status to NO_FLUSH_IN_PROGRESS. On the status change
+** One signal will be sent to the thread group waiting for executing
+** invalidations and a broadcast signal will be sent to the thread 
+** group holding result set writers.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_invalidate2' LIMIT 1 INTO @flush_thread_id;
+KILL QUERY @flush_thread_id;
+**
+*************************************************************************
+** No tables should be locked
+=================================== Connection thd2
+a
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+DELETE FROM t1;
+DELETE FROM t2;
+DELETE FROM t3;
+=================================== Connection thd3
+a
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+DELETE FROM t4;
+DELETE FROM t5;
+=================================== Connection thd1
+** Done.
+SET GLOBAL query_cache_size= 0;
+# Restore defaults
+RESET QUERY CACHE;
+FLUSH STATUS;
+DROP TABLE t1,t2,t3,t4,t5;
+SET GLOBAL query_cache_size= DEFAULT;
+SET GLOBAL query_cache_type= DEFAULT;
diff --git a/mysql-test/r/select.result b/mysql-test/r/select.result
index 09c7d1b329d..50b5c3c13fb 100644
--- a/mysql-test/r/select.result
+++ b/mysql-test/r/select.result
@@ -4457,4 +4457,83 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 Warnings:
 Note	1003	select '0' AS `a`,'0' AS `b`,'0' AS `c` from `test`.`t1` where 1
 DROP TABLE t1;
+#
+# Bug#45266: Uninitialized variable lead to an empty result.
+#
+drop table if exists A,AA,B,BB;
+CREATE TABLE `A` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`date_key` date NOT NULL,
+`date_nokey` date NOT NULL,
+`datetime_key` datetime NOT NULL,
+`int_nokey` int(11) NOT NULL,
+`time_key` time NOT NULL,
+`time_nokey` time NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `date_key` (`date_key`),
+KEY `time_key` (`time_key`),
+KEY `datetime_key` (`datetime_key`)
+);
+CREATE TABLE `AA` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`int_nokey` int(11) NOT NULL,
+`time_key` time NOT NULL,
+KEY `time_key` (`time_key`),
+PRIMARY KEY (`pk`)
+);
+CREATE TABLE `B` (
+`date_nokey` date NOT NULL,
+`date_key` date NOT NULL,
+`time_key` time NOT NULL,
+`datetime_nokey` datetime NOT NULL,
+`varchar_key` varchar(1) NOT NULL,
+KEY `date_key` (`date_key`),
+KEY `time_key` (`time_key`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `B` VALUES ('2003-07-28','2003-07-28','15:13:38','0000-00-00 00:00:00','f'),('0000-00-00','0000-00-00','00:05:48','2004-07-02 14:34:13','x');
+CREATE TABLE `BB` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`int_nokey` int(11) NOT NULL,
+`date_key` date NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+`date_nokey` date NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `date_key` (`date_key`)
+);
+INSERT INTO `BB` VALUES (10,8,'0000-00-00','i','0000-00-00'),(11,0,'2005-08-18','','2005-08-18');
+SELECT table1 . `pk` AS field1 
+FROM 
+(BB AS table1 INNER JOIN 
+(AA AS table2 STRAIGHT_JOIN A AS table3 
+ON ( table3 . `date_key` = table2 . `pk` ))
+ON ( table3 . `datetime_key` = table2 . `int_nokey` ))
+WHERE  ( table3 . `date_key` <= 4 AND table2 . `pk` = table1 . `varchar_nokey`)
+GROUP BY field1 ;
+field1
+SELECT table3 .`date_key` field1
+FROM
+B table1 LEFT JOIN B table3 JOIN
+(BB table6 JOIN A table7 ON table6 .`varchar_nokey`)
+ON table6 .`int_nokey` ON table6 .`date_key`
+  WHERE  NOT ( table1 .`varchar_key`  AND table7 .`pk`) GROUP  BY field1;
+field1
+NULL
+SELECT table4 . `time_nokey` AS field1 FROM 
+(AA AS table1 CROSS JOIN 
+(AA AS table2 STRAIGHT_JOIN 
+(B AS table3 STRAIGHT_JOIN A AS table4 
+ON ( table4 . `date_key` = table3 . `time_key` ))
+ON ( table4 . `pk` = table3 . `date_nokey` ))
+ON ( table4 . `time_key` = table3 . `datetime_nokey` ))
+WHERE  ( table4 . `time_key` < table1 . `time_key` AND
+table1 . `int_nokey` != 'f')
+GROUP BY field1  ORDER BY field1 , field1;
+field1
+SELECT table1 .`time_key` field2  FROM B table1  LEFT JOIN  BB JOIN A table5 ON table5 .`date_nokey`  ON table5 .`int_nokey` GROUP  BY field2;
+field2
+00:05:48
+15:13:38
+drop table A,AA,B,BB;
+#end of test for bug#45266
 End of 5.1 tests
diff --git a/mysql-test/r/sp-error.result b/mysql-test/r/sp-error.result
index 35d61ce757d..17ab2b79043 100644
--- a/mysql-test/r/sp-error.result
+++ b/mysql-test/r/sp-error.result
@@ -1660,3 +1660,13 @@ declare continue handler for sqlstate '00000' set @x=0;
 end$$
 ERROR 42000: Bad SQLSTATE: '00000'
 LOAD DATA INFILE '../../tmp/proc.txt' INTO TABLE mysql.proc;
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2);
+SELECT MAX (a) FROM t1 WHERE b = 999999;
+ERROR 42000: FUNCTION test.MAX does not exist. Check the 'Function Name Parsing and Resolution' section in the Reference Manual
+SELECT AVG (a) FROM t1 WHERE b = 999999;
+AVG (a)
+NULL
+SELECT non_existent (a) FROM t1 WHERE b = 999999;
+ERROR 42000: FUNCTION test.non_existent does not exist
+DROP TABLE t1;
diff --git a/mysql-test/r/sp-fib.result b/mysql-test/r/sp-fib.result
new file mode 100644
index 00000000000..a26e104c1e8
--- /dev/null
+++ b/mysql-test/r/sp-fib.result
@@ -0,0 +1,33 @@
+drop table if exists t3;
+create table t3 ( f bigint unsigned not null );
+drop procedure if exists fib;
+create procedure fib(n int unsigned)
+begin
+if n > 1 then
+begin
+declare x, y bigint unsigned;
+declare c cursor for select f from t3 order by f desc limit 2;
+open c;
+fetch c into y;
+fetch c into x;
+insert into t3 values (x+y);
+call fib(n-1);
+## Close the cursor AFTER the recursion to ensure that the stack
+## frame is somewhat intact.
+close c;
+end;
+end if;
+end|
+set @@max_sp_recursion_depth= 20|
+insert into t3 values (0), (1)|
+call fib(4)|
+select * from t3 order by f asc|
+f
+0
+1
+1
+2
+3
+drop table t3|
+drop procedure fib|
+set @@max_sp_recursion_depth= 0|
diff --git a/mysql-test/r/sp.result b/mysql-test/r/sp.result
index 9574841bc35..3ad556b8c30 100644
--- a/mysql-test/r/sp.result
+++ b/mysql-test/r/sp.result
@@ -1337,52 +1337,6 @@ drop procedure opp|
 drop procedure ip|
 show procedure status where name like '%p%' and db='test'|
 Db	Name	Type	Definer	Modified	Created	Security_type	Comment	character_set_client	collation_connection	Database Collation
-drop table if exists t3|
-create table t3 ( f bigint unsigned not null )|
-drop procedure if exists fib|
-create procedure fib(n int unsigned)
-begin
-if n > 1 then
-begin
-declare x, y bigint unsigned;
-declare c cursor for select f from t3 order by f desc limit 2;
-open c;
-fetch c into y;
-fetch c into x;
-close c;
-insert into t3 values (x+y);
-call fib(n-1);
-end;
-end if;
-end|
-set @@max_sp_recursion_depth= 20|
-insert into t3 values (0), (1)|
-call fib(3)|
-select * from t3 order by f asc|
-f
-0
-1
-1
-2
-truncate table t3|
-insert into t3 values (0), (1)|
-call fib(10)|
-select * from t3 order by f asc|
-f
-0
-1
-1
-2
-3
-5
-8
-13
-21
-34
-55
-drop table t3|
-drop procedure fib|
-set @@max_sp_recursion_depth= 0|
 drop procedure if exists bar|
 create procedure bar(x char(16), y int)
 comment "111111111111" sql security invoker
diff --git a/mysql-test/r/sp_notembedded.result b/mysql-test/r/sp_notembedded.result
index d15efc6d7d7..c6641e673ee 100644
--- a/mysql-test/r/sp_notembedded.result
+++ b/mysql-test/r/sp_notembedded.result
@@ -233,4 +233,19 @@ rl_acquirer	old
 drop procedure p1;
 drop table t1;
 set session low_priority_updates=default;
+INSERT INTO mysql.user (Host, User, Password, Select_priv, Insert_priv, Update_priv,
+Delete_priv, Create_priv, Drop_priv, Reload_priv, Shutdown_priv, Process_priv, File_priv,
+Grant_priv, References_priv, Index_priv, Alter_priv, Show_db_priv, Super_priv,
+Create_tmp_table_priv, Lock_tables_priv, Execute_priv, Repl_slave_priv, Repl_client_priv,
+Create_view_priv, Show_view_priv, Create_routine_priv, Alter_routine_priv,
+Create_user_priv, ssl_type, ssl_cipher, x509_issuer, x509_subject, max_questions,
+max_updates, max_connections, max_user_connections) 
+VALUES('%', 'mysqltest_1', password(''), 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N',
+'N', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'N', 'N', 'N', 'Y', 'Y', 'N', '',
+'', '', '', '0', '0', '0', '0');
+FLUSH PRIVILEGES;
+CREATE PROCEDURE p1(i INT) BEGIN END;
+DROP PROCEDURE p1;
+DELETE FROM mysql.user WHERE User='mysqltest_1';
+FLUSH PRIVILEGES;
 set @@global.concurrent_insert= @old_concurrent_insert;
diff --git a/mysql-test/r/sql_mode.result b/mysql-test/r/sql_mode.result
index 401340f204c..0b0d5a38d0b 100644
--- a/mysql-test/r/sql_mode.result
+++ b/mysql-test/r/sql_mode.result
@@ -506,3 +506,24 @@ mysqltest_32753@localhost
 set session sql_mode=@OLD_SQL_MODE;
 flush privileges;
 drop user mysqltest_32753@localhost;
+DROP TABLE IF EXISTS t1,t2;
+CREATE USER 'user_PCTFL'@'localhost' identified by 'PWD';
+CREATE USER 'user_no_PCTFL'@'localhost' identified by 'PWD';
+CREATE TABLE t1 (f1 BIGINT);
+CREATE TABLE t2 (f1 CHAR(3) NOT NULL, f2 CHAR(20));
+GRANT ALL ON t1 TO 'user_PCTFL'@'localhost','user_no_PCTFL'@'localhost';
+GRANT SELECT(f1) ON t2 TO 'user_PCTFL'@'localhost','user_no_PCTFL'@'localhost';
+SET @OLD_SQL_MODE = @@SESSION.SQL_MODE;
+SET SESSION SQL_MODE = 'PAD_CHAR_TO_FULL_LENGTH';
+DROP USER 'user_PCTFL'@'localhost';
+SET SESSION SQL_MODE = @OLD_SQL_MODE;
+DROP USER 'user_no_PCTFL'@'localhost';
+FLUSH PRIVILEGES;
+SELECT * FROM mysql.db WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+Host	Db	User	Select_priv	Insert_priv	Update_priv	Delete_priv	Create_priv	Drop_priv	Grant_priv	References_priv	Index_priv	Alter_priv	Create_tmp_table_priv	Lock_tables_priv	Create_view_priv	Show_view_priv	Create_routine_priv	Alter_routine_priv	Execute_priv	Event_priv	Trigger_priv
+SELECT * FROM mysql.tables_priv WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+Host	Db	User	Table_name	Grantor	Timestamp	Table_priv	Column_priv
+SELECT * FROM mysql.columns_priv WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+Host	Db	User	Table_name	Column_name	Timestamp	Column_priv
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/mysql-test/r/status.result b/mysql-test/r/status.result
index ca815540c29..ce3acba9b8a 100644
--- a/mysql-test/r/status.result
+++ b/mysql-test/r/status.result
@@ -1,13 +1,15 @@
 set @old_concurrent_insert= @@global.concurrent_insert;
 set @@global.concurrent_insert= 0;
+SET @old_log_output = @@global.log_output;
+SET GLOBAL LOG_OUTPUT = 'FILE';
 flush status;
 show status like 'Table_lock%';
 Variable_name	Value
-Table_locks_immediate	1
+Table_locks_immediate	0
 Table_locks_waited	0
 select * from information_schema.session_status where variable_name like 'Table_lock%';
 VARIABLE_NAME	VARIABLE_VALUE
-TABLE_LOCKS_IMMEDIATE	2
+TABLE_LOCKS_IMMEDIATE	0
 TABLE_LOCKS_WAITED	0
 # Switched to connection: con1
 set sql_log_bin=0;
@@ -154,7 +156,7 @@ Variable_name	Value
 Com_show_status	3
 show status like 'hand%write%';
 Variable_name	Value
-Handler_write	5
+Handler_write	0
 show status like '%tmp%';
 Variable_name	Value
 Created_tmp_disk_tables	0
@@ -162,7 +164,7 @@ Created_tmp_files	0
 Created_tmp_tables	0
 show status like 'hand%write%';
 Variable_name	Value
-Handler_write	7
+Handler_write	0
 show status like '%tmp%';
 Variable_name	Value
 Created_tmp_disk_tables	0
@@ -237,3 +239,4 @@ SELECT 9;
 DROP PROCEDURE p1;
 DROP FUNCTION f1;
 set @@global.concurrent_insert= @old_concurrent_insert;
+SET GLOBAL log_output = @old_log_output;
diff --git a/mysql-test/r/subselect3.result b/mysql-test/r/subselect3.result
index e0f361a0f4f..f055b40116a 100644
--- a/mysql-test/r/subselect3.result
+++ b/mysql-test/r/subselect3.result
@@ -849,6 +849,25 @@ ROW(1,2) = (SELECT    1,    1)	ROW(1,2) IN (SELECT    1,    1)
 SELECT ROW(1,2) = (SELECT    1,    2), ROW(1,2) IN (SELECT    1,    2);
 ROW(1,2) = (SELECT    1,    2)	ROW(1,2) IN (SELECT    1,    2)
 1	1
+CREATE TABLE t1 (a INT, b INT, c INT);
+INSERT INTO t1 VALUES (1,1,1), (1,1,1);
+EXPLAIN EXTENDED 
+SELECT c FROM 
+( SELECT 
+(SELECT COUNT(a) FROM 
+(SELECT COUNT(b) FROM t1) AS x GROUP BY c
+) FROM t1 GROUP BY b
+) AS y;
+ERROR 42S22: Unknown column 'c' in 'field list'
+SHOW WARNINGS;
+Level	Code	Message
+Note	1276	Field or reference 'test.t1.a' of SELECT #3 was resolved in SELECT #2
+Note	1276	Field or reference 'test.t1.c' of SELECT #3 was resolved in SELECT #2
+Error	1054	Unknown column 'c' in 'field list'
+Note	1003	select `c` AS `c` from (select (select count(`test`.`t1`.`a`) AS `COUNT(a)` from (select count(`test`.`t1`.`b`) AS `COUNT(b)` from `test`.`t1`) `x` group by `c`) AS `(SELECT COUNT(a) FROM 
+(SELECT COUNT(b) FROM t1) AS x GROUP BY c
+)` from `test`.`t1` group by `test`.`t1`.`b`) `y`
+DROP TABLE t1;
 End of 5.0 tests
 create table t0 (a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
diff --git a/mysql-test/r/trigger.result b/mysql-test/r/trigger.result
index 23f15f618f2..4476735735c 100644
--- a/mysql-test/r/trigger.result
+++ b/mysql-test/r/trigger.result
@@ -2073,4 +2073,18 @@ select @a, @b;
 drop trigger trg1;
 drop trigger trg2;
 drop table t1, t2;
+CREATE TABLE t1 ( a INT, b INT );
+CREATE TABLE t2 ( a INT AUTO_INCREMENT KEY, b INT );
+INSERT INTO t1 (a) VALUES (1);
+CREATE TRIGGER tr1
+BEFORE INSERT ON t2
+FOR EACH ROW 
+BEGIN 
+UPDATE a_nonextisting_table SET a = 1;
+END//
+CREATE TABLE IF NOT EXISTS t2 ( a INT, b INT ) SELECT a, b FROM t1;
+ERROR 42S02: Table 'test.a_nonextisting_table' doesn't exist
+SELECT * FROM t2;
+a	b
+DROP TABLE t1, t2;
 End of 5.1 tests.
diff --git a/mysql-test/r/trigger_notembedded.result b/mysql-test/r/trigger_notembedded.result
index 1e13bff03b1..335e6910a3a 100644
--- a/mysql-test/r/trigger_notembedded.result
+++ b/mysql-test/r/trigger_notembedded.result
@@ -462,4 +462,18 @@ unlock tables;
 select * from t1;
 i
 drop table t1;
+CREATE DATABASE db1;
+CREATE TABLE db1.t1 (a char(30)) ENGINE=MEMORY;
+CREATE TRIGGER db1.trg AFTER INSERT ON db1.t1 FOR EACH ROW
+INSERT INTO db1.t1 VALUES('Some very sensitive data goes here');
+CREATE USER 'no_rights'@'localhost';
+REVOKE ALL ON *.* FROM 'no_rights'@'localhost';
+FLUSH PRIVILEGES;
+SELECT trigger_name FROM INFORMATION_SCHEMA.TRIGGERS
+WHERE trigger_schema = 'db1';
+trigger_name
+SHOW CREATE TRIGGER db1.trg;
+ERROR 42000: Access denied; you need the TRIGGER privilege for this operation
+DROP USER 'no_rights'@'localhost';
+DROP DATABASE db1;
 End of 5.1 tests.
diff --git a/mysql-test/r/type_newdecimal.result b/mysql-test/r/type_newdecimal.result
index f2b08d1c6b7..748aadee4fb 100644
--- a/mysql-test/r/type_newdecimal.result
+++ b/mysql-test/r/type_newdecimal.result
@@ -1524,10 +1524,10 @@ Warnings:
 Warning	1264	Out of range value for column 'f1' at row 1
 DESC t1;
 Field	Type	Null	Key	Default	Extra
-f1	decimal(59,30)	NO		0.000000000000000000000000000000	
+f1	decimal(65,30)	NO		0.000000000000000000000000000000	
 SELECT f1 FROM t1;
 f1
-99999999999999999999999999999.999999999999999999999999999999
+99999999999999999999999999999999999.999999999999999999999999999999
 DROP TABLE t1;
 select (1.20396873 * 0.89550000 * 0.68000000 * 1.08721696 * 0.99500000 *
 1.01500000 * 1.01500000 * 0.99500000);
@@ -1577,3 +1577,56 @@ Error	1264	Out of range value for column 'cast(-13.4 as decimal(2,1))' at row 1
 select cast(98.6 as decimal(2,0));
 cast(98.6 as decimal(2,0))
 99
+#
+# Bug #45262: Bad effects with CREATE TABLE and DECIMAL
+#
+CREATE TABLE t1 SELECT .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+Warnings:
+Note	1265	Data truncated for column 'my_col' at row 1
+DESCRIBE t1;
+Field	Type	Null	Key	Default	Extra
+my_col	decimal(30,30)	NO		0.000000000000000000000000000000	
+SELECT my_col FROM t1;
+my_col
+0.123456789123456789123456789123
+DROP TABLE t1;
+CREATE TABLE t1 SELECT 1 + .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+Warnings:
+Note	1265	Data truncated for column 'my_col' at row 1
+DESCRIBE t1;
+Field	Type	Null	Key	Default	Extra
+my_col	decimal(65,30)	NO		0.000000000000000000000000000000	
+SELECT my_col FROM t1;
+my_col
+1.123456789123456789123456789123
+DROP TABLE t1;
+CREATE TABLE t1 SELECT 1 * .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+Warnings:
+Note	1265	Data truncated for column 'my_col' at row 1
+DESCRIBE t1;
+Field	Type	Null	Key	Default	Extra
+my_col	decimal(65,30)	NO		0.000000000000000000000000000000	
+SELECT my_col FROM t1;
+my_col
+0.123456789123456789123456789123
+DROP TABLE t1;
+CREATE TABLE t1 SELECT 1 / .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+Warnings:
+Note	1265	Data truncated for column 'my_col' at row 1
+DESCRIBE t1;
+Field	Type	Null	Key	Default	Extra
+my_col	decimal(65,4)	YES		NULL	
+SELECT my_col FROM t1;
+my_col
+8.1000
+DROP TABLE t1;
+CREATE TABLE t1 SELECT 1 % .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+Warnings:
+Note	1265	Data truncated for column 'my_col' at row 1
+DESCRIBE t1;
+Field	Type	Null	Key	Default	Extra
+my_col	decimal(65,30)	YES		NULL	
+SELECT my_col FROM t1;
+my_col
+0.012345687012345687012345687012
+DROP TABLE t1;
diff --git a/mysql-test/r/type_time.result b/mysql-test/r/type_time.result
index d80a3973555..e4b90196c2d 100644
--- a/mysql-test/r/type_time.result
+++ b/mysql-test/r/type_time.result
@@ -128,3 +128,13 @@ SELECT sum(f3) FROM t1 where f2='2007-07-01 00:00:00' group by f2;
 sum(f3)
 3
 drop table t1;
+#
+# Bug #44792: valgrind warning when casting from time to time
+#
+CREATE TABLE t1 (c TIME);
+INSERT INTO t1 VALUES ('0:00:00');
+SELECT CAST(c AS TIME) FROM t1;
+CAST(c AS TIME)
+00:00:00
+DROP TABLE t1;
+End of 5.0 tests
diff --git a/mysql-test/r/union.result b/mysql-test/r/union.result
index 23a7724984c..44a3812725a 100644
--- a/mysql-test/r/union.result
+++ b/mysql-test/r/union.result
@@ -1574,4 +1574,17 @@ SHOW FIELDS FROM t2;
 Field	Type	Null	Key	Default	Extra
 d	double(9,6)	YES		NULL	
 DROP TABLE t1, t2;
+CREATE TABLE t1(a INT);
+EXPLAIN EXTENDED
+SELECT a FROM t1
+UNION
+SELECT a FROM t1
+ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t1	system	NULL	NULL	NULL	NULL	0	0.00	const row not found
+2	UNION	t1	system	NULL	NULL	NULL	NULL	0	0.00	const row not found
+NULL	UNION RESULT	<union1,2>	ALL	NULL	NULL	NULL	NULL	NULL	NULL	Using filesort
+Warnings:
+Note	1003	select '0' AS `a` from `test`.`t1` union select '0' AS `a` from `test`.`t1` order by `a`
+DROP TABLE t1;
 End of 5.0 tests
diff --git a/mysql-test/r/user_var.result b/mysql-test/r/user_var.result
index 8961a935006..28da1dae931 100644
--- a/mysql-test/r/user_var.result
+++ b/mysql-test/r/user_var.result
@@ -399,6 +399,17 @@ select @lastid != id, @lastid, @lastid := id from t1;
 0	3	3
 1	3	4
 drop table t1;
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 VALUES (0, 0), (2, 1), (2, 3), (1, 1), (30, 20);
+SELECT a, b INTO @a, @b FROM t1 WHERE a=2 AND b=3 GROUP BY a, b;
+SELECT @a, @b;
+@a	@b
+2	3
+SELECT a, b FROM t1 WHERE a=2 AND b=3 GROUP BY a, b;
+a	b
+2	3
+DROP TABLE t1;
+End of 5.0 tests
 CREATE TABLE t1 (i INT);
 CREATE TRIGGER t_after_insert AFTER INSERT ON t1 FOR EACH ROW SET @bug42188 = 10;
 INSERT INTO t1 VALUES (1);
diff --git a/mysql-test/r/variables.result b/mysql-test/r/variables.result
index f27d0b9fdd5..c1cd1840df8 100644
--- a/mysql-test/r/variables.result
+++ b/mysql-test/r/variables.result
@@ -1436,7 +1436,7 @@ Warnings:
 Warning	1292	Truncated incorrect auto_increment_offset value: '0'
 select @@storage_engine;
 Catalog	Database	Table	Table_alias	Column	Column_alias	Type	Length	Max length	Is_null	Flags	Decimals	Charsetnr
-def					@@storage_engine	253	6	6	N	1	31	8
+def					@@storage_engine	253	6	6	Y	0	31	8
 @@storage_engine
 MyISAM
 SET @old_server_id = @@GLOBAL.server_id;
@@ -1467,4 +1467,24 @@ SELECT @@GLOBAL.server_id;
 @@GLOBAL.server_id
 0
 SET GLOBAL server_id = @old_server_id;
+SELECT @@GLOBAL.INIT_FILE, @@GLOBAL.INIT_FILE IS NULL;
+@@GLOBAL.INIT_FILE	@@GLOBAL.INIT_FILE IS NULL
+NULL	1
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES ();
+SET @bug42778= @@sql_safe_updates;
+SET @@sql_safe_updates= 0;
+DELETE FROM t1 ORDER BY (@@GLOBAL.INIT_FILE) ASC LIMIT 10;
+SET @@sql_safe_updates= @bug42778;
+DROP TABLE t1;
+#
+# BUG#10206 - InnoDB: Transaction requiring Max_BinLog_Cache_size > 4GB always rollsback
+#
+SET @old_max_binlog_cache_size = @@GLOBAL.max_binlog_cache_size;
+# Set the max_binlog_cache_size to size more than 4GB. 
+SET GLOBAL max_binlog_cache_size = 5 * 1024 * 1024 * 1024;
+SELECT @@GLOBAL.max_binlog_cache_size;
+@@GLOBAL.max_binlog_cache_size
+5368709120
+SET GLOBAL max_binlog_cache_size = @old_max_binlog_cache_size;
 End of 5.1 tests
diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result
index 0905fc0109b..2dc448a29d8 100644
--- a/mysql-test/r/view.result
+++ b/mysql-test/r/view.result
@@ -3700,6 +3700,136 @@ ERROR 42000: Key 'c2' doesn't exist in table 'v1'
 DROP VIEW v1;
 DROP TABLE t1;
 # -----------------------------------------------------------------
+# -- Bug#40825: Error 1356 while selecting from a view 
+# --            with a "HAVING" clause though query works
+# -----------------------------------------------------------------
+
+CREATE TABLE t1 (c INT);
+
+CREATE VIEW v1 (view_column) AS SELECT c AS alias FROM t1 HAVING alias;
+SHOW CREATE VIEW v1;
+View	Create View	character_set_client	collation_connection
+v1	CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `t1`.`c` AS `view_column` from `t1` having `view_column`	latin1	latin1_swedish_ci
+SELECT * FROM v1;
+view_column
+
+DROP VIEW v1;
+DROP TABLE t1;
+
+# -- End of test case for Bug#40825
+
+# 
+# Bug #45806 crash when replacing into a view with a join!
+# 
+CREATE TABLE t1(a INT UNIQUE);
+CREATE VIEW v1 AS SELECT t1.a FROM t1, t1 AS a;
+INSERT INTO t1 VALUES (1), (2);
+REPLACE INTO v1(a) SELECT 1 FROM t1,t1 AS c;
+SELECT * FROM v1;
+a
+1
+2
+1
+2
+REPLACE INTO v1(a) SELECT 3 FROM t1,t1 AS c;
+SELECT * FROM v1;
+a
+1
+2
+3
+1
+2
+3
+1
+2
+3
+DELETE FROM t1 WHERE a=3;
+INSERT INTO v1(a) SELECT 1 FROM t1,t1 AS c
+ON DUPLICATE KEY UPDATE `v1`.`a`= 1;
+SELECT * FROM v1;
+a
+1
+2
+1
+2
+CREATE VIEW v2 AS SELECT t1.a FROM t1, v1 AS a;
+REPLACE INTO v2(a) SELECT 1 FROM t1,t1 AS c;
+SELECT * FROM v2;
+a
+1
+2
+1
+2
+1
+2
+1
+2
+REPLACE INTO v2(a) SELECT 3 FROM t1,t1 AS c;
+SELECT * FROM v2;
+a
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+INSERT INTO v2(a) SELECT 1 FROM t1,t1 AS c
+ON DUPLICATE KEY UPDATE `v2`.`a`= 1;
+SELECT * FROM v2;
+a
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+1
+2
+3
+DROP VIEW v1;
+DROP VIEW v2;
+DROP TABLE t1;
+# -- End of test case for Bug#45806
+# -----------------------------------------------------------------
 # -- End of 5.0 tests.
 # -----------------------------------------------------------------
 DROP DATABASE IF EXISTS `d-1`;
@@ -3817,6 +3947,14 @@ call p();
 call p();
 drop view a;
 drop procedure p;
+#
+# Bug #44860: ALTER TABLE on view crashes server
+#
+CREATE TABLE t1 (a INT);
+CREATE VIEW v1 AS SELECT a FROM t1;
+ALTER TABLE v1;
+DROP VIEW v1;
+DROP TABLE t1;
 # -----------------------------------------------------------------
 # -- End of 5.1 tests.
 # -----------------------------------------------------------------
diff --git a/mysql-test/r/xa.result b/mysql-test/r/xa.result
index 592cf07522b..a597806d897 100644
--- a/mysql-test/r/xa.result
+++ b/mysql-test/r/xa.result
@@ -75,3 +75,17 @@ xa rollback 'a','c';
 xa start 'a','c';
 drop table t1;
 End of 5.0 tests
+xa start 'a';
+xa end 'a';
+xa rollback 'a';
+xa start 'a';
+xa end 'a';
+xa rollback 'a';
+xa start 'a';
+xa end 'a';
+xa prepare 'a';
+xa commit 'a';
+xa start 'a';
+xa end 'a';
+xa prepare 'a';
+xa commit 'a';
diff --git a/mysql-test/suite/binlog/r/binlog_database.result b/mysql-test/suite/binlog/r/binlog_database.result
index 8dbe0f21852..1cc9281f3fc 100644
--- a/mysql-test/suite/binlog/r/binlog_database.result
+++ b/mysql-test/suite/binlog/r/binlog_database.result
@@ -100,15 +100,15 @@ drop table tt1, t1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	drop database if exists mysqltest1
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE `t1` /* generated by server */
 FLUSH STATUS;
 show databases;
diff --git a/mysql-test/suite/binlog/r/binlog_incident.result b/mysql-test/suite/binlog/r/binlog_incident.result
new file mode 100644
index 00000000000..d8b0357b8c4
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_incident.result
@@ -0,0 +1,12 @@
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT * FROM t1;
+a
+1
+2
+3
+REPLACE INTO t1 VALUES (4);
+DROP TABLE t1;
+FLUSH LOGS;
+Contain RELOAD DATABASE
+1
diff --git a/mysql-test/suite/binlog/r/binlog_innodb.result b/mysql-test/suite/binlog/r/binlog_innodb.result
index 919ac33ef35..1922897f631 100644
--- a/mysql-test/suite/binlog/r/binlog_innodb.result
+++ b/mysql-test/suite/binlog/r/binlog_innodb.result
@@ -66,49 +66,49 @@ COMMIT;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=INNODB
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 VALUES (1,1),(2,2),(3,3),(4,4),(5,5),(6,6)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; UPDATE t1 SET b = 2*a WHERE a > 1
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; UPDATE t1 SET b = 3*a WHERE a > 3
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; UPDATE t1 SET b = 4*a WHERE a > 4
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; UPDATE t1 SET b = 3*a WHERE a > 3
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; UPDATE t1 SET b = 4*a WHERE a > 4
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
diff --git a/mysql-test/suite/binlog/r/binlog_innodb_row.result b/mysql-test/suite/binlog/r/binlog_innodb_row.result
index fab79c4bc2f..f7415610dc5 100644
--- a/mysql-test/suite/binlog/r/binlog_innodb_row.result
+++ b/mysql-test/suite/binlog/r/binlog_innodb_row.result
@@ -9,7 +9,7 @@ commit;
 *** Results of the test: the binlog must have only Write_rows events not any Update_rows ***
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -24,7 +24,7 @@ commit;
 *** Results of the test: the binlog must have only one Write_rows event not two ***
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
diff --git a/mysql-test/suite/binlog/r/binlog_multi_engine.result b/mysql-test/suite/binlog/r/binlog_multi_engine.result
index caae5f55d13..9252229903b 100644
--- a/mysql-test/suite/binlog/r/binlog_multi_engine.result
+++ b/mysql-test/suite/binlog/r/binlog_multi_engine.result
@@ -17,16 +17,16 @@ TRUNCATE t1b;
 TRUNCATE t1n;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1b VALUES (1,1), (1,2), (2,1), (2,2)
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1m VALUES (1,1), (1,2), (2,1), (2,2)
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t1m, t1b SET m = 2, b = 3 WHERE n = c
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1n VALUES (1,1), (1,2), (2,1), (2,2)
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t1m, t1n SET m = 2, e = 3 WHERE n = f
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t1n, t1b SET e = 2, b = 3 WHERE f = c
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t1n)
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (mysql.ndb_apply_status)
@@ -48,9 +48,9 @@ TRUNCATE t1b;
 TRUNCATE t1n;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1b VALUES (1,1), (1,2), (2,1), (2,2)
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1m VALUES (1,1), (1,2), (2,1), (2,2)
 mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t1n)
@@ -73,14 +73,14 @@ UPDATE t1n, t1b SET e = 2, b = 3 WHERE f = c;
 ERROR HY000: Binary logging not possible. Message: Statement cannot be written atomically since more than one engine involved and at least one engine is self-logging
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t1m)
 mysqld-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	COMMIT
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t1b)
 mysqld-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t1n)
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (mysql.ndb_apply_status)
diff --git a/mysql-test/suite/binlog/r/binlog_row_binlog.result b/mysql-test/suite/binlog/r/binlog_row_binlog.result
index 25cb7a4726f..4baa47db129 100644
--- a/mysql-test/suite/binlog/r/binlog_row_binlog.result
+++ b/mysql-test/suite/binlog/r/binlog_row_binlog.result
@@ -12,11 +12,11 @@ show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int) engine=innodb
 master-bin.000001	#	Query	#	#	use `test`; create table t2 (a int) engine=innodb
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -29,7 +29,7 @@ drop table t1;
 show binlog events in 'master-bin.000001' from 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1 (n int) engine=innodb
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
@@ -249,7 +249,7 @@ show binlog events from 0;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server version, Binlog ver: 4
 master-bin.000001	106	Query	1	205	use `test`; create table t1(n int) engine=innodb
-master-bin.000001	205	Query	1	273	use `test`; BEGIN
+master-bin.000001	205	Query	1	273	BEGIN
 master-bin.000001	273	Table_map	1	314	table_id: # (test.t1)
 master-bin.000001	314	Write_rows	1	348	table_id: # flags: STMT_END_F
 master-bin.000001	348	Table_map	1	389	table_id: # (test.t1)
@@ -266,7 +266,7 @@ show binlog events from 0;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server version, Binlog ver: 4
 master-bin.000001	106	Query	1	206	use `test`; create table t1 (a int) engine=innodb
-master-bin.000001	206	Query	1	274	use `test`; BEGIN
+master-bin.000001	206	Query	1	274	BEGIN
 master-bin.000001	274	Table_map	1	315	table_id: # (test.t1)
 master-bin.000001	315	Write_rows	1	349	table_id: # flags: STMT_END_F
 master-bin.000001	349	Table_map	1	390	table_id: # (test.t1)
@@ -1085,10 +1085,10 @@ show binlog events from 0;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server version, Binlog ver: 4
 master-bin.000001	106	Query	1	227	use `test`; create table t1 (a bigint unsigned, b bigint(20) unsigned)
-master-bin.000001	227	Query	1	295	use `test`; BEGIN
+master-bin.000001	227	Query	1	295	BEGIN
 master-bin.000001	295	Table_map	1	337	table_id: # (test.t1)
 master-bin.000001	337	Write_rows	1	383	table_id: # flags: STMT_END_F
-master-bin.000001	383	Query	1	452	use `test`; COMMIT
+master-bin.000001	383	Query	1	452	COMMIT
 master-bin.000001	452	Query	1	528	use `test`; drop table t1
 reset master;
 CREATE DATABASE bug39182 DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
@@ -1192,32 +1192,32 @@ use test;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (id tinyint auto_increment primary key)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; drop table t1
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE IF NOT EXISTS `t2` (
   `a` int(11) DEFAULT NULL
 )
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE IF NOT EXISTS `t3` (
   `a` int(11) DEFAULT NULL
 )
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Delete_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 drop table t1,t2,t3,tt1;
 create table t1 (a int not null auto_increment, primary key (a)) engine=myisam;
 insert delayed into t1 values (207);
@@ -1227,46 +1227,46 @@ FLUSH TABLES;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (id tinyint auto_increment primary key)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; drop table t1
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE IF NOT EXISTS `t2` (
   `a` int(11) DEFAULT NULL
 )
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE IF NOT EXISTS `t3` (
   `a` int(11) DEFAULT NULL
 )
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
-master-bin.000001	#	Query	#	#	use `mysql`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (mysql.user)
 master-bin.000001	#	Delete_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `mysql`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE `t1`,`t2`,`t3` /* generated by server */
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int not null auto_increment, primary key (a)) engine=myisam
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; FLUSH TABLES
 insert delayed into t1 values (null),(null),(null),(null);
 insert delayed into t1 values (null),(null),(400),(null);
diff --git a/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result b/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result
index 49aa64adfb5..8daed8d5c25 100644
--- a/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result
+++ b/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result
@@ -5,16 +5,15 @@ reset master;
 insert into t2 values (@v);
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 flush logs;
 /*!40019 SET @@session.max_insert_delayed_threads=0*/;
 /*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
 DELIMITER /*!*/;
 ROLLBACK/*!*/;
-use test/*!*/;
 SET TIMESTAMP=10000/*!*/;
 SET @@session.pseudo_thread_id=999999999/*!*/;
 SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
diff --git a/mysql-test/suite/binlog/r/binlog_row_insert_select.result b/mysql-test/suite/binlog/r/binlog_row_insert_select.result
index d4370c4de12..c7386b092e4 100644
--- a/mysql-test/suite/binlog/r/binlog_row_insert_select.result
+++ b/mysql-test/suite/binlog/r/binlog_row_insert_select.result
@@ -8,10 +8,10 @@ insert into t1 select * from t2;
 ERROR 23000: Duplicate entry '2' for key 'a'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select * from t1;
 a
 1
diff --git a/mysql-test/suite/binlog/r/binlog_row_mix_innodb_myisam.result b/mysql-test/suite/binlog/r/binlog_row_mix_innodb_myisam.result
index 4f3bc57e576..4d639c3da68 100644
--- a/mysql-test/suite/binlog/r/binlog_row_mix_innodb_myisam.result
+++ b/mysql-test/suite/binlog/r/binlog_row_mix_innodb_myisam.result
@@ -8,7 +8,7 @@ insert into t2 select * from t1;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
@@ -25,12 +25,12 @@ Warnings:
 Warning	1196	Some non-transactional changed tables couldn't be rolled back
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 delete from t1;
 delete from t2;
 reset master;
@@ -45,7 +45,7 @@ Warning	1196	Some non-transactional changed tables couldn't be rolled back
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Query	#	#	use `test`; savepoint my_savepoint
@@ -74,7 +74,7 @@ a
 7
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Query	#	#	use `test`; savepoint my_savepoint
@@ -100,12 +100,12 @@ get_lock("a",10)
 1
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 delete from t1;
 delete from t2;
 reset master;
@@ -113,14 +113,14 @@ insert into t1 values(9);
 insert into t2 select * from t1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 delete from t1;
 delete from t2;
 reset master;
@@ -129,7 +129,7 @@ begin;
 insert into t2 select * from t1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -137,11 +137,11 @@ insert into t1 values(11);
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
@@ -157,7 +157,7 @@ insert into t2 select * from t1;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
@@ -184,7 +184,7 @@ rollback to savepoint my_savepoint;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -205,7 +205,7 @@ a
 18
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
@@ -257,31 +257,31 @@ get_lock("lock1",60)
 1
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Delete_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; alter table t2 engine=MyISAM
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; drop table t1,t2
 master-bin.000001	#	Query	#	#	use `test`; create table t0 (n int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t0)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t0)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; create table t2 (n int) engine=innodb
 do release_lock("lock1");
 drop table t0,t2;
@@ -364,46 +364,46 @@ a	b
 DROP TABLE t1,t2;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE if exists t2
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE IF EXISTS t2
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t2 (a int, b int, primary key (a)) engine=innodb
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; TRUNCATE table t2
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE t2
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	ROLLBACK
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 master-bin.000001	#	Query	#	#	use `test`; TRUNCATE table t2
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE `t1` /* generated by server */
 reset master;
 create table t1 (a int) engine=innodb;
@@ -447,12 +447,12 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.ti)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.ti)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -499,11 +499,11 @@ insert into t2 values (bug27417(2));
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* only (!) with fixes for #23333 will show there is the query */;
 select count(*) from t1 /* must be 3 */;
 count(*)
@@ -518,11 +518,11 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 /* the query must be in regardless of #23333 */;
 select count(*) from t1 /* must be 5 */;
 count(*)
@@ -544,11 +544,11 @@ insert into t2 values (bug27417(1));
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -561,12 +561,12 @@ insert into t2 select bug27417(1) union select bug27417(2);
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: #
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 2 */;
 count(*)
@@ -578,13 +578,13 @@ update t3 set b=b+bug27417(1);
 ERROR 23000: Duplicate entry '4' for key 'b'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t3)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: #
 master-bin.000001	#	Update_rows	#	#	table_id: #
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 2 */;
 count(*)
@@ -598,11 +598,11 @@ UPDATE t4,t3 SET t4.a=t3.a + bug27417(1) /* top level non-ta table */;
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t4)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 4 */;
 count(*)
@@ -631,12 +631,12 @@ delete from t2;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t3)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -654,13 +654,13 @@ delete t2.* from t2,t5 where t2.a=t5.a + 1;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Delete_rows	#	#	table_id: #
 master-bin.000001	#	Write_rows	#	#	table_id: #
 master-bin.000001	#	Delete_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -679,13 +679,13 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t4)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: #
 master-bin.000001	#	Write_rows	#	#	table_id: #
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 drop trigger trg_del_t2;
 drop table t1,t2,t3,t4,t5;
@@ -706,12 +706,12 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.ti)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Table_map	#	#	table_id: # (test.ti)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -795,10 +795,10 @@ insert into t2 values (bug27417(1));
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=1
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 values (bug27417(1))
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -810,10 +810,10 @@ insert into t2 select bug27417(1) union select bug27417(2);
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=2
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select bug27417(1) union select bug27417(2)
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 2 */;
 count(*)
 2
@@ -867,10 +867,10 @@ delete from t2;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=9
 master-bin.000001	#	Query	#	#	use `test`; delete from t2
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -887,9 +887,9 @@ delete t2.* from t2,t5 where t2.a=t5.a + 1;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete t2.* from t2,t5 where t2.a=t5.a + 1
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -907,14 +907,14 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	User var	#	#	@`b`=_latin1 0x3135 COLLATE latin1_swedish_ci
 master-bin.000001	#	Begin_load_query	#	#	;file_id=#;block_len=#
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	User var	#	#	@`b`=_latin1 0x3135 COLLATE latin1_swedish_ci
 master-bin.000001	#	Execute_load_query	#	#	use `test`; load data infile '../../std_data/rpl_loaddata.dat' into table t4 (a, @b) set b= @b + bug27417(2) ;file_id=#
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 drop trigger trg_del_t2;
 drop table t1,t2,t3,t4,t5;
 drop function bug27417;
diff --git a/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_db_filter.result b/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_db_filter.result
new file mode 100644
index 00000000000..354fd832fb3
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_db_filter.result
@@ -0,0 +1,43 @@
+RESET MASTER;
+CREATE TABLE t1 (id int);
+CREATE TABLE t2 (id int);
+CREATE TABLE t3 (txt TEXT);
+CREATE TABLE t4 (a int) ENGINE= InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (3);
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/std_data/words.dat' INTO TABLE t3;
+INSERT INTO t1 VALUES (4);
+CREATE DATABASE b42941;
+use b42941;
+CREATE TABLE t1 (id int);
+CREATE TABLE t2 (id int);
+CREATE TABLE t3 (txt TEXT);
+CREATE TABLE t4 (a int) ENGINE= InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (3);
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/std_data/words.dat' INTO TABLE t3;
+INSERT INTO t1 VALUES (4);
+INSERT INTO test.t1 VALUES (5);
+FLUSH LOGS;
+UPDATE test.t1 t11, b42941.t1 t12 SET t11.id=10, t12.id=100;
+BEGIN;
+INSERT INTO test.t4 VALUES (1);
+INSERT INTO b42941.t4 VALUES (1);
+UPDATE test.t4 tn4, b42941.t4 tt4 SET tn4.a= 10, tt4.a= 100;
+COMMIT;
+FLUSH LOGS;
+SET @b42941_output.1= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.1');
+SET @b42941_output.2= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.2');
+SET @b42941_output.1= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.1');
+SET @b42941_output.2= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.2');
+SET @b42941_output.1= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.1');
+SET @b42941_output.2= LOAD_FILE('MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog.2');
+DROP DATABASE b42941;
+use test;
+DROP TABLE t1, t2, t3, t4;
diff --git a/mysql-test/suite/binlog/r/binlog_stm_binlog.result b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
index efdeb30a2af..d151e31269f 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_binlog.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
@@ -6,7 +6,7 @@ show binlog events;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server ver: #, Binlog ver: #
 master-bin.000001	106	Query	1	213	use `test`; create table t1 (a int, b int) engine=innodb
-master-bin.000001	213	Query	1	281	use `test`; BEGIN
+master-bin.000001	213	Query	1	281	BEGIN
 master-bin.000001	281	Query	1	371	use `test`; insert into t1 values (1,2)
 master-bin.000001	371	Xid	1	398	COMMIT /* XID */
 drop table t1;
@@ -24,10 +24,10 @@ show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int) engine=innodb
 master-bin.000001	#	Query	#	#	use `test`; create table t2 (a int) engine=innodb
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert t1 values (5)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert t2 values (5)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 drop table t1,t2;
@@ -39,7 +39,7 @@ drop table t1;
 show binlog events in 'master-bin.000001' from 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1 (n int) engine=innodb
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values(100 + 4)
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values(99 + 4)
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values(98 + 4)
@@ -159,7 +159,7 @@ show binlog events from 0;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server version, Binlog ver: 4
 master-bin.000001	106	Query	1	205	use `test`; create table t1(n int) engine=innodb
-master-bin.000001	205	Query	1	273	use `test`; BEGIN
+master-bin.000001	205	Query	1	273	BEGIN
 master-bin.000001	273	Query	1	361	use `test`; insert into t1 values (1)
 master-bin.000001	361	Query	1	449	use `test`; insert into t1 values (2)
 master-bin.000001	449	Query	1	537	use `test`; insert into t1 values (3)
@@ -173,7 +173,7 @@ show binlog events from 0;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server version, Binlog ver: 4
 master-bin.000001	106	Query	1	206	use `test`; create table t1 (a int) engine=innodb
-master-bin.000001	206	Query	1	274	use `test`; BEGIN
+master-bin.000001	206	Query	1	274	BEGIN
 master-bin.000001	274	Query	1	365	use `test`; insert into t1 values( 400 )
 master-bin.000001	365	Query	1	456	use `test`; insert into t1 values( 399 )
 master-bin.000001	456	Query	1	547	use `test`; insert into t1 values( 398 )
@@ -730,18 +730,18 @@ master-bin.000001	#	Query	#	#	use `mysql`; UPDATE user SET password=password('An
 master-bin.000001	#	Query	#	#	use `mysql`; DELETE FROM user WHERE host='localhost' AND user='@#@'
 master-bin.000001	#	Query	#	#	use `test`; drop table t1,t2,t3,tt1
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int not null auto_increment, primary key (a)) engine=myisam
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; FLUSH TABLES
 insert delayed into t1 values (null),(null),(null),(null);
 insert delayed into t1 values (null),(null),(400),(null);
diff --git a/mysql-test/suite/binlog/r/binlog_stm_blackhole.result b/mysql-test/suite/binlog/r/binlog_stm_blackhole.result
index 1cd77cfbed4..f3a01f66fc2 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_blackhole.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_blackhole.result
@@ -109,35 +109,35 @@ Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Format_desc	#	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	#	Query	#	#	use `test`; drop table t1,t2
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int) engine=blackhole
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete from t1 where a=10
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; update t1 set a=11 where a=15
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(1)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert ignore into t1 values(1)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; replace into t1 values(100)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; create table t2 (a varchar(200)) engine=blackhole
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Begin_load_query	#	#	;file_id=#;block_len=581
 master-bin.000001	#	Execute_load_query	#	#	use `test`; load data infile '../../std_data/words.dat' into table t2 ;file_id=#
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; alter table t1 add b int
 master-bin.000001	#	Query	#	#	use `test`; alter table t1 drop b
 master-bin.000001	#	Query	#	#	use `test`; create table t3 like t1
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 select * from t3
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; replace into t1 select * from t3
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 drop table t1,t2,t3;
 CREATE TABLE t1(a INT) ENGINE=BLACKHOLE;
 INSERT DELAYED INTO t1 VALUES(1);
@@ -167,9 +167,9 @@ show binlog events;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Format_desc	#	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int) engine=blackhole
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(1)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 drop table if exists t1;
 reset master;
 create table t1 (a int auto_increment, primary key (a)) engine=blackhole;
@@ -181,16 +181,16 @@ insert into t1 values (55), (NULL);
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1 (a int auto_increment, primary key (a)) engine=blackhole
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Intvar	1	#	INSERT_ID=1
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values (11), (NULL), (NULL), (NULL)
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	COMMIT
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Intvar	1	#	INSERT_ID=3
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values (NULL), (33), (NULL)
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	COMMIT
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Intvar	1	#	INSERT_ID=5
 master-bin.000001	#	Query	1	#	use `test`; insert into t1 values (55), (NULL)
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
+master-bin.000001	#	Query	1	#	COMMIT
 drop table t1;
diff --git a/mysql-test/suite/binlog/r/binlog_stm_mix_innodb_myisam.result b/mysql-test/suite/binlog/r/binlog_stm_mix_innodb_myisam.result
index 38488c9331d..95773a247b9 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_mix_innodb_myisam.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_mix_innodb_myisam.result
@@ -8,7 +8,7 @@ insert into t2 select * from t1;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(1)
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -23,10 +23,10 @@ Warnings:
 Warning	1196	Some non-transactional changed tables couldn't be rolled back
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(2)
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 delete from t1;
 delete from t2;
 reset master;
@@ -41,7 +41,7 @@ Warning	1196	Some non-transactional changed tables couldn't be rolled back
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(3)
 master-bin.000001	#	Query	#	#	use `test`; savepoint my_savepoint
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(4)
@@ -67,7 +67,7 @@ a
 7
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(5)
 master-bin.000001	#	Query	#	#	use `test`; savepoint my_savepoint
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(6)
@@ -89,10 +89,10 @@ get_lock("a",10)
 1
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(8)
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 delete from t1;
 delete from t2;
 reset master;
@@ -100,7 +100,7 @@ insert into t1 values(9);
 insert into t2 select * from t1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(9)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
@@ -112,7 +112,7 @@ begin;
 insert into t2 select * from t1;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(10)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
@@ -120,11 +120,11 @@ insert into t1 values(11);
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(10)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(11)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 alter table t2 engine=INNODB;
@@ -137,7 +137,7 @@ insert into t2 select * from t1;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(12)
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select * from t1
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -162,7 +162,7 @@ rollback to savepoint my_savepoint;
 commit;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(14)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 delete from t1;
@@ -182,7 +182,7 @@ a
 18
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(16)
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(18)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -232,26 +232,26 @@ get_lock("lock1",60)
 1
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(16)
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values(18)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete from t1
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete from t2
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; alter table t2 engine=MyISAM
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into t1 values (1)
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 values (20)
 master-bin.000001	#	Query	#	#	use `test`; drop table t1,t2
 master-bin.000001	#	Query	#	#	use `test`; create temporary table ti (a int) engine=innodb
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values(1)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; create temporary table t1 (a int) engine=myisam
 master-bin.000001	#	Query	#	#	use `test`; insert t1 values (1)
 master-bin.000001	#	Query	#	#	use `test`; create table t0 (n int)
@@ -356,9 +356,9 @@ master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 values (8,8)
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 values (9,9)
 master-bin.000001	#	Query	#	#	use `test`; TRUNCATE table t2
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 values (10,10)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t2 values (100,100)
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; DROP TABLE t1,t2
 reset master;
 create table t1 (a int) engine=innodb;
@@ -402,11 +402,11 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (1)
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (2)
 master-bin.000001	#	Query	#	#	use `test`; insert into tt select * from ti
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -428,11 +428,11 @@ Warnings:
 Warning	1196	Some non-transactional changed tables couldn't be rolled back
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (1)
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (2) /* to make the dup error in the following */
 master-bin.000001	#	Query	#	#	use `test`; insert into tt select * from ti /* one affected and error */
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -499,10 +499,10 @@ insert into t2 values (bug27417(1));
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=1
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 values (bug27417(1))
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -515,10 +515,10 @@ insert into t2 select bug27417(1) union select bug27417(2);
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=2
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select bug27417(1) union select bug27417(2)
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 2 */;
 count(*)
@@ -575,10 +575,10 @@ delete from t2;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=9
 master-bin.000001	#	Query	#	#	use `test`; delete from t2
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -596,9 +596,9 @@ delete t2.* from t2,t5 where t2.a=t5.a + 1;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete t2.* from t2,t5 where t2.a=t5.a + 1
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 select count(*) from t1 /* must be 1 */;
 count(*)
@@ -617,12 +617,12 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	Begin_load_query	#	#	;file_id=#;block_len=#
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	Execute_load_query	#	#	use `test`; load data infile '../../std_data/rpl_loaddata.dat' into table t4 (a, @b) set b= @b + bug27417(2) ;file_id=#
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 /* the output must denote there is the query */;
 drop trigger trg_del_t2;
 drop table t1,t2,t3,t4,t5;
@@ -644,11 +644,11 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (1)
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (2)
 master-bin.000001	#	Query	#	#	use `test`; insert into tt select * from ti
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -670,11 +670,11 @@ Warnings:
 Warning	1196	Some non-transactional changed tables couldn't be rolled back
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (1)
 master-bin.000001	#	Query	#	#	use `test`; insert into ti values (2) /* to make the dup error in the following */
 master-bin.000001	#	Query	#	#	use `test`; insert into tt select * from ti /* one affected and error */
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from ti /* zero */;
 count(*)
 0
@@ -739,10 +739,10 @@ insert into t2 values (bug27417(1));
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=1
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 values (bug27417(1))
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -754,10 +754,10 @@ insert into t2 select bug27417(1) union select bug27417(2);
 ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=2
 master-bin.000001	#	Query	#	#	use `test`; insert into t2 select bug27417(1) union select bug27417(2)
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 2 */;
 count(*)
 2
@@ -811,10 +811,10 @@ delete from t2;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=9
 master-bin.000001	#	Query	#	#	use `test`; delete from t2
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -831,9 +831,9 @@ delete t2.* from t2,t5 where t2.a=t5.a + 1;
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; delete t2.* from t2,t5 where t2.a=t5.a + 1
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 select count(*) from t1 /* must be 1 */;
 count(*)
 1
@@ -851,14 +851,14 @@ count(*)
 2
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	User var	#	#	@`b`=_latin1 0x3135 COLLATE latin1_swedish_ci
 master-bin.000001	#	Begin_load_query	#	#	;file_id=#;block_len=#
 master-bin.000001	#	Intvar	#	#	INSERT_ID=10
 master-bin.000001	#	User var	#	#	@`b`=_latin1 0x3135 COLLATE latin1_swedish_ci
 master-bin.000001	#	Execute_load_query	#	#	use `test`; load data infile '../../std_data/rpl_loaddata.dat' into table t4 (a, @b) set b= @b + bug27417(2) ;file_id=#
-master-bin.000001	#	Query	#	#	use `test`; ROLLBACK
+master-bin.000001	#	Query	#	#	ROLLBACK
 drop trigger trg_del_t2;
 drop table t1,t2,t3,t4,t5;
 drop function bug27417;
diff --git a/mysql-test/suite/binlog/r/binlog_stm_ps.result b/mysql-test/suite/binlog/r/binlog_stm_ps.result
index ea7cc6f16df..3af525e297c 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_ps.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_ps.result
@@ -11,7 +11,7 @@ prepare s from "insert into t1 select 100 limit ?";
 set @a=100;
 execute s using @a;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (a int)
diff --git a/mysql-test/suite/binlog/r/binlog_stm_row.result b/mysql-test/suite/binlog/r/binlog_stm_row.result
index d1cc55f03b3..f96073a2b92 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_row.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_row.result
@@ -65,10 +65,10 @@ master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t2 VALUES(2)
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 SELECT * FROM t2 WHERE GET_LOCK('Bug#34306', 120)
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t2 VALUES (3)
 master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t2 VALUES (4)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 DROP TABLE t1;
 DROP TABLE t2;
 SET GLOBAL BINLOG_FORMAT = @saved_global_binlog_format;
diff --git a/mysql-test/suite/binlog/r/binlog_stm_unsafe_warning.result b/mysql-test/suite/binlog/r/binlog_stm_unsafe_warning.result
new file mode 100644
index 00000000000..439bff0cfe1
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_stm_unsafe_warning.result
@@ -0,0 +1,30 @@
+### NOT filtered database => assertion: warnings ARE shown
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+Warnings:
+Note	1592	Statement may not be safe to log in statement format.
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+Warnings:
+Note	1592	Statement may not be safe to log in statement format.
+DROP TABLE t1;
+### NOT filtered database => assertion: binlog disabled and warnings ARE NOT shown
+SET SQL_LOG_BIN= 0;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+DROP TABLE t1;
+SET SQL_LOG_BIN= 1;
+### FILTERED database => assertion: warnings ARE NOT shown
+CREATE DATABASE b42851;
+USE b42851;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+DROP TABLE t1;
+DROP DATABASE b42851;
diff --git a/mysql-test/suite/binlog/r/binlog_tbl_metadata.result b/mysql-test/suite/binlog/r/binlog_tbl_metadata.result
new file mode 100644
index 00000000000..a2f185edc85
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_tbl_metadata.result
@@ -0,0 +1,156 @@
+RESET MASTER;
+DROP TABLE IF EXISTS `t1`;
+CREATE TABLE `t1` (
+`c1` int(11) NOT NULL AUTO_INCREMENT,
+`c2` varchar(30) NOT NULL,
+`c3` varchar(30) DEFAULT NULL,
+`c4` varchar(30) DEFAULT NULL,
+`c5` varchar(30) DEFAULT NULL,
+`c6` varchar(30) DEFAULT NULL,
+`c7` varchar(30) DEFAULT NULL,
+`c8` varchar(30) DEFAULT NULL,
+`c9` varchar(30) DEFAULT NULL,
+`c10` varchar(30) DEFAULT NULL,
+`c11` varchar(30) DEFAULT NULL,
+`c12` varchar(30) DEFAULT NULL,
+`c13` varchar(30) DEFAULT NULL,
+`c14` varchar(30) DEFAULT NULL,
+`c15` varchar(30) DEFAULT NULL,
+`c16` varchar(30) DEFAULT NULL,
+`c17` varchar(30) DEFAULT NULL,
+`c18` varchar(30) DEFAULT NULL,
+`c19` varchar(30) DEFAULT NULL,
+`c20` varchar(30) DEFAULT NULL,
+`c21` varchar(30) DEFAULT NULL,
+`c22` varchar(30) DEFAULT NULL,
+`c23` varchar(30) DEFAULT NULL,
+`c24` varchar(30) DEFAULT NULL,
+`c25` varchar(30) DEFAULT NULL,
+`c26` varchar(30) DEFAULT NULL,
+`c27` varchar(30) DEFAULT NULL,
+`c28` varchar(30) DEFAULT NULL,
+`c29` varchar(30) DEFAULT NULL,
+`c30` varchar(30) DEFAULT NULL,
+`c31` varchar(30) DEFAULT NULL,
+`c32` varchar(30) DEFAULT NULL,
+`c33` varchar(30) DEFAULT NULL,
+`c34` varchar(30) DEFAULT NULL,
+`c35` varchar(30) DEFAULT NULL,
+`c36` varchar(30) DEFAULT NULL,
+`c37` varchar(30) DEFAULT NULL,
+`c38` varchar(30) DEFAULT NULL,
+`c39` varchar(30) DEFAULT NULL,
+`c40` varchar(30) DEFAULT NULL,
+`c41` varchar(30) DEFAULT NULL,
+`c42` varchar(30) DEFAULT NULL,
+`c43` varchar(30) DEFAULT NULL,
+`c44` varchar(30) DEFAULT NULL,
+`c45` varchar(30) DEFAULT NULL,
+`c46` varchar(30) DEFAULT NULL,
+`c47` varchar(30) DEFAULT NULL,
+`c48` varchar(30) DEFAULT NULL,
+`c49` varchar(30) DEFAULT NULL,
+`c50` varchar(30) DEFAULT NULL,
+`c51` varchar(30) DEFAULT NULL,
+`c52` varchar(30) DEFAULT NULL,
+`c53` varchar(30) DEFAULT NULL,
+`c54` varchar(30) DEFAULT NULL,
+`c55` varchar(30) DEFAULT NULL,
+`c56` varchar(30) DEFAULT NULL,
+`c57` varchar(30) DEFAULT NULL,
+`c58` varchar(30) DEFAULT NULL,
+`c59` varchar(30) DEFAULT NULL,
+`c60` varchar(30) DEFAULT NULL,
+`c61` varchar(30) DEFAULT NULL,
+`c62` varchar(30) DEFAULT NULL,
+`c63` varchar(30) DEFAULT NULL,
+`c64` varchar(30) DEFAULT NULL,
+`c65` varchar(30) DEFAULT NULL,
+`c66` varchar(30) DEFAULT NULL,
+`c67` varchar(30) DEFAULT NULL,
+`c68` varchar(30) DEFAULT NULL,
+`c69` varchar(30) DEFAULT NULL,
+`c70` varchar(30) DEFAULT NULL,
+`c71` varchar(30) DEFAULT NULL,
+`c72` varchar(30) DEFAULT NULL,
+`c73` varchar(30) DEFAULT NULL,
+`c74` varchar(30) DEFAULT NULL,
+`c75` varchar(30) DEFAULT NULL,
+`c76` varchar(30) DEFAULT NULL,
+`c77` varchar(30) DEFAULT NULL,
+`c78` varchar(30) DEFAULT NULL,
+`c79` varchar(30) DEFAULT NULL,
+`c80` varchar(30) DEFAULT NULL,
+`c81` varchar(30) DEFAULT NULL,
+`c82` varchar(30) DEFAULT NULL,
+`c83` varchar(30) DEFAULT NULL,
+`c84` varchar(30) DEFAULT NULL,
+`c85` varchar(30) DEFAULT NULL,
+`c86` varchar(30) DEFAULT NULL,
+`c87` varchar(30) DEFAULT NULL,
+`c88` varchar(30) DEFAULT NULL,
+`c89` varchar(30) DEFAULT NULL,
+`c90` varchar(30) DEFAULT NULL,
+`c91` varchar(30) DEFAULT NULL,
+`c92` varchar(30) DEFAULT NULL,
+`c93` varchar(30) DEFAULT NULL,
+`c94` varchar(30) DEFAULT NULL,
+`c95` varchar(30) DEFAULT NULL,
+`c96` varchar(30) DEFAULT NULL,
+`c97` varchar(30) DEFAULT NULL,
+`c98` varchar(30) DEFAULT NULL,
+`c99` varchar(30) DEFAULT NULL,
+`c100` varchar(30) DEFAULT NULL,
+`c101` varchar(30) DEFAULT NULL,
+`c102` varchar(30) DEFAULT NULL,
+`c103` varchar(30) DEFAULT NULL,
+`c104` varchar(30) DEFAULT NULL,
+`c105` varchar(30) DEFAULT NULL,
+`c106` varchar(30) DEFAULT NULL,
+`c107` varchar(30) DEFAULT NULL,
+`c108` varchar(30) DEFAULT NULL,
+`c109` varchar(30) DEFAULT NULL,
+`c110` varchar(30) DEFAULT NULL,
+`c111` varchar(30) DEFAULT NULL,
+`c112` varchar(30) DEFAULT NULL,
+`c113` varchar(30) DEFAULT NULL,
+`c114` varchar(30) DEFAULT NULL,
+`c115` varchar(30) DEFAULT NULL,
+`c116` varchar(30) DEFAULT NULL,
+`c117` varchar(30) DEFAULT NULL,
+`c118` varchar(30) DEFAULT NULL,
+`c119` varchar(30) DEFAULT NULL,
+`c120` varchar(30) DEFAULT NULL,
+`c121` varchar(30) DEFAULT NULL,
+`c122` varchar(30) DEFAULT NULL,
+`c123` varchar(30) DEFAULT NULL,
+`c124` varchar(30) DEFAULT NULL,
+`c125` varchar(30) DEFAULT NULL,
+`c126` varchar(30) DEFAULT NULL,
+`c127` varchar(30) DEFAULT NULL,
+`c128` varchar(30) DEFAULT NULL,
+`c129` varchar(30) DEFAULT NULL,
+`c130` varchar(30) DEFAULT NULL,
+`c131` varchar(30) DEFAULT NULL,
+`c132` varchar(30) DEFAULT NULL,
+`c133` varchar(30) DEFAULT NULL,
+`c134` varchar(30) DEFAULT NULL,
+`c135` varchar(30) DEFAULT NULL,
+`c136` varchar(30) DEFAULT NULL,
+`c137` varchar(30) DEFAULT NULL,
+`c138` varchar(30) DEFAULT NULL,
+`c139` varchar(30) DEFAULT NULL,
+`c140` varchar(30) DEFAULT NULL,
+`c141` varchar(30) DEFAULT NULL,
+`c142` varchar(30) DEFAULT NULL,
+`c143` varchar(30) DEFAULT NULL,
+`c144` varchar(30) DEFAULT NULL,
+`c145` varchar(30) DEFAULT NULL,
+`c146` varchar(30) DEFAULT NULL,
+PRIMARY KEY (`c1`)
+) ENGINE=InnoDB;
+LOCK TABLES `t1` WRITE;
+INSERT INTO `t1` VALUES ('1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1');
+DROP TABLE `t1`;
+FLUSH LOGS;
+=== Using mysqlbinlog to detect failure. Before the patch mysqlbinlog would find a corrupted event, thence would fail.
diff --git a/mysql-test/suite/binlog/r/binlog_unsafe.result b/mysql-test/suite/binlog/r/binlog_unsafe.result
index 675c327e9e7..4c2c32ad8f1 100644
--- a/mysql-test/suite/binlog/r/binlog_unsafe.result
+++ b/mysql-test/suite/binlog/r/binlog_unsafe.result
@@ -10,25 +10,25 @@ INSERT DELAYED INTO t1 VALUES (5);
 ---- Insert directly ----
 INSERT INTO t1 VALUES (@@global.sync_binlog);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t1 VALUES (@@session.insert_id);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t1 VALUES (@@global.auto_increment_increment);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t2 SELECT UUID();
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t2 VALUES (@@session.sql_mode);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t2 VALUES (@@global.init_slave);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t2 VALUES (@@hostname);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ---- Insert from stored procedure ----
 CREATE PROCEDURE proc()
 BEGIN
@@ -42,13 +42,13 @@ INSERT INTO t2 VALUES (@@hostname);
 END|
 CALL proc();
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ---- Insert from stored function ----
 CREATE FUNCTION func()
 RETURNS INT
@@ -66,13 +66,13 @@ SELECT func();
 func()
 0
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ---- Insert from trigger ----
 CREATE TRIGGER trig
 BEFORE INSERT ON trigger_table
@@ -88,14 +88,14 @@ INSERT INTO t2 VALUES (@@hostname);
 END|
 INSERT INTO trigger_table VALUES ('bye.');
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ---- Insert from prepared statement ----
 PREPARE p1 FROM 'INSERT INTO t1 VALUES (@@global.sync_binlog)';
 PREPARE p2 FROM 'INSERT INTO t1 VALUES (@@session.insert_id)';
@@ -106,25 +106,25 @@ PREPARE p6 FROM 'INSERT INTO t2 VALUES (@@global.init_slave)';
 PREPARE p7 FROM 'INSERT INTO t2 VALUES (@@hostname)';
 EXECUTE p1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p2;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p3;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p4;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p5;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p6;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 EXECUTE p7;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ---- Insert from nested call of triggers / functions / procedures ----
 CREATE PROCEDURE proc1()
 INSERT INTO trigger_table VALUES ('ha!')|
@@ -154,13 +154,13 @@ EXECUTE prep6;
 func5()
 0
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 ==== Variables that should *not* be unsafe ====
 INSERT INTO t1 VALUES (@@session.pseudo_thread_id);
 INSERT INTO t1 VALUES (@@session.pseudo_thread_id);
@@ -195,16 +195,16 @@ DROP TABLE t1, t2, t3, trigger_table, trigger_table2;
 CREATE TABLE t1(a INT, b INT, KEY(a), PRIMARY KEY(b));
 INSERT INTO t1 SELECT * FROM t1 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 REPLACE INTO t1 SELECT * FROM t1 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 UPDATE t1 SET a=1 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 DELETE FROM t1 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 CREATE PROCEDURE p1()
 BEGIN
 INSERT INTO t1 SELECT * FROM t1 LIMIT 1;
@@ -214,10 +214,10 @@ DELETE FROM t1 LIMIT 1;
 END|
 CALL p1();
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 DROP PROCEDURE p1;
 DROP TABLE t1;
 DROP TABLE IF EXISTS t1;
@@ -225,16 +225,16 @@ CREATE TABLE t1 (a VARCHAR(100), b VARCHAR(100));
 INSERT INTO t1 VALUES ('a','b');
 UPDATE t1 SET b = '%s%s%s%s%s%s%s%s%s%s%s%s%s%s' WHERE a = 'a' LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 DROP TABLE t1;
 DROP TABLE IF EXISTS t1, t2;
 CREATE TABLE t1(i INT PRIMARY KEY);
 CREATE TABLE t2(i INT PRIMARY KEY);
 CREATE TABLE t3(i INT, ch CHAR(50));
-"Should issue message Statement is not safe to log in statement format."
+"Should issue message Statement may not be safe to log in statement format."
 INSERT INTO t1 SELECT * FROM t2 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 CREATE FUNCTION func6()
 RETURNS INT
 BEGIN
@@ -243,10 +243,10 @@ INSERT INTO t1 VALUES (11);
 INSERT INTO t1 VALUES (12);
 RETURN 0;
 END|
-"Should issue message Statement is not safe to log in statement format only once"
+"Should issue message Statement may not be safe to log in statement format only once"
 INSERT INTO t3 VALUES(func6(), UUID());
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 "Check whether SET @@SQL_LOG_BIN = 0/1 doesn't work in substatements"
 CREATE FUNCTION fun_check_log_bin() RETURNS INT
 BEGIN
@@ -259,7 +259,7 @@ SELECT fun_check_log_bin();
 fun_check_log_bin()
 100
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 "SQL_LOG_BIN should be ON still"
 SHOW VARIABLES LIKE "SQL_LOG_BIN";
 Variable_name	Value
@@ -315,16 +315,16 @@ CREATE TABLE t1(i INT PRIMARY KEY);
 CREATE TABLE t2(i INT PRIMARY KEY);
 INSERT INTO t1 SELECT * FROM t2 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 INSERT INTO t1 VALUES(@@global.sync_binlog);
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 UPDATE t1 SET i = 999 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 DELETE FROM t1 LIMIT 1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 DROP TABLE t1, t2;
 SET @@SESSION.SQL_MODE = @save_sql_mode;
 "End of tests"
diff --git a/mysql-test/suite/binlog/t/binlog_incident-master.opt b/mysql-test/suite/binlog/t/binlog_incident-master.opt
new file mode 100644
index 00000000000..57ce0081ae5
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_incident-master.opt
@@ -0,0 +1 @@
+--loose-debug=+d,incident_database_resync_on_replace
\ No newline at end of file
diff --git a/mysql-test/suite/binlog/t/binlog_incident.test b/mysql-test/suite/binlog/t/binlog_incident.test
new file mode 100644
index 00000000000..208c7f24df2
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_incident.test
@@ -0,0 +1,27 @@
+# The purpose of this test is to provide a reference for how the
+# incident log event is represented in the output from the mysqlbinlog
+# program.
+
+source include/have_log_bin.inc;
+source include/have_debug.inc;
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+
+CREATE TABLE t1 (a INT);
+
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT * FROM t1;
+
+# This will generate an incident log event and store it in the binary
+# log before the replace statement.
+REPLACE INTO t1 VALUES (4);
+
+DROP TABLE t1;
+FLUSH LOGS;
+
+exec $MYSQL_BINLOG --start-position=106 $MYSQLD_DATADIR/master-bin.000001 >$MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql;
+--disable_query_log
+eval SELECT cont LIKE '%RELOAD DATABASE; # Shall generate syntax error%' AS `Contain RELOAD DATABASE` FROM (SELECT load_file('$MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql') AS cont) AS tbl;
+--enable_query_log
+
+remove_file $MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql;
\ No newline at end of file
diff --git a/mysql-test/suite/binlog/t/binlog_row_mysqlbinlog_db_filter.test b/mysql-test/suite/binlog/t/binlog_row_mysqlbinlog_db_filter.test
new file mode 100644
index 00000000000..0422c204270
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_row_mysqlbinlog_db_filter.test
@@ -0,0 +1,143 @@
+# BUG#42941: --database parameter to mysqlbinlog fails with RBR
+#
+# WHAT
+# ====
+#
+#  This test aims at checking whether a rows log event is printed or
+#  not when --database parameter is used to filter events from one
+#  given database.
+#
+# HOW
+# ===
+#
+#  The test is implemented as follows: 
+#
+#    i) Some operations are done in two different databases:
+#       'test' and 'b42941';
+#   ii) mysqlbinlog is used to dump the contents of the binlog file
+#       filtering only events from 'b42941'. The result of the dump is
+#       stored in a temporary file. (This is done with and without
+#       --verbose/hexdump flag);
+#  iii) The contents of the dump are loaded into a session variable;
+#   iv) The variable contents are searched for 'test' and 'b42941';
+#    v) Should 'test' be found, an ERROR is reported. Should 'b42941' be
+#       absent, an ERROR is reported.
+
+-- source include/have_log_bin.inc
+-- source include/have_binlog_format_row.inc
+-- source include/have_innodb.inc
+
+RESET MASTER;
+-- let $MYSQLD_DATADIR= `select @@datadir`
+
+CREATE TABLE t1 (id int);
+CREATE TABLE t2 (id int);
+CREATE TABLE t3 (txt TEXT);
+CREATE TABLE t4 (a int) ENGINE= InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (3);
+-- replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+-- eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/std_data/words.dat' INTO TABLE t3
+INSERT INTO t1 VALUES (4);
+
+CREATE DATABASE b42941;
+use b42941;
+CREATE TABLE t1 (id int);
+CREATE TABLE t2 (id int);
+CREATE TABLE t3 (txt TEXT);
+CREATE TABLE t4 (a int) ENGINE= InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (3);
+-- replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+-- eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/std_data/words.dat' INTO TABLE t3
+INSERT INTO t1 VALUES (4);
+
+INSERT INTO test.t1 VALUES (5);
+
+FLUSH LOGS;
+
+UPDATE test.t1 t11, b42941.t1 t12 SET t11.id=10, t12.id=100;
+
+BEGIN;
+INSERT INTO test.t4 VALUES (1);
+INSERT INTO b42941.t4 VALUES (1);
+UPDATE test.t4 tn4, b42941.t4 tt4 SET tn4.a= 10, tt4.a= 100;
+COMMIT;
+
+FLUSH LOGS;
+
+-- let $log_file1= $MYSQLD_DATADIR/master-bin.000001
+-- let $log_file2= $MYSQLD_DATADIR/master-bin.000002
+-- let $outfile= $MYSQLTEST_VARDIR/tmp/b42941-mysqlbinlog
+-- let $cmd= $MYSQL_BINLOG
+
+let $i= 3;
+while($i)
+{
+  -- let $flags=--database=b42941
+
+  # construct CLI for mysqlbinlog
+  if(`SELECT $i=3`)
+  {
+    -- let $flags= $flags --verbose --hexdump
+  }
+
+  if(`SELECT $i=2`)
+  {
+    -- let $flags= $flags --verbose
+  }
+
+#  if(`SELECT $i=1`)
+#  {
+    # do nothing $flags is already set as it should be
+#  }
+
+  # execute mysqlbinlog on the two available master binlog files
+  -- exec $cmd $flags $log_file1 > $outfile.1
+  -- exec $cmd $flags $log_file2 > $outfile.2
+
+  # load outputs into a variable
+  -- replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+  -- eval SET @b42941_output.1= LOAD_FILE('$outfile.1')
+
+  -- replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+  -- eval SET @b42941_output.2= LOAD_FILE('$outfile.2')
+
+  # remove unecessary files
+  -- remove_file $outfile.1
+  -- remove_file $outfile.2
+
+  # assertion: events for database test are filtered
+  if (`SELECT INSTR(@b42941_output.1, 'test')`)
+  {
+    -- echo **** ERROR **** Database name 'test' FOUND in mysqlbinlog output ($flags $outfile.1).
+  }
+
+  if (`SELECT INSTR(@b42941_output.2, 'test')`)
+  {
+    -- echo **** ERROR **** Database name 'test' FOUND in mysqlbinlog output ($flags $outfile.2).
+  }
+
+  # assertion: events for database b42941 are not filtered
+  if (!`SELECT INSTR(@b42941_output.1, 'b42941')`)
+  {
+    -- echo **** ERROR **** Database name 'b42941' NOT FOUND in mysqlbinlog output ($flags $outfile.1).
+  }
+
+  if (!`SELECT INSTR(@b42941_output.2, 'b42941')`)
+  {
+    -- echo **** ERROR **** Database name 'b42941' NOT FOUND in mysqlbinlog output ($flags $outfile.2).
+  }
+
+  dec $i;
+}
+
+DROP DATABASE b42941;
+use test;
+DROP TABLE t1, t2, t3, t4;
diff --git a/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning-master.opt b/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning-master.opt
new file mode 100644
index 00000000000..24c2027e399
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning-master.opt
@@ -0,0 +1 @@
+--binlog-ignore-db=b42851
diff --git a/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning.test b/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning.test
new file mode 100644
index 00000000000..0bf685ea921
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_stm_unsafe_warning.test
@@ -0,0 +1,73 @@
+# BUG#42851: Spurious "Statement is not safe to log in statement
+#            format." warnings
+#
+# WHY
+# ===
+#   
+#   This test aims at checking that the fix that removes spurious
+#   entries in the error log when the statement is filtered out from
+#   binlog, is working.
+#
+# HOW
+# ===
+#
+#   The test case is split into three assertions when issuing statements
+#   containing LIMIT and ORDER BY:
+#
+#     i) issue statements in database that is not filtered => check
+#        that warnings ARE shown;
+#
+#    ii) issue statements in database that is not filtered, but with
+#        binlog disabled => check that warnings ARE NOT shown;
+#
+#   iii) issue statements in database that is filtered => check that
+#        warnings ARE NOT shown.
+
+-- source include/have_log_bin.inc
+-- source include/have_binlog_format_statement.inc
+
+-- echo ### NOT filtered database => assertion: warnings ARE shown
+
+-- disable_warnings
+DROP TABLE IF EXISTS t1;
+-- enable_warnings
+
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+DROP TABLE t1;
+
+-- echo ### NOT filtered database => assertion: binlog disabled and warnings ARE NOT shown
+
+SET SQL_LOG_BIN= 0;
+
+-- disable_warnings
+DROP TABLE IF EXISTS t1;
+-- enable_warnings
+
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+DROP TABLE t1;
+
+SET SQL_LOG_BIN= 1;
+
+-- echo ### FILTERED database => assertion: warnings ARE NOT shown
+
+CREATE DATABASE b42851;
+USE b42851;
+
+-- disable_warnings
+DROP TABLE IF EXISTS t1;
+-- enable_warnings
+
+CREATE TABLE t1 (a int, b int, primary key (a));
+INSERT INTO t1 VALUES (1,2), (2,3);
+UPDATE t1 SET b='4' WHERE a=1 LIMIT 1;
+UPDATE t1 SET b='5' WHERE a=2 ORDER BY a LIMIT 1;
+DROP TABLE t1;
+
+# clean up
+DROP DATABASE b42851;
diff --git a/mysql-test/suite/binlog/t/binlog_tbl_metadata.test b/mysql-test/suite/binlog/t/binlog_tbl_metadata.test
new file mode 100644
index 00000000000..5e847ab5fbd
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_tbl_metadata.test
@@ -0,0 +1,199 @@
+#
+# BUG#42749: infinite loop writing to row based binlog - processlist shows
+# "freeing items"
+#
+# WHY
+# ===
+#   
+#   This bug would make table map event to report data_written one byte less
+#   than what would actually be written in its body. This would cause one byte shorter
+#   event end_log_pos. The ultimate impact was that it would make fixing the
+#   position in MYSQL_BIN_LOG::write_cache bogus or end up in an infinite loop.
+#
+# HOW
+# ===
+#
+#   Checking that the patch fixes the problem is done as follows:
+#     i) a table with several fields is created;
+#    ii) an insert is performed;
+#   iii) the logs are flushed;
+#    iv) mysqlbinlog is used to check if it succeeds.
+# 
+#   In step iv), before the bug was fixed, the test case would fail with
+#   mysqlbinlog reporting that it was unable to succeed in reading the event.
+#
+
+-- source include/have_log_bin.inc
+-- source include/have_innodb.inc
+-- source include/have_binlog_format_row.inc
+-- connection default
+
+RESET MASTER;
+
+-- disable_warnings
+DROP TABLE IF EXISTS `t1`;
+-- enable_warnings
+
+CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` varchar(30) NOT NULL,
+  `c3` varchar(30) DEFAULT NULL,
+  `c4` varchar(30) DEFAULT NULL,
+  `c5` varchar(30) DEFAULT NULL,
+  `c6` varchar(30) DEFAULT NULL,
+  `c7` varchar(30) DEFAULT NULL,
+  `c8` varchar(30) DEFAULT NULL,
+  `c9` varchar(30) DEFAULT NULL,
+  `c10` varchar(30) DEFAULT NULL,
+  `c11` varchar(30) DEFAULT NULL,
+  `c12` varchar(30) DEFAULT NULL,
+  `c13` varchar(30) DEFAULT NULL,
+  `c14` varchar(30) DEFAULT NULL,
+  `c15` varchar(30) DEFAULT NULL,
+  `c16` varchar(30) DEFAULT NULL,
+  `c17` varchar(30) DEFAULT NULL,
+  `c18` varchar(30) DEFAULT NULL,
+  `c19` varchar(30) DEFAULT NULL,
+  `c20` varchar(30) DEFAULT NULL,
+  `c21` varchar(30) DEFAULT NULL,
+  `c22` varchar(30) DEFAULT NULL,
+  `c23` varchar(30) DEFAULT NULL,
+  `c24` varchar(30) DEFAULT NULL,
+  `c25` varchar(30) DEFAULT NULL,
+  `c26` varchar(30) DEFAULT NULL,
+  `c27` varchar(30) DEFAULT NULL,
+  `c28` varchar(30) DEFAULT NULL,
+  `c29` varchar(30) DEFAULT NULL,
+  `c30` varchar(30) DEFAULT NULL,
+  `c31` varchar(30) DEFAULT NULL,
+  `c32` varchar(30) DEFAULT NULL,
+  `c33` varchar(30) DEFAULT NULL,
+  `c34` varchar(30) DEFAULT NULL,
+  `c35` varchar(30) DEFAULT NULL,
+  `c36` varchar(30) DEFAULT NULL,
+  `c37` varchar(30) DEFAULT NULL,
+  `c38` varchar(30) DEFAULT NULL,
+  `c39` varchar(30) DEFAULT NULL,
+  `c40` varchar(30) DEFAULT NULL,
+  `c41` varchar(30) DEFAULT NULL,
+  `c42` varchar(30) DEFAULT NULL,
+  `c43` varchar(30) DEFAULT NULL,
+  `c44` varchar(30) DEFAULT NULL,
+  `c45` varchar(30) DEFAULT NULL,
+  `c46` varchar(30) DEFAULT NULL,
+  `c47` varchar(30) DEFAULT NULL,
+  `c48` varchar(30) DEFAULT NULL,
+  `c49` varchar(30) DEFAULT NULL,
+  `c50` varchar(30) DEFAULT NULL,
+  `c51` varchar(30) DEFAULT NULL,
+  `c52` varchar(30) DEFAULT NULL,
+  `c53` varchar(30) DEFAULT NULL,
+  `c54` varchar(30) DEFAULT NULL,
+  `c55` varchar(30) DEFAULT NULL,
+  `c56` varchar(30) DEFAULT NULL,
+  `c57` varchar(30) DEFAULT NULL,
+  `c58` varchar(30) DEFAULT NULL,
+  `c59` varchar(30) DEFAULT NULL,
+  `c60` varchar(30) DEFAULT NULL,
+  `c61` varchar(30) DEFAULT NULL,
+  `c62` varchar(30) DEFAULT NULL,
+  `c63` varchar(30) DEFAULT NULL,
+  `c64` varchar(30) DEFAULT NULL,
+  `c65` varchar(30) DEFAULT NULL,
+  `c66` varchar(30) DEFAULT NULL,
+  `c67` varchar(30) DEFAULT NULL,
+  `c68` varchar(30) DEFAULT NULL,
+  `c69` varchar(30) DEFAULT NULL,
+  `c70` varchar(30) DEFAULT NULL,
+  `c71` varchar(30) DEFAULT NULL,
+  `c72` varchar(30) DEFAULT NULL,
+  `c73` varchar(30) DEFAULT NULL,
+  `c74` varchar(30) DEFAULT NULL,
+  `c75` varchar(30) DEFAULT NULL,
+  `c76` varchar(30) DEFAULT NULL,
+  `c77` varchar(30) DEFAULT NULL,
+  `c78` varchar(30) DEFAULT NULL,
+  `c79` varchar(30) DEFAULT NULL,
+  `c80` varchar(30) DEFAULT NULL,
+  `c81` varchar(30) DEFAULT NULL,
+  `c82` varchar(30) DEFAULT NULL,
+  `c83` varchar(30) DEFAULT NULL,
+  `c84` varchar(30) DEFAULT NULL,
+  `c85` varchar(30) DEFAULT NULL,
+  `c86` varchar(30) DEFAULT NULL,
+  `c87` varchar(30) DEFAULT NULL,
+  `c88` varchar(30) DEFAULT NULL,
+  `c89` varchar(30) DEFAULT NULL,
+  `c90` varchar(30) DEFAULT NULL,
+  `c91` varchar(30) DEFAULT NULL,
+  `c92` varchar(30) DEFAULT NULL,
+  `c93` varchar(30) DEFAULT NULL,
+  `c94` varchar(30) DEFAULT NULL,
+  `c95` varchar(30) DEFAULT NULL,
+  `c96` varchar(30) DEFAULT NULL,
+  `c97` varchar(30) DEFAULT NULL,
+  `c98` varchar(30) DEFAULT NULL,
+  `c99` varchar(30) DEFAULT NULL,
+  `c100` varchar(30) DEFAULT NULL,
+  `c101` varchar(30) DEFAULT NULL,
+  `c102` varchar(30) DEFAULT NULL,
+  `c103` varchar(30) DEFAULT NULL,
+  `c104` varchar(30) DEFAULT NULL,
+  `c105` varchar(30) DEFAULT NULL,
+  `c106` varchar(30) DEFAULT NULL,
+  `c107` varchar(30) DEFAULT NULL,
+  `c108` varchar(30) DEFAULT NULL,
+  `c109` varchar(30) DEFAULT NULL,
+  `c110` varchar(30) DEFAULT NULL,
+  `c111` varchar(30) DEFAULT NULL,
+  `c112` varchar(30) DEFAULT NULL,
+  `c113` varchar(30) DEFAULT NULL,
+  `c114` varchar(30) DEFAULT NULL,
+  `c115` varchar(30) DEFAULT NULL,
+  `c116` varchar(30) DEFAULT NULL,
+  `c117` varchar(30) DEFAULT NULL,
+  `c118` varchar(30) DEFAULT NULL,
+  `c119` varchar(30) DEFAULT NULL,
+  `c120` varchar(30) DEFAULT NULL,
+  `c121` varchar(30) DEFAULT NULL,
+  `c122` varchar(30) DEFAULT NULL,
+  `c123` varchar(30) DEFAULT NULL,
+  `c124` varchar(30) DEFAULT NULL,
+  `c125` varchar(30) DEFAULT NULL,
+  `c126` varchar(30) DEFAULT NULL,
+  `c127` varchar(30) DEFAULT NULL,
+  `c128` varchar(30) DEFAULT NULL,
+  `c129` varchar(30) DEFAULT NULL,
+  `c130` varchar(30) DEFAULT NULL,
+  `c131` varchar(30) DEFAULT NULL,
+  `c132` varchar(30) DEFAULT NULL,
+  `c133` varchar(30) DEFAULT NULL,
+  `c134` varchar(30) DEFAULT NULL,
+  `c135` varchar(30) DEFAULT NULL,
+  `c136` varchar(30) DEFAULT NULL,
+  `c137` varchar(30) DEFAULT NULL,
+  `c138` varchar(30) DEFAULT NULL,
+  `c139` varchar(30) DEFAULT NULL,
+  `c140` varchar(30) DEFAULT NULL,
+  `c141` varchar(30) DEFAULT NULL,
+  `c142` varchar(30) DEFAULT NULL,
+  `c143` varchar(30) DEFAULT NULL,
+  `c144` varchar(30) DEFAULT NULL,
+  `c145` varchar(30) DEFAULT NULL,
+  `c146` varchar(30) DEFAULT NULL,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB;
+
+LOCK TABLES `t1` WRITE;
+
+INSERT INTO `t1` VALUES ('1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1','1');
+
+DROP TABLE `t1`;
+
+FLUSH LOGS;
+
+-- echo === Using mysqlbinlog to detect failure. Before the patch mysqlbinlog would find a corrupted event, thence would fail.
+
+-- let $MYSQLD_DATADIR= `SELECT @@datadir`;
+-- exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mysqlbinlog_bug42749.binlog
+-- remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog_bug42749.binlog
diff --git a/mysql-test/suite/binlog/t/binlog_unsafe.test b/mysql-test/suite/binlog/t/binlog_unsafe.test
index 1b0f0a6c30a..c4e1f31cbce 100644
--- a/mysql-test/suite/binlog/t/binlog_unsafe.test
+++ b/mysql-test/suite/binlog/t/binlog_unsafe.test
@@ -289,7 +289,7 @@ CREATE TABLE t1(i INT PRIMARY KEY);
 CREATE TABLE t2(i INT PRIMARY KEY);
 CREATE TABLE t3(i INT, ch CHAR(50));
 
---echo "Should issue message Statement is not safe to log in statement format."
+--echo "Should issue message Statement may not be safe to log in statement format."
 INSERT INTO t1 SELECT * FROM t2 LIMIT 1;
 
 DELIMITER |;
@@ -302,7 +302,7 @@ BEGIN
   RETURN 0;
 END|
 DELIMITER ;|
---echo "Should issue message Statement is not safe to log in statement format only once"
+--echo "Should issue message Statement may not be safe to log in statement format only once"
 INSERT INTO t3 VALUES(func6(), UUID());
 
 --echo "Check whether SET @@SQL_LOG_BIN = 0/1 doesn't work in substatements"
diff --git a/mysql-test/suite/funcs_1/datadict/is_key_column_usage.inc b/mysql-test/suite/funcs_1/datadict/is_key_column_usage.inc
index c8e8a186673..098b8c6eca2 100644
--- a/mysql-test/suite/funcs_1/datadict/is_key_column_usage.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_key_column_usage.inc
@@ -126,7 +126,6 @@ ORDER BY constraint_catalog, constraint_schema, constraint_name,
 eval $select;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $select;
diff --git a/mysql-test/suite/funcs_1/datadict/is_routines.inc b/mysql-test/suite/funcs_1/datadict/is_routines.inc
index 573967cbc1b..c2b547e600f 100644
--- a/mysql-test/suite/funcs_1/datadict/is_routines.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_routines.inc
@@ -96,10 +96,11 @@ CREATE FUNCTION function_for_routines() RETURNS INT RETURN 0;
 SELECT specific_name,routine_catalog,routine_schema,routine_name,routine_type,
        routine_body,external_name,external_language,parameter_style,sql_path
 FROM information_schema.routines
-WHERE routine_catalog   IS NOT NULL OR external_name   IS NOT NULL
+WHERE routine_schema = 'test' AND
+   (routine_catalog   IS NOT NULL OR external_name   IS NOT NULL
    OR external_language IS NOT NULL OR sql_path        IS NOT NULL
    OR routine_body      <> 'SQL'    OR parameter_style <> 'SQL'
-   OR specific_name     <> routine_name;
+   OR specific_name     <> routine_name);
 
 DROP PROCEDURE sp_for_routines;
 DROP FUNCTION  function_for_routines;
@@ -178,7 +179,6 @@ GRANT EXECUTE ON db_datadict_2.* TO 'testuser2'@'localhost';
 FLUSH PRIVILEGES;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 --replace_column 16 "YYYY-MM-DD hh:mm:ss" 17 "YYYY-MM-DD hh:mm:ss"
diff --git a/mysql-test/suite/funcs_1/datadict/is_schemata.inc b/mysql-test/suite/funcs_1/datadict/is_schemata.inc
index 96061d541b7..29e1f6af4ef 100644
--- a/mysql-test/suite/funcs_1/datadict/is_schemata.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_schemata.inc
@@ -104,7 +104,6 @@ eval $my_select;
 eval $my_show;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict_1);
 # Shows db_datadict_1
diff --git a/mysql-test/suite/funcs_1/datadict/is_tables.inc b/mysql-test/suite/funcs_1/datadict/is_tables.inc
index 4f608eb02ea..d1e4608a572 100644
--- a/mysql-test/suite/funcs_1/datadict/is_tables.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_tables.inc
@@ -130,7 +130,6 @@ WHERE table_schema = 'db_datadict' ORDER BY table_name;
 let $my_show = SHOW TABLES FROM db_datadict;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 # tb2 is not granted to anyone
diff --git a/mysql-test/suite/funcs_1/datadict/is_triggers.inc b/mysql-test/suite/funcs_1/datadict/is_triggers.inc
index 70d5540e163..3b83a75295b 100644
--- a/mysql-test/suite/funcs_1/datadict/is_triggers.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_triggers.inc
@@ -122,7 +122,6 @@ let $my_select = SELECT * FROM information_schema.triggers
 WHERE trigger_name = 'trg1';
 let $my_show =   SHOW TRIGGERS FROM db_datadict;
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 --replace_result $engine_type <engine_type>
diff --git a/mysql-test/suite/funcs_1/datadict/is_views.inc b/mysql-test/suite/funcs_1/datadict/is_views.inc
index 542dab05a8e..32e66e4f684 100644
--- a/mysql-test/suite/funcs_1/datadict/is_views.inc
+++ b/mysql-test/suite/funcs_1/datadict/is_views.inc
@@ -108,7 +108,6 @@ WHERE table_schema = 'db_datadict' ORDER BY table_name;
 eval $select;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , test);
 eval $select;
diff --git a/mysql-test/suite/funcs_1/datadict/processlist_priv.inc b/mysql-test/suite/funcs_1/datadict/processlist_priv.inc
index d8e1b398bfc..b5c40c63566 100644
--- a/mysql-test/suite/funcs_1/datadict/processlist_priv.inc
+++ b/mysql-test/suite/funcs_1/datadict/processlist_priv.inc
@@ -212,7 +212,7 @@ GRANT PROCESS ON *.* TO ''@'localhost';
 --echo     anonymous user with PROCESS privilege
 --echo     SHOW/SELECT shows all processes/threads.
 --echo ####################################################################################
-connect (anonymous1,localhost,'',,information_schema);
+connect (anonymous1,localhost,"''",,information_schema);
 SHOW GRANTS;
 --replace_column 1 ID 3 HOST_NAME 6 TIME
 SHOW processlist;
@@ -253,7 +253,7 @@ REVOKE PROCESS ON *.* FROM ''@'localhost';
 
 --echo ####################################################################################
 --echo 7.1 New connection (anonymous2,localhost,'',,information_schema)
-connect (anonymous2,localhost,'',,information_schema);
+connect (anonymous2,localhost,"''",,information_schema);
 --echo     The anonymous user has no more the PROCESS privilege
 --echo     Again only the processes of the anonymous user are visible.
 --echo ####################################################################################
diff --git a/mysql-test/suite/funcs_1/datadict/statistics.inc b/mysql-test/suite/funcs_1/datadict/statistics.inc
index 6f24f422b5e..00fd7a1b06b 100644
--- a/mysql-test/suite/funcs_1/datadict/statistics.inc
+++ b/mysql-test/suite/funcs_1/datadict/statistics.inc
@@ -42,7 +42,6 @@ ORDER BY table_schema, table_name, index_name, seq_in_index, column_name;
 eval $my_select;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1,localhost,testuser1,,db_datadict);
 --replace_column 10 #CARD#
diff --git a/mysql-test/suite/funcs_1/datadict/table_constraints.inc b/mysql-test/suite/funcs_1/datadict/table_constraints.inc
index 513057c84a0..9e57976862b 100644
--- a/mysql-test/suite/funcs_1/datadict/table_constraints.inc
+++ b/mysql-test/suite/funcs_1/datadict/table_constraints.inc
@@ -33,7 +33,6 @@ ORDER BY table_schema,table_name,constraint_name;
 eval $my_select;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1,localhost,testuser1,,db_datadict);
 eval $my_select;
diff --git a/mysql-test/suite/funcs_1/datadict/tables.inc b/mysql-test/suite/funcs_1/datadict/tables.inc
index 8dae7ba0ebc..5aa072d184c 100644
--- a/mysql-test/suite/funcs_1/datadict/tables.inc
+++ b/mysql-test/suite/funcs_1/datadict/tables.inc
@@ -37,7 +37,6 @@ CREATE USER testuser1@localhost;
 GRANT SELECT ON test1.* TO testuser1@localhost;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1,localhost,testuser1,,test1);
 --source suite/funcs_1/datadict/tables2.inc
diff --git a/mysql-test/suite/funcs_1/datadict/tables1.inc b/mysql-test/suite/funcs_1/datadict/tables1.inc
index 2dff32d81a9..2e054a9dcfb 100644
--- a/mysql-test/suite/funcs_1/datadict/tables1.inc
+++ b/mysql-test/suite/funcs_1/datadict/tables1.inc
@@ -27,7 +27,6 @@ CREATE USER testuser1@localhost;
 GRANT SELECT ON test1.* TO testuser1@localhost;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1,localhost,testuser1,,test1);
 --source suite/funcs_1/datadict/tables2.inc
diff --git a/mysql-test/suite/funcs_1/r/charset_collation.result b/mysql-test/suite/funcs_1/r/charset_collation.result
new file mode 100644
index 00000000000..b3183ba48e1
--- /dev/null
+++ b/mysql-test/suite/funcs_1/r/charset_collation.result
@@ -0,0 +1,40 @@
+DROP USER dbdict_test@localhost;
+CREATE USER dbdict_test@localhost;
+# Establish connection con (user=dbdict_test)
+
+SELECT *
+FROM information_schema.character_sets
+WHERE character_set_name IN ('utf8','latin1','binary')
+ORDER BY character_set_name;
+CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
+binary	binary	Binary pseudo charset	1
+latin1	latin1_swedish_ci	cp1252 West European	1
+utf8	utf8_general_ci	UTF-8 Unicode	3
+
+SELECT *
+FROM information_schema.collations
+WHERE character_set_name IN ('utf8','latin1','binary')
+AND (collation_name LIKE CONCAT(character_set_name,'_general_ci')
+OR
+collation_name LIKE CONCAT(character_set_name,'_bin'))
+ORDER BY collation_name;
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+latin1_bin	latin1	47		Yes	1
+latin1_general_ci	latin1	48		Yes	1
+utf8_bin	utf8	83		Yes	1
+utf8_general_ci	utf8	33	Yes	Yes	1
+
+SELECT *
+FROM information_schema.collation_character_set_applicability
+WHERE character_set_name IN ('utf8','latin1','binary')
+AND (collation_name LIKE CONCAT(character_set_name,'_general_ci')
+OR
+collation_name LIKE CONCAT(character_set_name,'_bin'))
+ORDER BY collation_name, character_set_name;
+COLLATION_NAME	CHARACTER_SET_NAME
+latin1_bin	latin1
+latin1_general_ci	latin1
+utf8_bin	utf8
+utf8_general_ci	utf8
+# Switch to connection default + disconnect con
+DROP USER dbdict_test@localhost;
diff --git a/mysql-test/suite/funcs_1/r/charset_collation_1.result b/mysql-test/suite/funcs_1/r/charset_collation_1.result
deleted file mode 100644
index 55ed4b4704c..00000000000
--- a/mysql-test/suite/funcs_1/r/charset_collation_1.result
+++ /dev/null
@@ -1,312 +0,0 @@
-DROP USER dbdict_test@localhost;
-CREATE USER dbdict_test@localhost;
-# Establish connection con (user=dbdict_test)
-
-SELECT *
-FROM information_schema.character_sets
-ORDER BY character_set_name;
-CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
-armscii8	armscii8_general_ci	ARMSCII-8 Armenian	1
-ascii	ascii_general_ci	US ASCII	1
-big5	big5_chinese_ci	Big5 Traditional Chinese	2
-binary	binary	Binary pseudo charset	1
-cp1250	cp1250_general_ci	Windows Central European	1
-cp1251	cp1251_general_ci	Windows Cyrillic	1
-cp1256	cp1256_general_ci	Windows Arabic	1
-cp1257	cp1257_general_ci	Windows Baltic	1
-cp850	cp850_general_ci	DOS West European	1
-cp852	cp852_general_ci	DOS Central European	1
-cp866	cp866_general_ci	DOS Russian	1
-cp932	cp932_japanese_ci	SJIS for Windows Japanese	2
-dec8	dec8_swedish_ci	DEC West European	1
-eucjpms	eucjpms_japanese_ci	UJIS for Windows Japanese	3
-euckr	euckr_korean_ci	EUC-KR Korean	2
-gb2312	gb2312_chinese_ci	GB2312 Simplified Chinese	2
-gbk	gbk_chinese_ci	GBK Simplified Chinese	2
-geostd8	geostd8_general_ci	GEOSTD8 Georgian	1
-greek	greek_general_ci	ISO 8859-7 Greek	1
-hebrew	hebrew_general_ci	ISO 8859-8 Hebrew	1
-hp8	hp8_english_ci	HP West European	1
-keybcs2	keybcs2_general_ci	DOS Kamenicky Czech-Slovak	1
-koi8r	koi8r_general_ci	KOI8-R Relcom Russian	1
-koi8u	koi8u_general_ci	KOI8-U Ukrainian	1
-latin1	latin1_swedish_ci	cp1252 West European	1
-latin2	latin2_general_ci	ISO 8859-2 Central European	1
-latin5	latin5_turkish_ci	ISO 8859-9 Turkish	1
-latin7	latin7_general_ci	ISO 8859-13 Baltic	1
-macce	macce_general_ci	Mac Central European	1
-macroman	macroman_general_ci	Mac West European	1
-sjis	sjis_japanese_ci	Shift-JIS Japanese	2
-swe7	swe7_swedish_ci	7bit Swedish	1
-tis620	tis620_thai_ci	TIS620 Thai	1
-ucs2	ucs2_general_ci	UCS-2 Unicode	2
-ujis	ujis_japanese_ci	EUC-JP Japanese	3
-utf8	utf8_general_ci	UTF-8 Unicode	3
-
-SELECT *
-FROM information_schema.collations
-ORDER BY collation_name;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-armscii8_bin	armscii8	64			0
-armscii8_general_ci	armscii8	32	Yes		0
-ascii_bin	ascii	65			0
-ascii_general_ci	ascii	11	Yes		0
-big5_bin	big5	84		Yes	1
-big5_chinese_ci	big5	1	Yes	Yes	1
-binary	binary	63	Yes	Yes	1
-cp1250_bin	cp1250	66		Yes	1
-cp1250_croatian_ci	cp1250	44		Yes	1
-cp1250_czech_cs	cp1250	34		Yes	2
-cp1250_general_ci	cp1250	26	Yes	Yes	1
-cp1250_polish_ci	cp1250	99		Yes	1
-cp1251_bin	cp1251	50			0
-cp1251_bulgarian_ci	cp1251	14			0
-cp1251_general_ci	cp1251	51	Yes		0
-cp1251_general_cs	cp1251	52			0
-cp1251_ukrainian_ci	cp1251	23			0
-cp1256_bin	cp1256	67			0
-cp1256_general_ci	cp1256	57	Yes		0
-cp1257_bin	cp1257	58			0
-cp1257_general_ci	cp1257	59	Yes		0
-cp1257_lithuanian_ci	cp1257	29			0
-cp850_bin	cp850	80			0
-cp850_general_ci	cp850	4	Yes		0
-cp852_bin	cp852	81			0
-cp852_general_ci	cp852	40	Yes		0
-cp866_bin	cp866	68			0
-cp866_general_ci	cp866	36	Yes		0
-cp932_bin	cp932	96		Yes	1
-cp932_japanese_ci	cp932	95	Yes	Yes	1
-dec8_bin	dec8	69			0
-dec8_swedish_ci	dec8	3	Yes		0
-eucjpms_bin	eucjpms	98		Yes	1
-eucjpms_japanese_ci	eucjpms	97	Yes	Yes	1
-euckr_bin	euckr	85		Yes	1
-euckr_korean_ci	euckr	19	Yes	Yes	1
-gb2312_bin	gb2312	86		Yes	1
-gb2312_chinese_ci	gb2312	24	Yes	Yes	1
-gbk_bin	gbk	87		Yes	1
-gbk_chinese_ci	gbk	28	Yes	Yes	1
-geostd8_bin	geostd8	93			0
-geostd8_general_ci	geostd8	92	Yes		0
-greek_bin	greek	70			0
-greek_general_ci	greek	25	Yes		0
-hebrew_bin	hebrew	71			0
-hebrew_general_ci	hebrew	16	Yes		0
-hp8_bin	hp8	72			0
-hp8_english_ci	hp8	6	Yes		0
-keybcs2_bin	keybcs2	73			0
-keybcs2_general_ci	keybcs2	37	Yes		0
-koi8r_bin	koi8r	74			0
-koi8r_general_ci	koi8r	7	Yes		0
-koi8u_bin	koi8u	75			0
-koi8u_general_ci	koi8u	22	Yes		0
-latin1_bin	latin1	47		Yes	1
-latin1_danish_ci	latin1	15		Yes	1
-latin1_general_ci	latin1	48		Yes	1
-latin1_general_cs	latin1	49		Yes	1
-latin1_german1_ci	latin1	5		Yes	1
-latin1_german2_ci	latin1	31		Yes	2
-latin1_spanish_ci	latin1	94		Yes	1
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin2_bin	latin2	77		Yes	1
-latin2_croatian_ci	latin2	27		Yes	1
-latin2_czech_cs	latin2	2		Yes	4
-latin2_general_ci	latin2	9	Yes	Yes	1
-latin2_hungarian_ci	latin2	21		Yes	1
-latin5_bin	latin5	78			0
-latin5_turkish_ci	latin5	30	Yes		0
-latin7_bin	latin7	79			0
-latin7_estonian_cs	latin7	20			0
-latin7_general_ci	latin7	41	Yes		0
-latin7_general_cs	latin7	42			0
-macce_bin	macce	43			0
-macce_general_ci	macce	38	Yes		0
-macroman_bin	macroman	53			0
-macroman_general_ci	macroman	39	Yes		0
-sjis_bin	sjis	88		Yes	1
-sjis_japanese_ci	sjis	13	Yes	Yes	1
-swe7_bin	swe7	82			0
-swe7_swedish_ci	swe7	10	Yes		0
-tis620_bin	tis620	89		Yes	1
-tis620_thai_ci	tis620	18	Yes	Yes	4
-ucs2_bin	ucs2	90		Yes	1
-ucs2_czech_ci	ucs2	138		Yes	8
-ucs2_danish_ci	ucs2	139		Yes	8
-ucs2_esperanto_ci	ucs2	145		Yes	8
-ucs2_estonian_ci	ucs2	134		Yes	8
-ucs2_general_ci	ucs2	35	Yes	Yes	1
-ucs2_hungarian_ci	ucs2	146		Yes	8
-ucs2_icelandic_ci	ucs2	129		Yes	8
-ucs2_latvian_ci	ucs2	130		Yes	8
-ucs2_lithuanian_ci	ucs2	140		Yes	8
-ucs2_persian_ci	ucs2	144		Yes	8
-ucs2_polish_ci	ucs2	133		Yes	8
-ucs2_romanian_ci	ucs2	131		Yes	8
-ucs2_roman_ci	ucs2	143		Yes	8
-ucs2_slovak_ci	ucs2	141		Yes	8
-ucs2_slovenian_ci	ucs2	132		Yes	8
-ucs2_spanish2_ci	ucs2	142		Yes	8
-ucs2_spanish_ci	ucs2	135		Yes	8
-ucs2_swedish_ci	ucs2	136		Yes	8
-ucs2_turkish_ci	ucs2	137		Yes	8
-ucs2_unicode_ci	ucs2	128		Yes	8
-ujis_bin	ujis	91		Yes	1
-ujis_japanese_ci	ujis	12	Yes	Yes	1
-utf8_bin	utf8	83		Yes	1
-utf8_czech_ci	utf8	202		Yes	8
-utf8_danish_ci	utf8	203		Yes	8
-utf8_esperanto_ci	utf8	209		Yes	8
-utf8_estonian_ci	utf8	198		Yes	8
-utf8_general_ci	utf8	33	Yes	Yes	1
-utf8_hungarian_ci	utf8	210		Yes	8
-utf8_icelandic_ci	utf8	193		Yes	8
-utf8_latvian_ci	utf8	194		Yes	8
-utf8_lithuanian_ci	utf8	204		Yes	8
-utf8_persian_ci	utf8	208		Yes	8
-utf8_polish_ci	utf8	197		Yes	8
-utf8_romanian_ci	utf8	195		Yes	8
-utf8_roman_ci	utf8	207		Yes	8
-utf8_slovak_ci	utf8	205		Yes	8
-utf8_slovenian_ci	utf8	196		Yes	8
-utf8_spanish2_ci	utf8	206		Yes	8
-utf8_spanish_ci	utf8	199		Yes	8
-utf8_swedish_ci	utf8	200		Yes	8
-utf8_turkish_ci	utf8	201		Yes	8
-utf8_unicode_ci	utf8	192		Yes	8
-
-
-SELECT *
-FROM information_schema.collation_character_set_applicability
-ORDER BY collation_name, character_set_name;
-COLLATION_NAME	CHARACTER_SET_NAME
-armscii8_bin	armscii8
-armscii8_general_ci	armscii8
-ascii_bin	ascii
-ascii_general_ci	ascii
-big5_bin	big5
-big5_chinese_ci	big5
-binary	binary
-cp1250_bin	cp1250
-cp1250_croatian_ci	cp1250
-cp1250_czech_cs	cp1250
-cp1250_general_ci	cp1250
-cp1250_polish_ci	cp1250
-cp1251_bin	cp1251
-cp1251_bulgarian_ci	cp1251
-cp1251_general_ci	cp1251
-cp1251_general_cs	cp1251
-cp1251_ukrainian_ci	cp1251
-cp1256_bin	cp1256
-cp1256_general_ci	cp1256
-cp1257_bin	cp1257
-cp1257_general_ci	cp1257
-cp1257_lithuanian_ci	cp1257
-cp850_bin	cp850
-cp850_general_ci	cp850
-cp852_bin	cp852
-cp852_general_ci	cp852
-cp866_bin	cp866
-cp866_general_ci	cp866
-cp932_bin	cp932
-cp932_japanese_ci	cp932
-dec8_bin	dec8
-dec8_swedish_ci	dec8
-eucjpms_bin	eucjpms
-eucjpms_japanese_ci	eucjpms
-euckr_bin	euckr
-euckr_korean_ci	euckr
-filename	filename
-gb2312_bin	gb2312
-gb2312_chinese_ci	gb2312
-gbk_bin	gbk
-gbk_chinese_ci	gbk
-geostd8_bin	geostd8
-geostd8_general_ci	geostd8
-greek_bin	greek
-greek_general_ci	greek
-hebrew_bin	hebrew
-hebrew_general_ci	hebrew
-hp8_bin	hp8
-hp8_english_ci	hp8
-keybcs2_bin	keybcs2
-keybcs2_general_ci	keybcs2
-koi8r_bin	koi8r
-koi8r_general_ci	koi8r
-koi8u_bin	koi8u
-koi8u_general_ci	koi8u
-latin1_bin	latin1
-latin1_danish_ci	latin1
-latin1_general_ci	latin1
-latin1_general_cs	latin1
-latin1_german1_ci	latin1
-latin1_german2_ci	latin1
-latin1_spanish_ci	latin1
-latin1_swedish_ci	latin1
-latin2_bin	latin2
-latin2_croatian_ci	latin2
-latin2_czech_cs	latin2
-latin2_general_ci	latin2
-latin2_hungarian_ci	latin2
-latin5_bin	latin5
-latin5_turkish_ci	latin5
-latin7_bin	latin7
-latin7_estonian_cs	latin7
-latin7_general_ci	latin7
-latin7_general_cs	latin7
-macce_bin	macce
-macce_general_ci	macce
-macroman_bin	macroman
-macroman_general_ci	macroman
-sjis_bin	sjis
-sjis_japanese_ci	sjis
-swe7_bin	swe7
-swe7_swedish_ci	swe7
-tis620_bin	tis620
-tis620_thai_ci	tis620
-ucs2_bin	ucs2
-ucs2_czech_ci	ucs2
-ucs2_danish_ci	ucs2
-ucs2_esperanto_ci	ucs2
-ucs2_estonian_ci	ucs2
-ucs2_general_ci	ucs2
-ucs2_hungarian_ci	ucs2
-ucs2_icelandic_ci	ucs2
-ucs2_latvian_ci	ucs2
-ucs2_lithuanian_ci	ucs2
-ucs2_persian_ci	ucs2
-ucs2_polish_ci	ucs2
-ucs2_romanian_ci	ucs2
-ucs2_roman_ci	ucs2
-ucs2_slovak_ci	ucs2
-ucs2_slovenian_ci	ucs2
-ucs2_spanish2_ci	ucs2
-ucs2_spanish_ci	ucs2
-ucs2_swedish_ci	ucs2
-ucs2_turkish_ci	ucs2
-ucs2_unicode_ci	ucs2
-ujis_bin	ujis
-ujis_japanese_ci	ujis
-utf8_bin	utf8
-utf8_czech_ci	utf8
-utf8_danish_ci	utf8
-utf8_esperanto_ci	utf8
-utf8_estonian_ci	utf8
-utf8_general_ci	utf8
-utf8_hungarian_ci	utf8
-utf8_icelandic_ci	utf8
-utf8_latvian_ci	utf8
-utf8_lithuanian_ci	utf8
-utf8_persian_ci	utf8
-utf8_polish_ci	utf8
-utf8_romanian_ci	utf8
-utf8_roman_ci	utf8
-utf8_slovak_ci	utf8
-utf8_slovenian_ci	utf8
-utf8_spanish2_ci	utf8
-utf8_spanish_ci	utf8
-utf8_swedish_ci	utf8
-utf8_turkish_ci	utf8
-utf8_unicode_ci	utf8
-# Switch to connection default + disconnect con
-DROP USER dbdict_test@localhost;
diff --git a/mysql-test/suite/funcs_1/r/charset_collation_2.result b/mysql-test/suite/funcs_1/r/charset_collation_2.result
deleted file mode 100644
index a9fb15a588c..00000000000
--- a/mysql-test/suite/funcs_1/r/charset_collation_2.result
+++ /dev/null
@@ -1,314 +0,0 @@
-DROP USER dbdict_test@localhost;
-CREATE USER dbdict_test@localhost;
-# Establish connection con (user=dbdict_test)
-
-SELECT *
-FROM information_schema.character_sets
-ORDER BY character_set_name;
-CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
-armscii8	armscii8_general_ci	ARMSCII-8 Armenian	1
-ascii	ascii_general_ci	US ASCII	1
-big5	big5_chinese_ci	Big5 Traditional Chinese	2
-binary	binary	Binary pseudo charset	1
-cp1250	cp1250_general_ci	Windows Central European	1
-cp1251	cp1251_general_ci	Windows Cyrillic	1
-cp1256	cp1256_general_ci	Windows Arabic	1
-cp1257	cp1257_general_ci	Windows Baltic	1
-cp850	cp850_general_ci	DOS West European	1
-cp852	cp852_general_ci	DOS Central European	1
-cp866	cp866_general_ci	DOS Russian	1
-cp932	cp932_japanese_ci	SJIS for Windows Japanese	2
-dec8	dec8_swedish_ci	DEC West European	1
-eucjpms	eucjpms_japanese_ci	UJIS for Windows Japanese	3
-euckr	euckr_korean_ci	EUC-KR Korean	2
-gb2312	gb2312_chinese_ci	GB2312 Simplified Chinese	2
-gbk	gbk_chinese_ci	GBK Simplified Chinese	2
-geostd8	geostd8_general_ci	GEOSTD8 Georgian	1
-greek	greek_general_ci	ISO 8859-7 Greek	1
-hebrew	hebrew_general_ci	ISO 8859-8 Hebrew	1
-hp8	hp8_english_ci	HP West European	1
-keybcs2	keybcs2_general_ci	DOS Kamenicky Czech-Slovak	1
-koi8r	koi8r_general_ci	KOI8-R Relcom Russian	1
-koi8u	koi8u_general_ci	KOI8-U Ukrainian	1
-latin1	latin1_swedish_ci	cp1252 West European	1
-latin2	latin2_general_ci	ISO 8859-2 Central European	1
-latin5	latin5_turkish_ci	ISO 8859-9 Turkish	1
-latin7	latin7_general_ci	ISO 8859-13 Baltic	1
-macce	macce_general_ci	Mac Central European	1
-macroman	macroman_general_ci	Mac West European	1
-sjis	sjis_japanese_ci	Shift-JIS Japanese	2
-swe7	swe7_swedish_ci	7bit Swedish	1
-tis620	tis620_thai_ci	TIS620 Thai	1
-ucs2	ucs2_general_ci	UCS-2 Unicode	2
-ujis	ujis_japanese_ci	EUC-JP Japanese	3
-utf8	utf8_general_ci	UTF-8 Unicode	3
-
-SELECT *
-FROM information_schema.collations
-ORDER BY collation_name;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-armscii8_bin	armscii8	64			0
-armscii8_general_ci	armscii8	32	Yes		0
-ascii_bin	ascii	65			0
-ascii_general_ci	ascii	11	Yes		0
-big5_bin	big5	84		Yes	1
-big5_chinese_ci	big5	1	Yes	Yes	1
-binary	binary	63	Yes	Yes	1
-cp1250_bin	cp1250	66		Yes	1
-cp1250_croatian_ci	cp1250	44		Yes	1
-cp1250_czech_cs	cp1250	34		Yes	2
-cp1250_general_ci	cp1250	26	Yes	Yes	1
-cp1250_polish_ci	cp1250	99		Yes	1
-cp1251_bin	cp1251	50			0
-cp1251_bulgarian_ci	cp1251	14			0
-cp1251_general_ci	cp1251	51	Yes		0
-cp1251_general_cs	cp1251	52			0
-cp1251_ukrainian_ci	cp1251	23			0
-cp1256_bin	cp1256	67			0
-cp1256_general_ci	cp1256	57	Yes		0
-cp1257_bin	cp1257	58			0
-cp1257_general_ci	cp1257	59	Yes		0
-cp1257_lithuanian_ci	cp1257	29			0
-cp850_bin	cp850	80			0
-cp850_general_ci	cp850	4	Yes		0
-cp852_bin	cp852	81			0
-cp852_general_ci	cp852	40	Yes		0
-cp866_bin	cp866	68			0
-cp866_general_ci	cp866	36	Yes		0
-cp932_bin	cp932	96		Yes	1
-cp932_japanese_ci	cp932	95	Yes	Yes	1
-dec8_bin	dec8	69			0
-dec8_swedish_ci	dec8	3	Yes		0
-eucjpms_bin	eucjpms	98		Yes	1
-eucjpms_japanese_ci	eucjpms	97	Yes	Yes	1
-euckr_bin	euckr	85		Yes	1
-euckr_korean_ci	euckr	19	Yes	Yes	1
-gb2312_bin	gb2312	86		Yes	1
-gb2312_chinese_ci	gb2312	24	Yes	Yes	1
-gbk_bin	gbk	87		Yes	1
-gbk_chinese_ci	gbk	28	Yes	Yes	1
-geostd8_bin	geostd8	93			0
-geostd8_general_ci	geostd8	92	Yes		0
-greek_bin	greek	70			0
-greek_general_ci	greek	25	Yes		0
-hebrew_bin	hebrew	71			0
-hebrew_general_ci	hebrew	16	Yes		0
-hp8_bin	hp8	72			0
-hp8_english_ci	hp8	6	Yes		0
-keybcs2_bin	keybcs2	73			0
-keybcs2_general_ci	keybcs2	37	Yes		0
-koi8r_bin	koi8r	74			0
-koi8r_general_ci	koi8r	7	Yes		0
-koi8u_bin	koi8u	75			0
-koi8u_general_ci	koi8u	22	Yes		0
-latin1_bin	latin1	47		Yes	1
-latin1_danish_ci	latin1	15		Yes	1
-latin1_general_ci	latin1	48		Yes	1
-latin1_general_cs	latin1	49		Yes	1
-latin1_german1_ci	latin1	5		Yes	1
-latin1_german2_ci	latin1	31		Yes	2
-latin1_spanish_ci	latin1	94		Yes	1
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin2_bin	latin2	77		Yes	1
-latin2_croatian_ci	latin2	27		Yes	1
-latin2_czech_cs	latin2	2		Yes	4
-latin2_general_ci	latin2	9	Yes	Yes	1
-latin2_hungarian_ci	latin2	21		Yes	1
-latin5_bin	latin5	78			0
-latin5_turkish_ci	latin5	30	Yes		0
-latin7_bin	latin7	79			0
-latin7_estonian_cs	latin7	20			0
-latin7_general_ci	latin7	41	Yes		0
-latin7_general_cs	latin7	42			0
-macce_bin	macce	43			0
-macce_general_ci	macce	38	Yes		0
-macroman_bin	macroman	53			0
-macroman_general_ci	macroman	39	Yes		0
-sjis_bin	sjis	88		Yes	1
-sjis_japanese_ci	sjis	13	Yes	Yes	1
-swe7_bin	swe7	82			0
-swe7_swedish_ci	swe7	10	Yes		0
-tis620_bin	tis620	89		Yes	1
-tis620_thai_ci	tis620	18	Yes	Yes	4
-ucs2_bin	ucs2	90		Yes	1
-ucs2_czech_ci	ucs2	138		Yes	8
-ucs2_danish_ci	ucs2	139		Yes	8
-ucs2_esperanto_ci	ucs2	145		Yes	8
-ucs2_estonian_ci	ucs2	134		Yes	8
-ucs2_general_ci	ucs2	35	Yes	Yes	1
-ucs2_hungarian_ci	ucs2	146		Yes	8
-ucs2_icelandic_ci	ucs2	129		Yes	8
-ucs2_latvian_ci	ucs2	130		Yes	8
-ucs2_lithuanian_ci	ucs2	140		Yes	8
-ucs2_persian_ci	ucs2	144		Yes	8
-ucs2_polish_ci	ucs2	133		Yes	8
-ucs2_romanian_ci	ucs2	131		Yes	8
-ucs2_roman_ci	ucs2	143		Yes	8
-ucs2_slovak_ci	ucs2	141		Yes	8
-ucs2_slovenian_ci	ucs2	132		Yes	8
-ucs2_spanish2_ci	ucs2	142		Yes	8
-ucs2_spanish_ci	ucs2	135		Yes	8
-ucs2_swedish_ci	ucs2	136		Yes	8
-ucs2_turkish_ci	ucs2	137		Yes	8
-ucs2_unicode_ci	ucs2	128		Yes	8
-ujis_bin	ujis	91		Yes	1
-ujis_japanese_ci	ujis	12	Yes	Yes	1
-utf8_bin	utf8	83		Yes	1
-utf8_czech_ci	utf8	202		Yes	8
-utf8_danish_ci	utf8	203		Yes	8
-utf8_esperanto_ci	utf8	209		Yes	8
-utf8_estonian_ci	utf8	198		Yes	8
-utf8_general_ci	utf8	33	Yes	Yes	1
-utf8_general_cs	utf8	254		Yes	1
-utf8_hungarian_ci	utf8	210		Yes	8
-utf8_icelandic_ci	utf8	193		Yes	8
-utf8_latvian_ci	utf8	194		Yes	8
-utf8_lithuanian_ci	utf8	204		Yes	8
-utf8_persian_ci	utf8	208		Yes	8
-utf8_polish_ci	utf8	197		Yes	8
-utf8_romanian_ci	utf8	195		Yes	8
-utf8_roman_ci	utf8	207		Yes	8
-utf8_slovak_ci	utf8	205		Yes	8
-utf8_slovenian_ci	utf8	196		Yes	8
-utf8_spanish2_ci	utf8	206		Yes	8
-utf8_spanish_ci	utf8	199		Yes	8
-utf8_swedish_ci	utf8	200		Yes	8
-utf8_turkish_ci	utf8	201		Yes	8
-utf8_unicode_ci	utf8	192		Yes	8
-
-
-SELECT *
-FROM information_schema.collation_character_set_applicability
-ORDER BY collation_name, character_set_name;
-COLLATION_NAME	CHARACTER_SET_NAME
-armscii8_bin	armscii8
-armscii8_general_ci	armscii8
-ascii_bin	ascii
-ascii_general_ci	ascii
-big5_bin	big5
-big5_chinese_ci	big5
-binary	binary
-cp1250_bin	cp1250
-cp1250_croatian_ci	cp1250
-cp1250_czech_cs	cp1250
-cp1250_general_ci	cp1250
-cp1250_polish_ci	cp1250
-cp1251_bin	cp1251
-cp1251_bulgarian_ci	cp1251
-cp1251_general_ci	cp1251
-cp1251_general_cs	cp1251
-cp1251_ukrainian_ci	cp1251
-cp1256_bin	cp1256
-cp1256_general_ci	cp1256
-cp1257_bin	cp1257
-cp1257_general_ci	cp1257
-cp1257_lithuanian_ci	cp1257
-cp850_bin	cp850
-cp850_general_ci	cp850
-cp852_bin	cp852
-cp852_general_ci	cp852
-cp866_bin	cp866
-cp866_general_ci	cp866
-cp932_bin	cp932
-cp932_japanese_ci	cp932
-dec8_bin	dec8
-dec8_swedish_ci	dec8
-eucjpms_bin	eucjpms
-eucjpms_japanese_ci	eucjpms
-euckr_bin	euckr
-euckr_korean_ci	euckr
-filename	filename
-gb2312_bin	gb2312
-gb2312_chinese_ci	gb2312
-gbk_bin	gbk
-gbk_chinese_ci	gbk
-geostd8_bin	geostd8
-geostd8_general_ci	geostd8
-greek_bin	greek
-greek_general_ci	greek
-hebrew_bin	hebrew
-hebrew_general_ci	hebrew
-hp8_bin	hp8
-hp8_english_ci	hp8
-keybcs2_bin	keybcs2
-keybcs2_general_ci	keybcs2
-koi8r_bin	koi8r
-koi8r_general_ci	koi8r
-koi8u_bin	koi8u
-koi8u_general_ci	koi8u
-latin1_bin	latin1
-latin1_danish_ci	latin1
-latin1_general_ci	latin1
-latin1_general_cs	latin1
-latin1_german1_ci	latin1
-latin1_german2_ci	latin1
-latin1_spanish_ci	latin1
-latin1_swedish_ci	latin1
-latin2_bin	latin2
-latin2_croatian_ci	latin2
-latin2_czech_cs	latin2
-latin2_general_ci	latin2
-latin2_hungarian_ci	latin2
-latin5_bin	latin5
-latin5_turkish_ci	latin5
-latin7_bin	latin7
-latin7_estonian_cs	latin7
-latin7_general_ci	latin7
-latin7_general_cs	latin7
-macce_bin	macce
-macce_general_ci	macce
-macroman_bin	macroman
-macroman_general_ci	macroman
-sjis_bin	sjis
-sjis_japanese_ci	sjis
-swe7_bin	swe7
-swe7_swedish_ci	swe7
-tis620_bin	tis620
-tis620_thai_ci	tis620
-ucs2_bin	ucs2
-ucs2_czech_ci	ucs2
-ucs2_danish_ci	ucs2
-ucs2_esperanto_ci	ucs2
-ucs2_estonian_ci	ucs2
-ucs2_general_ci	ucs2
-ucs2_hungarian_ci	ucs2
-ucs2_icelandic_ci	ucs2
-ucs2_latvian_ci	ucs2
-ucs2_lithuanian_ci	ucs2
-ucs2_persian_ci	ucs2
-ucs2_polish_ci	ucs2
-ucs2_romanian_ci	ucs2
-ucs2_roman_ci	ucs2
-ucs2_slovak_ci	ucs2
-ucs2_slovenian_ci	ucs2
-ucs2_spanish2_ci	ucs2
-ucs2_spanish_ci	ucs2
-ucs2_swedish_ci	ucs2
-ucs2_turkish_ci	ucs2
-ucs2_unicode_ci	ucs2
-ujis_bin	ujis
-ujis_japanese_ci	ujis
-utf8_bin	utf8
-utf8_czech_ci	utf8
-utf8_danish_ci	utf8
-utf8_esperanto_ci	utf8
-utf8_estonian_ci	utf8
-utf8_general_ci	utf8
-utf8_general_cs	utf8
-utf8_hungarian_ci	utf8
-utf8_icelandic_ci	utf8
-utf8_latvian_ci	utf8
-utf8_lithuanian_ci	utf8
-utf8_persian_ci	utf8
-utf8_polish_ci	utf8
-utf8_romanian_ci	utf8
-utf8_roman_ci	utf8
-utf8_slovak_ci	utf8
-utf8_slovenian_ci	utf8
-utf8_spanish2_ci	utf8
-utf8_spanish_ci	utf8
-utf8_swedish_ci	utf8
-utf8_turkish_ci	utf8
-utf8_unicode_ci	utf8
-# Switch to connection default + disconnect con
-DROP USER dbdict_test@localhost;
diff --git a/mysql-test/suite/funcs_1/r/charset_collation_3.result b/mysql-test/suite/funcs_1/r/charset_collation_3.result
deleted file mode 100644
index 55ed4b4704c..00000000000
--- a/mysql-test/suite/funcs_1/r/charset_collation_3.result
+++ /dev/null
@@ -1,312 +0,0 @@
-DROP USER dbdict_test@localhost;
-CREATE USER dbdict_test@localhost;
-# Establish connection con (user=dbdict_test)
-
-SELECT *
-FROM information_schema.character_sets
-ORDER BY character_set_name;
-CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
-armscii8	armscii8_general_ci	ARMSCII-8 Armenian	1
-ascii	ascii_general_ci	US ASCII	1
-big5	big5_chinese_ci	Big5 Traditional Chinese	2
-binary	binary	Binary pseudo charset	1
-cp1250	cp1250_general_ci	Windows Central European	1
-cp1251	cp1251_general_ci	Windows Cyrillic	1
-cp1256	cp1256_general_ci	Windows Arabic	1
-cp1257	cp1257_general_ci	Windows Baltic	1
-cp850	cp850_general_ci	DOS West European	1
-cp852	cp852_general_ci	DOS Central European	1
-cp866	cp866_general_ci	DOS Russian	1
-cp932	cp932_japanese_ci	SJIS for Windows Japanese	2
-dec8	dec8_swedish_ci	DEC West European	1
-eucjpms	eucjpms_japanese_ci	UJIS for Windows Japanese	3
-euckr	euckr_korean_ci	EUC-KR Korean	2
-gb2312	gb2312_chinese_ci	GB2312 Simplified Chinese	2
-gbk	gbk_chinese_ci	GBK Simplified Chinese	2
-geostd8	geostd8_general_ci	GEOSTD8 Georgian	1
-greek	greek_general_ci	ISO 8859-7 Greek	1
-hebrew	hebrew_general_ci	ISO 8859-8 Hebrew	1
-hp8	hp8_english_ci	HP West European	1
-keybcs2	keybcs2_general_ci	DOS Kamenicky Czech-Slovak	1
-koi8r	koi8r_general_ci	KOI8-R Relcom Russian	1
-koi8u	koi8u_general_ci	KOI8-U Ukrainian	1
-latin1	latin1_swedish_ci	cp1252 West European	1
-latin2	latin2_general_ci	ISO 8859-2 Central European	1
-latin5	latin5_turkish_ci	ISO 8859-9 Turkish	1
-latin7	latin7_general_ci	ISO 8859-13 Baltic	1
-macce	macce_general_ci	Mac Central European	1
-macroman	macroman_general_ci	Mac West European	1
-sjis	sjis_japanese_ci	Shift-JIS Japanese	2
-swe7	swe7_swedish_ci	7bit Swedish	1
-tis620	tis620_thai_ci	TIS620 Thai	1
-ucs2	ucs2_general_ci	UCS-2 Unicode	2
-ujis	ujis_japanese_ci	EUC-JP Japanese	3
-utf8	utf8_general_ci	UTF-8 Unicode	3
-
-SELECT *
-FROM information_schema.collations
-ORDER BY collation_name;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-armscii8_bin	armscii8	64			0
-armscii8_general_ci	armscii8	32	Yes		0
-ascii_bin	ascii	65			0
-ascii_general_ci	ascii	11	Yes		0
-big5_bin	big5	84		Yes	1
-big5_chinese_ci	big5	1	Yes	Yes	1
-binary	binary	63	Yes	Yes	1
-cp1250_bin	cp1250	66		Yes	1
-cp1250_croatian_ci	cp1250	44		Yes	1
-cp1250_czech_cs	cp1250	34		Yes	2
-cp1250_general_ci	cp1250	26	Yes	Yes	1
-cp1250_polish_ci	cp1250	99		Yes	1
-cp1251_bin	cp1251	50			0
-cp1251_bulgarian_ci	cp1251	14			0
-cp1251_general_ci	cp1251	51	Yes		0
-cp1251_general_cs	cp1251	52			0
-cp1251_ukrainian_ci	cp1251	23			0
-cp1256_bin	cp1256	67			0
-cp1256_general_ci	cp1256	57	Yes		0
-cp1257_bin	cp1257	58			0
-cp1257_general_ci	cp1257	59	Yes		0
-cp1257_lithuanian_ci	cp1257	29			0
-cp850_bin	cp850	80			0
-cp850_general_ci	cp850	4	Yes		0
-cp852_bin	cp852	81			0
-cp852_general_ci	cp852	40	Yes		0
-cp866_bin	cp866	68			0
-cp866_general_ci	cp866	36	Yes		0
-cp932_bin	cp932	96		Yes	1
-cp932_japanese_ci	cp932	95	Yes	Yes	1
-dec8_bin	dec8	69			0
-dec8_swedish_ci	dec8	3	Yes		0
-eucjpms_bin	eucjpms	98		Yes	1
-eucjpms_japanese_ci	eucjpms	97	Yes	Yes	1
-euckr_bin	euckr	85		Yes	1
-euckr_korean_ci	euckr	19	Yes	Yes	1
-gb2312_bin	gb2312	86		Yes	1
-gb2312_chinese_ci	gb2312	24	Yes	Yes	1
-gbk_bin	gbk	87		Yes	1
-gbk_chinese_ci	gbk	28	Yes	Yes	1
-geostd8_bin	geostd8	93			0
-geostd8_general_ci	geostd8	92	Yes		0
-greek_bin	greek	70			0
-greek_general_ci	greek	25	Yes		0
-hebrew_bin	hebrew	71			0
-hebrew_general_ci	hebrew	16	Yes		0
-hp8_bin	hp8	72			0
-hp8_english_ci	hp8	6	Yes		0
-keybcs2_bin	keybcs2	73			0
-keybcs2_general_ci	keybcs2	37	Yes		0
-koi8r_bin	koi8r	74			0
-koi8r_general_ci	koi8r	7	Yes		0
-koi8u_bin	koi8u	75			0
-koi8u_general_ci	koi8u	22	Yes		0
-latin1_bin	latin1	47		Yes	1
-latin1_danish_ci	latin1	15		Yes	1
-latin1_general_ci	latin1	48		Yes	1
-latin1_general_cs	latin1	49		Yes	1
-latin1_german1_ci	latin1	5		Yes	1
-latin1_german2_ci	latin1	31		Yes	2
-latin1_spanish_ci	latin1	94		Yes	1
-latin1_swedish_ci	latin1	8	Yes	Yes	1
-latin2_bin	latin2	77		Yes	1
-latin2_croatian_ci	latin2	27		Yes	1
-latin2_czech_cs	latin2	2		Yes	4
-latin2_general_ci	latin2	9	Yes	Yes	1
-latin2_hungarian_ci	latin2	21		Yes	1
-latin5_bin	latin5	78			0
-latin5_turkish_ci	latin5	30	Yes		0
-latin7_bin	latin7	79			0
-latin7_estonian_cs	latin7	20			0
-latin7_general_ci	latin7	41	Yes		0
-latin7_general_cs	latin7	42			0
-macce_bin	macce	43			0
-macce_general_ci	macce	38	Yes		0
-macroman_bin	macroman	53			0
-macroman_general_ci	macroman	39	Yes		0
-sjis_bin	sjis	88		Yes	1
-sjis_japanese_ci	sjis	13	Yes	Yes	1
-swe7_bin	swe7	82			0
-swe7_swedish_ci	swe7	10	Yes		0
-tis620_bin	tis620	89		Yes	1
-tis620_thai_ci	tis620	18	Yes	Yes	4
-ucs2_bin	ucs2	90		Yes	1
-ucs2_czech_ci	ucs2	138		Yes	8
-ucs2_danish_ci	ucs2	139		Yes	8
-ucs2_esperanto_ci	ucs2	145		Yes	8
-ucs2_estonian_ci	ucs2	134		Yes	8
-ucs2_general_ci	ucs2	35	Yes	Yes	1
-ucs2_hungarian_ci	ucs2	146		Yes	8
-ucs2_icelandic_ci	ucs2	129		Yes	8
-ucs2_latvian_ci	ucs2	130		Yes	8
-ucs2_lithuanian_ci	ucs2	140		Yes	8
-ucs2_persian_ci	ucs2	144		Yes	8
-ucs2_polish_ci	ucs2	133		Yes	8
-ucs2_romanian_ci	ucs2	131		Yes	8
-ucs2_roman_ci	ucs2	143		Yes	8
-ucs2_slovak_ci	ucs2	141		Yes	8
-ucs2_slovenian_ci	ucs2	132		Yes	8
-ucs2_spanish2_ci	ucs2	142		Yes	8
-ucs2_spanish_ci	ucs2	135		Yes	8
-ucs2_swedish_ci	ucs2	136		Yes	8
-ucs2_turkish_ci	ucs2	137		Yes	8
-ucs2_unicode_ci	ucs2	128		Yes	8
-ujis_bin	ujis	91		Yes	1
-ujis_japanese_ci	ujis	12	Yes	Yes	1
-utf8_bin	utf8	83		Yes	1
-utf8_czech_ci	utf8	202		Yes	8
-utf8_danish_ci	utf8	203		Yes	8
-utf8_esperanto_ci	utf8	209		Yes	8
-utf8_estonian_ci	utf8	198		Yes	8
-utf8_general_ci	utf8	33	Yes	Yes	1
-utf8_hungarian_ci	utf8	210		Yes	8
-utf8_icelandic_ci	utf8	193		Yes	8
-utf8_latvian_ci	utf8	194		Yes	8
-utf8_lithuanian_ci	utf8	204		Yes	8
-utf8_persian_ci	utf8	208		Yes	8
-utf8_polish_ci	utf8	197		Yes	8
-utf8_romanian_ci	utf8	195		Yes	8
-utf8_roman_ci	utf8	207		Yes	8
-utf8_slovak_ci	utf8	205		Yes	8
-utf8_slovenian_ci	utf8	196		Yes	8
-utf8_spanish2_ci	utf8	206		Yes	8
-utf8_spanish_ci	utf8	199		Yes	8
-utf8_swedish_ci	utf8	200		Yes	8
-utf8_turkish_ci	utf8	201		Yes	8
-utf8_unicode_ci	utf8	192		Yes	8
-
-
-SELECT *
-FROM information_schema.collation_character_set_applicability
-ORDER BY collation_name, character_set_name;
-COLLATION_NAME	CHARACTER_SET_NAME
-armscii8_bin	armscii8
-armscii8_general_ci	armscii8
-ascii_bin	ascii
-ascii_general_ci	ascii
-big5_bin	big5
-big5_chinese_ci	big5
-binary	binary
-cp1250_bin	cp1250
-cp1250_croatian_ci	cp1250
-cp1250_czech_cs	cp1250
-cp1250_general_ci	cp1250
-cp1250_polish_ci	cp1250
-cp1251_bin	cp1251
-cp1251_bulgarian_ci	cp1251
-cp1251_general_ci	cp1251
-cp1251_general_cs	cp1251
-cp1251_ukrainian_ci	cp1251
-cp1256_bin	cp1256
-cp1256_general_ci	cp1256
-cp1257_bin	cp1257
-cp1257_general_ci	cp1257
-cp1257_lithuanian_ci	cp1257
-cp850_bin	cp850
-cp850_general_ci	cp850
-cp852_bin	cp852
-cp852_general_ci	cp852
-cp866_bin	cp866
-cp866_general_ci	cp866
-cp932_bin	cp932
-cp932_japanese_ci	cp932
-dec8_bin	dec8
-dec8_swedish_ci	dec8
-eucjpms_bin	eucjpms
-eucjpms_japanese_ci	eucjpms
-euckr_bin	euckr
-euckr_korean_ci	euckr
-filename	filename
-gb2312_bin	gb2312
-gb2312_chinese_ci	gb2312
-gbk_bin	gbk
-gbk_chinese_ci	gbk
-geostd8_bin	geostd8
-geostd8_general_ci	geostd8
-greek_bin	greek
-greek_general_ci	greek
-hebrew_bin	hebrew
-hebrew_general_ci	hebrew
-hp8_bin	hp8
-hp8_english_ci	hp8
-keybcs2_bin	keybcs2
-keybcs2_general_ci	keybcs2
-koi8r_bin	koi8r
-koi8r_general_ci	koi8r
-koi8u_bin	koi8u
-koi8u_general_ci	koi8u
-latin1_bin	latin1
-latin1_danish_ci	latin1
-latin1_general_ci	latin1
-latin1_general_cs	latin1
-latin1_german1_ci	latin1
-latin1_german2_ci	latin1
-latin1_spanish_ci	latin1
-latin1_swedish_ci	latin1
-latin2_bin	latin2
-latin2_croatian_ci	latin2
-latin2_czech_cs	latin2
-latin2_general_ci	latin2
-latin2_hungarian_ci	latin2
-latin5_bin	latin5
-latin5_turkish_ci	latin5
-latin7_bin	latin7
-latin7_estonian_cs	latin7
-latin7_general_ci	latin7
-latin7_general_cs	latin7
-macce_bin	macce
-macce_general_ci	macce
-macroman_bin	macroman
-macroman_general_ci	macroman
-sjis_bin	sjis
-sjis_japanese_ci	sjis
-swe7_bin	swe7
-swe7_swedish_ci	swe7
-tis620_bin	tis620
-tis620_thai_ci	tis620
-ucs2_bin	ucs2
-ucs2_czech_ci	ucs2
-ucs2_danish_ci	ucs2
-ucs2_esperanto_ci	ucs2
-ucs2_estonian_ci	ucs2
-ucs2_general_ci	ucs2
-ucs2_hungarian_ci	ucs2
-ucs2_icelandic_ci	ucs2
-ucs2_latvian_ci	ucs2
-ucs2_lithuanian_ci	ucs2
-ucs2_persian_ci	ucs2
-ucs2_polish_ci	ucs2
-ucs2_romanian_ci	ucs2
-ucs2_roman_ci	ucs2
-ucs2_slovak_ci	ucs2
-ucs2_slovenian_ci	ucs2
-ucs2_spanish2_ci	ucs2
-ucs2_spanish_ci	ucs2
-ucs2_swedish_ci	ucs2
-ucs2_turkish_ci	ucs2
-ucs2_unicode_ci	ucs2
-ujis_bin	ujis
-ujis_japanese_ci	ujis
-utf8_bin	utf8
-utf8_czech_ci	utf8
-utf8_danish_ci	utf8
-utf8_esperanto_ci	utf8
-utf8_estonian_ci	utf8
-utf8_general_ci	utf8
-utf8_hungarian_ci	utf8
-utf8_icelandic_ci	utf8
-utf8_latvian_ci	utf8
-utf8_lithuanian_ci	utf8
-utf8_persian_ci	utf8
-utf8_polish_ci	utf8
-utf8_romanian_ci	utf8
-utf8_roman_ci	utf8
-utf8_slovak_ci	utf8
-utf8_slovenian_ci	utf8
-utf8_spanish2_ci	utf8
-utf8_spanish_ci	utf8
-utf8_swedish_ci	utf8
-utf8_turkish_ci	utf8
-utf8_unicode_ci	utf8
-# Switch to connection default + disconnect con
-DROP USER dbdict_test@localhost;
diff --git a/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result b/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
index d5e1309e39f..59ad695c413 100644
--- a/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
@@ -166,7 +166,7 @@ NULL	information_schema	PROCESSLIST	HOST	3		NO	varchar	64	192	NULL	NULL	utf8	utf
 NULL	information_schema	PROCESSLIST	ID	1	0	NO	bigint	NULL	NULL	19	0	NULL	NULL	bigint(4)				
 NULL	information_schema	PROCESSLIST	INFO	8	NULL	YES	longtext	4294967295	4294967295	NULL	NULL	utf8	utf8_general_ci	longtext				
 NULL	information_schema	PROCESSLIST	STATE	7	NULL	YES	varchar	64	192	NULL	NULL	utf8	utf8_general_ci	varchar(64)				
-NULL	information_schema	PROCESSLIST	TIME	6	0	NO	bigint	NULL	NULL	19	0	NULL	NULL	bigint(7)				
+NULL	information_schema	PROCESSLIST	TIME	6	0	NO	int	NULL	NULL	10	0	NULL	NULL	int(7)				
 NULL	information_schema	PROCESSLIST	USER	2		NO	varchar	16	48	NULL	NULL	utf8	utf8_general_ci	varchar(16)				
 NULL	information_schema	REFERENTIAL_CONSTRAINTS	CONSTRAINT_CATALOG	1	NULL	YES	varchar	512	1536	NULL	NULL	utf8	utf8_general_ci	varchar(512)				
 NULL	information_schema	REFERENTIAL_CONSTRAINTS	CONSTRAINT_NAME	3		NO	varchar	64	192	NULL	NULL	utf8	utf8_general_ci	varchar(64)				
@@ -340,6 +340,7 @@ ORDER BY CHARACTER_SET_NAME, COLLATION_NAME, COL_CML;
 COL_CML	DATA_TYPE	CHARACTER_SET_NAME	COLLATION_NAME
 NULL	bigint	NULL	NULL
 NULL	datetime	NULL	NULL
+NULL	int	NULL	NULL
 --> CHAR(0) is allowed (see manual), and here both CHARACHTER_* values
 --> are 0, which is intended behavior, and the result of 0 / 0 IS NULL
 SELECT CHARACTER_OCTET_LENGTH / CHARACTER_MAXIMUM_LENGTH AS COL_CML,
@@ -519,7 +520,7 @@ NULL	information_schema	PROCESSLIST	ID	bigint	NULL	NULL	NULL	NULL	bigint(4)
 3.0000	information_schema	PROCESSLIST	HOST	varchar	64	192	utf8	utf8_general_ci	varchar(64)
 3.0000	information_schema	PROCESSLIST	DB	varchar	64	192	utf8	utf8_general_ci	varchar(64)
 3.0000	information_schema	PROCESSLIST	COMMAND	varchar	16	48	utf8	utf8_general_ci	varchar(16)
-NULL	information_schema	PROCESSLIST	TIME	bigint	NULL	NULL	NULL	NULL	bigint(7)
+NULL	information_schema	PROCESSLIST	TIME	int	NULL	NULL	NULL	NULL	int(7)
 3.0000	information_schema	PROCESSLIST	STATE	varchar	64	192	utf8	utf8_general_ci	varchar(64)
 1.0000	information_schema	PROCESSLIST	INFO	longtext	4294967295	4294967295	utf8	utf8_general_ci	longtext
 3.0000	information_schema	REFERENTIAL_CONSTRAINTS	CONSTRAINT_CATALOG	varchar	512	1536	utf8	utf8_general_ci	varchar(512)
diff --git a/mysql-test/suite/funcs_1/r/is_columns_myisam_embedded.result b/mysql-test/suite/funcs_1/r/is_columns_myisam_embedded.result
index d22c5cc06f7..2721dcf3c6e 100644
--- a/mysql-test/suite/funcs_1/r/is_columns_myisam_embedded.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_myisam_embedded.result
@@ -514,7 +514,7 @@ NULL	test	tb1	f6	6	NULL	YES	mediumtext	16777215	16777215	NULL	NULL	latin1	latin1
 NULL	test	tb1	f7	7	NULL	YES	longtext	4294967295	4294967295	NULL	NULL	latin1	latin1_swedish_ci	longtext				
 NULL	test	tb1	f8	8	NULL	YES	tinyblob	255	255	NULL	NULL	NULL	NULL	tinyblob				
 NULL	test	tb1	f9	9	NULL	YES	blob	65535	65535	NULL	NULL	NULL	NULL	blob				
-NULL	test	tb2	f100	42	00000000000000000008.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f100	42	00000000000000000008.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb2	f101	43	2000-01-01	NO	date	NULL	NULL	NULL	NULL	NULL	NULL	date				
 NULL	test	tb2	f102	44	00:00:20	NO	time	NULL	NULL	NULL	NULL	NULL	NULL	time				
 NULL	test	tb2	f103	45	0002-02-02 00:00:00	NO	datetime	NULL	NULL	NULL	NULL	NULL	NULL	datetime				
@@ -547,32 +547,32 @@ NULL	test	tb2	f70	12	NULL	YES	decimal	NULL	NULL	63	30	NULL	NULL	decimal(63,30) u
 NULL	test	tb2	f71	13	NULL	YES	decimal	NULL	NULL	10	0	NULL	NULL	decimal(10,0) unsigned zerofill				
 NULL	test	tb2	f72	14	NULL	YES	decimal	NULL	NULL	63	30	NULL	NULL	decimal(63,30) unsigned zerofill				
 NULL	test	tb2	f73	15	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb2	f74	16	NULL	YES	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb2	f75	17	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb2	f76	18	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f74	16	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb2	f75	17	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f76	18	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb2	f77	19	7.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb2	f78	20	7.7	YES	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb2	f79	21	00000000000000000007.7	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb2	f80	22	00000000000000000008.8	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f78	20	7.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb2	f79	21	00000000000000000007.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f80	22	00000000000000000008.8	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb2	f81	23	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test	tb2	f82	24	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb2	f83	25	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb2	f84	26	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f82	24	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb2	f83	25	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f84	26	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test	tb2	f85	27	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test	tb2	f86	28	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test	tb2	f87	29	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb2	f88	30	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb2	f89	31	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb2	f90	32	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb2	f91	33	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb2	f92	34	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f87	29	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb2	f88	30	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb2	f89	31	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f90	32	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f91	33	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f92	34	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test	tb2	f93	35	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test	tb2	f94	36	8.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb2	f95	37	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb2	f96	38	8.8	NO	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb2	f97	39	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb2	f98	40	00000000000000000008.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb2	f99	41	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f95	37	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb2	f96	38	8.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb2	f97	39	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb2	f98	40	00000000000000000008.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb2	f99	41	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test	tb3	f118	1	a	NO	char	1	1	NULL	NULL	latin1	latin1_swedish_ci	char(1)				
 NULL	test	tb3	f119	2		NO	char	1	1	NULL	NULL	latin1	latin1_bin	char(1)				
 NULL	test	tb3	f120	3		NO	char	1	1	NULL	NULL	latin1	latin1_swedish_ci	char(1)				
@@ -646,33 +646,33 @@ NULL	test	tb4	f187	12	000000000000000000000000000000009.000000000000000000000000
 NULL	test	tb4	f188	13	0000000009	NO	decimal	NULL	NULL	10	0	NULL	NULL	decimal(10,0) unsigned zerofill				
 NULL	test	tb4	f189	14	000000000000000000000000000000009.000000000000000000000000000000	NO	decimal	NULL	NULL	63	30	NULL	NULL	decimal(63,30) unsigned zerofill				
 NULL	test	tb4	f190	15	88.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb4	f191	16	88.8	NO	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb4	f192	17	00000000000000000088.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb4	f193	18	00000000000000000088.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f191	16	88.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb4	f192	17	00000000000000000088.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f193	18	00000000000000000088.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb4	f194	19	55.5	NO	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb4	f195	20	55.5	NO	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb4	f196	21	00000000000000000055.5	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb4	f197	22	00000000000000000055.5	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f195	20	55.5	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb4	f196	21	00000000000000000055.5	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f197	22	00000000000000000055.5	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb4	f198	23	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test	tb4	f199	24	NULL	YES	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb4	f200	25	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f201	26	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f199	24	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb4	f200	25	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f201	26	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test	tb4	f202	27	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test	tb4	f203	28	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test	tb4	f204	29	NULL	YES	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb4	f205	30	NULL	YES	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb4	f206	31	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f207	32	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f208	33	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f209	34	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f204	29	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb4	f205	30	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb4	f206	31	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f207	32	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f208	33	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f209	34	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test	tb4	f210	35	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test	tb4	f211	36	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test	tb4	f212	37	NULL	YES	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test	tb4	f213	38	NULL	YES	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test	tb4	f214	39	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f215	40	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test	tb4	f216	41	NULL	YES	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test	tb4	f217	42	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f212	37	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test	tb4	f213	38	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test	tb4	f214	39	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f215	40	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test	tb4	f216	41	NULL	YES	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test	tb4	f217	42	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test	tb4	f218	43	NULL	YES	date	NULL	NULL	NULL	NULL	NULL	NULL	date				
 NULL	test	tb4	f219	44	NULL	YES	time	NULL	NULL	NULL	NULL	NULL	NULL	time				
 NULL	test	tb4	f220	45	NULL	YES	datetime	NULL	NULL	NULL	NULL	NULL	NULL	datetime				
@@ -698,7 +698,7 @@ NULL	test	tb4	f239	64	NULL	YES	varbinary	1000	1000	NULL	NULL	NULL	NULL	varbinary
 NULL	test	tb4	f240	65	NULL	YES	varchar	120	120	NULL	NULL	latin1	latin1_swedish_ci	varchar(120)				
 NULL	test	tb4	f241	66	NULL	YES	char	100	100	NULL	NULL	latin1	latin1_swedish_ci	char(100)				
 NULL	test	tb4	f242	67	NULL	YES	bit	NULL	NULL	30	NULL	NULL	NULL	bit(30)				
-NULL	test1	tb2	f100	42	00000000000000000008.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f100	42	00000000000000000008.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test1	tb2	f101	43	2000-01-01	NO	date	NULL	NULL	NULL	NULL	NULL	NULL	date				
 NULL	test1	tb2	f102	44	00:00:20	NO	time	NULL	NULL	NULL	NULL	NULL	NULL	time				
 NULL	test1	tb2	f103	45	0002-02-02 00:00:00	NO	datetime	NULL	NULL	NULL	NULL	NULL	NULL	datetime				
@@ -731,32 +731,32 @@ NULL	test1	tb2	f70	12	NULL	YES	decimal	NULL	NULL	63	30	NULL	NULL	decimal(63,30)
 NULL	test1	tb2	f71	13	NULL	YES	decimal	NULL	NULL	10	0	NULL	NULL	decimal(10,0) unsigned zerofill				
 NULL	test1	tb2	f72	14	NULL	YES	decimal	NULL	NULL	63	30	NULL	NULL	decimal(63,30) unsigned zerofill				
 NULL	test1	tb2	f73	15	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test1	tb2	f74	16	NULL	YES	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test1	tb2	f75	17	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test1	tb2	f76	18	NULL	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f74	16	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test1	tb2	f75	17	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f76	18	NULL	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test1	tb2	f77	19	7.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test1	tb2	f78	20	7.7	YES	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test1	tb2	f79	21	00000000000000000007.7	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test1	tb2	f80	22	00000000000000000008.8	YES	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f78	20	7.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test1	tb2	f79	21	00000000000000000007.7	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f80	22	00000000000000000008.8	YES	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
 NULL	test1	tb2	f81	23	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test1	tb2	f82	24	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test1	tb2	f83	25	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test1	tb2	f84	26	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f82	24	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test1	tb2	f83	25	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f84	26	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test1	tb2	f85	27	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test1	tb2	f86	28	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
-NULL	test1	tb2	f87	29	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test1	tb2	f88	30	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test1	tb2	f89	31	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test1	tb2	f90	32	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test1	tb2	f91	33	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test1	tb2	f92	34	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f87	29	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test1	tb2	f88	30	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test1	tb2	f89	31	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f90	32	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f91	33	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f92	34	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test1	tb2	f93	35	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float				
 NULL	test1	tb2	f94	36	8.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double				
-NULL	test1	tb2	f95	37	8.8	NO	float unsigned	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
-NULL	test1	tb2	f96	38	8.8	NO	double unsigned	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
-NULL	test1	tb2	f97	39	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
-NULL	test1	tb2	f98	40	00000000000000000008.8	NO	double unsigned zerofill	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
-NULL	test1	tb2	f99	41	0000000008.8	NO	float unsigned zerofill	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f95	37	8.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned				
+NULL	test1	tb2	f96	38	8.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned				
+NULL	test1	tb2	f97	39	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
+NULL	test1	tb2	f98	40	00000000000000000008.8	NO	double	NULL	NULL	22	NULL	NULL	NULL	double unsigned zerofill				
+NULL	test1	tb2	f99	41	0000000008.8	NO	float	NULL	NULL	12	NULL	NULL	NULL	float unsigned zerofill				
 NULL	test4	t6	f1	1	NULL	YES	char	20	20	NULL	NULL	latin1	latin1_swedish_ci	char(20)				
 NULL	test4	t6	f2	2	NULL	YES	char	25	25	NULL	NULL	latin1	latin1_swedish_ci	char(25)				
 NULL	test4	t6	f3	3	NULL	YES	date	NULL	NULL	NULL	NULL	NULL	NULL	date				
@@ -817,11 +817,7 @@ NULL	date	NULL	NULL
 NULL	datetime	NULL	NULL
 NULL	decimal	NULL	NULL
 NULL	double	NULL	NULL
-NULL	double unsigned	NULL	NULL
-NULL	double unsigned zerofill	NULL	NULL
 NULL	float	NULL	NULL
-NULL	float unsigned	NULL	NULL
-NULL	float unsigned zerofill	NULL	NULL
 NULL	int	NULL	NULL
 NULL	mediumint	NULL	NULL
 NULL	smallint	NULL	NULL
@@ -963,33 +959,33 @@ NULL	test	tb2	f70	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test	tb2	f71	decimal	NULL	NULL	NULL	NULL	decimal(10,0) unsigned zerofill
 NULL	test	tb2	f72	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test	tb2	f73	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb2	f74	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb2	f75	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb2	f76	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f74	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb2	f75	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f76	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb2	f77	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb2	f78	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb2	f79	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb2	f80	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f78	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb2	f79	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f80	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb2	f81	float	NULL	NULL	NULL	NULL	float
-NULL	test	tb2	f82	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb2	f83	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f84	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f82	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb2	f83	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f84	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test	tb2	f85	float	NULL	NULL	NULL	NULL	float
 NULL	test	tb2	f86	float	NULL	NULL	NULL	NULL	float
-NULL	test	tb2	f87	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb2	f88	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb2	f89	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f90	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f91	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f92	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f87	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb2	f88	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb2	f89	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f90	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f91	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f92	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test	tb2	f93	float	NULL	NULL	NULL	NULL	float
 NULL	test	tb2	f94	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb2	f95	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb2	f96	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb2	f97	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f98	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb2	f99	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb2	f100	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f95	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb2	f96	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb2	f97	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f98	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb2	f99	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb2	f100	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb2	f101	date	NULL	NULL	NULL	NULL	date
 NULL	test	tb2	f102	time	NULL	NULL	NULL	NULL	time
 NULL	test	tb2	f103	datetime	NULL	NULL	NULL	NULL	datetime
@@ -1080,33 +1076,33 @@ NULL	test	tb4	f187	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test	tb4	f188	decimal	NULL	NULL	NULL	NULL	decimal(10,0) unsigned zerofill
 NULL	test	tb4	f189	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test	tb4	f190	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb4	f191	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb4	f192	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb4	f193	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f191	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb4	f192	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f193	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb4	f194	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb4	f195	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb4	f196	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb4	f197	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f195	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb4	f196	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f197	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb4	f198	float	NULL	NULL	NULL	NULL	float
-NULL	test	tb4	f199	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb4	f200	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f201	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f199	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb4	f200	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f201	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test	tb4	f202	float	NULL	NULL	NULL	NULL	float
 NULL	test	tb4	f203	float	NULL	NULL	NULL	NULL	float
-NULL	test	tb4	f204	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb4	f205	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb4	f206	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f207	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f208	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f209	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f204	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb4	f205	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb4	f206	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f207	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f208	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f209	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test	tb4	f210	float	NULL	NULL	NULL	NULL	float
 NULL	test	tb4	f211	double	NULL	NULL	NULL	NULL	double
-NULL	test	tb4	f212	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test	tb4	f213	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test	tb4	f214	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f215	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test	tb4	f216	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test	tb4	f217	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f212	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test	tb4	f213	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test	tb4	f214	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f215	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test	tb4	f216	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test	tb4	f217	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test	tb4	f218	date	NULL	NULL	NULL	NULL	date
 NULL	test	tb4	f219	time	NULL	NULL	NULL	NULL	time
 NULL	test	tb4	f220	datetime	NULL	NULL	NULL	NULL	datetime
@@ -1147,33 +1143,33 @@ NULL	test1	tb2	f70	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test1	tb2	f71	decimal	NULL	NULL	NULL	NULL	decimal(10,0) unsigned zerofill
 NULL	test1	tb2	f72	decimal	NULL	NULL	NULL	NULL	decimal(63,30) unsigned zerofill
 NULL	test1	tb2	f73	double	NULL	NULL	NULL	NULL	double
-NULL	test1	tb2	f74	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test1	tb2	f75	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test1	tb2	f76	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f74	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test1	tb2	f75	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f76	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test1	tb2	f77	double	NULL	NULL	NULL	NULL	double
-NULL	test1	tb2	f78	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test1	tb2	f79	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test1	tb2	f80	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f78	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test1	tb2	f79	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f80	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test1	tb2	f81	float	NULL	NULL	NULL	NULL	float
-NULL	test1	tb2	f82	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test1	tb2	f83	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f84	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f82	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test1	tb2	f83	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f84	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test1	tb2	f85	float	NULL	NULL	NULL	NULL	float
 NULL	test1	tb2	f86	float	NULL	NULL	NULL	NULL	float
-NULL	test1	tb2	f87	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test1	tb2	f88	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test1	tb2	f89	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f90	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f91	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f92	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f87	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test1	tb2	f88	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test1	tb2	f89	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f90	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f91	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f92	float	NULL	NULL	NULL	NULL	float unsigned zerofill
 NULL	test1	tb2	f93	float	NULL	NULL	NULL	NULL	float
 NULL	test1	tb2	f94	double	NULL	NULL	NULL	NULL	double
-NULL	test1	tb2	f95	float unsigned	NULL	NULL	NULL	NULL	float unsigned
-NULL	test1	tb2	f96	double unsigned	NULL	NULL	NULL	NULL	double unsigned
-NULL	test1	tb2	f97	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f98	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
-NULL	test1	tb2	f99	float unsigned zerofill	NULL	NULL	NULL	NULL	float unsigned zerofill
-NULL	test1	tb2	f100	double unsigned zerofill	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f95	float	NULL	NULL	NULL	NULL	float unsigned
+NULL	test1	tb2	f96	double	NULL	NULL	NULL	NULL	double unsigned
+NULL	test1	tb2	f97	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f98	double	NULL	NULL	NULL	NULL	double unsigned zerofill
+NULL	test1	tb2	f99	float	NULL	NULL	NULL	NULL	float unsigned zerofill
+NULL	test1	tb2	f100	double	NULL	NULL	NULL	NULL	double unsigned zerofill
 NULL	test1	tb2	f101	date	NULL	NULL	NULL	NULL	date
 NULL	test1	tb2	f102	time	NULL	NULL	NULL	NULL	time
 NULL	test1	tb2	f103	datetime	NULL	NULL	NULL	NULL	datetime
diff --git a/mysql-test/suite/funcs_1/r/is_columns_mysql_embedded.result b/mysql-test/suite/funcs_1/r/is_columns_mysql_embedded.result
index 1d37a87f868..9c9d3cd26de 100644
--- a/mysql-test/suite/funcs_1/r/is_columns_mysql_embedded.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_mysql_embedded.result
@@ -48,7 +48,7 @@ NULL	mysql	event	last_executed	10	NULL	YES	datetime	NULL	NULL	NULL	NULL	NULL	NUL
 NULL	mysql	event	modified	9	0000-00-00 00:00:00	NO	timestamp	NULL	NULL	NULL	NULL	NULL	NULL	timestamp				
 NULL	mysql	event	name	2		NO	char	64	192	NULL	NULL	utf8	utf8_general_ci	char(64)	PRI			
 NULL	mysql	event	on_completion	14	DROP	NO	enum	8	24	NULL	NULL	utf8	utf8_general_ci	enum('DROP','PRESERVE')				
-NULL	mysql	event	originator	17	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(10)				
+NULL	mysql	event	originator	17	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(10) unsigned				
 NULL	mysql	event	sql_mode	15		NO	set	478	1434	NULL	NULL	utf8	utf8_general_ci	set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES','IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION','NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB','NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40','ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES','STRICT_ALL_TABLES','NO_ZERO_IN_DATE','NO_ZERO_DATE','INVALID_DATES','ERROR_FOR_DIVISION_BY_ZERO','TRADITIONAL','NO_AUTO_CREATE_USER','HIGH_NOT_PRECEDENCE','NO_ENGINE_SUBSTITUTION','PAD_CHAR_TO_FULL_LENGTH')				
 NULL	mysql	event	starts	11	NULL	YES	datetime	NULL	NULL	NULL	NULL	NULL	NULL	datetime				
 NULL	mysql	event	status	13	ENABLED	NO	enum	18	54	NULL	NULL	utf8	utf8_general_ci	enum('ENABLED','DISABLED','SLAVESIDE_DISABLED')				
@@ -60,7 +60,7 @@ NULL	mysql	func	type	4	NULL	NO	enum	9	27	NULL	NULL	utf8	utf8_general_ci	enum('fu
 NULL	mysql	general_log	argument	6	NULL	NO	mediumtext	16777215	16777215	NULL	NULL	utf8	utf8_general_ci	mediumtext				
 NULL	mysql	general_log	command_type	5	NULL	NO	varchar	64	192	NULL	NULL	utf8	utf8_general_ci	varchar(64)				
 NULL	mysql	general_log	event_time	1	CURRENT_TIMESTAMP	NO	timestamp	NULL	NULL	NULL	NULL	NULL	NULL	timestamp		on update CURRENT_TIMESTAMP		
-NULL	mysql	general_log	server_id	4	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(11)				
+NULL	mysql	general_log	server_id	4	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(10) unsigned				
 NULL	mysql	general_log	thread_id	3	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(11)				
 NULL	mysql	general_log	user_host	2	NULL	NO	mediumtext	16777215	16777215	NULL	NULL	utf8	utf8_general_ci	mediumtext				
 NULL	mysql	help_category	help_category_id	1	NULL	NO	smallint	NULL	NULL	5	0	NULL	NULL	smallint(5) unsigned	PRI			
@@ -150,7 +150,7 @@ NULL	mysql	slow_log	lock_time	4	NULL	NO	time	NULL	NULL	NULL	NULL	NULL	NULL	time
 NULL	mysql	slow_log	query_time	3	NULL	NO	time	NULL	NULL	NULL	NULL	NULL	NULL	time				
 NULL	mysql	slow_log	rows_examined	6	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(11)				
 NULL	mysql	slow_log	rows_sent	5	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(11)				
-NULL	mysql	slow_log	server_id	10	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(11)				
+NULL	mysql	slow_log	server_id	10	NULL	NO	int	NULL	NULL	10	0	NULL	NULL	int(10) unsigned				
 NULL	mysql	slow_log	sql_text	11	NULL	NO	mediumtext	16777215	16777215	NULL	NULL	utf8	utf8_general_ci	mediumtext				
 NULL	mysql	slow_log	start_time	1	CURRENT_TIMESTAMP	NO	timestamp	NULL	NULL	NULL	NULL	NULL	NULL	timestamp		on update CURRENT_TIMESTAMP		
 NULL	mysql	slow_log	user_host	2	NULL	NO	mediumtext	16777215	16777215	NULL	NULL	utf8	utf8_general_ci	mediumtext				
@@ -329,7 +329,7 @@ NULL	mysql	event	ends	datetime	NULL	NULL	NULL	NULL	datetime
 3.0000	mysql	event	on_completion	enum	8	24	utf8	utf8_general_ci	enum('DROP','PRESERVE')
 3.0000	mysql	event	sql_mode	set	478	1434	utf8	utf8_general_ci	set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES','IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION','NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB','NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40','ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES','STRICT_ALL_TABLES','NO_ZERO_IN_DATE','NO_ZERO_DATE','INVALID_DATES','ERROR_FOR_DIVISION_BY_ZERO','TRADITIONAL','NO_AUTO_CREATE_USER','HIGH_NOT_PRECEDENCE','NO_ENGINE_SUBSTITUTION','PAD_CHAR_TO_FULL_LENGTH')
 3.0000	mysql	event	comment	char	64	192	utf8	utf8_bin	char(64)
-NULL	mysql	event	originator	int	NULL	NULL	NULL	NULL	int(10)
+NULL	mysql	event	originator	int	NULL	NULL	NULL	NULL	int(10) unsigned
 1.0000	mysql	event	time_zone	char	64	64	latin1	latin1_swedish_ci	char(64)
 3.0000	mysql	event	character_set_client	char	32	96	utf8	utf8_bin	char(32)
 3.0000	mysql	event	collation_connection	char	32	96	utf8	utf8_bin	char(32)
@@ -342,7 +342,7 @@ NULL	mysql	func	ret	tinyint	NULL	NULL	NULL	NULL	tinyint(1)
 NULL	mysql	general_log	event_time	timestamp	NULL	NULL	NULL	NULL	timestamp
 1.0000	mysql	general_log	user_host	mediumtext	16777215	16777215	utf8	utf8_general_ci	mediumtext
 NULL	mysql	general_log	thread_id	int	NULL	NULL	NULL	NULL	int(11)
-NULL	mysql	general_log	server_id	int	NULL	NULL	NULL	NULL	int(11)
+NULL	mysql	general_log	server_id	int	NULL	NULL	NULL	NULL	int(10) unsigned
 3.0000	mysql	general_log	command_type	varchar	64	192	utf8	utf8_general_ci	varchar(64)
 1.0000	mysql	general_log	argument	mediumtext	16777215	16777215	utf8	utf8_general_ci	mediumtext
 NULL	mysql	help_category	help_category_id	smallint	NULL	NULL	NULL	NULL	smallint(5) unsigned
@@ -434,7 +434,7 @@ NULL	mysql	slow_log	rows_examined	int	NULL	NULL	NULL	NULL	int(11)
 3.0000	mysql	slow_log	db	varchar	512	1536	utf8	utf8_general_ci	varchar(512)
 NULL	mysql	slow_log	last_insert_id	int	NULL	NULL	NULL	NULL	int(11)
 NULL	mysql	slow_log	insert_id	int	NULL	NULL	NULL	NULL	int(11)
-NULL	mysql	slow_log	server_id	int	NULL	NULL	NULL	NULL	int(11)
+NULL	mysql	slow_log	server_id	int	NULL	NULL	NULL	NULL	int(10) unsigned
 1.0000	mysql	slow_log	sql_text	mediumtext	16777215	16777215	utf8	utf8_general_ci	mediumtext
 3.0000	mysql	tables_priv	Host	char	60	180	utf8	utf8_bin	char(60)
 3.0000	mysql	tables_priv	Db	char	64	192	utf8	utf8_bin	char(64)
diff --git a/mysql-test/suite/funcs_1/r/is_routines.result b/mysql-test/suite/funcs_1/r/is_routines.result
index e7a900f5c0b..14a7107778c 100644
--- a/mysql-test/suite/funcs_1/r/is_routines.result
+++ b/mysql-test/suite/funcs_1/r/is_routines.result
@@ -111,10 +111,11 @@ CREATE FUNCTION function_for_routines() RETURNS INT RETURN 0;
 SELECT specific_name,routine_catalog,routine_schema,routine_name,routine_type,
 routine_body,external_name,external_language,parameter_style,sql_path
 FROM information_schema.routines
-WHERE routine_catalog   IS NOT NULL OR external_name   IS NOT NULL
+WHERE routine_schema = 'test' AND
+(routine_catalog   IS NOT NULL OR external_name   IS NOT NULL
 OR external_language IS NOT NULL OR sql_path        IS NOT NULL
 OR routine_body      <> 'SQL'    OR parameter_style <> 'SQL'
-   OR specific_name     <> routine_name;
+   OR specific_name     <> routine_name);
 specific_name	routine_catalog	routine_schema	routine_name	routine_type	routine_body	external_name	external_language	parameter_style	sql_path
 DROP PROCEDURE sp_for_routines;
 DROP FUNCTION  function_for_routines;
diff --git a/mysql-test/suite/funcs_1/r/storedproc.result b/mysql-test/suite/funcs_1/r/storedproc.result
index 7e21ddf1544..3efb361dc82 100644
--- a/mysql-test/suite/funcs_1/r/storedproc.result
+++ b/mysql-test/suite/funcs_1/r/storedproc.result
@@ -18241,8 +18241,6 @@ END//
 CALL sp70_n(-1e+40);
 f1
 -10000000000000000000000000000000000000000
-Warnings:
-Note	1265	Data truncated for column 'f1' at row 1
 CALL sp70_n( -10000000000000000000000000000000000000000 );
 f1
 -10000000000000000000000000000000000000000
@@ -18255,8 +18253,6 @@ END//
 CALL sp71_nu(1.00e+40);
 f1
 10000000000000000000000000000000000000000
-Warnings:
-Note	1265	Data truncated for column 'f1' at row 1
 CALL sp71_nu( 10000000000000000000000000000000000000000 );
 f1
 10000000000000000000000000000000000000000
@@ -18269,8 +18265,6 @@ END//
 CALL sp72_nuz(1.00e+40);
 f1
 0000000000000000000000010000000000000000000000000000000000000000
-Warnings:
-Note	1265	Data truncated for column 'f1' at row 1
 CALL sp72_nuz( 10000000000000000000000000000000000000000 );
 f1
 0000000000000000000000010000000000000000000000000000000000000000
@@ -18283,8 +18277,6 @@ END//
 CALL sp73_n_z(1.00e+40);
 f1
 0000000000000000000000010000000000000000000000000000000000000000
-Warnings:
-Note	1265	Data truncated for column 'f1' at row 1
 CALL sp73_n_z( 10000000000000000000000000000000000000000 );
 f1
 0000000000000000000000010000000000000000000000000000000000000000
diff --git a/mysql-test/suite/funcs_1/storedproc/storedproc_06.inc b/mysql-test/suite/funcs_1/storedproc/storedproc_06.inc
index d0fc6092959..f2df99fb5a3 100644
--- a/mysql-test/suite/funcs_1/storedproc/storedproc_06.inc
+++ b/mysql-test/suite/funcs_1/storedproc/storedproc_06.inc
@@ -53,7 +53,6 @@ flush privileges;
 DROP PROCEDURE IF EXISTS sp1;
 --enable_warnings
 
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (user1a, localhost, user_1, , db_storedproc_1);
 --source suite/funcs_1/include/show_connection.inc
diff --git a/mysql-test/suite/funcs_1/storedproc/storedproc_10.inc b/mysql-test/suite/funcs_1/storedproc/storedproc_10.inc
index 69378541b51..83f5f2105c5 100644
--- a/mysql-test/suite/funcs_1/storedproc/storedproc_10.inc
+++ b/mysql-test/suite/funcs_1/storedproc/storedproc_10.inc
@@ -58,7 +58,6 @@ GRANT CREATE ROUTINE ON db_storedproc.* TO 'user_1'@'localhost';
 GRANT SELECT         ON db_storedproc.* TO 'user_2'@'localhost';
 FLUSH PRIVILEGES;
 
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (user2_1, localhost, user_1, , db_storedproc);
 --source suite/funcs_1/include/show_connection.inc
diff --git a/mysql-test/suite/funcs_1/datadict/charset_collation.inc b/mysql-test/suite/funcs_1/t/charset_collation.test
similarity index 53%
rename from mysql-test/suite/funcs_1/datadict/charset_collation.inc
rename to mysql-test/suite/funcs_1/t/charset_collation.test
index a1991346cfc..186eb1f5b85 100644
--- a/mysql-test/suite/funcs_1/datadict/charset_collation.inc
+++ b/mysql-test/suite/funcs_1/t/charset_collation.test
@@ -1,58 +1,16 @@
-# suite/funcs_1/datadict/charset_collation.inc
+# suite/funcs_1/t/charset_collation.test
 #
 # Tests checking the content of the information_schema tables
 #      character_sets
 #      collations
 #      collation_character_set_applicability
 #
-#
-# The amount and properties of character_sets/collations depend on the
-# build type
-# 2007-12 MySQL 5.0, 2008-06 MySQL 5.1
-# ---------------------------------------------------------------------
-#
-# Variant 1 fits to
-#    version_comment       MySQL Enterprise Server (Commercial)
-#    version_comment       MySQL Enterprise Server (GPL)
-#    version_comment       MySQL Classic Server (Commercial)
-#    version_comment       MySQL Pushbuild Edition, build <number>
-#   (version_comment       Source distribution
-#    and
-#    compile was without "max" - > no collation 'utf8_general_ci')
-#
-# Variant 2 fits to
-#    version_comment       MySQL Enterprise Server (GPL)
-#    version_comment       MySQL Classic Server (Commercial)
-#    version_comment       MySQL Pushbuild Edition, build <number>
-#   (version_comment       Source distribution
-#    and
-#    compile was without "max" - > collation 'utf8_general_ci' exists)
-#
-# Difference between variant 1 and 2 is the collation 'utf8_general_ci'.
-#
-# Variant 3 fits to
-#    version_comment       MySQL Community Server (GPL)
-#    version_comment       MySQL Cluster Server (Commercial)
-#    version_comment       MySQL Advanced Server (GPL)         5.1
-#    version_comment       MySQL Advanced Server (Commercial)  5.1
-#
-# Difference between variant 3 and 2 is within the collation properties
-# IS_COMPILED and SORTLEN.
-#
-# 2008-06 All time excluded variant is "vanilla".
-# How to build "vanilla":
-#    ./BUILD/autorun.sh
-#    ./configure
-#    ./make
-# Some properties of "vanilla"
-#    version_comment       Source distribution
-#    Compared to the variants 1 to 3 a lot of character sets are missing.
-#    Example: "ucs2_bin" is in variant 1 to 3 but not in "vanilla".
-#
 # Created:
-# 2007-12-18 mleich - remove the unstable character_set/collation subtests
-#                     from include/datadict-master.inc
-#                   - create this new test
+# 2009-04-28 mleich Replace the charset_collation_* test which failed too often
+#                   because of changes
+#                   - in general available character sets and collations
+#                   - in build types
+#                   (Bug#40545, Bug#40209, Bug#40618, Bug#38346)
 #
 
 # Create a low privileged user.
@@ -61,8 +19,6 @@ DROP USER dbdict_test@localhost;
 CREATE USER dbdict_test@localhost;
 
 --echo # Establish connection con (user=dbdict_test)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
---replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (con,localhost,dbdict_test,,);
 ################################################################################
 #
@@ -98,32 +54,48 @@ connect (con,localhost,dbdict_test,,);
 #          combinations for which the current user and PUBLIC have no
 #          USAGE privilege.
 #
-# Notes (2007-12-19 mleich):
+# Notes (2009-04-28 mleich):
 # - The requirements are outdated because grant/revoke privilege for using a
 #   characterset/collation were never implemented.
-#   Therefore the tests should simply check the content of these tables.
-#
+#   Therefore the tests focus on the completeness and correctness of the
+#   content (rows and columns) of these tables.
 # - The amount of collations/character sets grows with new MySQL releases.
-#
-# - Even within the same release the amount of records within these tables
+#   Even within the same release the amount of records within these tables
 #   can differ between different build types (community, enterprise, source,...)
-#
+#   Therefore we limit the queries to character sets and collations which
+#   - exist in all build types
+#   - have in all build types the same "state".
+#   The character set
+#   - utf8 is used for Metadata
+#   - ascii is a quite usual
+#   The collations <character set>_general_ci and <character set>_bin seem
+#   to be available all time.
 #
 ################################################################################
+
+let $char_set_condition= character_set_name IN ('utf8','latin1','binary');
+let $collation_condition=
+   (collation_name LIKE CONCAT(character_set_name,'_general_ci')
+    OR
+    collation_name LIKE CONCAT(character_set_name,'_bin'));
 --echo
-SELECT *
+eval SELECT *
 FROM information_schema.character_sets
+WHERE $char_set_condition
 ORDER BY character_set_name;
 
 --echo
-SELECT *
+eval SELECT *
 FROM information_schema.collations
+WHERE $char_set_condition
+  AND $collation_condition
 ORDER BY collation_name;
 
-echo;
 --echo
-SELECT *
+eval SELECT *
 FROM information_schema.collation_character_set_applicability
+WHERE $char_set_condition
+  AND $collation_condition
 ORDER BY collation_name, character_set_name;
 
 
diff --git a/mysql-test/suite/funcs_1/t/charset_collation_1.test b/mysql-test/suite/funcs_1/t/charset_collation_1.test
deleted file mode 100644
index 15777062a72..00000000000
--- a/mysql-test/suite/funcs_1/t/charset_collation_1.test
+++ /dev/null
@@ -1,32 +0,0 @@
-# Tests checking the content of the information_schema tables
-#      character_sets
-#      collations
-#      collation_character_set_applicability
-#
-# Content variant 1 which should fit to
-#    Enterprise or Classic builds (binaries provided by MySQL)
-#    Pushbuilds
-#    Source builds without "max"
-#
-# Please read suite/funcs_1/datadict/charset_collation.inc for
-# additional information.
-#
-# Created:
-# 2007-12-18 mleich - remove the unstable character_set/collation subtests
-#                     from include/datadict-master.inc
-#                   - create this new test
-#
-
-if (`SELECT EXISTS (SELECT 1 FROM information_schema.collations
-                    WHERE collation_name = 'utf8_general_cs')
-       OR (    @@version_comment NOT LIKE '%Source%'
-           AND @@version_comment NOT LIKE '%Enterprise%'
-           AND @@version_comment NOT LIKE '%Classic%'
-           AND @@version_comment NOT LIKE '%Pushbuild%')
-       OR (SELECT count(*) = 0 FROM information_schema.collations
-                    WHERE collation_name = 'ucs2_bin')`)
-{
-  skip Test needs Enterprise, Classic , regular Pushbuild or Source-without-max build;
-}
-
---source suite/funcs_1/datadict/charset_collation.inc
diff --git a/mysql-test/suite/funcs_1/t/charset_collation_2.test b/mysql-test/suite/funcs_1/t/charset_collation_2.test
deleted file mode 100644
index d4924953b7d..00000000000
--- a/mysql-test/suite/funcs_1/t/charset_collation_2.test
+++ /dev/null
@@ -1,24 +0,0 @@
-# Tests checking the content of the information_schema tables
-#      character_sets
-#      collations
-#      collation_character_set_applicability
-#
-# Content variant 2 (compile from source with "max")
-#
-# Please read suite/funcs_1/datadict/charset_collation.inc for
-# additional information.
-#
-# Created:
-# 2007-12-18 mleich - remove the unstable character_set/collation subtests
-#                     from include/datadict-master.inc
-#                   - create this new test
-#
-
-if (`SELECT @@version_comment NOT LIKE '%Source%'
-     OR NOT EXISTS (SELECT 1 FROM information_schema.collations
-                    WHERE collation_name = 'utf8_general_cs')`)
-{
-  skip Test needs Source build with "max";
-}
-
---source suite/funcs_1/datadict/charset_collation.inc
diff --git a/mysql-test/suite/funcs_1/t/charset_collation_3.test b/mysql-test/suite/funcs_1/t/charset_collation_3.test
deleted file mode 100644
index e88b44e4a0f..00000000000
--- a/mysql-test/suite/funcs_1/t/charset_collation_3.test
+++ /dev/null
@@ -1,25 +0,0 @@
-# Tests checking the content of the information_schema tables
-#      character_sets
-#      collations
-#      collation_character_set_applicability
-#
-# Content variant 3 which should fit to
-#    Community and Cluster builds (binaries provided by MySQL)
-#
-# Please read suite/funcs_1/datadict/charset_collation.inc for
-# additional information.
-#
-# Created:
-# 2007-12-18 mleich - remove the unstable character_set/collation subtests
-#                     from include/datadict-master.inc
-#                   - create this new test
-#
-
-if (`SELECT @@version_comment NOT LIKE '%Community%'
-        AND @@version_comment NOT LIKE '%Cluster%'
-        AND @@version_comment NOT LIKE '%Advanced%'`)
-{
-   skip Test needs Community, Cluster or Advanced build;
-}
-
---source suite/funcs_1/datadict/charset_collation.inc
diff --git a/mysql-test/suite/funcs_1/t/disabled.def b/mysql-test/suite/funcs_1/t/disabled.def
index 69a69c60708..3f260ca49ba 100644
--- a/mysql-test/suite/funcs_1/t/disabled.def
+++ b/mysql-test/suite/funcs_1/t/disabled.def
@@ -11,6 +11,3 @@
 ##############################################################################
 
 ndb_trig_1011ext:  Bug#32656 NDB: Duplicate key error aborts transaction in handler. Doesn't talk back to SQL
-charset_collation_1: Bug#38346, Bug#40209, Bug#40545, Bug#40618
-charset_collation_2: Bug#38346, Bug#40209, Bug#40545, Bug#40618
-charset_collation_3: Bug#38346, Bug#40209, Bug#40545, Bug#40618
diff --git a/mysql-test/suite/funcs_1/t/is_basics_mixed.test b/mysql-test/suite/funcs_1/t/is_basics_mixed.test
index 7d03dc5f8b0..235b91c67d0 100644
--- a/mysql-test/suite/funcs_1/t/is_basics_mixed.test
+++ b/mysql-test/suite/funcs_1/t/is_basics_mixed.test
@@ -55,7 +55,6 @@ DROP   USER 'testuser1'@'localhost';
 CREATE USER 'testuser1'@'localhost';
 # Low privileged user
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , test);
 SELECT DATABASE();
diff --git a/mysql-test/suite/funcs_1/t/is_column_privileges.test b/mysql-test/suite/funcs_1/t/is_column_privileges.test
index 925d07b9657..cb8c50c01b7 100644
--- a/mysql-test/suite/funcs_1/t/is_column_privileges.test
+++ b/mysql-test/suite/funcs_1/t/is_column_privileges.test
@@ -132,7 +132,6 @@ WITH GRANT OPTION;
 eval $select;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $select;
diff --git a/mysql-test/suite/funcs_1/t/is_column_privileges_is_mysql_test.test b/mysql-test/suite/funcs_1/t/is_column_privileges_is_mysql_test.test
index 98d01c60838..33269fe929c 100644
--- a/mysql-test/suite/funcs_1/t/is_column_privileges_is_mysql_test.test
+++ b/mysql-test/suite/funcs_1/t/is_column_privileges_is_mysql_test.test
@@ -46,7 +46,6 @@ eval $my_show2;
 eval $my_show3;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $my_select;
diff --git a/mysql-test/suite/funcs_1/t/is_columns.test b/mysql-test/suite/funcs_1/t/is_columns.test
index 20b832ca5c3..efb52acd48c 100644
--- a/mysql-test/suite/funcs_1/t/is_columns.test
+++ b/mysql-test/suite/funcs_1/t/is_columns.test
@@ -148,7 +148,6 @@ eval $my_show2;
 eval $my_show3;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 --source suite/funcs_1/datadict/datadict_bug_12777.inc
diff --git a/mysql-test/suite/funcs_1/t/is_schema_privileges.test b/mysql-test/suite/funcs_1/t/is_schema_privileges.test
index c1fc70b03f7..1f408d71b39 100644
--- a/mysql-test/suite/funcs_1/t/is_schema_privileges.test
+++ b/mysql-test/suite/funcs_1/t/is_schema_privileges.test
@@ -116,7 +116,6 @@ let $show_testuser1 = SHOW GRANTS FOR 'testuser1'@'localhost';
 let $show_testuser2 = SHOW GRANTS FOR 'testuser2'@'localhost';
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , test);
 GRANT SELECT ON db_datadict_4.*  TO 'testuser2'@'localhost';
diff --git a/mysql-test/suite/funcs_1/t/is_schema_privileges_is_mysql_test.test b/mysql-test/suite/funcs_1/t/is_schema_privileges_is_mysql_test.test
index d7b703ed04a..3f60f71fe9a 100644
--- a/mysql-test/suite/funcs_1/t/is_schema_privileges_is_mysql_test.test
+++ b/mysql-test/suite/funcs_1/t/is_schema_privileges_is_mysql_test.test
@@ -46,7 +46,6 @@ eval $my_show2;
 eval $my_show3;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $my_select;
diff --git a/mysql-test/suite/funcs_1/t/is_schemata_is_mysql_test.test b/mysql-test/suite/funcs_1/t/is_schemata_is_mysql_test.test
index b5f13ab323c..9bfbf0cf335 100644
--- a/mysql-test/suite/funcs_1/t/is_schemata_is_mysql_test.test
+++ b/mysql-test/suite/funcs_1/t/is_schemata_is_mysql_test.test
@@ -46,7 +46,6 @@ eval $my_show2;
 eval $my_show3;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $my_select;
diff --git a/mysql-test/suite/funcs_1/t/is_statistics.test b/mysql-test/suite/funcs_1/t/is_statistics.test
index e202e7392ea..458892a6d91 100644
--- a/mysql-test/suite/funcs_1/t/is_statistics.test
+++ b/mysql-test/suite/funcs_1/t/is_statistics.test
@@ -140,7 +140,6 @@ eval $my_show1;
 eval $my_show2;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , test);
 # nothing visible for testuser1
diff --git a/mysql-test/suite/funcs_1/t/is_table_constraints.test b/mysql-test/suite/funcs_1/t/is_table_constraints.test
index 730e805c91e..a64b3bfaa6e 100644
--- a/mysql-test/suite/funcs_1/t/is_table_constraints.test
+++ b/mysql-test/suite/funcs_1/t/is_table_constraints.test
@@ -132,7 +132,6 @@ eval $my_show1;
 eval $my_show2;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 SHOW GRANTS FOR 'testuser1'@'localhost';
diff --git a/mysql-test/suite/funcs_1/t/is_table_privileges.test b/mysql-test/suite/funcs_1/t/is_table_privileges.test
index 27ce22816a2..5ea0dd7c6a7 100644
--- a/mysql-test/suite/funcs_1/t/is_table_privileges.test
+++ b/mysql-test/suite/funcs_1/t/is_table_privileges.test
@@ -116,7 +116,6 @@ WHERE table_name LIKE 'tb%'
 ORDER BY grantee,table_schema,table_name,privilege_type;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 --replace_result $other_engine_type <other_engine_type>
diff --git a/mysql-test/suite/funcs_1/t/is_user_privileges.test b/mysql-test/suite/funcs_1/t/is_user_privileges.test
index 5f8c29ca39d..1d0d3e51aae 100644
--- a/mysql-test/suite/funcs_1/t/is_user_privileges.test
+++ b/mysql-test/suite/funcs_1/t/is_user_privileges.test
@@ -114,7 +114,6 @@ eval $my_select1;
 eval $my_select2;
 
 --echo # Establish connection testuser1 (user=testuser1)
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (testuser1, localhost, testuser1, , db_datadict);
 eval $my_select1;
diff --git a/mysql-test/suite/funcs_1/t/myisam_views.test b/mysql-test/suite/funcs_1/t/myisam_views.test
index 461bc0e3549..fe72843cfaf 100644
--- a/mysql-test/suite/funcs_1/t/myisam_views.test
+++ b/mysql-test/suite/funcs_1/t/myisam_views.test
@@ -1,5 +1,7 @@
 #### suite/funcs_1/t/myisam_views.test
 
+--source include/no_valgrind_without_big.inc
+
 # MyISAM tables should be used
 #
 # Set $engine_type
diff --git a/mysql-test/suite/funcs_1/t/ndb_storedproc_06.tes b/mysql-test/suite/funcs_1/t/ndb_storedproc_06.tes
deleted file mode 100644
index ce061da2299..00000000000
--- a/mysql-test/suite/funcs_1/t/ndb_storedproc_06.tes
+++ /dev/null
@@ -1,9 +0,0 @@
-#### suite/funcs_1/t/innodb_storedproc_06.test
-#
-# 1. Check if InnoDB is available
---source include/have_innodb.inc
-
-# 2. Set $engine_type
-let $engine_type= innodb;
-
---source suite/funcs_1/storedproc/storedproc_06.inc
diff --git a/mysql-test/suite/funcs_1/t/ndb_storedproc_08.tes b/mysql-test/suite/funcs_1/t/ndb_storedproc_08.tes
deleted file mode 100644
index c8c289c5f49..00000000000
--- a/mysql-test/suite/funcs_1/t/ndb_storedproc_08.tes
+++ /dev/null
@@ -1,9 +0,0 @@
-#### suite/funcs_1/t/innodb_storedproc_08.test
-#
-# 1. Check if InnoDB is available
---source include/have_innodb.inc
-
-# 2. Set $engine_type
-let $engine_type= innodb;
-
---source suite/funcs_1/storedproc/storedproc_08.inc
diff --git a/mysql-test/suite/funcs_1/t/storedproc.test b/mysql-test/suite/funcs_1/t/storedproc.test
index 6877b751ed2..16c4d61bf58 100644
--- a/mysql-test/suite/funcs_1/t/storedproc.test
+++ b/mysql-test/suite/funcs_1/t/storedproc.test
@@ -10,6 +10,17 @@
 #                  
 ############################################################################
 
+# Bug#37746 - Arithmetic range ("int") is smaller than expected
+# This code is in place to ensure this test is only skipped
+# for the Win64 platform
+if(`SELECT CONVERT(@@version_compile_os using latin1) IN ("Win64")`)
+{
+--skip Bug#37746 2009-07-07 pcrews Arithmetic range ("int") is smaller than expected
+}
+
+
+
+
 # This test cannot be used for the embedded server because we check here
 # privileges.
 --source include/not_embedded.inc
@@ -817,7 +828,6 @@ CREATE PROCEDURE sp11() insert into mysql.t1 values('a');
 --replace_column 13 created 14 modified
 SELECT security_type from mysql.proc where specific_name='sp11';
 
-let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 --replace_result $MASTER_MYPORT MYSQL_PORT $MASTER_MYSOCK MYSQL_SOCK
 connect (u_1, localhost, user_1, , db_storedproc);
 --source suite/funcs_1/include/show_connection.inc
@@ -22430,7 +22440,9 @@ BEGIN
 END//
 delimiter ;//
 
+--disable_warnings
 CALL sp70_n(-1e+40);
+--enable_warnings
 eval CALL sp70_n( $minus_40 );
 
 
@@ -22446,7 +22458,9 @@ BEGIN
 END//
 delimiter ;//
 
+--disable_warnings
 CALL sp71_nu(1.00e+40);
+--enable_warnings
 eval CALL sp71_nu( $plus_40 );
 
 
@@ -22462,7 +22476,9 @@ BEGIN
 END//
 delimiter ;//
 
+--disable_warnings
 CALL sp72_nuz(1.00e+40);
+--enable_warnings
 eval CALL sp72_nuz( $plus_40 );
 
 
@@ -22478,7 +22494,9 @@ BEGIN
 END//
 delimiter ;//
 
+--disable_warnings
 CALL sp73_n_z(1.00e+40);
+--enable_warnings
 eval CALL sp73_n_z( $plus_40 );
 
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03.inc b/mysql-test/suite/funcs_1/triggers/triggers_03.inc
index e5bea5b2005..9ef6a9ac9af 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03.inc
@@ -62,7 +62,6 @@ let $message= Testcase 3.5.3.2/6:;
 	grant SELECT on priv_db.t1 to test_yesprivs@localhost;
 	show grants for test_yesprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (no_privs,localhost,test_noprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_columns.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_columns.inc
index 60928ba8f35..475063587d4 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_columns.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_columns.inc
@@ -36,7 +36,6 @@ let $message= ####### Testcase for column privileges of triggers: #######;
         grant SELECT,UPDATE on priv_db.* to test_noprivs@localhost;
         show grants for test_noprivs@localhost;
 
-	let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_db_level.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_db_level.inc
index cb9d8ddc78b..e5933eb84a8 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_db_level.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_db_level.inc
@@ -37,7 +37,6 @@ let $message= Testcase for db level:;
         show grants for test_noprivs@localhost;
 
 # no trigger privilege->create trigger must fail:
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 let $message= no trigger privilege on db level for create:;
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_db_table_mix.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_db_table_mix.inc
index de9cf61f641..82f4a28f664 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_db_table_mix.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_db_table_mix.inc
@@ -41,7 +41,6 @@ let $message= ####### Testcase for mix of db and table level: #######;
         grant SELECT,INSERT on priv2_db.* to test_noprivs@localhost;
         show grants for test_noprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 	use priv1_db;
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_definer.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_definer.inc
index 18c8a3ebcd5..f1efff990f1 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_definer.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_definer.inc
@@ -27,7 +27,6 @@ let $message= #########      Testcase for definer:   ########;
 
 	revoke ALL PRIVILEGES, GRANT OPTION FROM test_yesprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_global_db_mix.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_global_db_mix.inc
index cd90d25aefd..b6f4af7e0a7 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_global_db_mix.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_global_db_mix.inc
@@ -38,7 +38,6 @@ let $message= #### Testcase for mix of user(global) and db level: ####;
         grant SELECT,INSERT  on *.* to test_noprivs@localhost;
         show grants for test_noprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_prepare.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_prepare.inc
index f1b3bbe2cb4..ea7c385768c 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_prepare.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_prepare.inc
@@ -32,7 +32,6 @@ let $message= #### Testcase for trigger privilege on execution time ########;
 	revoke ALL PRIVILEGES, GRANT OPTION FROM test_yesprivs@localhost;
         revoke ALL PRIVILEGES, GRANT OPTION FROM test_useprivs@localhost;
 
-	let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_table_level.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_table_level.inc
index 9cc272c09bc..94f30fe13c2 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_table_level.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_table_level.inc
@@ -30,7 +30,6 @@ let $message= #########      Testcase for table level:   ########;
         set password for test_noprivs@localhost = password('PWD');
         revoke ALL PRIVILEGES, GRANT OPTION FROM test_noprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_03e_transaction.inc b/mysql-test/suite/funcs_1/triggers/triggers_03e_transaction.inc
index 53ce49c728c..e43f4ce97a3 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_03e_transaction.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_03e_transaction.inc
@@ -27,7 +27,6 @@ let $message= #########      Testcase for transactions:   ########;
 
 	revoke ALL PRIVILEGES, GRANT OPTION FROM test_yesprivs@localhost;
 
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (yes_privs,localhost,test_yesprivs,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_0407.inc b/mysql-test/suite/funcs_1/triggers/triggers_0407.inc
index af45017ae6a..d68b3d79086 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_0407.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_0407.inc
@@ -22,7 +22,6 @@ let $message= Testcase: 3.5:;
 	create User test_super@localhost;
 	set password for test_super@localhost = password('PWD');
 	grant ALL on *.* to test_super@localhost with grant OPTION;
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (con1_general,localhost,test_general,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
diff --git a/mysql-test/suite/funcs_1/triggers/triggers_08.inc b/mysql-test/suite/funcs_1/triggers/triggers_08.inc
index 4b4050b996d..087f18e8e6b 100644
--- a/mysql-test/suite/funcs_1/triggers/triggers_08.inc
+++ b/mysql-test/suite/funcs_1/triggers/triggers_08.inc
@@ -23,7 +23,6 @@ let $message= Testcase: 3.5:;
 	create User test_super@localhost;
 	set password for test_super@localhost = password('PWD');
 	grant ALL on *.* to test_super@localhost with grant OPTION;
-        let $MASTER_MYSOCK= query_get_value(SHOW VARIABLES LIKE 'socket', Value, 1);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
 	connect (con2_general,localhost,test_general,PWD,test,$MASTER_MYPORT,$MASTER_MYSOCK);
 	--replace_result $MASTER_MYPORT MASTER_MYPORT $MASTER_MYSOCK MASTER_MYSOCK
diff --git a/mysql-test/suite/ibmdb2i/include/have_i54.inc b/mysql-test/suite/ibmdb2i/include/have_i54.inc
new file mode 100755
index 00000000000..7054e196153
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/include/have_i54.inc
@@ -0,0 +1,20 @@
+# Check for IBM i 6.1 or later
+--disable_query_log
+system uname -rv > $MYSQLTEST_VARDIR/tmp/version;
+--disable_warnings
+drop table if exists uname_vr;
+--enable_warnings
+create temporary table uname_vr (r int, v int);
+--disable_warnings
+eval LOAD DATA INFILE "$MYSQLTEST_VARDIR/tmp/version" into table uname_vr fields terminated by ' ';
+--enable_warnings
+let $ok = `select count(*) from uname_vr where v = 5 and r = 4`;
+drop table uname_vr;
+remove_file $MYSQLTEST_VARDIR/tmp/version;
+--enable_query_log
+if (!$ok)
+{
+  skip "Need IBM i 5.4 or later";
+}
+  
+
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44232.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44232.result
new file mode 100755
index 00000000000..8276b401073
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44232.result
@@ -0,0 +1,4 @@
+create table t1 (c char(1) character set armscii8) engine=ibmdb2i;
+ERROR HY000: Can't create table 'test.t1' (errno: 2504)
+create table t1 (c char(1) character set eucjpms ) engine=ibmdb2i;
+ERROR HY000: Can't create table 'test.t1' (errno: 2504)
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44610.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44610.result
new file mode 100755
index 00000000000..311e800e1b0
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_44610.result
@@ -0,0 +1,18 @@
+create table ABC (i int) engine=ibmdb2i;
+drop table ABC;
+create table `1234567890ABC` (i int) engine=ibmdb2i;
+drop table `1234567890ABC`;
+create table `!@#$%` (i int) engine=ibmdb2i;
+drop table `!@#$%`;
+create table `ABCD#########` (i int) engine=ibmdb2i;
+drop table `ABCD#########`;
+create table `_` (i int) engine=ibmdb2i;
+drop table `_`;
+create table `abc##def` (i int) engine=ibmdb2i;
+drop table `abc##def`;
+set names utf8;
+create table İ (s1 int) engine=ibmdb2i;
+drop table İ;
+create table İİ (s1 int) engine=ibmdb2i;
+drop table İİ;
+set names latin1;
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45196.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45196.result
new file mode 100644
index 00000000000..916e1d93ee5
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45196.result
@@ -0,0 +1,33 @@
+drop table if exists t1;
+create table t1 (c char(10), index(c)) collate ucs2_czech_ci engine=ibmdb2i;
+insert into t1 values ("ch"),("h"),("i");
+select * from t1 order by c;
+c
+h
+ch
+i
+drop table t1;
+create table t1 (c char(10), index(c)) collate utf8_czech_ci engine=ibmdb2i;
+insert into t1 values ("ch"),("h"),("i");
+select * from t1 order by c;
+c
+h
+ch
+i
+drop table t1;
+create table t1 (c char(10), index(c)) collate ucs2_danish_ci engine=ibmdb2i;
+insert into t1 values("abc"),("abcd"),("aaaa");
+select c from t1 order by c;
+c
+abc
+abcd
+aaaa
+drop table t1;
+create table t1 (c char(10), index(c)) collate utf8_danish_ci engine=ibmdb2i;
+insert into t1 values("abc"),("abcd"),("aaaa");
+select c from t1 order by c;
+c
+abc
+abcd
+aaaa
+drop table t1;
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45793.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45793.result
new file mode 100644
index 00000000000..2392b746877
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45793.result
@@ -0,0 +1,7 @@
+drop table if exists t1;
+create table t1 (c char(10), index(c)) charset macce engine=ibmdb2i;
+insert into t1 values ("test");
+select * from t1 order by c;
+c
+test
+drop table t1;
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45983.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45983.result
new file mode 100644
index 00000000000..b9f4dcfc656
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_bug_45983.result
@@ -0,0 +1,20 @@
+set ibmdb2i_create_index_option=1;
+drop schema if exists test1;
+create schema test1;
+use test1;
+CREATE TABLE t1 (f int primary key, index(f)) engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (f char(10) collate utf8_bin primary key, index(f)) engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (f char(10) collate latin1_swedish_ci primary key, index(f)) engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (f char(10) collate latin1_swedish_ci primary key, i int, index i(i,f)) engine=ibmdb2i;
+drop table t1;
+create table fd (SQSSEQ CHAR(10)) engine=ibmdb2i;
+select * from fd;
+SQSSEQ
+*HEX
+*HEX
+*HEX
+*HEX
+drop table fd;
diff --git a/mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result b/mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result
new file mode 100644
index 00000000000..4f7d71cab2d
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result
@@ -0,0 +1,1204 @@
+drop table if exists t1, ffd, fd;
+CREATE TABLE t1 (armscii8_bin integer, c char(10), v varchar(20), index(c), index(v)) collate armscii8_bin engine=ibmdb2i;
+CREATE TABLE t1 (armscii8_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate armscii8_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (ascii_bin integer, c char(10), v varchar(20), index(c), index(v)) collate ascii_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (ascii_bin char(10) primary key) collate ascii_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ascii_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ascii_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (ascii_general_ci char(10) primary key) collate ascii_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (big5_bin integer, c char(10), v varchar(20), index(c), index(v)) collate big5_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (big5_bin char(10) primary key) collate big5_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (big5_chinese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate big5_chinese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (big5_chinese_ci char(10) primary key) collate big5_chinese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1250_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp1250_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1250_bin char(10) primary key) collate cp1250_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1250_croatian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1250_croatian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1250_croatian_ci char(10) primary key) collate cp1250_croatian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1250_czech_cs integer, c char(10), v varchar(20), index(c), index(v)) collate cp1250_czech_cs engine=ibmdb2i;
+CREATE TABLE t1 (cp1250_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1250_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1250_general_ci char(10) primary key) collate cp1250_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1250_polish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1250_polish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1250_polish_ci char(10) primary key) collate cp1250_polish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1251_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp1251_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1251_bin char(10) primary key) collate cp1251_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1251_bulgarian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1251_bulgarian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1251_bulgarian_ci char(10) primary key) collate cp1251_bulgarian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1251_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1251_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1251_general_ci char(10) primary key) collate cp1251_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1251_general_cs integer, c char(10), v varchar(20), index(c), index(v)) collate cp1251_general_cs engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1251_general_cs char(10) primary key) collate cp1251_general_cs engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1251_ukrainian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1251_ukrainian_ci engine=ibmdb2i;
+CREATE TABLE t1 (cp1256_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp1256_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1256_bin char(10) primary key) collate cp1256_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1256_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1256_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp1256_general_ci char(10) primary key) collate cp1256_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp1257_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp1257_bin engine=ibmdb2i;
+CREATE TABLE t1 (cp1257_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1257_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (cp1257_lithuanian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp1257_lithuanian_ci engine=ibmdb2i;
+CREATE TABLE t1 (cp850_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp850_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp850_bin char(10) primary key) collate cp850_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp850_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp850_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp850_general_ci char(10) primary key) collate cp850_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp852_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp852_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp852_bin char(10) primary key) collate cp852_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp852_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp852_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (cp852_general_ci char(10) primary key) collate cp852_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp866_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp866_bin engine=ibmdb2i;
+CREATE TABLE t1 (cp866_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp866_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (cp932_bin integer, c char(10), v varchar(20), index(c), index(v)) collate cp932_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (cp932_bin char(10) primary key) collate cp932_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (cp932_japanese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate cp932_japanese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (cp932_japanese_ci char(10) primary key) collate cp932_japanese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (dec8_bin integer, c char(10), v varchar(20), index(c), index(v)) collate dec8_bin engine=ibmdb2i;
+CREATE TABLE t1 (dec8_swedish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate dec8_swedish_ci engine=ibmdb2i;
+CREATE TABLE t1 (eucjpms_bin integer, c char(10), v varchar(20), index(c), index(v)) collate eucjpms_bin engine=ibmdb2i;
+CREATE TABLE t1 (eucjpms_japanese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate eucjpms_japanese_ci engine=ibmdb2i;
+CREATE TABLE t1 (euckr_bin integer, c char(10), v varchar(20), index(c), index(v)) collate euckr_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (euckr_bin char(10) primary key) collate euckr_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (euckr_korean_ci integer, c char(10), v varchar(20), index(c), index(v)) collate euckr_korean_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (euckr_korean_ci char(10) primary key) collate euckr_korean_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (gb2312_bin integer, c char(10), v varchar(20), index(c), index(v)) collate gb2312_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (gb2312_bin char(10) primary key) collate gb2312_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (gb2312_chinese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate gb2312_chinese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (gb2312_chinese_ci char(10) primary key) collate gb2312_chinese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (gbk_bin integer, c char(10), v varchar(20), index(c), index(v)) collate gbk_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (gbk_bin char(10) primary key) collate gbk_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (gbk_chinese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate gbk_chinese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (gbk_chinese_ci char(10) primary key) collate gbk_chinese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (geostd8_bin integer, c char(10), v varchar(20), index(c), index(v)) collate geostd8_bin engine=ibmdb2i;
+CREATE TABLE t1 (geostd8_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate geostd8_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (greek_bin integer, c char(10), v varchar(20), index(c), index(v)) collate greek_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (greek_bin char(10) primary key) collate greek_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (greek_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate greek_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (greek_general_ci char(10) primary key) collate greek_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (hebrew_bin integer, c char(10), v varchar(20), index(c), index(v)) collate hebrew_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (hebrew_bin char(10) primary key) collate hebrew_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (hebrew_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate hebrew_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (hebrew_general_ci char(10) primary key) collate hebrew_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (hp8_bin integer, c char(10), v varchar(20), index(c), index(v)) collate hp8_bin engine=ibmdb2i;
+CREATE TABLE t1 (hp8_english_ci integer, c char(10), v varchar(20), index(c), index(v)) collate hp8_english_ci engine=ibmdb2i;
+CREATE TABLE t1 (keybcs2_bin integer, c char(10), v varchar(20), index(c), index(v)) collate keybcs2_bin engine=ibmdb2i;
+CREATE TABLE t1 (keybcs2_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate keybcs2_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (koi8r_bin integer, c char(10), v varchar(20), index(c), index(v)) collate koi8r_bin engine=ibmdb2i;
+CREATE TABLE t1 (koi8r_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate koi8r_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (koi8u_bin integer, c char(10), v varchar(20), index(c), index(v)) collate koi8u_bin engine=ibmdb2i;
+CREATE TABLE t1 (koi8u_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate koi8u_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (latin1_bin integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_bin char(10) primary key) collate latin1_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_danish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_danish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_danish_ci char(10) primary key) collate latin1_danish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_general_ci char(10) primary key) collate latin1_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_general_cs integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_general_cs engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_general_cs char(10) primary key) collate latin1_general_cs engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_german1_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_german1_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_german1_ci char(10) primary key) collate latin1_german1_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_german2_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_german2_ci engine=ibmdb2i;
+CREATE TABLE t1 (latin1_spanish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_spanish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_spanish_ci char(10) primary key) collate latin1_spanish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin1_swedish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin1_swedish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin1_swedish_ci char(10) primary key) collate latin1_swedish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin2_bin integer, c char(10), v varchar(20), index(c), index(v)) collate latin2_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin2_bin char(10) primary key) collate latin2_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin2_croatian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin2_croatian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin2_croatian_ci char(10) primary key) collate latin2_croatian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin2_czech_cs integer, c char(10), v varchar(20), index(c), index(v)) collate latin2_czech_cs engine=ibmdb2i;
+CREATE TABLE t1 (latin2_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin2_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin2_general_ci char(10) primary key) collate latin2_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin2_hungarian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin2_hungarian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin2_hungarian_ci char(10) primary key) collate latin2_hungarian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin5_bin integer, c char(10), v varchar(20), index(c), index(v)) collate latin5_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin5_bin char(10) primary key) collate latin5_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin5_turkish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin5_turkish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (latin5_turkish_ci char(10) primary key) collate latin5_turkish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (latin7_bin integer, c char(10), v varchar(20), index(c), index(v)) collate latin7_bin engine=ibmdb2i;
+CREATE TABLE t1 (latin7_estonian_cs integer, c char(10), v varchar(20), index(c), index(v)) collate latin7_estonian_cs engine=ibmdb2i;
+CREATE TABLE t1 (latin7_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate latin7_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (latin7_general_cs integer, c char(10), v varchar(20), index(c), index(v)) collate latin7_general_cs engine=ibmdb2i;
+CREATE TABLE t1 (macce_bin integer, c char(10), v varchar(20), index(c), index(v)) collate macce_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (macce_bin char(10) primary key) collate macce_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (macce_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate macce_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (macce_general_ci char(10) primary key) collate macce_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (macroman_bin integer, c char(10), v varchar(20), index(c), index(v)) collate macroman_bin engine=ibmdb2i;
+CREATE TABLE t1 (macroman_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate macroman_general_ci engine=ibmdb2i;
+CREATE TABLE t1 (sjis_bin integer, c char(10), v varchar(20), index(c), index(v)) collate sjis_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (sjis_bin char(10) primary key) collate sjis_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (sjis_japanese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate sjis_japanese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (sjis_japanese_ci char(10) primary key) collate sjis_japanese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (swe7_bin integer, c char(10), v varchar(20), index(c), index(v)) collate swe7_bin engine=ibmdb2i;
+CREATE TABLE t1 (swe7_swedish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate swe7_swedish_ci engine=ibmdb2i;
+CREATE TABLE t1 (tis620_bin integer, c char(10), v varchar(20), index(c), index(v)) collate tis620_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (tis620_bin char(10) primary key) collate tis620_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (tis620_thai_ci integer, c char(10), v varchar(20), index(c), index(v)) collate tis620_thai_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	11	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	23	NULL	6	Using where
+drop table t1;
+create table t1 (tis620_thai_ci char(10) primary key) collate tis620_thai_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_bin integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_bin char(10) primary key) collate ucs2_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_czech_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_czech_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_czech_ci char(10) primary key) collate ucs2_czech_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_danish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_danish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_danish_ci char(10) primary key) collate ucs2_danish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_esperanto_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_esperanto_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_esperanto_ci char(10) primary key) collate ucs2_esperanto_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_estonian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_estonian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_estonian_ci char(10) primary key) collate ucs2_estonian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_general_ci char(10) primary key) collate ucs2_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_hungarian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_hungarian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_hungarian_ci char(10) primary key) collate ucs2_hungarian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_icelandic_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_icelandic_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_icelandic_ci char(10) primary key) collate ucs2_icelandic_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_latvian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_latvian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_latvian_ci char(10) primary key) collate ucs2_latvian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_lithuanian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_lithuanian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_lithuanian_ci char(10) primary key) collate ucs2_lithuanian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_persian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_persian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_persian_ci char(10) primary key) collate ucs2_persian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_polish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_polish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_polish_ci char(10) primary key) collate ucs2_polish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_romanian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_romanian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_romanian_ci char(10) primary key) collate ucs2_romanian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_roman_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_roman_ci engine=ibmdb2i;
+CREATE TABLE t1 (ucs2_slovak_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_slovak_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_slovak_ci char(10) primary key) collate ucs2_slovak_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_slovenian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_slovenian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_slovenian_ci char(10) primary key) collate ucs2_slovenian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_spanish2_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_spanish2_ci engine=ibmdb2i;
+CREATE TABLE t1 (ucs2_spanish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_spanish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_spanish_ci char(10) primary key) collate ucs2_spanish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_swedish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_swedish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_swedish_ci char(10) primary key) collate ucs2_swedish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_turkish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_turkish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_turkish_ci char(10) primary key) collate ucs2_turkish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ucs2_unicode_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ucs2_unicode_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	21	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	43	NULL	6	Using where
+drop table t1;
+create table t1 (ucs2_unicode_ci char(10) primary key) collate ucs2_unicode_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ujis_bin integer, c char(10), v varchar(20), index(c), index(v)) collate ujis_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (ujis_bin char(10) primary key) collate ujis_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (ujis_japanese_ci integer, c char(10), v varchar(20), index(c), index(v)) collate ujis_japanese_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (ujis_japanese_ci char(10) primary key) collate ujis_japanese_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_bin integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_bin engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_bin char(10) primary key) collate utf8_bin engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_czech_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_czech_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_czech_ci char(10) primary key) collate utf8_czech_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_danish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_danish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_danish_ci char(10) primary key) collate utf8_danish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_esperanto_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_esperanto_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_esperanto_ci char(10) primary key) collate utf8_esperanto_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_estonian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_estonian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_estonian_ci char(10) primary key) collate utf8_estonian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_general_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_general_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_general_ci char(10) primary key) collate utf8_general_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_hungarian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_hungarian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_hungarian_ci char(10) primary key) collate utf8_hungarian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_icelandic_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_icelandic_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_icelandic_ci char(10) primary key) collate utf8_icelandic_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_latvian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_latvian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_latvian_ci char(10) primary key) collate utf8_latvian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_lithuanian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_lithuanian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_lithuanian_ci char(10) primary key) collate utf8_lithuanian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_persian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_persian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_persian_ci char(10) primary key) collate utf8_persian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_polish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_polish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_polish_ci char(10) primary key) collate utf8_polish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_romanian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_romanian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_romanian_ci char(10) primary key) collate utf8_romanian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_roman_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_roman_ci engine=ibmdb2i;
+CREATE TABLE t1 (utf8_slovak_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_slovak_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_slovak_ci char(10) primary key) collate utf8_slovak_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_slovenian_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_slovenian_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_slovenian_ci char(10) primary key) collate utf8_slovenian_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_spanish2_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_spanish2_ci engine=ibmdb2i;
+CREATE TABLE t1 (utf8_spanish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_spanish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_spanish_ci char(10) primary key) collate utf8_spanish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_swedish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_swedish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_swedish_ci char(10) primary key) collate utf8_swedish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_turkish_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_turkish_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_turkish_ci char(10) primary key) collate utf8_turkish_ci engine=ibmdb2i;
+drop table t1;
+CREATE TABLE t1 (utf8_unicode_ci integer, c char(10), v varchar(20), index(c), index(v)) collate utf8_unicode_ci engine=ibmdb2i;
+insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+insert into t1 select * from t1;
+explain select c,v from t1 force index(c) where c like "ab%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	31	NULL	6	Using where
+explain select c,v from t1 force index(v) where v like "de%";
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	63	NULL	6	Using where
+drop table t1;
+create table t1 (utf8_unicode_ci char(10) primary key) collate utf8_unicode_ci engine=ibmdb2i;
+drop table t1;
+create table ffd (WHCHD1 CHAR(20), WHCSID decimal(5,0)) engine=ibmdb2i;
+create table fd (SQSSEQ CHAR(10)) engine=ibmdb2i;
+create temporary table intermed (row integer key auto_increment, cs char(30), ccsid integer);
+insert into intermed (cs, ccsid) select * from ffd;
+create temporary table intermed2 (row integer key auto_increment, srtseq char(10));
+insert into intermed2 (srtseq) select * from fd;
+select ccsid, cs, srtseq from intermed inner join intermed2 on intermed.row = intermed2.row;
+ccsid	cs	srtseq
+500	"ascii_bin"	QBLA101F4U
+500	"ascii_general_ci"	QALA101F4S
+1200	"big5_bin"	QBCHT04B0U
+1200	"big5_chinese_ci"	QACHT04B0S
+1153	"cp1250_bin"	QELA20481U
+1153	"cp1250_croatian_ci"	QALA20481S
+1153	"cp1250_general_ci"	QCLA20481S
+1153	"cp1250_polish_ci"	QDLA20481S
+1025	"cp1251_bin"	QCCYR0401U
+1025	"cp1251_bulgarian_ci	QACYR0401S
+1025	"cp1251_general_ci"	QBCYR0401S
+1025	"cp1251_general_cs"	QBCYR0401U
+420	"cp1256_bin"	QBARA01A4U
+420	"cp1256_general_ci"	QAARA01A4S
+500	"cp850_bin"	QDLA101F4U
+500	"cp850_general_ci"	QCLA101F4S
+870	"cp852_bin"	QBLA20366U
+870	"cp852_general_ci"	QALA20366S
+1200	"cp932_bin"	QBJPN04B0U
+1200	"cp932_japanese_ci"	QAJPN04B0S
+1200	"euckr_bin"	QBKOR04B0U
+1200	"euckr_korean_ci"	QAKOR04B0S
+1200	"gb2312_bin"	QBCHS04B0U
+1200	"gb2312_chinese_ci"	QACHS04B0S
+1200	"gbk_bin"	QDCHS04B0U
+1200	"gbk_chinese_ci"	QCCHS04B0S
+875	"greek_bin"	QBELL036BU
+875	"greek_general_ci"	QAELL036BS
+424	"hebrew_bin"	QBHEB01A8U
+424	"hebrew_general_ci"	QAHEB01A8S
+1148	"latin1_bin"	QFLA1047CU
+1148	"latin1_danish_ci"	QALA1047CS
+1148	"latin1_general_ci"	QBLA1047CS
+1148	"latin1_general_cs"	QBLA1047CU
+1148	"latin1_german1_ci"	QCLA1047CS
+1148	"latin1_spanish_ci"	QDLA1047CS
+1148	"latin1_swedish_ci"	QELA1047CS
+870	"latin2_bin"	QGLA20366U
+870	"latin2_croatian_ci"	QCLA20366S
+870	"latin2_general_ci"	QELA20366S
+870	"latin2_hungarian_ci	QFLA20366S
+1026	"latin5_bin"	QBTRK0402U
+1026	"latin5_turkish_ci"	QATRK0402S
+870	"macce_bin"	QILA20366U
+870	"macce_general_ci"	QHLA20366S
+1200	"sjis_bin"	QDJPN04B0U
+1200	"sjis_japanese_ci"	QCJPN04B0S
+838	"tis620_bin"	QBTHA0346U
+838	"tis620_thai_ci"	QATHA0346S
+13488	"ucs2_bin"	*HEX
+13488	"ucs2_czech_ci"	I34ACS_CZ
+13488	"ucs2_danish_ci"	I34ADA_DK
+13488	"ucs2_esperanto_ci"	I34AEO
+13488	"ucs2_estonian_ci"	I34AET
+13488	"ucs2_general_ci"	QAUCS04B0S
+13488	"ucs2_hungarian_ci"	I34AHU
+13488	"ucs2_icelandic_ci"	I34AIS
+13488	"ucs2_latvian_ci"	I34ALV
+13488	"ucs2_lithuanian_ci"	I34ALT
+13488	"ucs2_persian_ci"	I34AFA
+13488	"ucs2_polish_ci"	I34APL
+13488	"ucs2_romanian_ci"	I34ARO
+13488	"ucs2_slovak_ci"	I34ASK
+13488	"ucs2_slovenian_ci"	I34ASL
+13488	"ucs2_spanish_ci"	I34AES
+13488	"ucs2_swedish_ci"	I34ASW
+13488	"ucs2_turkish_ci"	I34ATR
+13488	"ucs2_unicode_ci"	I34AEN
+1200	"ujis_bin"	QFJPN04B0U
+1200	"ujis_japanese_ci"	QEJPN04B0S
+1208	"utf8_bin"	*HEX
+1208	"utf8_czech_ci"	I34ACS_CZ
+1208	"utf8_danish_ci"	I34ADA_DK
+1208	"utf8_esperanto_ci"	I34AEO
+1208	"utf8_estonian_ci"	I34AET
+1200	"utf8_general_ci"	QAUCS04B0S
+1208	"utf8_hungarian_ci"	I34AHU
+1208	"utf8_icelandic_ci"	I34AIS
+1208	"utf8_latvian_ci"	I34ALV
+1208	"utf8_lithuanian_ci"	I34ALT
+1208	"utf8_persian_ci"	I34AFA
+1208	"utf8_polish_ci"	I34APL
+1208	"utf8_romanian_ci"	I34ARO
+1208	"utf8_slovak_ci"	I34ASK
+1208	"utf8_slovenian_ci"	I34ASL
+1208	"utf8_spanish_ci"	I34AES
+1208	"utf8_swedish_ci"	I34ASW
+1208	"utf8_turkish_ci"	I34ATR
+1208	"utf8_unicode_ci"	I34AEN
+drop table ffd, fd;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44232.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44232.test
new file mode 100755
index 00000000000..ea29b5abcd4
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44232.test
@@ -0,0 +1,8 @@
+--source suite/ibmdb2i/include/have_ibmdb2i.inc
+--source suite/ibmdb2i/include/have_i54.inc
+
+--error 1005
+create table t1 (c char(1) character set armscii8) engine=ibmdb2i;
+
+--error 1005
+create table t1 (c char(1) character set eucjpms ) engine=ibmdb2i;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44610.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44610.test
new file mode 100755
index 00000000000..da69b5d9148
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_44610.test
@@ -0,0 +1,28 @@
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+
+# Test RCDFMT generation for a variety of kinds of table names
+create table ABC (i int) engine=ibmdb2i;
+drop table ABC;
+
+create table `1234567890ABC` (i int) engine=ibmdb2i;
+drop table `1234567890ABC`;
+
+create table `!@#$%` (i int) engine=ibmdb2i;
+drop table `!@#$%`;
+
+create table `ABCD#########` (i int) engine=ibmdb2i;
+drop table `ABCD#########`;
+
+create table `_` (i int) engine=ibmdb2i;
+drop table `_`;
+
+create table `abc##def` (i int) engine=ibmdb2i;
+drop table `abc##def`;
+
+set names utf8;
+create table İ (s1 int) engine=ibmdb2i;
+drop table İ;
+
+create table İİ (s1 int) engine=ibmdb2i;
+drop table İİ;
+set names latin1;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45196.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45196.test
new file mode 100644
index 00000000000..17b1d658975
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45196.test
@@ -0,0 +1,26 @@
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+source suite/ibmdb2i/include/have_i61.inc;
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1 (c char(10), index(c)) collate ucs2_czech_ci engine=ibmdb2i;
+insert into t1 values ("ch"),("h"),("i");
+select * from t1 order by c;
+drop table t1;
+
+create table t1 (c char(10), index(c)) collate utf8_czech_ci engine=ibmdb2i;
+insert into t1 values ("ch"),("h"),("i");
+select * from t1 order by c;
+drop table t1;
+
+create table t1 (c char(10), index(c)) collate ucs2_danish_ci engine=ibmdb2i;           
+insert into t1 values("abc"),("abcd"),("aaaa"); 
+select c from t1 order by c;
+drop table t1;
+
+create table t1 (c char(10), index(c)) collate utf8_danish_ci engine=ibmdb2i;           
+insert into t1 values("abc"),("abcd"),("aaaa"); 
+select c from t1 order by c;
+drop table t1;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45793.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45793.test
new file mode 100644
index 00000000000..93fb78ff421
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45793.test
@@ -0,0 +1,11 @@
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+source suite/ibmdb2i/include/have_i61.inc;
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1 (c char(10), index(c)) charset macce engine=ibmdb2i;
+insert into t1 values ("test");
+select * from t1 order by c;
+drop table t1;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45983.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45983.test
new file mode 100644
index 00000000000..695d8e90ada
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_bug_45983.test
@@ -0,0 +1,47 @@
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+
+# Confirm that ibmdb2i_create_index_option causes additional *HEX sorted indexes to be created for all non-binary keys.
+
+set ibmdb2i_create_index_option=1;
+--disable_warnings
+drop schema if exists test1;
+create schema test1;
+use test1;
+--enable_warnings
+
+--disable_abort_on_error
+--error 0,255
+exec system "DLTF QGPL/FDOUT" > /dev/null;
+--enable_abort_on_error
+
+#No additional index because no string fields in key
+CREATE TABLE t1 (f int primary key, index(f)) engine=ibmdb2i;
+--error 255
+exec system "DSPFD FILE(\"test1\"/PRIM0001) TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+--error 255
+exec system "DSPFD FILE(\"test1\"/\"f___H_t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+drop table t1;
+
+#No additional index because binary sorting
+CREATE TABLE t1 (f char(10) collate utf8_bin primary key, index(f)) engine=ibmdb2i;
+--error 255
+exec system "DSPFD FILE(\"test1\"/PRIM0001) TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+--error 255
+exec system "DSPFD FILE(\"test1\"/\"f___H_t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+drop table t1;
+
+CREATE TABLE t1 (f char(10) collate latin1_swedish_ci primary key, index(f)) engine=ibmdb2i;
+exec system "DSPFD FILE(\"test1\"/PRIM0001) TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+exec system "DSPFD FILE(\"test1\"/\"f___H_t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+drop table t1;
+
+CREATE TABLE t1 (f char(10) collate latin1_swedish_ci primary key, i int, index i(i,f)) engine=ibmdb2i;
+exec system "DSPFD FILE(\"test1\"/PRIM0001) TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+exec system "DSPFD FILE(\"test1\"/\"i___H_t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+drop table t1;
+
+
+create table fd (SQSSEQ CHAR(10)) engine=ibmdb2i;
+system system "CPYF FROMFILE(QGPL/FDOUT) TOFILE(\"test1\"/\"fd\") mbropt(*replace) fmtopt(*drop *map)" > /dev/null;
+select * from fd;
+drop table fd;
diff --git a/mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test b/mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test
new file mode 100644
index 00000000000..899f330d360
--- /dev/null
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test
@@ -0,0 +1,44 @@
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+source suite/ibmdb2i/include/have_i61.inc;
+--disable_warnings
+drop table if exists t1, ffd, fd;
+--enable_warnings
+
+--disable_abort_on_error
+--error 0,255
+exec system "DLTF QGPL/FFDOUT" > /dev/null;
+--error 0,255
+exec system "DLTF QGPL/FDOUT" > /dev/null;
+--enable_abort_on_error
+let $count= query_get_value(select count(*) from information_schema.COLLATIONS where COLLATION_NAME <> "binary", count(*),1);
+
+while ($count)
+{
+  let $collation = query_get_value(select COLLATION_NAME from information_schema.COLLATIONS where COLLATION_NAME <> "binary" order by COLLATION_NAME desc, COLLATION_NAME, $count);
+  error 0,1005,2504,2028;
+  eval CREATE TABLE t1 ($collation integer, c char(10), v varchar(20), index(c), index(v)) collate $collation engine=ibmdb2i;
+  if (!$mysql_errno)
+  {
+      insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+      insert into t1 select * from t1;
+      explain select c,v from t1 force index(c) where c like "ab%";
+      explain select c,v from t1 force index(v) where v like "de%";
+      drop table t1;
+      eval create table t1 ($collation char(10) primary key) collate $collation engine=ibmdb2i;
+      system system "DSPFFD FILE(\"test\"/\"t1\") OUTPUT(*OUTFILE) OUTFILE(QGPL/FFDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+      system system "DSPFD FILE(\"test\"/\"t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+      drop table t1;
+  }
+  dec $count;
+}
+
+create table ffd (WHCHD1 CHAR(20), WHCSID decimal(5,0)) engine=ibmdb2i;
+system system "CPYF FROMFILE(QGPL/FFDOUT) TOFILE(\"test\"/\"ffd\") mbropt(*replace) fmtopt(*drop *map)"  > /dev/null;
+create table fd (SQSSEQ CHAR(10)) engine=ibmdb2i;
+system system "CPYF FROMFILE(QGPL/FDOUT) TOFILE(\"test\"/\"fd\") mbropt(*replace) fmtopt(*drop *map)" > /dev/null;
+create temporary table intermed (row integer key auto_increment, cs char(30), ccsid integer);
+insert into intermed (cs, ccsid) select * from ffd;
+create temporary table intermed2 (row integer key auto_increment, srtseq char(10));
+insert into intermed2 (srtseq) select * from fd;
+select ccsid, cs, srtseq from intermed inner join intermed2 on intermed.row = intermed2.row;
+drop table ffd, fd;
diff --git a/mysql-test/suite/innodb/include/have_innodb_plugin.inc b/mysql-test/suite/innodb/include/have_innodb_plugin.inc
new file mode 100644
index 00000000000..24af3274ada
--- /dev/null
+++ b/mysql-test/suite/innodb/include/have_innodb_plugin.inc
@@ -0,0 +1,4 @@
+disable_query_log;
+--require r/true.require
+select (PLUGIN_LIBRARY LIKE 'ha_innodb_plugin%') as `TRUE` from information_schema.plugins where PLUGIN_NAME='InnoDB';
+enable_query_log;
diff --git a/mysql-test/suite/innodb/include/innodb-index.inc b/mysql-test/suite/innodb/include/innodb-index.inc
new file mode 100644
index 00000000000..37de3162abe
--- /dev/null
+++ b/mysql-test/suite/innodb/include/innodb-index.inc
@@ -0,0 +1,26 @@
+--eval create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=$charset
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+show create table t1;
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+check table t1;
+drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb-analyze.result b/mysql-test/suite/innodb/r/innodb-analyze.result
new file mode 100644
index 00000000000..2aee004a2d6
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-analyze.result
@@ -0,0 +1,2 @@
+Variable_name	Value
+innodb_stats_sample_pages	1
diff --git a/mysql-test/suite/innodb/r/innodb-index.result b/mysql-test/suite/innodb/r/innodb-index.result
new file mode 100644
index 00000000000..a7d66b15300
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-index.result
@@ -0,0 +1,1170 @@
+create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
+insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak');
+commit;
+alter table t1 add index b (b), add index b (b);
+ERROR 42000: Duplicate key name 'b'
+alter table t1 add index (b,b);
+ERROR 42S21: Duplicate column name 'b'
+alter table t1 add index d2 (d);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(d2) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d2	23	NULL	4	
+select * from t1 force index (d2) order by d;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+5	5	oo	oo
+4	4	tr	tr
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '4' for key 'b'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+alter table t1 add unique index (c), add index (d);
+ERROR HY000: Table 'test.t1#1' already exists
+rename table `t1#1` to `t1#2`;
+alter table t1 add unique index (c), add index (d);
+ERROR HY000: Table 'test.t1#2' already exists
+drop table `t1#2`;
+alter table t1 add unique index (c), add index (d);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	4	
+alter table t1 add primary key (a), drop index c;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add primary key (c);
+ERROR 42000: Multiple primary key defined
+alter table t1 drop primary key, add primary key (b);
+ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
+create unique index c on t1 (c);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	4	
+select * from t1 force index(c) order by c;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+5	5	oo	oo
+4	4	tr	tr
+alter table t1 drop index b, add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `d` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 values(6,1,'ggg','ggg');
+select * from t1;
+a	b	c	d
+2	3	ak	ak
+3	4	ad	ad
+4	4	tr	tr
+5	5	oo	oo
+6	1	ggg	ggg
+select * from t1 force index(b) order by b;
+a	b	c	d
+6	1	ggg	ggg
+2	3	ak	ak
+3	4	ad	ad
+4	4	tr	tr
+5	5	oo	oo
+select * from t1 force index(c) order by c;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+6	1	ggg	ggg
+5	5	oo	oo
+4	4	tr	tr
+select * from t1 force index(d) order by d;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+6	1	ggg	ggg
+5	5	oo	oo
+4	4	tr	tr
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	5	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	5	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	23	NULL	5	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `d` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add index (c(2));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add unique index (d(10));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `d` (`d`(10)),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 values(5,1,'ggg','ggg');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `d` (`d`(10)),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 drop index d;
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+8	9	fff	fff
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+5	1	ggg	ggg
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+explain select * from t1 order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b,c);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	5	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add index (b,c);
+insert into t1 values(11,11,'kkk','kkk');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`),
+  KEY `b_2` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add unique index (c,d);
+insert into t1 values(13,13,'yyy','aaa');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	34	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`),
+  UNIQUE KEY `c` (`c`,`d`),
+  KEY `b_2` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb;
+create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb;
+create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb;
+create table t2(a int not null, b int not null, c int not null, d int not null, e int,
+foreign key (b) references t1(b) on delete cascade,
+foreign key (c) references t3(c), foreign key (d) references t4(d))
+engine = innodb;
+alter table t1 drop index b;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+alter table t3 drop index c;
+ERROR HY000: Cannot drop index 'c': needed in a foreign key constraint
+alter table t4 drop index d;
+ERROR HY000: Cannot drop index 'd': needed in a foreign key constraint
+alter table t2 drop index b;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+alter table t2 drop index b, drop index c, drop index d;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+create unique index dc on t2 (d,c);
+create index dc on t1 (b,c);
+alter table t2 add primary key (a);
+insert into t1 values (1,1,1);
+insert into t3 values (1,1,1);
+insert into t4 values (1,1,1);
+insert into t2 values (1,1,1,1,1);
+commit;
+alter table t4 add constraint dc foreign key (a) references t1(a);
+show create table t4;
+Table	Create Table
+t4	CREATE TABLE `t4` (
+  `a` int(11) NOT NULL,
+  `d` int(11) NOT NULL,
+  `e` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `d` (`d`),
+  CONSTRAINT `dc` FOREIGN KEY (`a`) REFERENCES `t1` (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t3 add constraint dc foreign key (a) references t1(a);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 121)
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `a` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `d` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t2 drop index b, add index (b);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `d` int(11) NOT NULL,
+  `e` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `dc` (`d`,`c`),
+  KEY `c` (`c`),
+  KEY `b` (`b`),
+  CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`b`) ON DELETE CASCADE,
+  CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`c`) REFERENCES `t3` (`c`),
+  CONSTRAINT `t2_ibfk_3` FOREIGN KEY (`d`) REFERENCES `t4` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+delete from t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `dc` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+drop index dc on t4;
+ERROR 42000: Can't DROP 'dc'; check that column/key exists
+alter table t3 drop foreign key dc;
+ERROR HY000: Error on rename of './test/t3' to '#sql2-temporary' (errno: 152)
+alter table t4 drop foreign key dc;
+select * from t2;
+a	b	c	d	e
+1	1	1	1	1
+delete from t1;
+select * from t2;
+a	b	c	d	e
+drop table t2,t4,t3,t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=utf8;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '2' for key 'b'
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(c) order by c;
+a	b	c	d
+11	11	aaa	mmm
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	31	NULL	7	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	63	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
+create table t1(a int not null, b int) engine = innodb;
+insert into t1 values (1,1),(1,1),(1,1),(1,1);
+alter table t1 add unique index (a);
+ERROR 23000: Duplicate entry '1' for key 'a'
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '1' for key 'b'
+alter table t1 add unique index (a), add unique index(b);
+ERROR 23000: Duplicate entry '1' for key 'a'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb;
+alter table t1 drop index c, drop index b;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, primary key(a)) engine = innodb;
+alter table t1 add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'),(5,4,'affe','affe');
+alter table t1 add unique index (b), add unique index (c), add unique index (d);
+ERROR 23000: Duplicate entry '4' for key 'b'
+alter table t1 add unique index (c), add unique index (b), add index (d);
+ERROR 23000: Duplicate entry 'ac' for key 'c'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb;
+insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1);
+alter table t1 add unique index (b);
+insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  `c` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`),
+  KEY `c` (`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	5	NULL	9	
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	9	
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	4	NULL	9	
+select * from t1 order by a;
+a	b	c
+1	5	1
+2	4	2
+3	3	3
+4	2	4
+5	1	5
+10	20	20
+11	19	19
+12	18	18
+13	17	17
+select * from t1 force index(b) order by b;
+a	b	c
+5	1	5
+4	2	4
+3	3	3
+2	4	2
+1	5	1
+13	17	17
+12	18	18
+11	19	19
+10	20	20
+select * from t1 force index(c) order by c;
+a	b	c
+1	5	1
+2	4	2
+3	3	3
+4	2	4
+5	1	5
+13	17	17
+12	18	18
+11	19	19
+10	20	20
+drop table t1;
+create table t1(a int not null, b int not null) engine=innodb;
+insert into t1 values (1,1);
+alter table t1 add primary key(b);
+insert into t1 values (2,2);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  PRIMARY KEY (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+select * from t1;
+a	b
+1	1
+2	2
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	Using filesort
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	
+checksum table t1;
+Table	Checksum
+test.t1	582702641
+drop table t1;
+create table t1(a int not null) engine=innodb;
+insert into t1 values (1);
+alter table t1 add primary key(a);
+insert into t1 values (2);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+commit;
+select * from t1;
+a
+1
+2
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	Using index
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	Using index
+drop table t1;
+create table t2(d varchar(17) primary key) engine=innodb default charset=utf8;
+create table t3(a int primary key) engine=innodb;
+insert into t3 values(22),(44),(33),(55),(66);
+insert into t2 values ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+create table t1(a int, b blob, c text, d text not null)
+engine=innodb default charset = utf8;
+insert into t1
+select a,left(repeat(d,100*a),65535),repeat(d,20*a),d from t2,t3;
+drop table t2, t3;
+select count(*) from t1 where a=44;
+count(*)
+5
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+a	length(b)	b=left(repeat(d,100*a),65535)	length(c)	c=repeat(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	35200	1	7040	1	adfdijn0loKNHJik
+22	28600	1	5720	1	adfdijnmnb78k
+22	26400	1	5280	1	adfdpplkeock
+22	22000	1	4400	1	jejdkrun87
+33	33000	1	6600	1	adfd72nh9k
+33	52800	1	10560	1	adfdijn0loKNHJik
+33	42900	1	8580	1	adfdijnmnb78k
+33	39600	1	7920	1	adfdpplkeock
+33	33000	1	6600	1	jejdkrun87
+44	44000	1	8800	1	adfd72nh9k
+44	65535	1	14080	1	adfdijn0loKNHJik
+44	57200	1	11440	1	adfdijnmnb78k
+44	52800	1	10560	1	adfdpplkeock
+44	44000	1	8800	1	jejdkrun87
+55	55000	1	11000	1	adfd72nh9k
+55	65535	1	17600	1	adfdijn0loKNHJik
+55	65535	1	14300	1	adfdijnmnb78k
+55	65535	1	13200	1	adfdpplkeock
+55	55000	1	11000	1	jejdkrun87
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	21120	1	adfdijn0loKNHJik
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	13200	1	jejdkrun87
+alter table t1 add primary key (a), add key (b(20));
+ERROR 23000: Duplicate entry '22' for key 'PRIMARY'
+delete from t1 where a%2;
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+alter table t1 add primary key (a,b(255),c(255)), add key (b(767));
+select count(*) from t1 where a=44;
+count(*)
+5
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+a	length(b)	b=left(repeat(d,100*a),65535)	length(c)	c=repeat(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	35200	1	7040	1	adfdijn0loKNHJik
+22	28600	1	5720	1	adfdijnmnb78k
+22	26400	1	5280	1	adfdpplkeock
+22	22000	1	4400	1	jejdkrun87
+44	44000	1	8800	1	adfd72nh9k
+44	65535	1	14080	1	adfdijn0loKNHJik
+44	57200	1	11440	1	adfdijnmnb78k
+44	52800	1	10560	1	adfdpplkeock
+44	44000	1	8800	1	jejdkrun87
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	21120	1	adfdijn0loKNHJik
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	13200	1	jejdkrun87
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL DEFAULT '0',
+  `b` blob NOT NULL,
+  `c` text NOT NULL,
+  `d` text NOT NULL,
+  PRIMARY KEY (`a`,`b`(255),`c`(255)),
+  KEY `b` (`b`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+explain select * from t1 where b like 'adfd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	b	NULL	NULL	NULL	15	Using where
+create table t2(a int, b varchar(255), primary key(a,b)) engine=innodb;
+insert into t2 select a,left(b,255) from t1;
+drop table t1;
+rename table t2 to t1;
+set innodb_lock_wait_timeout=1;
+begin;
+select a from t1 limit 1 for update;
+a
+22
+set innodb_lock_wait_timeout=1;
+create index t1ba on t1 (b,a);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+begin;
+select a from t1 limit 1 lock in share mode;
+a
+22
+create index t1ba on t1 (b,a);
+drop index t1ba on t1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+explain select a from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	t1ba	261	NULL	15	Using index
+select a,sleep(2+a/100) from t1 order by b limit 3;
+select sleep(1);
+sleep(1)
+0
+drop index t1ba on t1;
+a	sleep(2+a/100)
+22	0
+44	0
+66	0
+explain select a from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	261	NULL	15	Using index; Using filesort
+select a from t1 order by b limit 3;
+a
+22
+66
+44
+commit;
+drop table t1;
+set global innodb_file_per_table=on;
+set global innodb_file_format='Barracuda';
+create table t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob,
+q blob,r blob,s blob,t blob,u blob)
+engine=innodb row_format=dynamic;
+create index t1a on t1 (a(1));
+create index t1b on t1 (b(1));
+create index t1c on t1 (c(1));
+create index t1d on t1 (d(1));
+create index t1e on t1 (e(1));
+create index t1f on t1 (f(1));
+create index t1g on t1 (g(1));
+create index t1h on t1 (h(1));
+create index t1i on t1 (i(1));
+create index t1j on t1 (j(1));
+create index t1k on t1 (k(1));
+create index t1l on t1 (l(1));
+create index t1m on t1 (m(1));
+create index t1n on t1 (n(1));
+create index t1o on t1 (o(1));
+create index t1p on t1 (p(1));
+create index t1q on t1 (q(1));
+create index t1r on t1 (r(1));
+create index t1s on t1 (s(1));
+create index t1t on t1 (t(1));
+create index t1u on t1 (u(1));
+ERROR HY000: Too big row
+create index t1ut on t1 (u(1), t(1));
+ERROR HY000: Too big row
+create index t1st on t1 (s(1), t(1));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` blob,
+  `b` blob,
+  `c` blob,
+  `d` blob,
+  `e` blob,
+  `f` blob,
+  `g` blob,
+  `h` blob,
+  `i` blob,
+  `j` blob,
+  `k` blob,
+  `l` blob,
+  `m` blob,
+  `n` blob,
+  `o` blob,
+  `p` blob,
+  `q` blob,
+  `r` blob,
+  `s` blob,
+  `t` blob,
+  `u` blob,
+  KEY `t1a` (`a`(1)),
+  KEY `t1b` (`b`(1)),
+  KEY `t1c` (`c`(1)),
+  KEY `t1d` (`d`(1)),
+  KEY `t1e` (`e`(1)),
+  KEY `t1f` (`f`(1)),
+  KEY `t1g` (`g`(1)),
+  KEY `t1h` (`h`(1)),
+  KEY `t1i` (`i`(1)),
+  KEY `t1j` (`j`(1)),
+  KEY `t1k` (`k`(1)),
+  KEY `t1l` (`l`(1)),
+  KEY `t1m` (`m`(1)),
+  KEY `t1n` (`n`(1)),
+  KEY `t1o` (`o`(1)),
+  KEY `t1p` (`p`(1)),
+  KEY `t1q` (`q`(1)),
+  KEY `t1r` (`r`(1)),
+  KEY `t1s` (`s`(1)),
+  KEY `t1t` (`t`(1)),
+  KEY `t1st` (`s`(1),`t`(1))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+create index t1u on t1 (u(1));
+ERROR HY000: Too big row
+alter table t1 row_format=compact;
+create index t1u on t1 (u(1));
+drop table t1;
+set global innodb_file_per_table=0;
+set global innodb_file_format=Antelope;
+SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
+SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
+CREATE TABLE t1(
+c1	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3) REFERENCES t1(c1);
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+INSERT INTO t2 VALUES(0,0,0);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+INSERT INTO t1 VALUES(0);
+INSERT INTO t2 VALUES(0,0,0);
+DROP TABLE t2;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1,c2,c3)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3) REFERENCES t1(c1);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`,`c2`,`c3`),
+  KEY `fk_t2_ca` (`c3`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`,`c2`,`c3`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t2 VALUES(0,0,1);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+INSERT INTO t2 VALUES(0,0,0);
+DELETE FROM t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+DELETE FROM t2;
+DROP TABLE t2;
+DROP TABLE t1;
+CREATE TABLE t1(
+c1	BIGINT(12) NOT NULL,
+c2	INT(4) NOT NULL,
+PRIMARY KEY (c2,c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c1);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t1 MODIFY COLUMN c2 BIGINT(12) NOT NULL;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` bigint(12) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  PRIMARY KEY (`c2`,`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c2_c1 ON t2(c2, c1);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c1_c2 ON t2(c3, c1, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  KEY `i_t2_c3_c1_c2` (`c3`,`c1`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  KEY `i_t2_c3_c1_c2` (`c3`,`c1`,`c2`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+DROP TABLE t2;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (3,'a'),(3,'b'),(1,'c'),(0,'d'),(1,'e');
+BEGIN;
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+CREATE INDEX t1a ON t1(a);
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+ERROR HY000: Table definition has changed, please retry transaction
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+COMMIT;
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+a	b
+0	d
+1	c
+1	e
+3	a
+3	b
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb-index_ucs2.result b/mysql-test/suite/innodb/r/innodb-index_ucs2.result
new file mode 100644
index 00000000000..c8a1e8c7da1
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-index_ucs2.result
@@ -0,0 +1,116 @@
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=ucs2;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '2' for key 'b'
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(c) order by c;
+a	b	c	d
+11	11	aaa	mmm
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	21	NULL	7	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	43	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb-timeout.result b/mysql-test/suite/innodb/r/innodb-timeout.result
new file mode 100644
index 00000000000..be9a688cd72
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-timeout.result
@@ -0,0 +1,38 @@
+set global innodb_lock_wait_timeout=42;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+1
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set global innodb_lock_wait_timeout=347;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+1
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+347
+create table t1(a int primary key)engine=innodb;
+begin;
+insert into t1 values(1),(2),(3);
+select * from t1 for update;
+commit;
+a
+1
+2
+3
+begin;
+insert into t1 values(4);
+select * from t1 for update;
+commit;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+drop table t1;
+set global innodb_lock_wait_timeout=50;
diff --git a/mysql-test/suite/innodb/r/innodb-use-sys-malloc.result b/mysql-test/suite/innodb/r/innodb-use-sys-malloc.result
new file mode 100644
index 00000000000..2ec4c7c8130
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-use-sys-malloc.result
@@ -0,0 +1,48 @@
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
+Expected error 'Read only variable'
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+drop table if exists t1;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+drop table t1;
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
+Expected error 'Read only variable'
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+drop table if exists t1;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb-zip.result b/mysql-test/suite/innodb/r/innodb-zip.result
new file mode 100644
index 00000000000..c81401743a5
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-zip.result
@@ -0,0 +1,421 @@
+set global innodb_file_per_table=off;
+set global innodb_file_format=`0`;
+create table t0(a int primary key) engine=innodb row_format=compressed;
+Warnings:
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t00(a int primary key) engine=innodb
+key_block_size=4 row_format=compressed;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4.
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t1(a int primary key) engine=innodb row_format=dynamic;
+Warnings:
+Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t2(a int primary key) engine=innodb row_format=redundant;
+create table t3(a int primary key) engine=innodb row_format=compact;
+create table t4(a int primary key) engine=innodb key_block_size=9;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=9.
+create table t5(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+set global innodb_file_per_table=on;
+create table t6(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+set global innodb_file_format=`1`;
+create table t7(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t8(a int primary key) engine=innodb
+key_block_size=1 row_format=fixed;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t9(a int primary key) engine=innodb
+key_block_size=1 row_format=compact;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t10(a int primary key) engine=innodb
+key_block_size=1 row_format=dynamic;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t11(a int primary key) engine=innodb
+key_block_size=1 row_format=compressed;
+create table t12(a int primary key) engine=innodb
+key_block_size=1;
+create table t13(a int primary key) engine=innodb
+row_format=compressed;
+create table t14(a int primary key) engine=innodb key_block_size=9;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=9.
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t0	Compact
+test	t00	Compact
+test	t1	Compact
+test	t10	Dynamic
+test	t11	Compressed
+test	t12	Compressed
+test	t13	Compressed
+test	t14	Compact
+test	t2	Redundant
+test	t3	Compact
+test	t4	Compact
+test	t5	Redundant
+test	t6	Redundant
+test	t7	Redundant
+test	t8	Compact
+test	t9	Compact
+drop table t0,t00,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+alter table t1 key_block_size=0;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=0.
+alter table t1 row_format=dynamic;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Dynamic
+alter table t1 row_format=compact;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compact
+alter table t1 row_format=redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Redundant
+drop table t1;
+create table t1(a int not null, b text, index(b(10))) engine=innodb
+key_block_size=1;
+create table t2(b text)engine=innodb;
+insert into t2 values(concat('1abcdefghijklmnopqrstuvwxyz', repeat('A',5000)));
+insert into t1 select 1, b from t2;
+commit;
+begin;
+update t1 set b=repeat('B',100);
+select a,left(b,40) from t1 natural join t2;
+a	left(b,40)
+1	1abcdefghijklmnopqrstuvwxyzAAAAAAAAAAAAA
+rollback;
+select a,left(b,40) from t1 natural join t2;
+a	left(b,40)
+1	1abcdefghijklmnopqrstuvwxyzAAAAAAAAAAAAA
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compressed
+test	t2	Compact
+drop table t1,t2;
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(439)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+create table t1( c1 int not null, c2 blob, c3 blob, c4 blob,
+primary key(c1, c2(22), c3(22)))
+engine = innodb row_format = dynamic;
+begin;
+insert into t1 values(1, repeat('A', 20000), repeat('B', 20000),
+repeat('C', 20000));
+update t1 set c3 = repeat('D', 20000) where c1 = 1;
+commit;
+select count(*) from t1 where c2 = repeat('A', 20000);
+count(*)
+1
+select count(*) from t1 where c3 = repeat('D', 20000);
+count(*)
+1
+select count(*) from t1 where c4 = repeat('C', 20000);
+count(*)
+1
+update t1 set c3 = repeat('E', 20000) where c1 = 1;
+drop table t1;
+set global innodb_file_format=`0`;
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format=`1`;
+select @@innodb_file_format;
+@@innodb_file_format
+Barracuda
+set global innodb_file_format=`2`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`-1`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`Antelope`;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format=`Cheetah`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`abc`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`1a`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=``;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_per_table = on;
+set global innodb_file_format = `1`;
+set innodb_strict_mode = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=0.
+drop table t1;
+set innodb_strict_mode = on;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 0. Valid values are [1, 2, 4, 8, 16]
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 9;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 1;
+create table t4 (id int primary key) engine = innodb key_block_size = 2;
+create table t5 (id int primary key) engine = innodb key_block_size = 4;
+create table t6 (id int primary key) engine = innodb key_block_size = 8;
+create table t7 (id int primary key) engine = innodb key_block_size = 16;
+create table t8 (id int primary key) engine = innodb row_format = compressed;
+create table t9 (id int primary key) engine = innodb row_format = dynamic;
+create table t10(id int primary key) engine = innodb row_format = compact;
+create table t11(id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t10	Compact
+test	t11	Redundant
+test	t3	Compressed
+test	t4	Compressed
+test	t5	Compressed
+test	t6	Compressed
+test	t7	Compressed
+test	t8	Compressed
+test	t9	Dynamic
+drop table t3, t4, t5, t6, t7, t8, t9, t10, t11;
+create table t1 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compressed;
+create table t2 (id int primary key) engine = innodb
+key_block_size = 8 row_format = redundant;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compact;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb
+key_block_size = 8 row_format = dynamic;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb
+key_block_size = 8 row_format = default;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compressed
+drop table t1;
+create table t1 (id int primary key) engine = innodb
+key_block_size = 9 row_format = redundant;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = compact;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = dynamic;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+set global innodb_file_per_table = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+ERROR HY000: Can't create table 'test.t6' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t6' (errno: 1478)
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+ERROR HY000: Can't create table 'test.t7' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t7' (errno: 1478)
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t8	Compact
+test	t9	Redundant
+drop table t8, t9;
+set global innodb_file_per_table = on;
+set global innodb_file_format = `0`;
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+ERROR HY000: Can't create table 'test.t6' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t6' (errno: 1478)
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+ERROR HY000: Can't create table 'test.t7' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t7' (errno: 1478)
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t8	Compact
+test	t9	Redundant
+drop table t8, t9;
+set global innodb_file_per_table=0;
+set global innodb_file_format=Antelope;
+set global innodb_file_per_table=on;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format_check=`Antelope`;
+create table normal_table (
+c1 int
+) engine = innodb;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+create table zip_table (
+c1 int
+) engine = innodb key_block_size = 8;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format_check=`Antelope`;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+show table status;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+drop table normal_table, zip_table;
diff --git a/mysql-test/suite/innodb/r/innodb_bug36169.result b/mysql-test/suite/innodb/r/innodb_bug36169.result
new file mode 100644
index 00000000000..aa80e4d7aa4
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug36169.result
@@ -0,0 +1,2 @@
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=ON;
diff --git a/mysql-test/suite/innodb/r/innodb_bug36172.result b/mysql-test/suite/innodb/r/innodb_bug36172.result
new file mode 100644
index 00000000000..195775f74c8
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug36172.result
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff --git a/mysql-test/suite/innodb/r/innodb_bug40360.result b/mysql-test/suite/innodb/r/innodb_bug40360.result
new file mode 100644
index 00000000000..ef4cf463903
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug40360.result
@@ -0,0 +1,4 @@
+SET TX_ISOLATION='READ-COMMITTED';
+CREATE TABLE bug40360 (a INT) engine=innodb;
+INSERT INTO bug40360 VALUES (1);
+DROP TABLE bug40360;
diff --git a/mysql-test/suite/innodb/r/innodb_bug41904.result b/mysql-test/suite/innodb/r/innodb_bug41904.result
new file mode 100644
index 00000000000..6070d32d181
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug41904.result
@@ -0,0 +1,4 @@
+CREATE TABLE bug41904 (id INT PRIMARY KEY, uniquecol CHAR(15)) ENGINE=InnoDB;
+INSERT INTO bug41904 VALUES (1,NULL), (2,NULL);
+CREATE UNIQUE INDEX ui ON bug41904 (uniquecol);
+DROP TABLE bug41904;
diff --git a/mysql-test/suite/innodb/r/innodb_bug44032.result b/mysql-test/suite/innodb/r/innodb_bug44032.result
new file mode 100644
index 00000000000..da2a000b06e
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_bug44032.result
@@ -0,0 +1,7 @@
+CREATE TABLE bug44032(c CHAR(3) CHARACTER SET UTF8) ROW_FORMAT=REDUNDANT
+ENGINE=InnoDB;
+INSERT INTO bug44032 VALUES('abc'),(0xEFBCA4EFBCA4EFBCA4);
+UPDATE bug44032 SET c='DDD' WHERE c=0xEFBCA4EFBCA4EFBCA4;
+UPDATE bug44032 SET c=NULL WHERE c='DDD';
+UPDATE bug44032 SET c='DDD' WHERE c IS NULL;
+DROP TABLE bug44032;
diff --git a/mysql-test/suite/innodb/r/innodb_file_format.result b/mysql-test/suite/innodb/r/innodb_file_format.result
new file mode 100644
index 00000000000..9cfac5f001c
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_file_format.result
@@ -0,0 +1,44 @@
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+set global innodb_file_format=antelope;
+set global innodb_file_format=barracuda;
+set global innodb_file_format=cheetah;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format;
+@@innodb_file_format
+Barracuda
+set global innodb_file_format=default;
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format=on;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=off;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format_check=antelope;
+set global innodb_file_format_check=barracuda;
+set global innodb_file_format_check=cheetah;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format_check=default;
+Warnings:
+Warning	1210	Ignoring SET innodb_file_format=on
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format=on;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=off;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
diff --git a/mysql-test/suite/innodb/r/innodb_information_schema.result b/mysql-test/suite/innodb/r/innodb_information_schema.result
new file mode 100644
index 00000000000..396cae579ce
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_information_schema.result
@@ -0,0 +1,23 @@
+lock_mode	lock_type	lock_table	lock_index	lock_rec	lock_data
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	2	'1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc'''''
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	2	'1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc'''''
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	3	'2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	3	'2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	4	'3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	4	'3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	5	'4', 'abc', '\0abc', 'abc\0', 'a\0bc', 'a\0bc\0', 'a\0bc\0\0'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	5	'4', 'abc', '\0abc', 'abc\0', 'a\0bc', 'a\0bc\0', 'a\0bc\0\0'
+X	RECORD	`test`.`t_min`	`PRIMARY`	2	-128, 0, -32768, 0, -8388608, 0, -2147483648, 0, -9223372036854775808, 0
+X	RECORD	`test`.`t_min`	`PRIMARY`	2	-128, 0, -32768, 0, -8388608, 0, -2147483648, 0, -9223372036854775808, 0
+X	RECORD	`test`.`t_max`	`PRIMARY`	2	127, 255, 32767, 65535, 8388607, 16777215, 2147483647, 4294967295, 9223372036854775807, 18446744073709551615
+X	RECORD	`test`.`t_max`	`PRIMARY`	2	127, 255, 32767, 65535, 8388607, 16777215, 2147483647, 4294967295, 9223372036854775807, 18446744073709551615
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	1	supremum pseudo-record
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	1	supremum pseudo-record
+lock_table	COUNT(*)
+`test`.`t_max`	2
+`test`.`t_min`	2
+`test`.```t'\"_str`	10
+lock_table	COUNT(*)
+"test"."t_max"	2
+"test"."t_min"	2
+"test"."`t'\""_str"	10
diff --git a/mysql-test/suite/innodb/t/disabled.def b/mysql-test/suite/innodb/t/disabled.def
new file mode 100644
index 00000000000..baf8c89f539
--- /dev/null
+++ b/mysql-test/suite/innodb/t/disabled.def
@@ -0,0 +1 @@
+innodb-index: InnoDB: Error: table `test`.`t1#1` already exists in InnoDB internal
diff --git a/mysql-test/suite/innodb/t/innodb-analyze.test b/mysql-test/suite/innodb/t/innodb-analyze.test
new file mode 100644
index 00000000000..870e6434797
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-analyze.test
@@ -0,0 +1,66 @@
+#
+# Test that mysqld does not crash when running ANALYZE TABLE with
+# different values of the parameter innodb_stats_sample_pages.
+#
+
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+# we care only that the following SQL commands do not produce errors
+# and do not crash the server
+-- disable_query_log
+-- disable_result_log
+-- enable_warnings
+
+SET GLOBAL innodb_stats_sample_pages=0;
+
+# check that the value has been adjusted to 1
+-- enable_result_log
+SHOW VARIABLES LIKE 'innodb_stats_sample_pages';
+-- disable_result_log
+
+CREATE TABLE innodb_analyze (
+	a INT,
+	b INT,
+	KEY(a),
+	KEY(b,a)
+) ENGINE=InnoDB;
+
+# test with empty table
+
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=2;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=4;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=8;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=16;
+ANALYZE TABLE innodb_analyze;
+
+INSERT INTO innodb_analyze VALUES
+(1,1), (1,1), (1,2), (1,3), (1,4), (1,5),
+(8,1), (8,8), (8,2), (7,1), (1,4), (3,5);
+
+SET GLOBAL innodb_stats_sample_pages=1;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=2;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=4;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=8;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=16;
+ANALYZE TABLE innodb_analyze;
+
+DROP TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=DEFAULT;
diff --git a/mysql-test/suite/innodb/t/innodb-index.test b/mysql-test/suite/innodb/t/innodb-index.test
new file mode 100644
index 00000000000..54aff3a42c0
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-index.test
@@ -0,0 +1,534 @@
+-- source include/have_innodb.inc
+
+create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
+insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak');
+commit;
+--error ER_DUP_KEYNAME
+alter table t1 add index b (b), add index b (b);
+--error ER_DUP_FIELDNAME
+alter table t1 add index (b,b);
+alter table t1 add index d2 (d);
+show create table t1;
+explain select * from t1 force index(d2) order by d;
+select * from t1 force index (d2) order by d;
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+show create table t1;
+alter table t1 add index (b);
+show create table t1;
+
+# Check how existing tables interfere with temporary tables.
+CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+
+--error 156
+alter table t1 add unique index (c), add index (d);
+rename table `t1#1` to `t1#2`;
+--error 156
+alter table t1 add unique index (c), add index (d);
+drop table `t1#2`;
+
+alter table t1 add unique index (c), add index (d);
+show create table t1;
+explain select * from t1 force index(c) order by c;
+alter table t1 add primary key (a), drop index c;
+show create table t1;
+--error ER_MULTIPLE_PRI_KEY
+alter table t1 add primary key (c);
+--error ER_DUP_ENTRY
+alter table t1 drop primary key, add primary key (b);
+create unique index c on t1 (c);
+show create table t1;
+explain select * from t1 force index(c) order by c;
+select * from t1 force index(c) order by c;
+alter table t1 drop index b, add index (b);
+show create table t1;
+insert into t1 values(6,1,'ggg','ggg');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add index (c(2));
+show create table t1;
+alter table t1 add unique index (d(10));
+show create table t1;
+insert into t1 values(5,1,'ggg','ggg');
+select * from t1;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+alter table t1 drop index d;
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+select * from t1 force index(c) order by c;
+explain select * from t1 order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 order by d;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b,c);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add index (b,c);
+insert into t1 values(11,11,'kkk','kkk');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add unique index (c,d);
+insert into t1 values(13,13,'yyy','aaa');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb;
+create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb;
+create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb;
+create table t2(a int not null, b int not null, c int not null, d int not null, e int,
+foreign key (b) references t1(b) on delete cascade,
+foreign key (c) references t3(c), foreign key (d) references t4(d))
+engine = innodb;
+--error ER_DROP_INDEX_FK
+alter table t1 drop index b;
+--error ER_DROP_INDEX_FK
+alter table t3 drop index c;
+--error ER_DROP_INDEX_FK
+alter table t4 drop index d;
+--error ER_DROP_INDEX_FK
+alter table t2 drop index b;
+--error ER_DROP_INDEX_FK
+alter table t2 drop index b, drop index c, drop index d;
+# Apparently, the following makes mysql_alter_table() drop index d.
+create unique index dc on t2 (d,c);
+create index dc on t1 (b,c);
+# This should preserve the foreign key constraints.
+alter table t2 add primary key (a);
+insert into t1 values (1,1,1);
+insert into t3 values (1,1,1);
+insert into t4 values (1,1,1);
+insert into t2 values (1,1,1,1,1);
+commit;
+alter table t4 add constraint dc foreign key (a) references t1(a);
+show create table t4;
+--replace_regex /'test\.#sql-[0-9a-f_]*'/'#sql-temporary'/
+# a foreign key 'test/dc' already exists
+--error ER_CANT_CREATE_TABLE
+alter table t3 add constraint dc foreign key (a) references t1(a);
+show create table t3;
+alter table t2 drop index b, add index (b);
+show create table t2;
+--error ER_ROW_IS_REFERENCED_2
+delete from t1;
+--error ER_CANT_DROP_FIELD_OR_KEY
+drop index dc on t4;
+# there is no foreign key dc on t3
+--replace_regex /'\.\/test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
+--error ER_ERROR_ON_RENAME
+alter table t3 drop foreign key dc;
+alter table t4 drop foreign key dc;
+select * from t2;
+delete from t1;
+select * from t2;
+
+drop table t2,t4,t3,t1;
+
+-- let charset = utf8
+-- source suite/innodb/include/innodb-index.inc
+
+create table t1(a int not null, b int) engine = innodb;
+insert into t1 values (1,1),(1,1),(1,1),(1,1);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (a);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (a), add unique index(b);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb;
+alter table t1 drop index c, drop index b;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, primary key(a)) engine = innodb;
+alter table t1 add index (b);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'),(5,4,'affe','affe');
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b), add unique index (c), add unique index (d);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (c), add unique index (b), add index (d);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb;
+insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1);
+alter table t1 add unique index (b);
+insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17);
+show create table t1;
+check table t1;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 order by a;
+explain select * from t1 force index(b) order by b;
+select * from t1 order by a;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+drop table t1;
+
+create table t1(a int not null, b int not null) engine=innodb;
+insert into t1 values (1,1);
+alter table t1 add primary key(b);
+insert into t1 values (2,2);
+show create table t1;
+check table t1;
+select * from t1;
+explain select * from t1;
+explain select * from t1 order by a;
+explain select * from t1 order by b;
+checksum table t1;
+drop table t1;
+
+create table t1(a int not null) engine=innodb;
+insert into t1 values (1);
+alter table t1 add primary key(a);
+insert into t1 values (2);
+show create table t1;
+check table t1;
+commit;
+select * from t1;
+explain select * from t1;
+explain select * from t1 order by a;
+drop table t1;
+
+create table t2(d varchar(17) primary key) engine=innodb default charset=utf8;
+create table t3(a int primary key) engine=innodb;
+
+insert into t3 values(22),(44),(33),(55),(66);
+
+insert into t2 values ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+
+create table t1(a int, b blob, c text, d text not null)
+engine=innodb default charset = utf8;
+
+# r2667 The following test is disabled because MySQL behavior changed.
+# r2667 The test was added with this comment:
+# r2667 
+# r2667 ------------------------------------------------------------------------
+# r2667 r1699 | marko | 2007-08-10 19:53:19 +0300 (Fri, 10 Aug 2007) | 5 lines
+# r2667 
+# r2667 branches/zip: Add changes that accidentally omitted from r1698:
+# r2667 
+# r2667 innodb-index.test, innodb-index.result: Add a test for creating
+# r2667 a PRIMARY KEY on a column that contains a NULL value.
+# r2667 ------------------------------------------------------------------------
+# r2667 
+# r2667 but in BZR-r2667:
+# r2667 http://bazaar.launchpad.net/~mysql/mysql-server/mysql-5.1/revision/davi%40mysql.com-20080617141221-8yre8ys9j4uw3xx5?start_revid=joerg%40mysql.com-20080630105418-7qoe5ehomgrcdb89
+# r2667 MySQL changed the behavior to do full table copy when creating PRIMARY INDEX
+# r2667 on a non-NULL column instead of calling ::add_index() which would fail (and
+# r2667 this is what we were testing here). Before r2667 the code execution path was
+# r2667 like this (when adding PRIMARY INDEX on a non-NULL column with ALTER TABLE):
+# r2667 
+# r2667 mysql_alter_table()
+# r2667   compare_tables()  // would return ALTER_TABLE_INDEX_CHANGED
+# r2667   ::add_index()  // would fail with "primary index cannot contain NULL"
+# r2667 
+# r2667 after r2667 the code execution path is the following:
+# r2667 
+# r2667 mysql_alter_table()
+# r2667   compare_tables()  // returns ALTER_TABLE_DATA_CHANGED
+# r2667   full copy is done, without calling ::add_index()
+# r2667 
+# r2667 To enable, remove "# r2667: " below.
+# r2667 
+# r2667: insert into t1 values (null,null,null,'null');
+insert into t1
+select a,left(repeat(d,100*a),65535),repeat(d,20*a),d from t2,t3;
+drop table t2, t3;
+select count(*) from t1 where a=44;
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+# r2667: --error ER_PRIMARY_CANT_HAVE_NULL
+# r2667: alter table t1 add primary key (a), add key (b(20));
+# r2667: delete from t1 where d='null';
+--error ER_DUP_ENTRY
+alter table t1 add primary key (a), add key (b(20));
+delete from t1 where a%2;
+check table t1;
+alter table t1 add primary key (a,b(255),c(255)), add key (b(767));
+select count(*) from t1 where a=44;
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+show create table t1;
+check table t1;
+explain select * from t1 where b like 'adfd%';
+
+#
+# Test locking
+#
+
+create table t2(a int, b varchar(255), primary key(a,b)) engine=innodb;
+insert into t2 select a,left(b,255) from t1;
+drop table t1;
+rename table t2 to t1;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+set innodb_lock_wait_timeout=1;
+begin;
+# Obtain an IX lock on the table
+select a from t1 limit 1 for update;
+connection b;
+set innodb_lock_wait_timeout=1;
+# This would require an S lock on the table, conflicting with the IX lock.
+--error ER_LOCK_WAIT_TIMEOUT
+create index t1ba on t1 (b,a);
+connection a;
+commit;
+begin;
+# Obtain an IS lock on the table
+select a from t1 limit 1 lock in share mode;
+connection b;
+# This will require an S lock on the table.  No conflict with the IS lock.
+create index t1ba on t1 (b,a);
+# This would require an X lock on the table, conflicting with the IS lock.
+--error ER_LOCK_WAIT_TIMEOUT
+drop index t1ba on t1;
+connection a;
+commit;
+explain select a from t1 order by b;
+--send
+select a,sleep(2+a/100) from t1 order by b limit 3;
+
+# The following DROP INDEX will succeed, altough the SELECT above has
+# opened a read view.  However, during the execution of the SELECT,
+# MySQL should hold a table lock that should block the execution
+# of the DROP INDEX below.
+
+connection b;
+select sleep(1);
+drop index t1ba on t1;
+
+# After the index was dropped, subsequent SELECTs will use the same
+# read view, but they should not be accessing the dropped index any more.
+
+connection a;
+reap;
+explain select a from t1 order by b;
+select a from t1 order by b limit 3;
+commit;
+
+connection default;
+disconnect a;
+disconnect b;
+
+drop table t1;
+
+let $per_table=`select @@innodb_file_per_table`;
+let $format=`select @@innodb_file_format`;
+set global innodb_file_per_table=on;
+set global innodb_file_format='Barracuda';
+# Test creating a table that could lead to undo log overflow.
+# In the undo log, we write a 768-byte prefix (REC_MAX_INDEX_COL_LEN)
+# of each externally stored column that appears as a column prefix in an index.
+# For this test case, it would suffice to write 1 byte, though.
+create table t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+		i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob,
+		q blob,r blob,s blob,t blob,u blob)
+	engine=innodb row_format=dynamic;
+create index t1a on t1 (a(1));
+create index t1b on t1 (b(1));
+create index t1c on t1 (c(1));
+create index t1d on t1 (d(1));
+create index t1e on t1 (e(1));
+create index t1f on t1 (f(1));
+create index t1g on t1 (g(1));
+create index t1h on t1 (h(1));
+create index t1i on t1 (i(1));
+create index t1j on t1 (j(1));
+create index t1k on t1 (k(1));
+create index t1l on t1 (l(1));
+create index t1m on t1 (m(1));
+create index t1n on t1 (n(1));
+create index t1o on t1 (o(1));
+create index t1p on t1 (p(1));
+create index t1q on t1 (q(1));
+create index t1r on t1 (r(1));
+create index t1s on t1 (s(1));
+create index t1t on t1 (t(1));
+--error 139
+create index t1u on t1 (u(1));
+--error 139
+create index t1ut on t1 (u(1), t(1));
+create index t1st on t1 (s(1), t(1));
+show create table t1;
+--error 139
+create index t1u on t1 (u(1));
+alter table t1 row_format=compact;
+create index t1u on t1 (u(1));
+
+drop table t1;
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format=$format;
+
+#
+# Test to check whether CREATE INDEX handles implicit foreign key
+# constraint modifications (Issue #70, Bug #38786)
+#
+SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
+SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
+
+CREATE TABLE t1(
+  c1	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3) REFERENCES t1(c1);
+
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+
+SHOW CREATE TABLE t2;
+
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+
+SHOW CREATE TABLE t2;
+
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+
+--error ER_NO_REFERENCED_ROW_2
+INSERT INTO t2 VALUES(0,0,0);
+INSERT INTO t1 VALUES(0);
+INSERT INTO t2 VALUES(0,0,0);
+
+DROP TABLE t2;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1,c2,c3)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3) REFERENCES t1(c1);
+
+SHOW CREATE TABLE t2;
+
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+
+SHOW CREATE TABLE t2;
+--error ER_NO_REFERENCED_ROW_2
+INSERT INTO t2 VALUES(0,0,1);
+INSERT INTO t2 VALUES(0,0,0);
+--error ER_ROW_IS_REFERENCED_2
+DELETE FROM t1;
+DELETE FROM t2;
+
+DROP TABLE t2;
+DROP TABLE t1;
+
+CREATE TABLE t1(
+  c1	BIGINT(12) NOT NULL,
+  c2	INT(4) NOT NULL,
+  PRIMARY KEY (c2,c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c1);
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+ALTER TABLE t1 MODIFY COLUMN c2 BIGINT(12) NOT NULL;
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+SHOW CREATE TABLE t1;
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c2_c1 ON t2(c2, c1);
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c3_c1_c2 ON t2(c3, c1, c2);
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+
+DROP TABLE t2;
+DROP TABLE t1;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+CREATE TABLE t1 (a INT, b CHAR(1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (3,'a'),(3,'b'),(1,'c'),(0,'d'),(1,'e');
+connection b;
+BEGIN;
+SELECT * FROM t1;
+connection a;
+CREATE INDEX t1a ON t1(a);
+connection b;
+SELECT * FROM t1;
+--error ER_TABLE_DEF_CHANGED
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+SELECT * FROM t1;
+COMMIT;
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+connection default;
+disconnect a;
+disconnect b;
+
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/innodb-index_ucs2.test b/mysql-test/suite/innodb/t/innodb-index_ucs2.test
new file mode 100644
index 00000000000..db4626ac346
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-index_ucs2.test
@@ -0,0 +1,5 @@
+-- source include/have_innodb.inc
+-- source include/have_ucs2.inc
+
+-- let charset = ucs2
+-- source suite/innodb/include/innodb-index.inc
diff --git a/mysql-test/suite/innodb/t/innodb-timeout.test b/mysql-test/suite/innodb/t/innodb-timeout.test
new file mode 100644
index 00000000000..1ee1ad63180
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-timeout.test
@@ -0,0 +1,65 @@
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+let $timeout=`select @@innodb_lock_wait_timeout`;
+set global innodb_lock_wait_timeout=42;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+
+connection a;
+select @@innodb_lock_wait_timeout;
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+
+connection b;
+select @@innodb_lock_wait_timeout;
+set global innodb_lock_wait_timeout=347;
+select @@innodb_lock_wait_timeout;
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+
+connect (c,localhost,root,,);
+connection c;
+select @@innodb_lock_wait_timeout;
+connection default;
+disconnect c;
+
+connection a;
+create table t1(a int primary key)engine=innodb;
+begin;
+insert into t1 values(1),(2),(3);
+
+connection b;
+--send
+select * from t1 for update;
+
+connection a;
+commit;
+
+connection b;
+reap;
+
+connection a;
+begin;
+insert into t1 values(4);
+
+connection b;
+--send
+select * from t1 for update;
+
+connection a;
+sleep 2;
+commit;
+
+connection b;
+--error ER_LOCK_WAIT_TIMEOUT
+reap;
+drop table t1;
+
+connection default;
+
+disconnect a;
+disconnect b;
+
+eval set global innodb_lock_wait_timeout=$timeout;
diff --git a/mysql-test/suite/innodb/t/innodb-use-sys-malloc-master.opt b/mysql-test/suite/innodb/t/innodb-use-sys-malloc-master.opt
new file mode 100644
index 00000000000..8ec086387f8
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-use-sys-malloc-master.opt
@@ -0,0 +1,2 @@
+--loose-innodb-use-sys-malloc=true
+--loose-innodb-use-sys-malloc=true
diff --git a/mysql-test/suite/innodb/t/innodb-use-sys-malloc.test b/mysql-test/suite/innodb/t/innodb-use-sys-malloc.test
new file mode 100644
index 00000000000..4df3ca9d27c
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-use-sys-malloc.test
@@ -0,0 +1,49 @@
+--source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+#display current value of innodb_use_sys_malloc
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+#try changing it. Should fail.
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+--echo Expected error 'Read only variable'
+
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+
+#do some stuff to see if it works.
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+drop table t1;
+--source include/have_innodb.inc
+
+#display current value of innodb_use_sys_malloc
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+#try changing it. Should fail.
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+--echo Expected error 'Read only variable'
+
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+
+#do some stuff to see if it works.
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+drop table t1;
diff --git a/mysql-test/suite/innodb/t/innodb-zip.test b/mysql-test/suite/innodb/t/innodb-zip.test
new file mode 100644
index 00000000000..3ee278b7c5a
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-zip.test
@@ -0,0 +1,344 @@
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+let $per_table=`select @@innodb_file_per_table`;
+let $format=`select @@innodb_file_format`;
+let $innodb_file_format_check_orig=`select @@innodb_file_format_check`;
+set global innodb_file_per_table=off;
+set global innodb_file_format=`0`;
+
+create table t0(a int primary key) engine=innodb row_format=compressed;
+create table t00(a int primary key) engine=innodb
+key_block_size=4 row_format=compressed;
+create table t1(a int primary key) engine=innodb row_format=dynamic;
+create table t2(a int primary key) engine=innodb row_format=redundant;
+create table t3(a int primary key) engine=innodb row_format=compact;
+create table t4(a int primary key) engine=innodb key_block_size=9;
+create table t5(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+
+set global innodb_file_per_table=on;
+create table t6(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+set global innodb_file_format=`1`;
+create table t7(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+create table t8(a int primary key) engine=innodb
+key_block_size=1 row_format=fixed;
+create table t9(a int primary key) engine=innodb
+key_block_size=1 row_format=compact;
+create table t10(a int primary key) engine=innodb
+key_block_size=1 row_format=dynamic;
+create table t11(a int primary key) engine=innodb
+key_block_size=1 row_format=compressed;
+create table t12(a int primary key) engine=innodb
+key_block_size=1;
+create table t13(a int primary key) engine=innodb
+row_format=compressed;
+create table t14(a int primary key) engine=innodb key_block_size=9;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+
+drop table t0,t00,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+alter table t1 key_block_size=0;
+alter table t1 row_format=dynamic;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+alter table t1 row_format=compact;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+alter table t1 row_format=redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1;
+
+create table t1(a int not null, b text, index(b(10))) engine=innodb
+key_block_size=1;
+
+create table t2(b text)engine=innodb;
+insert into t2 values(concat('1abcdefghijklmnopqrstuvwxyz', repeat('A',5000)));
+
+insert into t1 select 1, b from t2;
+commit;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+
+connection a;
+begin;
+update t1 set b=repeat('B',100);
+
+connection b;
+select a,left(b,40) from t1 natural join t2;
+
+connection a;
+rollback;
+
+connection b;
+select a,left(b,40) from t1 natural join t2;
+
+connection default;
+disconnect a;
+disconnect b;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1,t2;
+
+# The following should fail even in non-strict mode.
+SET SESSION innodb_strict_mode = off;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(439)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+
+#
+# Test blob column inheritance (mantis issue#36)
+#
+
+create table t1( c1 int not null, c2 blob, c3 blob, c4 blob,
+		primary key(c1, c2(22), c3(22)))
+		engine = innodb row_format = dynamic;
+begin;
+insert into t1 values(1, repeat('A', 20000), repeat('B', 20000),
+			repeat('C', 20000));
+
+update t1 set c3 = repeat('D', 20000) where c1 = 1;
+commit;
+
+# one blob column which is unchanged in update and part of PK
+# one blob column which is changed and part of of PK
+# one blob column which is not part of PK and is unchanged
+select count(*) from t1 where c2 = repeat('A', 20000);
+select count(*) from t1 where c3 = repeat('D', 20000);
+select count(*) from t1 where c4 = repeat('C', 20000);
+
+update t1 set c3 = repeat('E', 20000) where c1 = 1;
+drop table t1;
+
+#
+#
+# Test innodb_file_format
+#
+set global innodb_file_format=`0`;
+select @@innodb_file_format;
+set global innodb_file_format=`1`;
+select @@innodb_file_format;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`2`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`-1`;
+set global innodb_file_format=`Antelope`;
+set global innodb_file_format=`Barracuda`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`Cheetah`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`abc`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`1a`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=``;
+
+#test strict mode.
+# this does not work anymore, has been removed from mysqltest
+# -- enable_errors
+set global innodb_file_per_table = on;
+set global innodb_file_format = `1`;
+
+set innodb_strict_mode = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+drop table t1;
+
+#set strict_mode
+set innodb_strict_mode = on;
+
+#Test different values of KEY_BLOCK_SIZE
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 9;
+show errors;
+
+
+create table t3 (id int primary key) engine = innodb key_block_size = 1;
+create table t4 (id int primary key) engine = innodb key_block_size = 2;
+create table t5 (id int primary key) engine = innodb key_block_size = 4;
+create table t6 (id int primary key) engine = innodb key_block_size = 8;
+create table t7 (id int primary key) engine = innodb key_block_size = 16;
+
+#check various ROW_FORMAT values.
+create table t8 (id int primary key) engine = innodb row_format = compressed;
+create table t9 (id int primary key) engine = innodb row_format = dynamic;
+create table t10(id int primary key) engine = innodb row_format = compact;
+create table t11(id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t3, t4, t5, t6, t7, t8, t9, t10, t11;
+
+#test different values of ROW_FORMAT with KEY_BLOCK_SIZE
+create table t1 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compressed;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 8 row_format = redundant;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compact;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb
+key_block_size = 8 row_format = dynamic;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb
+key_block_size = 8 row_format = default;
+show errors;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1;
+
+#test multiple errors
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb
+key_block_size = 9 row_format = redundant;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = compact;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = dynamic;
+show errors;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+
+#test valid values with innodb_file_per_table unset
+set global innodb_file_per_table = off;
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+show errors;
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t8, t9;
+
+#test valid values with innodb_file_format unset
+set global innodb_file_per_table = on;
+set global innodb_file_format = `0`; 
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+show errors;
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t8, t9;
+
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format=$format;
+#
+# Testing of tablespace tagging
+#
+-- disable_info
+set global innodb_file_per_table=on;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format_check=`Antelope`;
+create table normal_table (
+  c1 int
+) engine = innodb;
+select @@innodb_file_format_check;
+create table zip_table (
+  c1 int
+) engine = innodb key_block_size = 8;
+select @@innodb_file_format_check;
+set global innodb_file_format_check=`Antelope`;
+select @@innodb_file_format_check;
+-- disable_result_log
+show table status;
+-- enable_result_log
+select @@innodb_file_format_check;
+drop table normal_table, zip_table;
+-- disable_result_log
+
+#
+# restore environment to the state it was before this test execution
+#
+
+-- disable_query_log
+eval set global innodb_file_format=$format;
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format_check=$innodb_file_format_check_orig;
diff --git a/mysql-test/suite/innodb/t/innodb_bug36169.test b/mysql-test/suite/innodb/t/innodb_bug36169.test
new file mode 100644
index 00000000000..da852b816f4
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug36169.test
@@ -0,0 +1,1159 @@
+#
+# Bug#36169 create innodb compressed table with too large row size crashed
+# http://bugs.mysql.com/36169
+#
+
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=ON;
+
+#
+# The following is copied from http://bugs.mysql.com/36169
+# (http://bugs.mysql.com/file.php?id=9121)
+# Probably it can be simplified but that is not obvious.
+#
+
+# we care only that the following SQL commands do produce errors
+# as expected and do not crash the server
+-- disable_query_log
+-- disable_result_log
+
+# Generating 10 tables
+# Creating a table with 94 columns and 24 indexes
+DROP TABLE IF EXISTS `table0`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table0`
+(`col0` BOOL,
+`col1` BOOL,
+`col2` TINYINT,
+`col3` DATE,
+`col4` TIME,
+`col5` SET ('test1','test2','test3'),
+`col6` TIME,
+`col7` TEXT,
+`col8` DECIMAL,
+`col9` SET ('test1','test2','test3'),
+`col10` FLOAT,
+`col11` DOUBLE PRECISION,
+`col12` ENUM ('test1','test2','test3'),
+`col13` TINYBLOB,
+`col14` YEAR,
+`col15` SET ('test1','test2','test3'),
+`col16` NUMERIC,
+`col17` NUMERIC,
+`col18` BLOB,
+`col19` DATETIME,
+`col20` DOUBLE PRECISION,
+`col21` DECIMAL,
+`col22` DATETIME,
+`col23` NUMERIC,
+`col24` NUMERIC,
+`col25` LONGTEXT,
+`col26` TINYBLOB,
+`col27` TIME,
+`col28` TINYBLOB,
+`col29` ENUM ('test1','test2','test3'),
+`col30` SMALLINT,
+`col31` REAL,
+`col32` FLOAT,
+`col33` CHAR (175),
+`col34` TINYTEXT,
+`col35` TINYTEXT,
+`col36` TINYBLOB,
+`col37` TINYBLOB,
+`col38` TINYTEXT,
+`col39` MEDIUMBLOB,
+`col40` TIMESTAMP,
+`col41` DOUBLE,
+`col42` SMALLINT,
+`col43` LONGBLOB,
+`col44` VARCHAR (80),
+`col45` MEDIUMTEXT,
+`col46` NUMERIC,
+`col47` BIGINT,
+`col48` DATE,
+`col49` TINYBLOB,
+`col50` DATE,
+`col51` BOOL,
+`col52` MEDIUMINT,
+`col53` FLOAT,
+`col54` TINYBLOB,
+`col55` LONGTEXT,
+`col56` SMALLINT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` DATETIME,
+`col59` MEDIUMTEXT,
+`col60` VARCHAR (232),
+`col61` NUMERIC,
+`col62` YEAR,
+`col63` SMALLINT,
+`col64` TIMESTAMP,
+`col65` BLOB,
+`col66` LONGBLOB,
+`col67` INT,
+`col68` LONGTEXT,
+`col69` ENUM ('test1','test2','test3'),
+`col70` INT,
+`col71` TIME,
+`col72` TIMESTAMP,
+`col73` TIMESTAMP,
+`col74` VARCHAR (170),
+`col75` SET ('test1','test2','test3'),
+`col76` TINYBLOB,
+`col77` BIGINT,
+`col78` NUMERIC,
+`col79` DATETIME,
+`col80` YEAR,
+`col81` NUMERIC,
+`col82` LONGBLOB,
+`col83` TEXT,
+`col84` CHAR (83),
+`col85` DECIMAL,
+`col86` FLOAT,
+`col87` INT,
+`col88` VARCHAR (145),
+`col89` DATE,
+`col90` DECIMAL,
+`col91` DECIMAL,
+`col92` MEDIUMBLOB,
+`col93` TIME,
+KEY `idx0` (`col69`,`col90`,`col8`),
+KEY `idx1` (`col60`),
+KEY `idx2` (`col60`,`col70`,`col74`),
+KEY `idx3` (`col22`,`col32`,`col72`,`col30`),
+KEY `idx4` (`col29`),
+KEY `idx5` (`col19`,`col45`(143)),
+KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),
+KEY `idx7` (`col48`,`col61`),
+KEY `idx8` (`col93`),
+KEY `idx9` (`col31`),
+KEY `idx10` (`col30`,`col21`),
+KEY `idx11` (`col67`),
+KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),
+KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),
+KEY `idx14` (`col78`),
+KEY `idx15` (`col63`,`col67`,`col64`),
+KEY `idx16` (`col17`,`col86`),
+KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),
+KEY `idx18` (`col62`),
+KEY `idx19` (`col31`,`col57`,`col56`,`col53`),
+KEY `idx20` (`col46`),
+KEY `idx21` (`col83`(54)),
+KEY `idx22` (`col51`,`col7`(120)),
+KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 10 columns and 32 indexes
+DROP TABLE IF EXISTS `table1`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table1`
+(`col0` CHAR (113),
+`col1` FLOAT,
+`col2` BIGINT,
+`col3` DECIMAL,
+`col4` BLOB,
+`col5` LONGTEXT,
+`col6` SET ('test1','test2','test3'),
+`col7` BIGINT,
+`col8` BIGINT,
+`col9` TINYBLOB,
+KEY `idx0` (`col5`(101),`col7`,`col8`),
+KEY `idx1` (`col8`),
+KEY `idx2` (`col4`(177),`col9`(126),`col6`,`col3`),
+KEY `idx3` (`col5`(160)),
+KEY `idx4` (`col9`(242)),
+KEY `idx5` (`col4`(139),`col2`,`col3`),
+KEY `idx6` (`col7`),
+KEY `idx7` (`col6`,`col2`,`col0`,`col3`),
+KEY `idx8` (`col9`(66)),
+KEY `idx9` (`col5`(253)),
+KEY `idx10` (`col1`,`col7`,`col2`),
+KEY `idx11` (`col9`(242),`col0`,`col8`,`col5`(163)),
+KEY `idx12` (`col8`),
+KEY `idx13` (`col0`,`col9`(37)),
+KEY `idx14` (`col0`),
+KEY `idx15` (`col5`(111)),
+KEY `idx16` (`col8`,`col0`,`col5`(13)),
+KEY `idx17` (`col4`(139)),
+KEY `idx18` (`col5`(189),`col2`,`col3`,`col9`(136)),
+KEY `idx19` (`col0`,`col3`,`col1`,`col8`),
+KEY `idx20` (`col8`),
+KEY `idx21` (`col0`,`col7`,`col9`(227),`col3`),
+KEY `idx22` (`col0`),
+KEY `idx23` (`col2`),
+KEY `idx24` (`col3`),
+KEY `idx25` (`col2`,`col3`),
+KEY `idx26` (`col0`),
+KEY `idx27` (`col5`(254)),
+KEY `idx28` (`col3`),
+KEY `idx29` (`col3`),
+KEY `idx30` (`col7`,`col3`,`col0`,`col4`(220)),
+KEY `idx31` (`col4`(1),`col0`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 141 columns and 18 indexes
+DROP TABLE IF EXISTS `table2`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table2`
+(`col0` BOOL,
+`col1` MEDIUMINT,
+`col2` VARCHAR (209),
+`col3` MEDIUMBLOB,
+`col4` CHAR (13),
+`col5` DOUBLE,
+`col6` TINYTEXT,
+`col7` REAL,
+`col8` SMALLINT,
+`col9` BLOB,
+`col10` TINYINT,
+`col11` DECIMAL,
+`col12` BLOB,
+`col13` DECIMAL,
+`col14` LONGBLOB,
+`col15` SMALLINT,
+`col16` LONGBLOB,
+`col17` TINYTEXT,
+`col18` FLOAT,
+`col19` CHAR (78),
+`col20` MEDIUMTEXT,
+`col21` SET ('test1','test2','test3'),
+`col22` MEDIUMINT,
+`col23` INT,
+`col24` MEDIUMBLOB,
+`col25` ENUM ('test1','test2','test3'),
+`col26` TINYBLOB,
+`col27` VARCHAR (116),
+`col28` TIMESTAMP,
+`col29` BLOB,
+`col30` SMALLINT,
+`col31` DOUBLE PRECISION,
+`col32` DECIMAL,
+`col33` DECIMAL,
+`col34` TEXT,
+`col35` MEDIUMINT,
+`col36` MEDIUMINT,
+`col37` BIGINT,
+`col38` VARCHAR (253),
+`col39` TINYBLOB,
+`col40` MEDIUMBLOB,
+`col41` BIGINT,
+`col42` DOUBLE,
+`col43` TEXT,
+`col44` BLOB,
+`col45` TIME,
+`col46` MEDIUMINT,
+`col47` DOUBLE PRECISION,
+`col48` SET ('test1','test2','test3'),
+`col49` DOUBLE PRECISION,
+`col50` VARCHAR (97),
+`col51` TEXT,
+`col52` NUMERIC,
+`col53` ENUM ('test1','test2','test3'),
+`col54` MEDIUMTEXT,
+`col55` MEDIUMINT,
+`col56` DATETIME,
+`col57` DATETIME,
+`col58` MEDIUMTEXT,
+`col59` CHAR (244),
+`col60` LONGBLOB,
+`col61` MEDIUMBLOB,
+`col62` DOUBLE,
+`col63` SMALLINT,
+`col64` BOOL,
+`col65` SMALLINT,
+`col66` VARCHAR (212),
+`col67` TIME,
+`col68` REAL,
+`col69` BOOL,
+`col70` BIGINT,
+`col71` DATE,
+`col72` TINYINT,
+`col73` ENUM ('test1','test2','test3'),
+`col74` DATE,
+`col75` TIME,
+`col76` DATETIME,
+`col77` BOOL,
+`col78` TINYTEXT,
+`col79` MEDIUMINT,
+`col80` NUMERIC,
+`col81` LONGTEXT,
+`col82` SET ('test1','test2','test3'),
+`col83` DOUBLE PRECISION,
+`col84` NUMERIC,
+`col85` VARCHAR (184),
+`col86` DOUBLE PRECISION,
+`col87` MEDIUMTEXT,
+`col88` MEDIUMBLOB,
+`col89` BOOL,
+`col90` SMALLINT,
+`col91` TINYINT,
+`col92` ENUM ('test1','test2','test3'),
+`col93` BOOL,
+`col94` TIMESTAMP,
+`col95` BOOL,
+`col96` MEDIUMTEXT,
+`col97` DECIMAL,
+`col98` BOOL,
+`col99` DECIMAL,
+`col100` MEDIUMINT,
+`col101` DOUBLE PRECISION,
+`col102` TINYINT,
+`col103` BOOL,
+`col104` MEDIUMINT,
+`col105` DECIMAL,
+`col106` NUMERIC,
+`col107` TIMESTAMP,
+`col108` MEDIUMBLOB,
+`col109` TINYBLOB,
+`col110` SET ('test1','test2','test3'),
+`col111` YEAR,
+`col112` TIMESTAMP,
+`col113` CHAR (201),
+`col114` BOOL,
+`col115` TINYINT,
+`col116` DOUBLE,
+`col117` TINYINT,
+`col118` TIMESTAMP,
+`col119` SET ('test1','test2','test3'),
+`col120` SMALLINT,
+`col121` TINYBLOB,
+`col122` TIMESTAMP,
+`col123` BLOB,
+`col124` DATE,
+`col125` SMALLINT,
+`col126` ENUM ('test1','test2','test3'),
+`col127` MEDIUMBLOB,
+`col128` DOUBLE PRECISION,
+`col129` REAL,
+`col130` VARCHAR (159),
+`col131` MEDIUMBLOB,
+`col132` BIGINT,
+`col133` INT,
+`col134` SET ('test1','test2','test3'),
+`col135` CHAR (198),
+`col136` SET ('test1','test2','test3'),
+`col137` MEDIUMTEXT,
+`col138` SMALLINT,
+`col139` BLOB,
+`col140` LONGBLOB,
+KEY `idx0` (`col14`(139),`col24`(208),`col38`,`col35`),
+KEY `idx1` (`col48`,`col118`,`col29`(131),`col100`),
+KEY `idx2` (`col86`,`col67`,`col43`(175)),
+KEY `idx3` (`col19`),
+KEY `idx4` (`col40`(220),`col67`),
+KEY `idx5` (`col99`,`col56`),
+KEY `idx6` (`col68`,`col28`,`col137`(157)),
+KEY `idx7` (`col51`(160),`col99`,`col45`,`col39`(9)),
+KEY `idx8` (`col15`,`col52`,`col90`,`col94`),
+KEY `idx9` (`col24`(3),`col139`(248),`col108`(118),`col41`),
+KEY `idx10` (`col36`,`col92`,`col114`),
+KEY `idx11` (`col115`,`col9`(116)),
+KEY `idx12` (`col130`,`col93`,`col134`),
+KEY `idx13` (`col123`(65)),
+KEY `idx14` (`col44`(90),`col86`,`col119`),
+KEY `idx15` (`col69`),
+KEY `idx16` (`col132`,`col81`(118),`col18`),
+KEY `idx17` (`col24`(250),`col7`,`col92`,`col45`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 199 columns and 1 indexes
+DROP TABLE IF EXISTS `table3`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table3`
+(`col0` SMALLINT,
+`col1` SET ('test1','test2','test3'),
+`col2` TINYTEXT,
+`col3` DOUBLE,
+`col4` NUMERIC,
+`col5` DATE,
+`col6` BIGINT,
+`col7` DOUBLE,
+`col8` TEXT,
+`col9` INT,
+`col10` REAL,
+`col11` TINYINT,
+`col12` NUMERIC,
+`col13` NUMERIC,
+`col14` TIME,
+`col15` DOUBLE,
+`col16` REAL,
+`col17` MEDIUMBLOB,
+`col18` YEAR,
+`col19` TINYTEXT,
+`col20` YEAR,
+`col21` CHAR (250),
+`col22` TINYINT,
+`col23` TINYINT,
+`col24` SMALLINT,
+`col25` DATETIME,
+`col26` MEDIUMINT,
+`col27` LONGBLOB,
+`col28` VARCHAR (106),
+`col29` FLOAT,
+`col30` MEDIUMTEXT,
+`col31` TINYBLOB,
+`col32` BIGINT,
+`col33` YEAR,
+`col34` REAL,
+`col35` MEDIUMBLOB,
+`col36` LONGTEXT,
+`col37` LONGBLOB,
+`col38` BIGINT,
+`col39` FLOAT,
+`col40` TIME,
+`col41` DATETIME,
+`col42` BOOL,
+`col43` BIGINT,
+`col44` SMALLINT,
+`col45` TIME,
+`col46` DOUBLE PRECISION,
+`col47` TIME,
+`col48` TINYTEXT,
+`col49` DOUBLE PRECISION,
+`col50` BIGINT,
+`col51` NUMERIC,
+`col52` TINYBLOB,
+`col53` DATE,
+`col54` DECIMAL,
+`col55` SMALLINT,
+`col56` TINYTEXT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` YEAR,
+`col59` TIME,
+`col60` TINYINT,
+`col61` DECIMAL,
+`col62` DOUBLE,
+`col63` DATE,
+`col64` LONGTEXT,
+`col65` DOUBLE,
+`col66` VARCHAR (88),
+`col67` MEDIUMTEXT,
+`col68` DATE,
+`col69` MEDIUMINT,
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` LONGTEXT,
+`col73` REAL,
+`col74` DOUBLE,
+`col75` TIME,
+`col76` DATE,
+`col77` DECIMAL,
+`col78` MEDIUMBLOB,
+`col79` NUMERIC,
+`col80` BIGINT,
+`col81` YEAR,
+`col82` SMALLINT,
+`col83` MEDIUMINT,
+`col84` TINYINT,
+`col85` MEDIUMBLOB,
+`col86` TIME,
+`col87` MEDIUMBLOB,
+`col88` LONGTEXT,
+`col89` BOOL,
+`col90` BLOB,
+`col91` LONGBLOB,
+`col92` YEAR,
+`col93` BLOB,
+`col94` INT,
+`col95` TINYTEXT,
+`col96` TINYINT,
+`col97` DECIMAL,
+`col98` ENUM ('test1','test2','test3'),
+`col99` MEDIUMINT,
+`col100` TINYINT,
+`col101` MEDIUMBLOB,
+`col102` TINYINT,
+`col103` SET ('test1','test2','test3'),
+`col104` TIMESTAMP,
+`col105` TEXT,
+`col106` DATETIME,
+`col107` MEDIUMTEXT,
+`col108` CHAR (220),
+`col109` TIME,
+`col110` VARCHAR (131),
+`col111` DECIMAL,
+`col112` FLOAT,
+`col113` SMALLINT,
+`col114` BIGINT,
+`col115` LONGBLOB,
+`col116` SET ('test1','test2','test3'),
+`col117` ENUM ('test1','test2','test3'),
+`col118` BLOB,
+`col119` MEDIUMTEXT,
+`col120` SET ('test1','test2','test3'),
+`col121` DATETIME,
+`col122` FLOAT,
+`col123` VARCHAR (242),
+`col124` YEAR,
+`col125` MEDIUMBLOB,
+`col126` TIME,
+`col127` BOOL,
+`col128` TINYBLOB,
+`col129` DOUBLE,
+`col130` TINYINT,
+`col131` BIGINT,
+`col132` SMALLINT,
+`col133` INT,
+`col134` DOUBLE PRECISION,
+`col135` MEDIUMBLOB,
+`col136` SET ('test1','test2','test3'),
+`col137` TINYTEXT,
+`col138` DOUBLE PRECISION,
+`col139` NUMERIC,
+`col140` BLOB,
+`col141` SET ('test1','test2','test3'),
+`col142` INT,
+`col143` VARCHAR (26),
+`col144` BLOB,
+`col145` REAL,
+`col146` SET ('test1','test2','test3'),
+`col147` LONGBLOB,
+`col148` TEXT,
+`col149` BLOB,
+`col150` CHAR (189),
+`col151` LONGTEXT,
+`col152` INT,
+`col153` FLOAT,
+`col154` LONGTEXT,
+`col155` DATE,
+`col156` LONGBLOB,
+`col157` TINYBLOB,
+`col158` REAL,
+`col159` DATE,
+`col160` TIME,
+`col161` YEAR,
+`col162` DOUBLE,
+`col163` VARCHAR (90),
+`col164` FLOAT,
+`col165` NUMERIC,
+`col166` ENUM ('test1','test2','test3'),
+`col167` DOUBLE PRECISION,
+`col168` DOUBLE PRECISION,
+`col169` TINYBLOB,
+`col170` TIME,
+`col171` SMALLINT,
+`col172` TINYTEXT,
+`col173` SMALLINT,
+`col174` DOUBLE,
+`col175` VARCHAR (14),
+`col176` VARCHAR (90),
+`col177` REAL,
+`col178` MEDIUMINT,
+`col179` TINYBLOB,
+`col180` FLOAT,
+`col181` TIMESTAMP,
+`col182` REAL,
+`col183` DOUBLE PRECISION,
+`col184` BIGINT,
+`col185` INT,
+`col186` MEDIUMTEXT,
+`col187` TIME,
+`col188` FLOAT,
+`col189` TIME,
+`col190` INT,
+`col191` FLOAT,
+`col192` MEDIUMINT,
+`col193` TINYINT,
+`col194` MEDIUMTEXT,
+`col195` DATE,
+`col196` TIME,
+`col197` YEAR,
+`col198` CHAR (206),
+KEY `idx0` (`col39`,`col23`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 133 columns and 16 indexes
+DROP TABLE IF EXISTS `table4`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table4`
+(`col0` VARCHAR (60),
+`col1` NUMERIC,
+`col2` LONGTEXT,
+`col3` MEDIUMTEXT,
+`col4` LONGTEXT,
+`col5` LONGBLOB,
+`col6` LONGBLOB,
+`col7` DATETIME,
+`col8` TINYTEXT,
+`col9` BLOB,
+`col10` BOOL,
+`col11` BIGINT,
+`col12` TEXT,
+`col13` VARCHAR (213),
+`col14` TINYBLOB,
+`col15` BOOL,
+`col16` MEDIUMTEXT,
+`col17` DOUBLE,
+`col18` TEXT,
+`col19` BLOB,
+`col20` SET ('test1','test2','test3'),
+`col21` TINYINT,
+`col22` DATETIME,
+`col23` TINYINT,
+`col24` ENUM ('test1','test2','test3'),
+`col25` REAL,
+`col26` BOOL,
+`col27` FLOAT,
+`col28` LONGBLOB,
+`col29` DATETIME,
+`col30` FLOAT,
+`col31` SET ('test1','test2','test3'),
+`col32` LONGBLOB,
+`col33` NUMERIC,
+`col34` YEAR,
+`col35` VARCHAR (146),
+`col36` BIGINT,
+`col37` DATETIME,
+`col38` DATE,
+`col39` SET ('test1','test2','test3'),
+`col40` CHAR (112),
+`col41` FLOAT,
+`col42` YEAR,
+`col43` TIME,
+`col44` DOUBLE,
+`col45` NUMERIC,
+`col46` FLOAT,
+`col47` DECIMAL,
+`col48` BIGINT,
+`col49` DECIMAL,
+`col50` YEAR,
+`col51` MEDIUMTEXT,
+`col52` LONGBLOB,
+`col53` SET ('test1','test2','test3'),
+`col54` BLOB,
+`col55` FLOAT,
+`col56` REAL,
+`col57` REAL,
+`col58` TEXT,
+`col59` MEDIUMBLOB,
+`col60` INT,
+`col61` INT,
+`col62` DATE,
+`col63` TEXT,
+`col64` DATE,
+`col65` ENUM ('test1','test2','test3'),
+`col66` DOUBLE PRECISION,
+`col67` TINYTEXT,
+`col68` TINYBLOB,
+`col69` FLOAT,
+`col70` BLOB,
+`col71` DATETIME,
+`col72` DOUBLE,
+`col73` LONGTEXT,
+`col74` TIME,
+`col75` DATETIME,
+`col76` VARCHAR (122),
+`col77` MEDIUMTEXT,
+`col78` MEDIUMTEXT,
+`col79` BOOL,
+`col80` LONGTEXT,
+`col81` TINYTEXT,
+`col82` NUMERIC,
+`col83` DOUBLE PRECISION,
+`col84` DATE,
+`col85` YEAR,
+`col86` BLOB,
+`col87` TINYTEXT,
+`col88` DOUBLE PRECISION,
+`col89` MEDIUMINT,
+`col90` MEDIUMTEXT,
+`col91` NUMERIC,
+`col92` DATETIME,
+`col93` NUMERIC,
+`col94` SET ('test1','test2','test3'),
+`col95` TINYTEXT,
+`col96` SET ('test1','test2','test3'),
+`col97` YEAR,
+`col98` MEDIUMINT,
+`col99` TEXT,
+`col100` TEXT,
+`col101` TIME,
+`col102` VARCHAR (225),
+`col103` TINYTEXT,
+`col104` TEXT,
+`col105` MEDIUMTEXT,
+`col106` TINYINT,
+`col107` TEXT,
+`col108` LONGBLOB,
+`col109` LONGTEXT,
+`col110` TINYTEXT,
+`col111` CHAR (56),
+`col112` YEAR,
+`col113` ENUM ('test1','test2','test3'),
+`col114` TINYBLOB,
+`col115` DATETIME,
+`col116` DATE,
+`col117` TIME,
+`col118` MEDIUMTEXT,
+`col119` DOUBLE PRECISION,
+`col120` FLOAT,
+`col121` TIMESTAMP,
+`col122` MEDIUMINT,
+`col123` YEAR,
+`col124` DATE,
+`col125` TEXT,
+`col126` FLOAT,
+`col127` TINYTEXT,
+`col128` BOOL,
+`col129` NUMERIC,
+`col130` TIMESTAMP,
+`col131` INT,
+`col132` MEDIUMBLOB,
+KEY `idx0` (`col130`),
+KEY `idx1` (`col30`,`col55`,`col19`(31)),
+KEY `idx2` (`col104`(186)),
+KEY `idx3` (`col131`),
+KEY `idx4` (`col64`,`col93`,`col2`(11)),
+KEY `idx5` (`col34`,`col121`,`col22`),
+KEY `idx6` (`col33`,`col55`,`col83`),
+KEY `idx7` (`col17`,`col87`(245),`col99`(17)),
+KEY `idx8` (`col65`,`col120`),
+KEY `idx9` (`col82`),
+KEY `idx10` (`col9`(72)),
+KEY `idx11` (`col88`),
+KEY `idx12` (`col128`,`col9`(200),`col71`,`col66`),
+KEY `idx13` (`col77`(126)),
+KEY `idx14` (`col105`(26),`col13`,`col117`),
+KEY `idx15` (`col4`(246),`col130`,`col115`,`col3`(141))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 176 columns and 13 indexes
+DROP TABLE IF EXISTS `table5`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table5`
+(`col0` MEDIUMTEXT,
+`col1` VARCHAR (90),
+`col2` TINYTEXT,
+`col3` TIME,
+`col4` BOOL,
+`col5` TINYTEXT,
+`col6` BOOL,
+`col7` TIMESTAMP,
+`col8` TINYBLOB,
+`col9` TINYINT,
+`col10` YEAR,
+`col11` SET ('test1','test2','test3'),
+`col12` TEXT,
+`col13` CHAR (248),
+`col14` BIGINT,
+`col15` TEXT,
+`col16` TINYINT,
+`col17` NUMERIC,
+`col18` SET ('test1','test2','test3'),
+`col19` LONGBLOB,
+`col20` FLOAT,
+`col21` INT,
+`col22` TEXT,
+`col23` BOOL,
+`col24` DECIMAL,
+`col25` DOUBLE PRECISION,
+`col26` FLOAT,
+`col27` TINYBLOB,
+`col28` NUMERIC,
+`col29` MEDIUMBLOB,
+`col30` DATE,
+`col31` LONGTEXT,
+`col32` DATE,
+`col33` FLOAT,
+`col34` BIGINT,
+`col35` TINYTEXT,
+`col36` MEDIUMTEXT,
+`col37` TIME,
+`col38` INT,
+`col39` TINYINT,
+`col40` SET ('test1','test2','test3'),
+`col41` CHAR (130),
+`col42` SMALLINT,
+`col43` INT,
+`col44` MEDIUMTEXT,
+`col45` VARCHAR (126),
+`col46` INT,
+`col47` DOUBLE PRECISION,
+`col48` BIGINT,
+`col49` MEDIUMTEXT,
+`col50` TINYBLOB,
+`col51` MEDIUMINT,
+`col52` TEXT,
+`col53` VARCHAR (208),
+`col54` VARCHAR (207),
+`col55` NUMERIC,
+`col56` DATETIME,
+`col57` ENUM ('test1','test2','test3'),
+`col58` NUMERIC,
+`col59` TINYBLOB,
+`col60` VARCHAR (73),
+`col61` MEDIUMTEXT,
+`col62` TINYBLOB,
+`col63` DATETIME,
+`col64` NUMERIC,
+`col65` MEDIUMINT,
+`col66` DATETIME,
+`col67` NUMERIC,
+`col68` TINYINT,
+`col69` VARCHAR (58),
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` DATE,
+`col73` TIME,
+`col74` DOUBLE PRECISION,
+`col75` DECIMAL,
+`col76` MEDIUMBLOB,
+`col77` REAL,
+`col78` YEAR,
+`col79` YEAR,
+`col80` LONGBLOB,
+`col81` BLOB,
+`col82` BIGINT,
+`col83` ENUM ('test1','test2','test3'),
+`col84` NUMERIC,
+`col85` SET ('test1','test2','test3'),
+`col86` MEDIUMTEXT,
+`col87` LONGBLOB,
+`col88` TIME,
+`col89` ENUM ('test1','test2','test3'),
+`col90` DECIMAL,
+`col91` FLOAT,
+`col92` DATETIME,
+`col93` TINYTEXT,
+`col94` TIMESTAMP,
+`col95` TIMESTAMP,
+`col96` TEXT,
+`col97` REAL,
+`col98` VARCHAR (198),
+`col99` TIME,
+`col100` TINYINT,
+`col101` BIGINT,
+`col102` LONGBLOB,
+`col103` LONGBLOB,
+`col104` MEDIUMINT,
+`col105` MEDIUMTEXT,
+`col106` TIMESTAMP,
+`col107` SMALLINT,
+`col108` NUMERIC,
+`col109` DECIMAL,
+`col110` FLOAT,
+`col111` DECIMAL,
+`col112` REAL,
+`col113` TINYTEXT,
+`col114` FLOAT,
+`col115` VARCHAR (7),
+`col116` LONGTEXT,
+`col117` DATE,
+`col118` BIGINT,
+`col119` TEXT,
+`col120` BIGINT,
+`col121` BLOB,
+`col122` CHAR (110),
+`col123` NUMERIC,
+`col124` MEDIUMBLOB,
+`col125` NUMERIC,
+`col126` NUMERIC,
+`col127` BOOL,
+`col128` TIME,
+`col129` TINYBLOB,
+`col130` TINYBLOB,
+`col131` DATE,
+`col132` INT,
+`col133` VARCHAR (123),
+`col134` CHAR (238),
+`col135` VARCHAR (225),
+`col136` LONGTEXT,
+`col137` LONGBLOB,
+`col138` REAL,
+`col139` TINYBLOB,
+`col140` DATETIME,
+`col141` TINYTEXT,
+`col142` LONGBLOB,
+`col143` BIGINT,
+`col144` VARCHAR (236),
+`col145` TEXT,
+`col146` YEAR,
+`col147` DECIMAL,
+`col148` TEXT,
+`col149` MEDIUMBLOB,
+`col150` TINYINT,
+`col151` BOOL,
+`col152` VARCHAR (72),
+`col153` INT,
+`col154` VARCHAR (165),
+`col155` TINYINT,
+`col156` MEDIUMTEXT,
+`col157` DOUBLE PRECISION,
+`col158` TIME,
+`col159` MEDIUMBLOB,
+`col160` LONGBLOB,
+`col161` DATETIME,
+`col162` DOUBLE PRECISION,
+`col163` BLOB,
+`col164` ENUM ('test1','test2','test3'),
+`col165` TIMESTAMP,
+`col166` DATE,
+`col167` TINYBLOB,
+`col168` TINYBLOB,
+`col169` LONGBLOB,
+`col170` DATETIME,
+`col171` BIGINT,
+`col172` VARCHAR (30),
+`col173` LONGTEXT,
+`col174` TIME,
+`col175` FLOAT,
+KEY `idx0` (`col16`,`col156`(139),`col97`,`col120`),
+KEY `idx1` (`col24`,`col0`(108)),
+KEY `idx2` (`col117`,`col173`(34),`col132`,`col82`),
+KEY `idx3` (`col2`(86)),
+KEY `idx4` (`col2`(43)),
+KEY `idx5` (`col83`,`col35`(87),`col111`),
+KEY `idx6` (`col6`,`col134`,`col92`),
+KEY `idx7` (`col56`),
+KEY `idx8` (`col30`,`col53`,`col129`(66)),
+KEY `idx9` (`col53`,`col113`(211),`col32`,`col15`(75)),
+KEY `idx10` (`col34`),
+KEY `idx11` (`col126`),
+KEY `idx12` (`col24`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 179 columns and 46 indexes
+DROP TABLE IF EXISTS `table6`;
+-- error ER_TOO_BIG_ROWSIZE
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table6`
+(`col0` ENUM ('test1','test2','test3'),
+`col1` MEDIUMBLOB,
+`col2` MEDIUMBLOB,
+`col3` DATETIME,
+`col4` DATE,
+`col5` YEAR,
+`col6` REAL,
+`col7` NUMERIC,
+`col8` MEDIUMBLOB,
+`col9` TEXT,
+`col10` TIMESTAMP,
+`col11` DOUBLE,
+`col12` DOUBLE,
+`col13` SMALLINT,
+`col14` TIMESTAMP,
+`col15` DECIMAL,
+`col16` DATE,
+`col17` TEXT,
+`col18` LONGBLOB,
+`col19` BIGINT,
+`col20` FLOAT,
+`col21` DATETIME,
+`col22` TINYINT,
+`col23` MEDIUMBLOB,
+`col24` SET ('test1','test2','test3'),
+`col25` TIME,
+`col26` TEXT,
+`col27` LONGTEXT,
+`col28` BIGINT,
+`col29` REAL,
+`col30` YEAR,
+`col31` MEDIUMBLOB,
+`col32` MEDIUMINT,
+`col33` FLOAT,
+`col34` TEXT,
+`col35` DATE,
+`col36` TIMESTAMP,
+`col37` REAL,
+`col38` BLOB,
+`col39` BLOB,
+`col40` BLOB,
+`col41` TINYBLOB,
+`col42` INT,
+`col43` TINYINT,
+`col44` REAL,
+`col45` BIGINT,
+`col46` TIMESTAMP,
+`col47` BLOB,
+`col48` ENUM ('test1','test2','test3'),
+`col49` BOOL,
+`col50` CHAR (109),
+`col51` DOUBLE,
+`col52` DOUBLE PRECISION,
+`col53` ENUM ('test1','test2','test3'),
+`col54` FLOAT,
+`col55` DOUBLE PRECISION,
+`col56` CHAR (166),
+`col57` TEXT,
+`col58` TIME,
+`col59` DECIMAL,
+`col60` TEXT,
+`col61` ENUM ('test1','test2','test3'),
+`col62` LONGTEXT,
+`col63` YEAR,
+`col64` DOUBLE,
+`col65` CHAR (87),
+`col66` DATE,
+`col67` BOOL,
+`col68` MEDIUMBLOB,
+`col69` DATETIME,
+`col70` DECIMAL,
+`col71` TIME,
+`col72` REAL,
+`col73` LONGTEXT,
+`col74` BLOB,
+`col75` REAL,
+`col76` INT,
+`col77` INT,
+`col78` FLOAT,
+`col79` DOUBLE,
+`col80` MEDIUMINT,
+`col81` ENUM ('test1','test2','test3'),
+`col82` VARCHAR (221),
+`col83` BIGINT,
+`col84` TINYINT,
+`col85` BIGINT,
+`col86` FLOAT,
+`col87` MEDIUMBLOB,
+`col88` CHAR (126),
+`col89` MEDIUMBLOB,
+`col90` DATETIME,
+`col91` TINYINT,
+`col92` DOUBLE,
+`col93` NUMERIC,
+`col94` DATE,
+`col95` BLOB,
+`col96` DATETIME,
+`col97` TIME,
+`col98` LONGBLOB,
+`col99` INT,
+`col100` SET ('test1','test2','test3'),
+`col101` TINYBLOB,
+`col102` INT,
+`col103` MEDIUMBLOB,
+`col104` MEDIUMTEXT,
+`col105` FLOAT,
+`col106` TINYBLOB,
+`col107` VARCHAR (26),
+`col108` TINYINT,
+`col109` TIME,
+`col110` TINYBLOB,
+`col111` LONGBLOB,
+`col112` TINYTEXT,
+`col113` FLOAT,
+`col114` TINYINT,
+`col115` NUMERIC,
+`col116` TIME,
+`col117` SET ('test1','test2','test3'),
+`col118` DATE,
+`col119` SMALLINT,
+`col120` BLOB,
+`col121` TINYTEXT,
+`col122` REAL,
+`col123` YEAR,
+`col124` REAL,
+`col125` BOOL,
+`col126` BLOB,
+`col127` REAL,
+`col128` MEDIUMBLOB,
+`col129` TIMESTAMP,
+`col130` LONGBLOB,
+`col131` MEDIUMBLOB,
+`col132` YEAR,
+`col133` YEAR,
+`col134` INT,
+`col135` MEDIUMINT,
+`col136` MEDIUMINT,
+`col137` TINYTEXT,
+`col138` TINYBLOB,
+`col139` BLOB,
+`col140` SET ('test1','test2','test3'),
+`col141` ENUM ('test1','test2','test3'),
+`col142` ENUM ('test1','test2','test3'),
+`col143` TINYTEXT,
+`col144` DATETIME,
+`col145` TEXT,
+`col146` DOUBLE PRECISION,
+`col147` DECIMAL,
+`col148` MEDIUMTEXT,
+`col149` TINYTEXT,
+`col150` SET ('test1','test2','test3'),
+`col151` MEDIUMTEXT,
+`col152` CHAR (126),
+`col153` DOUBLE,
+`col154` CHAR (243),
+`col155` SET ('test1','test2','test3'),
+`col156` SET ('test1','test2','test3'),
+`col157` DATETIME,
+`col158` DOUBLE,
+`col159` NUMERIC,
+`col160` DECIMAL,
+`col161` FLOAT,
+`col162` LONGBLOB,
+`col163` LONGTEXT,
+`col164` INT,
+`col165` TIME,
+`col166` CHAR (27),
+`col167` VARCHAR (63),
+`col168` TEXT,
+`col169` TINYBLOB,
+`col170` TINYBLOB,
+`col171` ENUM ('test1','test2','test3'),
+`col172` INT,
+`col173` TIME,
+`col174` DECIMAL,
+`col175` DOUBLE,
+`col176` MEDIUMBLOB,
+`col177` LONGBLOB,
+`col178` CHAR (43),
+KEY `idx0` (`col131`(219)),
+KEY `idx1` (`col67`,`col122`,`col59`,`col87`(33)),
+KEY `idx2` (`col83`,`col42`,`col57`(152)),
+KEY `idx3` (`col106`(124)),
+KEY `idx4` (`col173`,`col80`,`col165`,`col89`(78)),
+KEY `idx5` (`col174`,`col145`(108),`col23`(228),`col141`),
+KEY `idx6` (`col157`,`col140`),
+KEY `idx7` (`col130`(188),`col15`),
+KEY `idx8` (`col52`),
+KEY `idx9` (`col144`),
+KEY `idx10` (`col155`),
+KEY `idx11` (`col62`(230),`col1`(109)),
+KEY `idx12` (`col151`(24),`col95`(85)),
+KEY `idx13` (`col114`),
+KEY `idx14` (`col42`,`col98`(56),`col146`),
+KEY `idx15` (`col147`,`col39`(254),`col35`),
+KEY `idx16` (`col79`),
+KEY `idx17` (`col65`),
+KEY `idx18` (`col149`(165),`col168`(119),`col32`,`col117`),
+KEY `idx19` (`col64`),
+KEY `idx20` (`col93`),
+KEY `idx21` (`col64`,`col113`,`col104`(182)),
+KEY `idx22` (`col52`,`col111`(189)),
+KEY `idx23` (`col45`),
+KEY `idx24` (`col154`,`col107`,`col110`(159)),
+KEY `idx25` (`col149`(1),`col87`(131)),
+KEY `idx26` (`col58`,`col115`,`col63`),
+KEY `idx27` (`col95`(9),`col0`,`col87`(113)),
+KEY `idx28` (`col92`,`col130`(1)),
+KEY `idx29` (`col151`(129),`col137`(254),`col13`),
+KEY `idx30` (`col49`),
+KEY `idx31` (`col28`),
+KEY `idx32` (`col83`,`col146`),
+KEY `idx33` (`col155`,`col90`,`col17`(245)),
+KEY `idx34` (`col174`,`col169`(44),`col107`),
+KEY `idx35` (`col113`),
+KEY `idx36` (`col52`),
+KEY `idx37` (`col16`,`col120`(190)),
+KEY `idx38` (`col28`),
+KEY `idx39` (`col131`(165)),
+KEY `idx40` (`col135`,`col26`(86)),
+KEY `idx41` (`col69`,`col94`),
+KEY `idx42` (`col105`,`col151`(38),`col97`),
+KEY `idx43` (`col88`),
+KEY `idx44` (`col176`(100),`col42`,`col73`(189),`col94`),
+KEY `idx45` (`col2`(27),`col27`(116))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+DROP TABLE IF EXISTS table0;
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+DROP TABLE IF EXISTS table3;
+DROP TABLE IF EXISTS table4;
+DROP TABLE IF EXISTS table5;
+DROP TABLE IF EXISTS table6;
+
+SET GLOBAL innodb_file_per_table=DEFAULT;
+SET GLOBAL innodb_file_format='Antelope';
+SET GLOBAL innodb_file_format_check='Antelope';
diff --git a/mysql-test/suite/innodb/t/innodb_bug36172.test b/mysql-test/suite/innodb/t/innodb_bug36172.test
new file mode 100644
index 00000000000..9e1308d5fc3
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug36172.test
@@ -0,0 +1,31 @@
+#
+# Test case for bug 36172
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+SET storage_engine=InnoDB;
+
+# we do not really care about what gets printed, we are only
+# interested in getting success or failure according to our
+# expectations
+
+-- disable_query_log
+-- disable_result_log
+
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=on;
+
+DROP TABLE IF EXISTS `table0`;
+CREATE TABLE `table0` (   `col0` tinyint(1) DEFAULT NULL,   `col1` tinyint(1) DEFAULT NULL,   `col2` tinyint(4) DEFAULT NULL,   `col3` date DEFAULT NULL,   `col4` time DEFAULT NULL,   `col5` set('test1','test2','test3') DEFAULT NULL,   `col6` time DEFAULT NULL,   `col7` text,   `col8` decimal(10,0) DEFAULT NULL,   `col9` set('test1','test2','test3') DEFAULT NULL,   `col10` float DEFAULT NULL,   `col11` double DEFAULT NULL,   `col12` enum('test1','test2','test3') DEFAULT NULL,   `col13` tinyblob,   `col14` year(4) DEFAULT NULL,   `col15` set('test1','test2','test3') DEFAULT NULL,   `col16` decimal(10,0) DEFAULT NULL,   `col17` decimal(10,0) DEFAULT NULL,   `col18` blob,   `col19` datetime DEFAULT NULL,   `col20` double DEFAULT NULL,   `col21` decimal(10,0) DEFAULT NULL,   `col22` datetime DEFAULT NULL,   `col23` decimal(10,0) DEFAULT NULL,   `col24` decimal(10,0) DEFAULT NULL,   `col25` longtext,   `col26` tinyblob,   `col27` time DEFAULT NULL,   `col28` tinyblob,   `col29` enum('test1','test2','test3') DEFAULT NULL,   `col30` smallint(6) DEFAULT NULL,   `col31` double DEFAULT NULL,   `col32` float DEFAULT NULL,   `col33` char(175) DEFAULT NULL,   `col34` tinytext,   `col35` tinytext,   `col36` tinyblob,   `col37` tinyblob,   `col38` tinytext,   `col39` mediumblob,   `col40` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,   `col41` double DEFAULT NULL,   `col42` smallint(6) DEFAULT NULL,   `col43` longblob,   `col44` varchar(80) DEFAULT NULL,   `col45` mediumtext,   `col46` decimal(10,0) DEFAULT NULL,   `col47` bigint(20) DEFAULT NULL,   `col48` date DEFAULT NULL,   `col49` tinyblob,   `col50` date DEFAULT NULL,   `col51` tinyint(1) DEFAULT NULL,   `col52` mediumint(9) DEFAULT NULL,   `col53` float DEFAULT NULL,   `col54` tinyblob,   `col55` longtext,   `col56` smallint(6) DEFAULT NULL,   `col57` enum('test1','test2','test3') DEFAULT NULL,   `col58` datetime DEFAULT NULL,   `col59` mediumtext,   `col60` varchar(232) DEFAULT NULL,   `col61` decimal(10,0) DEFAULT NULL,   `col62` year(4) DEFAULT NULL,   `col63` smallint(6) DEFAULT NULL,   `col64` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col65` blob,   `col66` longblob,   `col67` int(11) DEFAULT NULL,   `col68` longtext,   `col69` enum('test1','test2','test3') DEFAULT NULL,   `col70` int(11) DEFAULT NULL,   `col71` time DEFAULT NULL,   `col72` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col73` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col74` varchar(170) DEFAULT NULL,   `col75` set('test1','test2','test3') DEFAULT NULL,   `col76` tinyblob,   `col77` bigint(20) DEFAULT NULL,   `col78` decimal(10,0) DEFAULT NULL,   `col79` datetime DEFAULT NULL,   `col80` year(4) DEFAULT NULL,   `col81` decimal(10,0) DEFAULT NULL,   `col82` longblob,   `col83` text,   `col84` char(83) DEFAULT NULL,   `col85` decimal(10,0) DEFAULT NULL,   `col86` float DEFAULT NULL,   `col87` int(11) DEFAULT NULL,   `col88` varchar(145) DEFAULT NULL,   `col89` date DEFAULT NULL,   `col90` decimal(10,0) DEFAULT NULL,   `col91` decimal(10,0) DEFAULT NULL,   `col92` mediumblob,   `col93` time DEFAULT NULL,   KEY `idx0` (`col69`,`col90`,`col8`),   KEY `idx1` (`col60`),   KEY `idx2` (`col60`,`col70`,`col74`),   KEY `idx3` (`col22`,`col32`,`col72`,`col30`),   KEY `idx4` (`col29`),   KEY `idx5` (`col19`,`col45`(143)),   KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),   KEY `idx7` (`col48`,`col61`),   KEY `idx8` (`col93`),   KEY `idx9` (`col31`),   KEY `idx10` (`col30`,`col21`),   KEY `idx11` (`col67`),   KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),   KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),   KEY `idx14` (`col78`),   KEY `idx15` (`col63`,`col67`,`col64`),   KEY `idx16` (`col17`,`col86`),   KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),   KEY `idx18` (`col62`),   KEY `idx19` (`col31`,`col57`,`col56`,`col53`),   KEY `idx20` (`col46`),   KEY `idx21` (`col83`(54)),   KEY `idx22` (`col51`,`col7`(120)),   KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+insert ignore into `table0` set  `col23` = 7887371.5084383683, `col24` = 4293854615.6906948000, `col25` = 'vitalist', `col26` = 'widespread', `col27` = '3570490', `col28` = 'habitual', `col30` = -5471, `col31` = 4286985783.6771750000, `col32` = 6354540.9826654866, `col33` = 'defoliation', `col34` = 'logarithms', `col35` = 'tegument\'s', `col36` = 'scouting\'s', `col37` = 'intermittency', `col38` = 'elongates', `col39` = 'prophecies', `col40` = '20560103035939', `col41` = 4292809130.0544143000, `col42` = 22057, `col43` = 'Hess\'s', `col44` = 'bandstand', `col45` = 'phenylketonuria', `col46` = 6338767.4018677324, `col47` = 5310247, `col48` = '12592418', `col49` = 'churchman\'s', `col50` = '32226125', `col51` = -58, `col52` = -6207968, `col53` = 1244839.3255104220, `col54` = 'robotized', `col55` = 'monotonous', `col56` = -26909, `col58` = '20720107023550', `col59` = 'suggestiveness\'s', `col60` = 'gemology', `col61` = 4287800670.2229986000, `col62` = '1944', `col63` = -16827, `col64` = '20700107212324', `col65` = 'Nicolais', `col66` = 'apteryx', `col67` = 6935317, `col68` = 'stroganoff', `col70` = 3316430, `col71` = '3277608', `col72` = '19300511045918', `col73` = '20421201003327', `col74` = 'attenuant', `col75` = '15173', `col76` = 'upstroke\'s', `col77` = 8118987, `col78` = 6791516.2735374002, `col79` = '20780701144624', `col80` = '2134', `col81` = 4290682351.3127537000, `col82` = 'unexplainably', `col83` = 'Storm', `col84` = 'Greyso\'s', `col85` = 4289119212.4306774000, `col86` = 7617575.8796655172, `col87` = -6325335, `col88` = 'fondue\'s', `col89` = '40608940', `col90` = 1659421.8093508712, `col91` = 8346904.6584368423, `col92` = 'reloads', `col93` = '5188366';
+CHECK TABLE table0 EXTENDED;
+INSERT IGNORE INTO `table0` SET `col19` = '19940127002709', `col20` = 2383927.9055146948, `col21` = 4293243420.5621204000, `col22` = '20511211123705', `col23` = 4289899778.6573381000, `col24` = 4293449279.0540481000, `col25` = 'emphysemic', `col26` = 'dentally', `col27` = '2347406', `col28` = 'eruct', `col30` = 1222, `col31` = 4294372994.9941406000, `col32` = 4291385574.1173744000, `col33` = 'borrowing\'s', `col34` = 'septics', `col35` = 'ratter\'s', `col36` = 'Kaye', `col37` = 'Florentia', `col38` = 'allium', `col39` = 'barkeep', `col40` = '19510407003441', `col41` = 4293559200.4215522000, `col42` = 22482, `col43` = 'decussate', `col44` = 'Brom\'s', `col45` = 'violated', `col46` = 4925506.4635456400, `col47` = 930549, `col48` = '51296066', `col49` = 'voluminously', `col50` = '29306676', `col51` = -88, `col52` = -2153690, `col53` = 4290250202.1464887000, `col54` = 'expropriation', `col55` = 'Aberdeen\'s', `col56` = 20343, `col58` = '19640415171532', `col59` = 'extern', `col60` = 'Ubana', `col61` = 4290487961.8539081000, `col62` = '2147', `col63` = -24271, `col64` = '20750801194548', `col65` = 'Cunaxa\'s', `col66` = 'pasticcio', `col67` = 2795817, `col68` = 'Indore\'s', `col70` = 6864127, `col71` = '1817832', `col72` = '20540506114211', `col73` = '20040101012300', `col74` = 'rationalized', `col75` = '45522', `col76` = 'indene', `col77` = -6964559, `col78` = 4247535.5266884370, `col79` = '20720416124357', `col80` = '2143', `col81` = 4292060102.4466386000, `col82` = 'striving', `col83` = 'boneblack\'s', `col84` = 'redolent', `col85` = 6489697.9009369183, `col86` = 4287473465.9731131000, `col87` = 7726015, `col88` = 'perplexed', `col89` = '17153791', `col90` = 5478587.1108127078, `col91` = 4287091404.7004304000, `col92` = 'Boulez\'s', `col93` = '2931278';
+CHECK TABLE table0 EXTENDED;
+DROP TABLE table0;
+
+SET GLOBAL innodb_file_per_table=DEFAULT;
+SET GLOBAL innodb_file_format='Antelope';
+SET GLOBAL innodb_file_format_check='Antelope';
diff --git a/mysql-test/suite/innodb/t/innodb_bug40360.test b/mysql-test/suite/innodb/t/innodb_bug40360.test
new file mode 100644
index 00000000000..e88837aab4f
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug40360.test
@@ -0,0 +1,16 @@
+#
+# Make sure http://bugs.mysql.com/40360 remains fixed.
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+SET TX_ISOLATION='READ-COMMITTED';
+
+# This is the default since MySQL 5.1.29 SET BINLOG_FORMAT='STATEMENT';
+
+CREATE TABLE bug40360 (a INT) engine=innodb;
+
+INSERT INTO bug40360 VALUES (1);
+
+DROP TABLE bug40360;
diff --git a/mysql-test/suite/innodb/t/innodb_bug41904.test b/mysql-test/suite/innodb/t/innodb_bug41904.test
new file mode 100644
index 00000000000..365c5229adc
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug41904.test
@@ -0,0 +1,14 @@
+#
+# Make sure http://bugs.mysql.com/41904 remains fixed.
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+CREATE TABLE bug41904 (id INT PRIMARY KEY, uniquecol CHAR(15)) ENGINE=InnoDB;
+
+INSERT INTO bug41904 VALUES (1,NULL), (2,NULL);
+
+CREATE UNIQUE INDEX ui ON bug41904 (uniquecol);
+
+DROP TABLE bug41904;
diff --git a/mysql-test/suite/innodb/t/innodb_bug44032.test b/mysql-test/suite/innodb/t/innodb_bug44032.test
new file mode 100644
index 00000000000..a963cb8b68f
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_bug44032.test
@@ -0,0 +1,13 @@
+# Bug44032 no update-in-place of UTF-8 columns in ROW_FORMAT=REDUNDANT
+# (btr_cur_update_in_place not invoked when updating from/to NULL;
+# the update is performed by delete and insert instead)
+
+-- source include/have_innodb.inc
+
+CREATE TABLE bug44032(c CHAR(3) CHARACTER SET UTF8) ROW_FORMAT=REDUNDANT
+ENGINE=InnoDB;
+INSERT INTO bug44032 VALUES('abc'),(0xEFBCA4EFBCA4EFBCA4);
+UPDATE bug44032 SET c='DDD' WHERE c=0xEFBCA4EFBCA4EFBCA4;
+UPDATE bug44032 SET c=NULL WHERE c='DDD';
+UPDATE bug44032 SET c='DDD' WHERE c IS NULL;
+DROP TABLE bug44032;
diff --git a/mysql-test/suite/innodb/t/innodb_file_format.test b/mysql-test/suite/innodb/t/innodb_file_format.test
new file mode 100644
index 00000000000..293e87a413e
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_file_format.test
@@ -0,0 +1,41 @@
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+let $format=`select @@innodb_file_format`;
+let $innodb_file_format_check_orig=`select @@innodb_file_format_check`;
+
+select @@innodb_file_format;
+select @@innodb_file_format_check;
+set global innodb_file_format=antelope;
+set global innodb_file_format=barracuda;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=cheetah;
+select @@innodb_file_format;
+set global innodb_file_format=default;
+select @@innodb_file_format;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=on;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=off;
+select @@innodb_file_format;
+set global innodb_file_format_check=antelope;
+set global innodb_file_format_check=barracuda;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format_check=cheetah;
+select @@innodb_file_format_check;
+set global innodb_file_format_check=default;
+select @@innodb_file_format_check;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=on;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=off;
+select @@innodb_file_format_check;
+
+#
+# restore environment to the state it was before this test execution
+#
+
+-- disable_query_log
+eval set global innodb_file_format=$format;
+eval set global innodb_file_format_check=$innodb_file_format_check_orig;
+-- enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb_information_schema.test b/mysql-test/suite/innodb/t/innodb_information_schema.test
new file mode 100644
index 00000000000..df65139448c
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_information_schema.test
@@ -0,0 +1,146 @@
+# 
+# Test that user data is correctly "visualized" in
+# INFORMATION_SCHEMA.innodb_locks.lock_data
+#
+
+-- source include/have_innodb.inc
+-- source suite/innodb/include/have_innodb_plugin.inc
+
+-- disable_query_log
+-- disable_result_log
+
+SET storage_engine=InnoDB;
+
+-- disable_warnings
+DROP TABLE IF EXISTS t_min, t_max;
+-- enable_warnings
+
+let $table_def =
+(
+	c01 TINYINT,
+	c02 TINYINT UNSIGNED,
+	c03 SMALLINT,
+	c04 SMALLINT UNSIGNED,
+	c05 MEDIUMINT,
+	c06 MEDIUMINT UNSIGNED,
+	c07 INT,
+	c08 INT UNSIGNED,
+	c09 BIGINT,
+	c10 BIGINT UNSIGNED,
+	PRIMARY KEY(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10)
+);
+
+-- eval CREATE TABLE t_min $table_def;
+INSERT INTO t_min VALUES
+(-128, 0,
+ -32768, 0,
+ -8388608, 0,
+ -2147483648, 0,
+ -9223372036854775808, 0);
+
+-- eval CREATE TABLE t_max $table_def;
+INSERT INTO t_max VALUES
+(127, 255,
+ 32767, 65535,
+ 8388607, 16777215,
+ 2147483647, 4294967295,
+ 9223372036854775807, 18446744073709551615);
+
+CREATE TABLE ```t'\"_str` (
+	c1 VARCHAR(32),
+	c2 VARCHAR(32),
+	c3 VARCHAR(32),
+	c4 VARCHAR(32),
+	c5 VARCHAR(32),
+	c6 VARCHAR(32),
+	c7 VARCHAR(32),
+	PRIMARY KEY(c1, c2, c3, c4, c5, c6, c7)
+);
+INSERT INTO ```t'\"_str` VALUES
+('1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc''''');
+INSERT INTO ```t'\"_str` VALUES
+('2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""');
+INSERT INTO ```t'\"_str` VALUES
+('3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\');
+INSERT INTO ```t'\"_str` VALUES
+('4', 'abc', 0x00616263, 0x61626300, 0x61006263, 0x6100626300, 0x610062630000);
+
+-- connect (con_lock,localhost,root,,)
+-- connect (con_min_trylock,localhost,root,,)
+-- connect (con_max_trylock,localhost,root,,)
+-- connect (con_str_insert_supremum,localhost,root,,)
+-- connect (con_str_lock_row1,localhost,root,,)
+-- connect (con_str_lock_row2,localhost,root,,)
+-- connect (con_str_lock_row3,localhost,root,,)
+-- connect (con_str_lock_row4,localhost,root,,)
+-- connect (con_verify_innodb_locks,localhost,root,,)
+
+-- connection con_lock
+SET autocommit=0;
+SELECT * FROM t_min FOR UPDATE;
+SELECT * FROM t_max FOR UPDATE;
+SELECT * FROM ```t'\"_str` FOR UPDATE;
+
+-- connection con_min_trylock
+-- send
+SELECT * FROM t_min FOR UPDATE;
+
+-- connection con_max_trylock
+-- send
+SELECT * FROM t_max FOR UPDATE;
+
+-- connection con_str_insert_supremum
+-- send
+INSERT INTO ```t'\"_str` VALUES
+('z', 'z', 'z', 'z', 'z', 'z', 'z');
+
+-- connection con_str_lock_row1
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '1' FOR UPDATE;
+
+-- connection con_str_lock_row2
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '2' FOR UPDATE;
+
+-- connection con_str_lock_row3
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '3' FOR UPDATE;
+
+-- connection con_str_lock_row4
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '4' FOR UPDATE;
+
+# Give time to the above 2 queries to execute before continuing.
+# Without this sleep it sometimes happens that the SELECT from innodb_locks
+# executes before some of them, resulting in less than expected number
+# of rows being selected from innodb_locks.
+-- sleep 0.1
+
+-- enable_result_log
+-- connection con_verify_innodb_locks
+SELECT lock_mode, lock_type, lock_table, lock_index, lock_rec, lock_data
+FROM INFORMATION_SCHEMA.INNODB_LOCKS ORDER BY lock_data;
+
+SELECT lock_table,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_LOCKS
+GROUP BY lock_table;
+
+set @save_sql_mode = @@sql_mode;
+SET SQL_MODE='ANSI_QUOTES';
+SELECT lock_table,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_LOCKS
+GROUP BY lock_table;
+SET @@sql_mode=@save_sql_mode;
+-- disable_result_log
+
+-- connection default
+
+-- disconnect con_lock
+-- disconnect con_min_trylock
+-- disconnect con_max_trylock
+-- disconnect con_str_insert_supremum
+-- disconnect con_str_lock_row1
+-- disconnect con_str_lock_row2
+-- disconnect con_str_lock_row3
+-- disconnect con_str_lock_row4
+-- disconnect con_verify_innodb_locks
+
+DROP TABLE t_min, t_max, ```t'\"_str`;
diff --git a/mysql-test/suite/ndb/my.cnf b/mysql-test/suite/ndb/my.cnf
index 60769272ada..a19fdeee302 100644
--- a/mysql-test/suite/ndb/my.cnf
+++ b/mysql-test/suite/ndb/my.cnf
@@ -13,6 +13,7 @@ ndbcluster
 
 [ENV]
 NDB_CONNECTSTRING=             @mysql_cluster.1.ndb_connectstring
+MASTER_MYSOCK=                 @mysqld.1.1.socket
 MASTER_MYPORT=                 @mysqld.1.1.port
 MASTER_MYPORT1=                @mysqld.2.1.port
 
diff --git a/mysql-test/suite/ndb/r/ndb_binlog_format.result b/mysql-test/suite/ndb/r/ndb_binlog_format.result
index bb02002ed58..6e1fc12130f 100644
--- a/mysql-test/suite/ndb/r/ndb_binlog_format.result
+++ b/mysql-test/suite/ndb/r/ndb_binlog_format.result
@@ -15,15 +15,15 @@ COMMIT;
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t1 VALUES (1,1), (1,2), (2,1), (2,2)
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t2 VALUES (1,1), (1,2), (2,1), (2,2)
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t1, t2 SET m = 2, b = 3 WHERE n = c
-mysqld-bin.000001	#	Query	#	#	use `test`; BEGIN
+mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Query	#	#	use `test`; INSERT INTO t3 VALUES (1,1), (1,2), (2,1), (2,2)
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t1, t3 SET m = 2, e = 3 WHERE n = f
 mysqld-bin.000001	#	Query	#	#	use `test`; UPDATE t3, t2 SET e = 2, b = 3 WHERE f = c
-mysqld-bin.000001	#	Query	#	#	use `test`; COMMIT
+mysqld-bin.000001	#	Query	#	#	COMMIT
 mysqld-bin.000001	#	Query	#	#	BEGIN
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (test.t3)
 mysqld-bin.000001	#	Table_map	#	#	table_id: # (mysql.ndb_apply_status)
diff --git a/mysql-test/suite/ndb_team/r/rpl_ndb_mix_innodb.result b/mysql-test/suite/ndb_team/r/rpl_ndb_mix_innodb.result
index eba1222ea33..f9c077f38da 100644
--- a/mysql-test/suite/ndb_team/r/rpl_ndb_mix_innodb.result
+++ b/mysql-test/suite/ndb_team/r/rpl_ndb_mix_innodb.result
@@ -28,7 +28,7 @@ from mysql.ndb_apply_status;
 
 show binlog events from <start_pos> limit 1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	<start_pos>	Query	1	#	use `test`; BEGIN
+master-bin.000001	<start_pos>	Query	1	#	BEGIN
 
 # Now the insert, one step after
 
@@ -53,7 +53,7 @@ from mysql.ndb_apply_status;
 <log_name>	<start_pos>	<end_pos>
 show binlog events from <start_pos> limit 1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	<start_pos>	Query	1	#	use `test`; BEGIN
+master-bin.000001	<start_pos>	Query	1	#	BEGIN
 
 show binlog events from <start_pos> limit 1,2;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
diff --git a/mysql-test/suite/parts/r/partition_auto_increment_memory.result b/mysql-test/suite/parts/r/partition_auto_increment_memory.result
index 77bab79f020..f4d783825f4 100644
--- a/mysql-test/suite/parts/r/partition_auto_increment_memory.result
+++ b/mysql-test/suite/parts/r/partition_auto_increment_memory.result
@@ -381,12 +381,12 @@ Table	Create Table
 t1	CREATE TABLE `t1` (
   `c1` int(11) NOT NULL AUTO_INCREMENT,
   PRIMARY KEY (`c1`)
-) ENGINE=MEMORY AUTO_INCREMENT=28 DEFAULT CHARSET=latin1
+) ENGINE=MEMORY AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
 /*!50100 PARTITION BY HASH (c1)
 PARTITIONS 2 */
 SELECT * FROM t1 ORDER BY c1;
 c1
-27
+1
 INSERT INTO t1 VALUES (100);
 INSERT INTO t1 VALUES (NULL);
 DELETE FROM t1 WHERE c1 >= 100;
diff --git a/mysql-test/suite/parts/r/partition_auto_increment_myisam.result b/mysql-test/suite/parts/r/partition_auto_increment_myisam.result
index b5b001ec17a..6abf08b68a0 100644
--- a/mysql-test/suite/parts/r/partition_auto_increment_myisam.result
+++ b/mysql-test/suite/parts/r/partition_auto_increment_myisam.result
@@ -381,12 +381,12 @@ Table	Create Table
 t1	CREATE TABLE `t1` (
   `c1` int(11) NOT NULL AUTO_INCREMENT,
   PRIMARY KEY (`c1`)
-) ENGINE=MyISAM AUTO_INCREMENT=28 DEFAULT CHARSET=latin1
+) ENGINE=MyISAM AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
 /*!50100 PARTITION BY HASH (c1)
 PARTITIONS 2 */
 SELECT * FROM t1 ORDER BY c1;
 c1
-27
+1
 INSERT INTO t1 VALUES (100);
 INSERT INTO t1 VALUES (NULL);
 DELETE FROM t1 WHERE c1 >= 100;
diff --git a/mysql-test/suite/rpl/r/rpl_begin_commit_rollback.result b/mysql-test/suite/rpl/r/rpl_begin_commit_rollback.result
new file mode 100644
index 00000000000..23736804784
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_begin_commit_rollback.result
@@ -0,0 +1,109 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+DROP DATABASE IF EXISTS db1;
+CREATE DATABASE db1;
+use db1;
+CREATE TABLE db1.t1 (a INT) ENGINE=InnoDB;
+CREATE TABLE db1.t2 (s CHAR(255)) ENGINE=MyISAM;
+include/stop_slave.inc
+[on master]
+CREATE PROCEDURE db1.p1 ()
+BEGIN
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t1 VALUES (3);
+INSERT INTO t1 VALUES (4);
+INSERT INTO t1 VALUES (5);
+END//
+CREATE PROCEDURE db1.p2 ()
+BEGIN
+INSERT INTO t1 VALUES (6);
+INSERT INTO t1 VALUES (7);
+INSERT INTO t1 VALUES (8);
+INSERT INTO t1 VALUES (9);
+INSERT INTO t1 VALUES (10);
+INSERT INTO t2 VALUES ('executed db1.p2()');
+END//
+INSERT INTO db1.t2 VALUES ('before call db1.p1()');
+use test;
+BEGIN;
+CALL db1.p1();
+COMMIT;
+INSERT INTO db1.t2 VALUES ('after call db1.p1()');
+SELECT * FROM db1.t1;
+a
+1
+2
+3
+4
+5
+SELECT * FROM db1.t2;
+s
+before call db1.p1()
+after call db1.p1()
+[on slave]
+start slave until master_log_file='master-bin.000001', master_log_pos=MASTER_POS;
+#
+# If we got non-zero here, then we're suffering BUG#43263
+#
+SELECT 0 as 'Must be 0';
+Must be 0
+0
+SELECT * from db1.t1;
+a
+1
+2
+3
+4
+5
+SELECT * from db1.t2;
+s
+before call db1.p1()
+[on master]
+INSERT INTO db1.t2 VALUES ('before call db1.p2()');
+BEGIN;
+CALL db1.p2();
+ROLLBACK;
+INSERT INTO db1.t2 VALUES ('after call db1.p2()');
+SELECT * FROM db1.t1;
+a
+1
+2
+3
+4
+5
+SELECT * FROM db1.t2;
+s
+before call db1.p1()
+after call db1.p1()
+before call db1.p2()
+executed db1.p2()
+after call db1.p2()
+[on slave]
+start slave until master_log_file='master-bin.000001', master_log_pos=MASTER_POS;
+#
+# If we got non-zero here, then we're suffering BUG#43263
+#
+SELECT 0 as 'Must be 0';
+Must be 0
+0
+SELECT * from db1.t1;
+a
+1
+2
+3
+4
+5
+SELECT * from db1.t2;
+s
+before call db1.p1()
+executed db1.p2()
+#
+# Clean up
+#
+DROP DATABASE db1;
+DROP DATABASE db1;
diff --git a/mysql-test/suite/rpl/r/rpl_binlog_grant.result b/mysql-test/suite/rpl/r/rpl_binlog_grant.result
index 72dc58effa1..4a789f361c6 100644
--- a/mysql-test/suite/rpl/r/rpl_binlog_grant.result
+++ b/mysql-test/suite/rpl/r/rpl_binlog_grant.result
@@ -23,7 +23,7 @@ master-bin.000001	4	Format_desc	1	106	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	106	Query	1	193	drop database if exists d1
 master-bin.000001	193	Query	1	272	create database d1
 master-bin.000001	272	Query	1	370	use `d1`; create table t (s1 int) engine=innodb
-master-bin.000001	370	Query	1	436	use `d1`; BEGIN
+master-bin.000001	370	Query	1	436	BEGIN
 master-bin.000001	436	Query	1	521	use `d1`; insert into t values (1)
 master-bin.000001	521	Xid	1	548	COMMIT /* XID */
 master-bin.000001	548	Query	1	633	use `d1`; grant select on t to x@y
@@ -44,11 +44,11 @@ master-bin.000001	4	Format_desc	1	106	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	106	Query	1	193	drop database if exists d1
 master-bin.000001	193	Query	1	272	create database d1
 master-bin.000001	272	Query	1	370	use `d1`; create table t (s1 int) engine=innodb
-master-bin.000001	370	Query	1	436	use `d1`; BEGIN
+master-bin.000001	370	Query	1	436	BEGIN
 master-bin.000001	436	Query	1	521	use `d1`; insert into t values (1)
 master-bin.000001	521	Xid	1	548	COMMIT /* XID */
 master-bin.000001	548	Query	1	633	use `d1`; grant select on t to x@y
-master-bin.000001	633	Query	1	699	use `d1`; BEGIN
+master-bin.000001	633	Query	1	699	BEGIN
 master-bin.000001	699	Query	1	784	use `d1`; insert into t values (2)
 master-bin.000001	784	Xid	1	811	COMMIT /* XID */
 master-bin.000001	811	Query	1	899	use `d1`; revoke select on t from x@y
diff --git a/mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result b/mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result
new file mode 100644
index 00000000000..0e3f83d0aa5
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result
@@ -0,0 +1,135 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
+CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MyIsam;
+CREATE TABLE t3(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
+########################################################################################
+#                                   1 - SINGLE STATEMENT
+########################################################################################
+*** Single statement on transactional table ***
+Got one of the listed errors
+*** Single statement on non-transactional table ***
+*** After WL#2687 the difference between STATEMENT/MIXED and ROW will not exist. ***
+Got one of the listed errors
+*** Single statement on both transactional and non-transactional tables. ***
+*** After WL#2687 we will be able to change the order of the tables. ***
+Got one of the listed errors
+SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
+START SLAVE SQL_THREAD;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+source include/diff_master_slave.inc;
+########################################################################################
+#                                     3 - BEGIN - COMMIT
+########################################################################################
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+COMMIT;
+source include/diff_master_slave.inc;
+########################################################################################
+#                                      4 - BEGIN - ROLLBACK
+########################################################################################
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+ROLLBACK;
+Warnings:
+Warning	1196	Some non-transactional changed tables couldn't be rolled back
+source include/diff_master_slave.inc;
+########################################################################################
+#                                         5 - PROCEDURE 
+########################################################################################
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+CREATE PROCEDURE p1(pd VARCHAR(30000))
+BEGIN
+INSERT INTO t1 (a, data) VALUES (1, pd);
+INSERT INTO t1 (a, data) VALUES (2, pd);
+INSERT INTO t1 (a, data) VALUES (3, pd);
+INSERT INTO t1 (a, data) VALUES (4, pd);
+INSERT INTO t1 (a, data) VALUES (5, 's');
+END//
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+BEGIN;
+Got one of the listed errors
+COMMIT;
+TRUNCATE TABLE t1;
+BEGIN;
+Got one of the listed errors
+ROLLBACK;
+source include/diff_master_slave.inc;
+########################################################################################
+#                                           6 - XID
+########################################################################################
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+ROLLBACK TO sv;
+Warnings:
+Warning	1196	Some non-transactional changed tables couldn't be rolled back
+COMMIT;
+source include/diff_master_slave.inc;
+########################################################################################
+#                                        7 - NON-TRANS TABLE
+########################################################################################
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+BEGIN;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+COMMIT;
+BEGIN;
+Got one of the listed errors
+COMMIT;
+########################################################################################
+#                                        CLEAN
+########################################################################################
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+DROP TABLE IF EXISTS t6;
+DROP PROCEDURE p1;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+DROP TABLE IF EXISTS t6;
+DROP PROCEDURE p1;
diff --git a/mysql-test/suite/rpl/r/rpl_concurrency_error.result b/mysql-test/suite/rpl/r/rpl_concurrency_error.result
new file mode 100644
index 00000000000..ba617667d5a
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_concurrency_error.result
@@ -0,0 +1,119 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+########################################################################
+#                             Environment
+########################################################################
+CREATE TABLE t (i INT, PRIMARY KEY(i), f CHAR(8)) engine = Innodb;
+CREATE TABLE n (d DATETIME, f CHAR(32)) engine = MyIsam;
+CREATE TRIGGER tr AFTER UPDATE ON t FOR EACH ROW 
+BEGIN 
+INSERT INTO n VALUES ( now(), concat( 'updated t: ', old.f, ' -> ', new.f ) ); 
+END |
+INSERT INTO t VALUES (4,'black'), (2,'red'), (3,'yelow'), (1,'cyan');
+########################################################################
+#                     Testing ER_LOCK_WAIT_TIMEOUT
+########################################################################
+SET AUTOCOMMIT = 1;
+BEGIN;
+UPDATE t SET f = 'yellow 2' WHERE i = 3;
+SET AUTOCOMMIT = 1;
+BEGIN;
+UPDATE t SET f = 'magenta 2' WHERE f = 'red';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t VALUES (5 + (2 * 10),"brown");
+INSERT INTO n VALUES (now(),"brown");
+COMMIT;
+ROLLBACK;
+Warnings:
+Warning	1196	Some non-transactional changed tables couldn't be rolled back
+show binlog events from <binlog_start>;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'magenta 2' WHERE f = 'red'
+master-bin.000001	#	Query	#	#	ROLLBACK
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'yellow 2' WHERE i = 3
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t VALUES (5 + (2 * 10),"brown")
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO n VALUES (now(),"brown")
+master-bin.000001	#	Query	#	#	ROLLBACK
+SET AUTOCOMMIT = 1;
+BEGIN;
+UPDATE t SET f = 'gray 2' WHERE i = 3;
+SET AUTOCOMMIT = 1;
+BEGIN;
+UPDATE t SET f = 'dark blue 2' WHERE f = 'red';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t VALUES (6 + (2 * 10),"brown");
+INSERT INTO n VALUES (now(),"brown");
+COMMIT;
+COMMIT;
+show binlog events from <binlog_start>;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'dark blue 2' WHERE f = 'red'
+master-bin.000001	#	Query	#	#	ROLLBACK
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'gray 2' WHERE i = 3
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t VALUES (6 + (2 * 10),"brown")
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO n VALUES (now(),"brown")
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+SET AUTOCOMMIT = 0;
+UPDATE t SET f = 'yellow 1' WHERE i = 3;
+SET AUTOCOMMIT = 0;
+UPDATE t SET f = 'magenta 1' WHERE f = 'red';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t VALUES (5 + (1 * 10),"brown");
+INSERT INTO n VALUES (now(),"brown");
+COMMIT;
+ROLLBACK;
+Warnings:
+Warning	1196	Some non-transactional changed tables couldn't be rolled back
+show binlog events from <binlog_start>;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'magenta 1' WHERE f = 'red'
+master-bin.000001	#	Query	#	#	ROLLBACK
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'yellow 1' WHERE i = 3
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t VALUES (5 + (1 * 10),"brown")
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO n VALUES (now(),"brown")
+master-bin.000001	#	Query	#	#	ROLLBACK
+SET AUTOCOMMIT = 0;
+UPDATE t SET f = 'gray 1' WHERE i = 3;
+SET AUTOCOMMIT = 0;
+UPDATE t SET f = 'dark blue 1' WHERE f = 'red';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+INSERT INTO t VALUES (6 + (1 * 10),"brown");
+INSERT INTO n VALUES (now(),"brown");
+COMMIT;
+COMMIT;
+show binlog events from <binlog_start>;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'dark blue 1' WHERE f = 'red'
+master-bin.000001	#	Query	#	#	ROLLBACK
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; UPDATE t SET f = 'gray 1' WHERE i = 3
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+master-bin.000001	#	Query	#	#	BEGIN
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO t VALUES (6 + (1 * 10),"brown")
+master-bin.000001	#	Query	#	#	use `test`; INSERT INTO n VALUES (now(),"brown")
+master-bin.000001	#	Xid	#	#	COMMIT /* XID */
+source include/diff_master_slave.inc;
+source include/diff_master_slave.inc;
+########################################################################
+#                                Cleanup
+########################################################################
+DROP TRIGGER tr;
+DROP TABLE t;
+DROP TABLE n;
diff --git a/mysql-test/suite/rpl/r/rpl_innodb_mixed_dml.result b/mysql-test/suite/rpl/r/rpl_innodb_mixed_dml.result
index 1e795a85ce1..033f71c16b7 100644
--- a/mysql-test/suite/rpl/r/rpl_innodb_mixed_dml.result
+++ b/mysql-test/suite/rpl/r/rpl_innodb_mixed_dml.result
@@ -836,178 +836,178 @@ master-bin.000001	#	Format_desc	1	#	Server ver: #
 master-bin.000001	#	Query	1	#	CREATE DATABASE test_rpl
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE TABLE t1 (a int auto_increment not null, b char(254), PRIMARY KEY(a)) ENGINE=innodb
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE TABLE t2 (a int auto_increment not null, b char(254), PRIMARY KEY(a)) ENGINE=innodb
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(2, 't1, text 2')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES(1, 't2, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1 WHERE a = 1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t2)
 master-bin.000001	#	Delete_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 SELECT * FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES (1, 't1, text 1') ON DUPLICATE KEY UPDATE b = 't2, text 1'
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1 WHERE a = 2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2 WHERE a = 2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Begin_load_query	1	#	;file_id=#;block_len=#
 master-bin.000001	#	Execute_load_query	1	#	use `test_rpl`; LOAD DATA INFILE 'MYSQLTEST_VARDIR/std_data/rpl_mixed.dat' INTO TABLE t1 FIELDS TERMINATED BY '|' ;file_id=#
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(2, 't1, text 2')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(3, 't1, text 3')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; REPLACE INTO t1 VALUES(1, 't1, text 11')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t1)
 master-bin.000001	#	Update_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; REPLACE INTO t1 SET a=3, b='t1, text 33'
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1 WHERE a = 2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 'CCC')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(2, 'DDD')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES(1, 'DDD')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES(2, 'CCC')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES(1, 't2, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; TRUNCATE t1
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 't1, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t2 VALUES(1, 't2, text 1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; UPDATE t1 SET b = 't1, text 1 updated' WHERE a = 1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; UPDATE t1, t2 SET t1.b = 'test', t2.b = 'test'
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES (1, 'start')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES (3, 'before savepoint s1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES (5, 'before savepoint s2')
 master-bin.000001	#	Query	1	#	use `test_rpl`; SAVEPOINT s2
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES (6, 'after savepoint s2')
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1 WHERE a = 7
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE USER 'user_test_rpl'@'localhost' IDENTIFIED BY PASSWORD '*1111111111111111111111111111111111111111'
@@ -1016,7 +1016,7 @@ master-bin.000001	#	Query	1	#	use `test_rpl`; REVOKE SELECT ON *.* FROM 'user_te
 master-bin.000001	#	Query	1	#	use `test_rpl`; SET PASSWORD FOR 'user_test_rpl'@'localhost'='*0000000000000000000000000000000000000000'
 master-bin.000001	#	Query	1	#	use `test_rpl`; RENAME USER 'user_test_rpl'@'localhost' TO 'user_test_rpl_2'@'localhost'
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP USER 'user_test_rpl_2'@'localhost'
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(100, 'test')
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; ANALYZE TABLE t1
@@ -1030,65 +1030,65 @@ master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE DEFINER=`root`@`localhost`
 BEGIN
 UPDATE t1 SET b = UUID() WHERE a = 202;
 END
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT  INTO t1 VALUES(201, 'test 201')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; UPDATE t1 SET b = 'test' WHERE a = 201
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT  INTO t1 VALUES(202, 'test 202')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t1)
 master-bin.000001	#	Update_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1 WHERE a = 202
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; ALTER PROCEDURE p1 COMMENT 'p1'
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP PROCEDURE p1
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP PROCEDURE p2
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE DEFINER=`root`@`localhost` TRIGGER tr1 BEFORE INSERT ON t1
 FOR EACH ROW BEGIN
 INSERT INTO t2 SET a = NEW.a, b = NEW.b;
 END
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t1)
 master-bin.000001	#	Table_map	1	#	table_id: # (test_rpl.t2)
 master-bin.000001	#	Write_rows	1	#	table_id: #
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP TRIGGER tr1
 master-bin.000001	#	Query	1	#	use `test_rpl`; GRANT EVENT ON *.* TO 'root'@'localhost'
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 'test1')
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE EVENT e1 ON SCHEDULE EVERY '1' SECOND COMMENT 'e_second_comment' DO DELETE FROM t1
 master-bin.000001	#	Query	1	#	use `test_rpl`; ALTER EVENT e1 RENAME TO e2
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP EVENT e2
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(1, 'test1')
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; INSERT INTO t1 VALUES(2, 'test2')
 master-bin.000001	#	Xid	1	#	#
 master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS SELECT * FROM t1 WHERE a = 1
@@ -1096,10 +1096,10 @@ master-bin.000001	#	Query	1	#	use `test_rpl`; CREATE ALGORITHM=UNDEFINED DEFINER
 master-bin.000001	#	Query	1	#	use `test_rpl`; ALTER ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS SELECT * FROM t1 WHERE a = 2
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP VIEW v1
 master-bin.000001	#	Query	1	#	use `test_rpl`; DROP VIEW v2
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t1
 master-bin.000001	#	Xid	1	#	#
-master-bin.000001	#	Query	1	#	use `test_rpl`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Query	1	#	use `test_rpl`; DELETE FROM t2
 master-bin.000001	#	Xid	1	#	#
 drop database test_rpl;
diff --git a/mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result b/mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result
index 0551240eb8a..2e707fb62c1 100644
--- a/mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result
+++ b/mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result
@@ -19,10 +19,10 @@ Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Format_desc	1	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	#	Query	1	#	use `test`; CREATE TABLE t1 (a INT, b LONG)
 master-bin.000001	#	Query	1	#	use `test`; INSERT INTO t1 VALUES (1,1), (2,2)
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
+master-bin.000001	#	Query	1	#	COMMIT
 **** On Slave ****
 SHOW SLAVE STATUS;
 Slave_IO_State	#
@@ -68,9 +68,9 @@ Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 slave-bin.000001	#	Format_desc	2	#	Server ver: VERSION, Binlog ver: 4
 slave-bin.000001	#	Query	1	#	use `test`; CREATE TABLE t1 (a INT, b LONG)
 slave-bin.000001	#	Query	1	#	use `test`; INSERT INTO t1 VALUES (1,1), (2,2)
-slave-bin.000001	#	Query	1	#	use `test`; BEGIN
+slave-bin.000001	#	Query	1	#	BEGIN
 slave-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 slave-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-slave-bin.000001	#	Query	1	#	use `test`; COMMIT
+slave-bin.000001	#	Query	1	#	COMMIT
 DROP TABLE IF EXISTS t1;
 SET @@global.binlog_format= @old_binlog_format;
diff --git a/mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result b/mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result
index 1504c16bd7e..7920b9a981d 100644
--- a/mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result
+++ b/mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result
@@ -28,10 +28,10 @@ INSERT INTO t2 VALUES (3,3), (4,4);
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t1 (a INT, b INT)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 **** On Slave ****
 SHOW DATABASES;
 Database
@@ -60,10 +60,10 @@ SHOW BINLOG EVENTS;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	4	Format_desc	1	106	Server ver: SERVER_VERSION, Binlog ver: 4
 master-bin.000001	106	Query	1	192	use `test`; CREATE TABLE t1 (a INT)
-master-bin.000001	192	Query	1	260	use `test`; BEGIN
+master-bin.000001	192	Query	1	260	BEGIN
 master-bin.000001	260	Table_map	1	301	table_id: # (test.t1)
 master-bin.000001	301	Write_rows	1	340	table_id: # flags: STMT_END_F
-master-bin.000001	340	Query	1	409	use `test`; COMMIT
+master-bin.000001	340	Query	1	409	COMMIT
 DROP TABLE t1;
 ================ Test for BUG#17620 ================
 drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
diff --git a/mysql-test/suite/rpl/r/rpl_row_create_table.result b/mysql-test/suite/rpl/r/rpl_row_create_table.result
index 29f58632fde..5bed9106009 100644
--- a/mysql-test/suite/rpl/r/rpl_row_create_table.result
+++ b/mysql-test/suite/rpl/r/rpl_row_create_table.result
@@ -150,10 +150,10 @@ a	b
 SHOW BINLOG EVENTS FROM 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 #	106	Query	#	206	use `test`; CREATE TABLE t7 (a INT, b INT UNIQUE)
-#	206	Query	#	274	use `test`; BEGIN
+#	206	Query	#	274	BEGIN
 #	274	Table_map	#	316	table_id: # (test.t7)
 #	316	Write_rows	#	372	table_id: # flags: STMT_END_F
-#	372	Query	#	443	use `test`; ROLLBACK
+#	372	Query	#	443	ROLLBACK
 SELECT * FROM t7 ORDER BY a,b;
 a	b
 1	2
@@ -173,10 +173,10 @@ Warnings:
 Warning	1196	Some non-transactional changed tables couldn't be rolled back
 SHOW BINLOG EVENTS FROM 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-#	106	Query	#	174	use `test`; BEGIN
+#	106	Query	#	174	BEGIN
 #	174	Table_map	#	216	table_id: # (test.t7)
 #	216	Write_rows	#	272	table_id: # flags: STMT_END_F
-#	272	Query	#	343	use `test`; ROLLBACK
+#	272	Query	#	343	ROLLBACK
 SELECT * FROM t7 ORDER BY a,b;
 a	b
 1	2
@@ -299,35 +299,35 @@ a
 SHOW BINLOG EVENTS FROM 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 #	106	Query	#	192	use `test`; CREATE TABLE t1 (a INT)
-#	192	Query	#	260	use `test`; BEGIN
+#	192	Query	#	260	BEGIN
 #	260	Table_map	#	301	table_id: # (test.t1)
 #	301	Write_rows	#	345	table_id: # flags: STMT_END_F
-#	345	Query	#	414	use `test`; COMMIT
-#	414	Query	#	482	use `test`; BEGIN
+#	345	Query	#	414	COMMIT
+#	414	Query	#	482	BEGIN
 #	482	Query	#	607	use `test`; CREATE TABLE `t2` (
   `a` int(11) DEFAULT NULL
 ) ENGINE=InnoDB
 #	607	Table_map	#	648	table_id: # (test.t2)
 #	648	Write_rows	#	692	table_id: # flags: STMT_END_F
 #	692	Xid	#	719	COMMIT /* XID */
-#	719	Query	#	787	use `test`; BEGIN
+#	719	Query	#	787	BEGIN
 #	787	Query	#	912	use `test`; CREATE TABLE `t3` (
   `a` int(11) DEFAULT NULL
 ) ENGINE=InnoDB
 #	912	Table_map	#	953	table_id: # (test.t3)
 #	953	Write_rows	#	997	table_id: # flags: STMT_END_F
 #	997	Xid	#	1024	COMMIT /* XID */
-#	1024	Query	#	1092	use `test`; BEGIN
+#	1024	Query	#	1092	BEGIN
 #	1092	Query	#	1217	use `test`; CREATE TABLE `t4` (
   `a` int(11) DEFAULT NULL
 ) ENGINE=InnoDB
 #	1217	Table_map	#	1258	table_id: # (test.t4)
 #	1258	Write_rows	#	1302	table_id: # flags: STMT_END_F
 #	1302	Xid	#	1329	COMMIT /* XID */
-#	1329	Query	#	1397	use `test`; BEGIN
+#	1329	Query	#	1397	BEGIN
 #	1397	Table_map	#	1438	table_id: # (test.t1)
 #	1438	Write_rows	#	1482	table_id: # flags: STMT_END_F
-#	1482	Query	#	1553	use `test`; ROLLBACK
+#	1482	Query	#	1553	ROLLBACK
 SHOW TABLES;
 Tables_in_test
 t1
@@ -393,12 +393,12 @@ a
 SHOW BINLOG EVENTS FROM 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 #	106	Query	#	192	use `test`; CREATE TABLE t1 (a INT)
-#	192	Query	#	260	use `test`; BEGIN
+#	192	Query	#	260	BEGIN
 #	260	Table_map	#	301	table_id: # (test.t1)
 #	301	Write_rows	#	345	table_id: # flags: STMT_END_F
-#	345	Query	#	414	use `test`; COMMIT
+#	345	Query	#	414	COMMIT
 #	414	Query	#	514	use `test`; CREATE TABLE t2 (a INT) ENGINE=INNODB
-#	514	Query	#	582	use `test`; BEGIN
+#	514	Query	#	582	BEGIN
 #	582	Table_map	#	623	table_id: # (test.t2)
 #	623	Write_rows	#	667	table_id: # flags: STMT_END_F
 #	667	Table_map	#	708	table_id: # (test.t2)
@@ -431,12 +431,12 @@ SELECT * FROM t2 ORDER BY a;
 a
 SHOW BINLOG EVENTS FROM 106;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-#	106	Query	#	174	use `test`; BEGIN
+#	106	Query	#	174	BEGIN
 #	174	Table_map	#	215	table_id: # (test.t2)
 #	215	Write_rows	#	259	table_id: # flags: STMT_END_F
 #	259	Table_map	#	300	table_id: # (test.t2)
 #	300	Write_rows	#	339	table_id: # flags: STMT_END_F
-#	339	Query	#	410	use `test`; ROLLBACK
+#	339	Query	#	410	ROLLBACK
 SELECT * FROM t2 ORDER BY a;
 a
 DROP TABLE t1,t2;
@@ -468,12 +468,12 @@ Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	DROP DATABASE IF EXISTS mysqltest1
 master-bin.000001	#	Query	#	#	CREATE DATABASE mysqltest1
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE mysqltest1.without_select (f1 BIGINT)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE `mysqltest1`.`with_select` (
   `f1` int(1) NOT NULL DEFAULT '0'
 )
 master-bin.000001	#	Table_map	#	#	table_id: # (mysqltest1.with_select)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 DROP DATABASE mysqltest1;
 end of the tests
diff --git a/mysql-test/suite/rpl/r/rpl_row_log.result b/mysql-test/suite/rpl/r/rpl_row_log.result
index b76cc6aa15e..9593b009d1f 100644
--- a/mysql-test/suite/rpl/r/rpl_row_log.result
+++ b/mysql-test/suite/rpl/r/rpl_row_log.result
@@ -20,23 +20,23 @@ show binlog events;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Format_desc	1	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=MyISAM
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
+master-bin.000001	#	Query	1	#	COMMIT
 master-bin.000001	#	Query	1	#	use `test`; drop table t1
 master-bin.000001	#	Query	1	#	use `test`; create table t1 (word char(20) not null)ENGINE=MyISAM
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	1	#	use `test`; COMMIT
+master-bin.000001	#	Query	1	#	COMMIT
 show binlog events from 106 limit 1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=MyISAM
 show binlog events from 106 limit 2;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=MyISAM
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 show binlog events from 106 limit 2,1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
@@ -192,26 +192,26 @@ insert into t2 values (1);
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=MyISAM
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Query	#	#	use `test`; drop table t1
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (word char(20) not null)ENGINE=MyISAM
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 master-bin.000001	#	Rotate	#	#	master-bin.000002;pos=4
 show binlog events in 'master-bin.000002';
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000002	#	Format_desc	1	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000002	#	Query	1	#	use `test`; create table t3 (a int)ENGINE=MyISAM
 master-bin.000002	#	Query	1	#	use `test`; create table t2 (n int)ENGINE=MyISAM
-master-bin.000002	#	Query	1	#	use `test`; BEGIN
+master-bin.000002	#	Query	1	#	BEGIN
 master-bin.000002	#	Table_map	1	#	table_id: # (test.t2)
 master-bin.000002	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-master-bin.000002	#	Query	1	#	use `test`; COMMIT
+master-bin.000002	#	Query	1	#	COMMIT
 show binary logs;
 Log_name	File_size
 master-bin.000001	#
@@ -224,26 +224,26 @@ show binlog events in 'slave-bin.000001' from 4;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 slave-bin.000001	#	Format_desc	2	#	Server ver: VERSION, Binlog ver: 4
 slave-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=MyISAM
-slave-bin.000001	#	Query	1	#	use `test`; BEGIN
+slave-bin.000001	#	Query	1	#	BEGIN
 slave-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 slave-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-slave-bin.000001	#	Query	1	#	use `test`; COMMIT
+slave-bin.000001	#	Query	1	#	COMMIT
 slave-bin.000001	#	Query	1	#	use `test`; drop table t1
 slave-bin.000001	#	Query	1	#	use `test`; create table t1 (word char(20) not null)ENGINE=MyISAM
-slave-bin.000001	#	Query	1	#	use `test`; BEGIN
+slave-bin.000001	#	Query	1	#	BEGIN
 slave-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 slave-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-slave-bin.000001	#	Query	1	#	use `test`; COMMIT
+slave-bin.000001	#	Query	1	#	COMMIT
 slave-bin.000001	#	Query	1	#	use `test`; create table t3 (a int)ENGINE=MyISAM
 slave-bin.000001	#	Rotate	2	#	slave-bin.000002;pos=4
 show binlog events in 'slave-bin.000002' from 4;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 slave-bin.000002	#	Format_desc	2	#	Server ver: VERSION, Binlog ver: 4
 slave-bin.000002	#	Query	1	#	use `test`; create table t2 (n int)ENGINE=MyISAM
-slave-bin.000002	#	Query	1	#	use `test`; BEGIN
+slave-bin.000002	#	Query	1	#	BEGIN
 slave-bin.000002	#	Table_map	1	#	table_id: # (test.t2)
 slave-bin.000002	#	Write_rows	1	#	table_id: # flags: STMT_END_F
-slave-bin.000002	#	Query	1	#	use `test`; COMMIT
+slave-bin.000002	#	Query	1	#	COMMIT
 SHOW SLAVE STATUS;
 Slave_IO_State	#
 Master_Host	127.0.0.1
@@ -301,14 +301,14 @@ insert into t1 values (NULL, last_insert_id()), (NULL, last_insert_id());
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1(a int auto_increment primary key, b int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 select * from t1;
 a	b
 1	1
diff --git a/mysql-test/suite/rpl/r/rpl_row_log_innodb.result b/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
index 809c50e1465..8526bad558b 100644
--- a/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
+++ b/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
@@ -20,13 +20,13 @@ show binlog events;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Format_desc	1	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=InnoDB
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	COMMIT /* XID */
 master-bin.000001	#	Query	1	#	use `test`; drop table t1
 master-bin.000001	#	Query	1	#	use `test`; create table t1 (word char(20) not null)ENGINE=InnoDB
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	1	#	COMMIT /* XID */
@@ -36,7 +36,7 @@ master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_in
 show binlog events from 106 limit 2;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	1	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=InnoDB
-master-bin.000001	#	Query	1	#	use `test`; BEGIN
+master-bin.000001	#	Query	1	#	BEGIN
 show binlog events from 106 limit 2,1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Table_map	1	#	table_id: # (test.t1)
@@ -192,13 +192,13 @@ insert into t2 values (1);
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1(n int not null auto_increment primary key)ENGINE=InnoDB
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
 master-bin.000001	#	Query	#	#	use `test`; drop table t1
 master-bin.000001	#	Query	#	#	use `test`; create table t1 (word char(20) not null)ENGINE=InnoDB
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
 master-bin.000001	#	Xid	#	#	COMMIT /* XID */
@@ -208,7 +208,7 @@ Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000002	#	Format_desc	1	#	Server ver: VERSION, Binlog ver: 4
 master-bin.000002	#	Query	1	#	use `test`; create table t3 (a int)ENGINE=InnoDB
 master-bin.000002	#	Query	1	#	use `test`; create table t2 (n int)ENGINE=InnoDB
-master-bin.000002	#	Query	1	#	use `test`; BEGIN
+master-bin.000002	#	Query	1	#	BEGIN
 master-bin.000002	#	Table_map	1	#	table_id: # (test.t2)
 master-bin.000002	#	Write_rows	1	#	table_id: # flags: STMT_END_F
 master-bin.000002	#	Xid	1	#	COMMIT /* XID */
@@ -301,14 +301,14 @@ insert into t1 values (NULL, last_insert_id()), (NULL, last_insert_id());
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1(a int auto_increment primary key, b int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 select * from t1;
 a	b
 1	1
diff --git a/mysql-test/suite/rpl/r/rpl_row_reset_slave.result b/mysql-test/suite/rpl/r/rpl_row_reset_slave.result
index 6126ec4bacc..fa40d8760a8 100644
--- a/mysql-test/suite/rpl/r/rpl_row_reset_slave.result
+++ b/mysql-test/suite/rpl/r/rpl_row_reset_slave.result
@@ -174,3 +174,26 @@ start slave;
 show status like 'slave_open_temp_tables';
 Variable_name	Value
 Slave_open_temp_tables	0
+stop slave;
+reset slave;
+*** errno must be zero: 0 ***
+change master to master_user='impossible_user_name';
+start slave;
+ONE
+1
+include/stop_slave.inc
+change master to master_user='root';
+include/start_slave.inc
+*** last errno must be  zero: 0 ***
+*** last error must be blank:  ***
+include/stop_slave.inc
+change master to master_user='impossible_user_name';
+start slave;
+ONE
+1
+include/stop_slave.inc
+reset slave;
+*** io  last errno must be  zero: 0  ***
+*** io  last error must be blank:   ***
+*** sql last errno must be  zero: 0 ***
+*** sql last error must be blank:  ***
diff --git a/mysql-test/suite/rpl/r/rpl_slave_load_tmpdir_not_exist.result b/mysql-test/suite/rpl/r/rpl_slave_load_tmpdir_not_exist.result
index a158fb5dfc4..3ed14a9cb6b 100644
--- a/mysql-test/suite/rpl/r/rpl_slave_load_tmpdir_not_exist.result
+++ b/mysql-test/suite/rpl/r/rpl_slave_load_tmpdir_not_exist.result
@@ -3,4 +3,4 @@ MASTER_CONNECT_RETRY=1,
 MASTER_HOST='127.0.0.1',
 MASTER_PORT=MASTER_MYPORT;
 START SLAVE;
-Unable to use slave's temporary directory ../../../error - Can't read dir of '../../../error' (Errcode: 2)
+12
diff --git a/mysql-test/suite/rpl/r/rpl_slave_skip.result b/mysql-test/suite/rpl/r/rpl_slave_skip.result
index 747e8f235a8..6148de5d954 100644
--- a/mysql-test/suite/rpl/r/rpl_slave_skip.result
+++ b/mysql-test/suite/rpl/r/rpl_slave_skip.result
@@ -17,20 +17,20 @@ show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t1 (a INT, b INT)
 master-bin.000001	#	Query	#	#	use `test`; CREATE TABLE t2 (c INT, d INT)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t2)
 master-bin.000001	#	Update_rows	#	#	table_id: #
 master-bin.000001	#	Update_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 SELECT * FROM t1;
 a	b
 1	1
@@ -39,8 +39,8 @@ a	b
 SELECT * FROM t2;
 c	d
 1	2
-2	16
-3	54
+2	8
+3	18
 **** On Slave ****
 START SLAVE UNTIL MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=762;
 SHOW SLAVE STATUS;
@@ -50,7 +50,7 @@ Master_User	root
 Master_Port	MASTER_PORT
 Connect_Retry	1
 Master_Log_File	master-bin.000001
-Read_Master_Log_Pos	1133
+Read_Master_Log_Pos	1115
 Relay_Log_File	#
 Relay_Log_Pos	#
 Relay_Master_Log_File	master-bin.000001
diff --git a/mysql-test/suite/rpl/r/rpl_stm_loadfile.result b/mysql-test/suite/rpl/r/rpl_stm_loadfile.result
index 72f58268d5f..ca76695f4d4 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_loadfile.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_loadfile.result
@@ -10,7 +10,7 @@ CREATE TABLE test.t1 (a INT, blob_column LONGBLOB, PRIMARY KEY(a));
 INSERT INTO test.t1  VALUES(1,'test');
 UPDATE test.t1 SET blob_column=LOAD_FILE('../../std_data/words2.dat') WHERE a=1;
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 create procedure test.p1()
 begin
 INSERT INTO test.t1  VALUES(2,'test');
@@ -18,7 +18,7 @@ UPDATE test.t1 SET blob_column=LOAD_FILE('../../std_data/words2.dat') WHERE a=2;
 end|
 CALL test.p1();
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 SELECT * FROM test.t1 ORDER BY blob_column;
 a	blob_column
 1	abase
diff --git a/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result b/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result
index bb89d150af7..78d9d7c41eb 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result
@@ -174,3 +174,26 @@ start slave;
 show status like 'slave_open_temp_tables';
 Variable_name	Value
 Slave_open_temp_tables	1
+stop slave;
+reset slave;
+*** errno must be zero: 0 ***
+change master to master_user='impossible_user_name';
+start slave;
+ONE
+1
+include/stop_slave.inc
+change master to master_user='root';
+include/start_slave.inc
+*** last errno must be  zero: 0 ***
+*** last error must be blank:  ***
+include/stop_slave.inc
+change master to master_user='impossible_user_name';
+start slave;
+ONE
+1
+include/stop_slave.inc
+reset slave;
+*** io  last errno must be  zero: 0  ***
+*** io  last error must be blank:   ***
+*** sql last errno must be  zero: 0 ***
+*** sql last error must be blank:  ***
diff --git a/mysql-test/suite/rpl/r/rpl_temporary.result b/mysql-test/suite/rpl/r/rpl_temporary.result
index 8a9ddaec9f6..631eb0677b0 100644
--- a/mysql-test/suite/rpl/r/rpl_temporary.result
+++ b/mysql-test/suite/rpl/r/rpl_temporary.result
@@ -6,6 +6,25 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
 start slave;
 call mtr.add_suppression("Slave: Can\'t find record in \'user\' Error_code: 1032");
 reset master;
+DROP TABLE IF EXISTS t1;
+CREATE TEMPORARY TABLE t1 (a char(1));
+INSERT INTO t1 VALUES ('a');
+include/stop_slave.inc
+include/start_slave.inc
+INSERT INTO t1 VALUES ('b');
+DROP TABLE IF EXISTS t1;
+CREATE TEMPORARY TABLE `t1`(`a` tinyint,`b` char(1))engine=myisam;
+INSERT INTO `t1` set `a`=128,`b`='128';
+Warnings:
+Warning	1264	Out of range value for column 'a' at row 1
+Warning	1265	Data truncated for column 'b' at row 1
+include/stop_slave.inc
+include/start_slave.inc
+INSERT INTO `t1` set `a`=128,`b`='128';
+Warnings:
+Warning	1264	Out of range value for column 'a' at row 1
+Warning	1265	Data truncated for column 'b' at row 1
+DROP TABLE t1;
 SET @save_select_limit=@@session.sql_select_limit;
 SET @@session.sql_select_limit=10, @@session.pseudo_thread_id=100;
 ERROR 42000: Access denied; you need the SUPER privilege for this operation
diff --git a/mysql-test/suite/rpl/r/rpl_udf.result b/mysql-test/suite/rpl/r/rpl_udf.result
index 56df5b30d93..ccf16271d01 100644
--- a/mysql-test/suite/rpl/r/rpl_udf.result
+++ b/mysql-test/suite/rpl/r/rpl_udf.result
@@ -182,19 +182,19 @@ CREATE TABLE t1(sum INT, price FLOAT(24)) ENGINE=MyISAM;
 affected rows: 0
 INSERT INTO t1 VALUES(myfunc_int(100), myfunc_double(50.00));
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 affected rows: 1
 INSERT INTO t1 VALUES(myfunc_int(10), myfunc_double(5.00));
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 affected rows: 1
 INSERT INTO t1 VALUES(myfunc_int(200), myfunc_double(25.00));
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 affected rows: 1
 INSERT INTO t1 VALUES(myfunc_int(1), myfunc_double(500.00));
 Warnings:
-Note	1592	Statement is not safe to log in statement format.
+Note	1592	Statement may not be safe to log in statement format.
 affected rows: 1
 SELECT * FROM t1 ORDER BY sum;
 sum	price
diff --git a/mysql-test/suite/rpl/t/disabled.def b/mysql-test/suite/rpl/t/disabled.def
index b7cb6da8127..af8eef764ed 100644
--- a/mysql-test/suite/rpl/t/disabled.def
+++ b/mysql-test/suite/rpl/t/disabled.def
@@ -10,4 +10,6 @@
 #
 ##############################################################################
 
-rpl_cross_version      : BUG#42311 2009-03-27 joro rpl_cross_version fails on macosx
+rpl_cross_version      : Bug#42311 2009-03-27 joro rpl_cross_version fails on macosx
+rpl_init_slave         : Bug#44920 2009-07006 pcrews MTR2 is not processing master.opt input properly on Windows.  *Must be done this way due to the nature of the bug*
+
diff --git a/mysql-test/suite/rpl/t/rpl_begin_commit_rollback-slave.opt b/mysql-test/suite/rpl/t/rpl_begin_commit_rollback-slave.opt
new file mode 100644
index 00000000000..b4abda5893f
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_begin_commit_rollback-slave.opt
@@ -0,0 +1 @@
+--innodb --replicate-do-db=db1
diff --git a/mysql-test/suite/rpl/t/rpl_begin_commit_rollback.test b/mysql-test/suite/rpl/t/rpl_begin_commit_rollback.test
new file mode 100644
index 00000000000..ec56e6a4f38
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_begin_commit_rollback.test
@@ -0,0 +1,125 @@
+source include/master-slave.inc;
+source include/have_innodb.inc;
+source include/have_binlog_format_statement.inc;
+
+disable_warnings;
+DROP DATABASE IF EXISTS db1;
+enable_warnings;
+
+CREATE DATABASE db1;
+
+use db1;
+
+CREATE TABLE db1.t1 (a INT) ENGINE=InnoDB;
+CREATE TABLE db1.t2 (s CHAR(255)) ENGINE=MyISAM;
+
+sync_slave_with_master;
+source include/stop_slave.inc;
+connection master;
+echo [on master];
+
+DELIMITER //;
+CREATE PROCEDURE db1.p1 ()
+BEGIN
+  INSERT INTO t1 VALUES (1);
+  INSERT INTO t1 VALUES (2);
+  INSERT INTO t1 VALUES (3);
+  INSERT INTO t1 VALUES (4);
+  INSERT INTO t1 VALUES (5);
+END//
+
+CREATE PROCEDURE db1.p2 ()
+BEGIN
+  INSERT INTO t1 VALUES (6);
+  INSERT INTO t1 VALUES (7);
+  INSERT INTO t1 VALUES (8);
+  INSERT INTO t1 VALUES (9);
+  INSERT INTO t1 VALUES (10);
+  INSERT INTO t2 VALUES ('executed db1.p2()');
+END//
+DELIMITER ;//
+
+INSERT INTO db1.t2 VALUES ('before call db1.p1()');
+
+# Note: the master_log_pos is set to be the position of the BEGIN + 1,
+# so before fix of BUG#43263 if the BEGIN is ignored, then all the
+# INSERTS in p1 will be replicated in AUTOCOMMIT=1 mode and the slave
+# SQL thread will stop right before the first INSERT. After fix of
+# BUG#43263, BEGIN will not be ignored by the replication db rules,
+# and then the whole transaction will be executed before slave SQL
+# stop.
+let $master_pos= query_get_value(SHOW MASTER STATUS, Position, 1);
+let $master_pos= `SELECT $master_pos + 1`;
+
+use test;
+BEGIN;
+CALL db1.p1();
+COMMIT;
+
+# The position where the following START SLAVE UNTIL will stop at
+let $master_end_trans_pos= query_get_value(SHOW MASTER STATUS, Position, 1);
+
+INSERT INTO db1.t2 VALUES ('after call db1.p1()');
+SELECT * FROM db1.t1;
+SELECT * FROM db1.t2;
+
+connection slave;
+echo [on slave];
+
+replace_result $master_pos MASTER_POS;
+eval start slave until master_log_file='master-bin.000001', master_log_pos=$master_pos;
+source include/wait_for_slave_sql_to_stop.inc;
+let $slave_sql_stop_pos= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+let $result= query_get_value(SELECT $slave_sql_stop_pos - $master_end_trans_pos as result, result, 1);
+
+--echo #
+--echo # If we got non-zero here, then we're suffering BUG#43263
+--echo #
+eval SELECT $result as 'Must be 0';
+SELECT * from db1.t1;
+SELECT * from db1.t2;
+
+connection master;
+echo [on master];
+
+INSERT INTO db1.t2 VALUES ('before call db1.p2()');
+
+# See comments above.
+let $master_pos= query_get_value(SHOW MASTER STATUS, Position, 1);
+let $master_pos= `SELECT $master_pos + 1`;
+
+BEGIN;
+CALL db1.p2();
+disable_warnings;
+ROLLBACK;
+enable_warnings;
+let $master_end_trans_pos= query_get_value(SHOW MASTER STATUS, Position, 1);
+
+INSERT INTO db1.t2 VALUES ('after call db1.p2()');
+SELECT * FROM db1.t1;
+SELECT * FROM db1.t2;
+
+connection slave;
+echo [on slave];
+
+replace_result $master_pos MASTER_POS;
+eval start slave until master_log_file='master-bin.000001', master_log_pos=$master_pos;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $slave_sql_stop_pos= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+let $result= query_get_value(SELECT $slave_sql_stop_pos - $master_end_trans_pos as result, result, 1);
+
+--echo #
+--echo # If we got non-zero here, then we're suffering BUG#43263
+--echo #
+eval SELECT $result as 'Must be 0';
+SELECT * from db1.t1;
+SELECT * from db1.t2;
+
+--echo #
+--echo # Clean up
+--echo #
+connection master;
+DROP DATABASE db1;
+connection slave;
+DROP DATABASE db1;
diff --git a/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt
new file mode 100644
index 00000000000..45631525481
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt
@@ -0,0 +1 @@
+--binlog_cache_size=4096 --max_binlog_cache_size=7680
diff --git a/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test
new file mode 100644
index 00000000000..e1f1f8c54bb
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test
@@ -0,0 +1,395 @@
+########################################################################################
+#    This test verifies if the binlog is not corrupted when the cache buffer is not
+#    big enough to accommodate the changes and is divided in five steps:
+#
+#    1 - Single Statements:
+#    1.1 - Single statement on transactional table.
+#    1.2 - Single statement on non-transactional table. 
+#    1.3 - Single statement on both transactional and non-transactional tables.
+#    In both 1.2 and 1.3, an incident event is logged to notify the user that the
+#    master and slave are diverging.
+#
+#    2 - Transactions ended by an implicit commit.
+#
+#    3 - Transactions ended by a COMMIT.
+#
+#    4 - Transactions ended by a ROLLBACK.
+#
+#    5 - Transactions with a failing statement that updates a non-transactional
+#    table. In this case, a failure means that the statement does not get into
+#    the cache and an incident event is logged to notify the user that the master
+#    and slave are diverging.
+#    
+########################################################################################
+
+########################################################################################
+#                                Configuring the environment
+########################################################################################
+--source include/have_innodb.inc
+--source include/master-slave.inc
+--source include/not_embedded.inc
+--source include/not_windows.inc
+
+CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
+CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MyIsam;
+CREATE TABLE t3(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
+
+let $data = `select concat('"', repeat('a',2000), '"')`;
+
+--echo ########################################################################################
+--echo #                                   1 - SINGLE STATEMENT
+--echo ########################################################################################
+
+connection master;
+
+--echo *** Single statement on transactional table ***
+--disable_query_log
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+eval INSERT INTO t1 (a, data) VALUES (1,
+     CONCAT($data, $data, $data, $data, $data));
+--enable_query_log
+
+--echo *** Single statement on non-transactional table ***
+--echo *** After WL#2687 the difference between STATEMENT/MIXED and ROW will not exist. ***
+--disable_query_log
+--disable_warnings
+if (`SELECT @@binlog_format = 'STATEMENT' || @@binlog_format = 'MIXED'`)
+{
+  eval INSERT INTO t2 (a, data) VALUES (2,
+       CONCAT($data, $data, $data, $data, $data, $data));
+  --echo Got one of the listed errors
+}
+if (`SELECT @@binlog_format = 'ROW'`)
+{
+  --error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+  eval INSERT INTO t2 (a, data) VALUES (2,
+       CONCAT($data, $data, $data, $data, $data, $data));
+
+  connection slave;
+  --source include/wait_for_slave_sql_to_stop.inc
+  SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
+  START SLAVE SQL_THREAD;
+  --source include/wait_for_slave_sql_to_start.inc
+}
+--enable_warnings
+--enable_query_log
+
+connection master;
+
+--disable_query_log
+eval INSERT INTO t1 (a, data) VALUES (3, $data);
+eval INSERT INTO t1 (a, data) VALUES (4, $data);
+eval INSERT INTO t1 (a, data) VALUES (5, $data);
+eval INSERT INTO t2 (a, data) VALUES (3, $data);
+eval INSERT INTO t2 (a, data) VALUES (4, $data);
+eval INSERT INTO t2 (a, data) VALUES (5, $data);
+--enable_query_log
+
+--echo *** Single statement on both transactional and non-transactional tables. ***
+--echo *** After WL#2687 we will be able to change the order of the tables. ***
+--disable_query_log
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+eval UPDATE t2, t1 SET t2.data = CONCAT($data, $data, $data, $data),
+                       t1.data = CONCAT($data, $data, $data, $data);
+--enable_query_log
+
+connection slave;
+--source include/wait_for_slave_sql_to_stop.inc
+SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
+START SLAVE SQL_THREAD;
+--source include/wait_for_slave_sql_to_start.inc
+
+#--echo ########################################################################################
+#--echo #                             2 - BEGIN - IMPLICIT COMMIT by DDL
+#--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (1, $data);
+--eval INSERT INTO t1 (a, data) VALUES (2, $data);
+--eval INSERT INTO t1 (a, data) VALUES (3, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (4, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (5, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (6, $data);
+--eval INSERT INTO t1 (a, data) VALUES (7, 's');
+--eval INSERT INTO t2 (a, data) VALUES (8, 's');
+--eval INSERT INTO t1 (a, data) VALUES (9, 's');
+--enable_query_log
+
+--disable_query_log
+ALTER TABLE t3 ADD COLUMN d int;
+--enable_query_log
+
+--disable_query_log
+--eval INSERT INTO t2 (a, data) VALUES (10, $data);
+--eval INSERT INTO t2 (a, data) VALUES (11, $data);
+--eval INSERT INTO t2 (a, data) VALUES (12, $data);
+--eval INSERT INTO t2 (a, data) VALUES (13, $data);
+--enable_query_log
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (14, $data);
+--eval INSERT INTO t1 (a, data) VALUES (15, $data);
+--eval INSERT INTO t1 (a, data) VALUES (16, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (17, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (18, $data);
+--eval INSERT INTO t1 (a, data) VALUES (19, 's');
+--eval INSERT INTO t2 (a, data) VALUES (20, 's');
+--eval INSERT INTO t1 (a, data) VALUES (21, 's');
+--enable_query_log
+
+if (`SELECT @@binlog_format = 'STATEMENT' || @@binlog_format = 'MIXED'`)
+{
+  --disable_query_log
+  CREATE TABLE t4 SELECT * FROM t1;
+  --enable_query_log
+  --echo Got one of the listed errors
+}
+if (`SELECT @@binlog_format = 'ROW'`)
+{
+  --disable_query_log
+  --error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+  CREATE TABLE t4 SELECT * FROM t1;
+  --enable_query_log
+}
+
+--disable_query_log
+--eval INSERT INTO t2 (a, data) VALUES (15, $data);
+--enable_query_log
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (22, $data);
+--eval INSERT INTO t1 (a, data) VALUES (23, $data);
+--eval INSERT INTO t1 (a, data) VALUES (24, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (25, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (26, $data);
+--eval INSERT INTO t1 (a, data) VALUES (27, 's');
+--eval INSERT INTO t2 (a, data) VALUES (28, 's');
+--eval INSERT INTO t1 (a, data) VALUES (29, 's');
+--enable_query_log
+
+--disable_query_log
+CREATE TABLE t5 (a int);
+--enable_query_log
+
+let $diff_statement= SELECT * FROM t1;
+--source include/diff_master_slave.inc
+
+--echo ########################################################################################
+--echo #                                     3 - BEGIN - COMMIT
+--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (1, $data);
+--eval INSERT INTO t1 (a, data) VALUES (2, $data);
+--eval INSERT INTO t1 (a, data) VALUES (3, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (4, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (5, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (6, $data);
+--eval INSERT INTO t1 (a, data) VALUES (7, 's');
+--eval INSERT INTO t2 (a, data) VALUES (8, 's');
+--eval INSERT INTO t1 (a, data) VALUES (9, 's');
+--enable_query_log
+COMMIT;
+
+let $diff_statement= SELECT * FROM t1;
+--source include/diff_master_slave.inc
+
+--echo ########################################################################################
+--echo #                                      4 - BEGIN - ROLLBACK
+--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (1, $data);
+--eval INSERT INTO t1 (a, data) VALUES (2, $data);
+--eval INSERT INTO t1 (a, data) VALUES (3, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (4, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (5, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (6, $data);
+--eval INSERT INTO t1 (a, data) VALUES (7, 's');
+--eval INSERT INTO t2 (a, data) VALUES (8, 's');
+--eval INSERT INTO t1 (a, data) VALUES (9, 's');
+--enable_query_log
+ROLLBACK;
+
+let $diff_statement= SELECT * FROM t1;
+--source include/diff_master_slave.inc
+
+--echo ########################################################################################
+--echo #                                         5 - PROCEDURE 
+--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+DELIMITER //;
+
+CREATE PROCEDURE p1(pd VARCHAR(30000))
+BEGIN
+  INSERT INTO t1 (a, data) VALUES (1, pd);
+  INSERT INTO t1 (a, data) VALUES (2, pd);
+  INSERT INTO t1 (a, data) VALUES (3, pd);
+  INSERT INTO t1 (a, data) VALUES (4, pd);
+  INSERT INTO t1 (a, data) VALUES (5, 's');
+END//
+
+DELIMITER ;//
+
+TRUNCATE TABLE t1;
+
+--disable_query_log
+eval CALL p1($data);
+--enable_query_log
+
+TRUNCATE TABLE t1;
+
+BEGIN;
+--disable_query_log
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+eval CALL p1($data);
+--enable_query_log
+COMMIT;
+
+TRUNCATE TABLE t1;
+
+BEGIN;
+--disable_query_log
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+eval CALL p1($data);
+--enable_query_log
+ROLLBACK;
+
+let $diff_statement= SELECT * FROM t1;
+--source include/diff_master_slave.inc
+
+--echo ########################################################################################
+--echo #                                           6 - XID
+--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (1, $data);
+--eval INSERT INTO t1 (a, data) VALUES (2, $data);
+--eval INSERT INTO t1 (a, data) VALUES (3, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (4, $data);
+SAVEPOINT sv;
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (5, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (6, $data);
+--eval INSERT INTO t1 (a, data) VALUES (7, 's');
+--eval INSERT INTO t2 (a, data) VALUES (8, 's');
+--eval INSERT INTO t1 (a, data) VALUES (9, 's');
+--enable_query_log
+ROLLBACK TO sv;
+COMMIT;
+
+let $diff_statement= SELECT * FROM t1;
+--source include/diff_master_slave.inc
+
+--echo ########################################################################################
+--echo #                                        7 - NON-TRANS TABLE
+--echo ########################################################################################
+
+connection master;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t2;
+TRUNCATE TABLE t3;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (1, $data);
+--eval INSERT INTO t1 (a, data) VALUES (2, $data);
+--eval INSERT INTO t2 (a, data) VALUES (3, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (4, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (5, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (6, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (7, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval UPDATE t2 SET data= CONCAT($data, $data);
+--eval INSERT INTO t1 (a, data) VALUES (8, 's');
+--eval INSERT INTO t1 (a, data) VALUES (9, 's');
+--eval INSERT INTO t2 (a, data) VALUES (10, 's');
+--eval INSERT INTO t1 (a, data) VALUES (11, 's');
+--enable_query_log
+COMMIT;
+
+BEGIN;
+--disable_query_log
+--eval INSERT INTO t1 (a, data) VALUES (15, $data);
+--eval INSERT INTO t1 (a, data) VALUES (16, $data);
+--eval INSERT INTO t2 (a, data) VALUES (17, $data);
+--error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
+--eval INSERT INTO t1 (a, data) VALUES (18, $data);
+--enable_query_log
+COMMIT;
+
+connection slave;
+--source include/wait_for_slave_sql_to_stop.inc
+
+--echo ########################################################################################
+--echo #                                        CLEAN
+--echo ########################################################################################
+
+--disable_warnings
+connection master;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+DROP TABLE IF EXISTS t6;
+DROP PROCEDURE p1;
+connection slave;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
+DROP TABLE IF EXISTS t4;
+DROP TABLE IF EXISTS t5;
+DROP TABLE IF EXISTS t6;
+DROP PROCEDURE p1;
+--enable_warnings
diff --git a/mysql-test/suite/rpl/t/rpl_concurrency_error-master.opt b/mysql-test/suite/rpl/t/rpl_concurrency_error-master.opt
new file mode 100644
index 00000000000..a6ef074a120
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_concurrency_error-master.opt
@@ -0,0 +1 @@
+--innodb-lock-wait-timeout=1
diff --git a/mysql-test/suite/rpl/t/rpl_concurrency_error.test b/mysql-test/suite/rpl/t/rpl_concurrency_error.test
new file mode 100644
index 00000000000..da2951afb1a
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_concurrency_error.test
@@ -0,0 +1,149 @@
+###############################################################################
+#BUG#44581 Slave stops when transaction with non-transactional table gets
+#lock wait timeout
+#
+# In STMT and MIXED modes, a statement that changes both non-transactional and
+# transactional tables must be written to the binary log whenever there are
+# changes to non-transactional tables. This means that the statement gets into
+# the # binary log even when the changes to the transactional tables fail. In
+# particular, in the presence of a failure such statement is annotated with the
+# error number and wrapped in a begin/rollback. On the slave, while applying
+# the statement, it is expected the same failure and the rollback prevents the
+# transactional changes to be persisted.
+
+# This test aims to verify if a statement that updates both transactional and
+# non-transacitonal tables and fails due to concurrency problems is correctly
+# processed by the slave in the sense that the statements get into the binary
+# log, the error is ignored and only the non-transactional tables are changed.
+###############################################################################
+
+--source include/master-slave.inc
+--source include/have_innodb.inc
+--source include/have_binlog_format_statement.inc
+
+--echo ########################################################################
+--echo #                             Environment
+--echo ########################################################################
+connection master;
+
+CREATE TABLE t (i INT, PRIMARY KEY(i), f CHAR(8)) engine = Innodb;
+CREATE TABLE n (d DATETIME, f CHAR(32)) engine = MyIsam;
+
+DELIMITER |;
+CREATE TRIGGER tr AFTER UPDATE ON t FOR EACH ROW 
+BEGIN 
+  INSERT INTO n VALUES ( now(), concat( 'updated t: ', old.f, ' -> ', new.f ) ); 
+END |
+DELIMITER ;|
+
+INSERT INTO t VALUES (4,'black'), (2,'red'), (3,'yelow'), (1,'cyan');
+
+connect (conn1, 127.0.0.1,root,,);
+connect (conn2, 127.0.0.1,root,,);
+
+--echo ########################################################################
+--echo #                     Testing ER_LOCK_WAIT_TIMEOUT
+--echo ########################################################################
+
+let $type=2;
+
+while ($type)
+{
+  let $binlog_start= query_get_value("SHOW MASTER STATUS", Position, 1);
+  connection conn1;
+  if (`select $type = 2`)
+  {
+    SET AUTOCOMMIT = 1;
+    BEGIN;
+  }
+  if (`select $type = 1`)
+  {
+    SET AUTOCOMMIT = 0;
+  }
+  eval UPDATE t SET f = 'yellow $type' WHERE i = 3;
+  
+  connection conn2;
+  if (`select $type = 2`)
+  {
+    SET AUTOCOMMIT = 1;
+    BEGIN;
+  }
+  if (`select $type = 1`)
+  {
+    SET AUTOCOMMIT = 0;
+  }
+  --error ER_LOCK_WAIT_TIMEOUT
+  eval UPDATE t SET f = 'magenta $type' WHERE f = 'red';
+  eval INSERT INTO t VALUES (5 + ($type * 10),"brown");
+  INSERT INTO n VALUES (now(),"brown");
+  
+  connection conn1;
+  COMMIT;
+  
+  connection conn2;
+  ROLLBACK;
+  --source include/show_binlog_events.inc
+
+  let $binlog_start= query_get_value("SHOW MASTER STATUS", Position, 1);
+  connection conn1;
+  if (`select $type = 2`)
+  {
+    SET AUTOCOMMIT = 1;
+    BEGIN;
+  }
+  if (`select $type = 1`)
+  {
+    SET AUTOCOMMIT = 0;
+  }
+  eval UPDATE t SET f = 'gray $type' WHERE i = 3;
+  
+  connection conn2;
+  if (`select $type = 2`)
+  {
+    SET AUTOCOMMIT = 1;
+    BEGIN;
+  }
+  if (`select $type = 1`)
+  {
+    SET AUTOCOMMIT = 0;
+  }
+  --error ER_LOCK_WAIT_TIMEOUT
+  eval UPDATE t SET f = 'dark blue $type' WHERE f = 'red';
+  eval INSERT INTO t VALUES (6 + ($type * 10),"brown");
+  INSERT INTO n VALUES (now(),"brown");
+  
+  connection conn1;
+  COMMIT;
+  
+  connection conn2;
+  COMMIT;
+  --source include/show_binlog_events.inc
+  
+  dec $type;
+}
+
+connection master;
+sync_slave_with_master;
+
+connection master;
+let $diff_statement= SELECT * FROM t order by i;
+source include/diff_master_slave.inc;
+
+connection master;
+let $diff_statement= SELECT * FROM n order by d, f;
+source include/diff_master_slave.inc;
+
+--echo ########################################################################
+--echo #                                Cleanup
+--echo ########################################################################
+
+connection master;
+DROP TRIGGER tr;
+DROP TABLE t;
+DROP TABLE n;
+
+sync_slave_with_master;
+
+connection master;
+disconnect conn1;
+disconnect conn2;
diff --git a/mysql-test/suite/rpl/t/rpl_incident.test b/mysql-test/suite/rpl/t/rpl_incident.test
index 38fcc116736..66893ebb93f 100644
--- a/mysql-test/suite/rpl/t/rpl_incident.test
+++ b/mysql-test/suite/rpl/t/rpl_incident.test
@@ -14,42 +14,13 @@ REPLACE INTO t1 VALUES (4);
 SELECT * FROM t1;
 
 connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
+# Wait until SQL thread stops with error LOST_EVENT on master
+let $slave_sql_errno= 1590;
+source include/wait_for_slave_sql_error.inc;
 
 # The 4 should not be inserted into the table, since the incident log
 # event should have stop the slave.
 --echo **** On Slave ****
-#### BEGIN DEBUG INFO ADDED BY SVEN 2008-07-18 -- SEE BUG#38077 ####
-let $tables= query_get_value(SHOW TABLES, Tables_in_test, 1);
-if (`SELECT '$tables' != 't1'`)
-{
-  --echo **** TEST CASE BUG! PRINTING DEBUG INFO! ****
-  --echo **** Dear developer, if you see this in the output of a test
-  --echo **** case run, please add all the information below as a
-  --echo **** comment to BUG#38077. If it's a pushbuild failure, please
-  --echo **** include a link to the push page.
-  --echo **** Thank you! /Sven
-  SHOW BINLOG EVENTS;
-  --echo **** master binlog ****
-  --error 0,1
-  --exec $MYSQL_BINLOG --hexdump $MYSQLTEST_VARDIR/log/master-bin.000001
-  --echo **** slave binlog ****
-  --error 0,1
-  --exec $MYSQL_BINLOG --hexdump $MYSQLTEST_VARDIR/log/slave-bin.000001
-  --echo **** slave status ****
-  query_vertical SHOW SLAVE STATUS;
-  --echo **** slave's master status ****
-  SHOW MASTER STATUS;
-  --echo **** slave binlog events ****
-  --echo [on master]
-  connection master;
-  --echo **** master status ****
-  SHOW MASTER STATUS;
-  --echo **** master binlog events ****
-  SHOW BINLOG EVENTS;
-  exit;
-}
-#### END DEBUG INFO ####
 SELECT * FROM t1;
 
 --replace_result $MASTER_MYPORT MASTER_PORT
diff --git a/mysql-test/suite/rpl/t/rpl_slave_load_tmpdir_not_exist.test b/mysql-test/suite/rpl/t/rpl_slave_load_tmpdir_not_exist.test
index 3a80fa43f20..68c41abf537 100644
--- a/mysql-test/suite/rpl/t/rpl_slave_load_tmpdir_not_exist.test
+++ b/mysql-test/suite/rpl/t/rpl_slave_load_tmpdir_not_exist.test
@@ -20,5 +20,5 @@ eval CHANGE MASTER TO MASTER_USER='root',
 START SLAVE;
 
 source include/wait_for_slave_sql_to_stop.inc;
-let $error=query_get_value("show slave status", Last_SQL_Error, 1);
-echo $error;
+let $errno=query_get_value("show slave status", Last_SQL_Errno, 1);
+echo $errno;
diff --git a/mysql-test/suite/rpl/t/rpl_sp.test b/mysql-test/suite/rpl/t/rpl_sp.test
index ec6464fb095..9be630e9ae8 100644
--- a/mysql-test/suite/rpl/t/rpl_sp.test
+++ b/mysql-test/suite/rpl/t/rpl_sp.test
@@ -642,3 +642,6 @@ drop procedure ` mysqltestbug36570_p2`;
 drop function mysqltestbug36570_f1;
 --echo End of 5.0 tests
 --echo End of 5.1 tests
+
+# Cleanup
+sync_slave_with_master;
diff --git a/mysql-test/suite/rpl/t/rpl_temporary.test b/mysql-test/suite/rpl/t/rpl_temporary.test
index 4e83d39710c..a59e4f2fd21 100644
--- a/mysql-test/suite/rpl/t/rpl_temporary.test
+++ b/mysql-test/suite/rpl/t/rpl_temporary.test
@@ -22,6 +22,77 @@ call mtr.add_suppression("Slave: Can\'t find record in \'user\' Error_code: 1032
 
 sync_with_master;
 reset master;
+
+# ##################################################################
+# BUG#41725: slave crashes when inserting into temporary table after
+#            stop/start slave
+#
+#  This test checks that both reported issues (assertion failure and
+#  crash) go away. It is implemented as follows:
+#    
+#    case 1: assertion failure
+#      i) create and insert into temporary table on master
+#     ii) sync slave with master
+#    iii) stop and restart slave
+#     iv) insert into master another value
+#      v) sync slave with master
+#
+#
+#    case 2: crash (SIGSEV)
+#      i) create and insert into temporary table on master (insert
+#         produces warnings)
+#     ii) sync slave with master
+#    iii) stop and restart slave
+#     iv) insert into master more values
+#      v) sync slave with master
+
+# case 1: Assertion in Field_string::store() failed because current 
+#         thread reference differed from table->in_use after slave 
+#         restart
+
+connection master;
+
+disable_warnings;
+DROP TABLE IF EXISTS t1;
+enable_warnings;
+
+CREATE TEMPORARY TABLE t1 (a char(1));
+INSERT INTO t1 VALUES ('a');
+sync_slave_with_master;
+
+source include/stop_slave.inc;
+source include/start_slave.inc;
+
+connection master;
+INSERT INTO t1 VALUES ('b');
+sync_slave_with_master;
+
+# case 2: crash on sp_rcontext::find_handler because it used 
+#         reference to invalid THD object after slave restart
+
+connection master;
+
+disable_warnings;
+DROP TABLE IF EXISTS t1;
+enable_warnings;
+CREATE TEMPORARY TABLE `t1`(`a` tinyint,`b` char(1))engine=myisam;
+INSERT INTO `t1` set `a`=128,`b`='128';
+
+sync_slave_with_master;
+
+source include/stop_slave.inc;
+source include/start_slave.inc;
+
+connection master;
+INSERT INTO `t1` set `a`=128,`b`='128';
+sync_slave_with_master;
+
+# cleanup
+
+connection master;
+DROP TABLE t1;
+sync_slave_with_master;
+
 connection master;
 
 connect (con1,localhost,root,,);
diff --git a/mysql-test/suite/rpl_ndb/r/rpl_ndb_log.result b/mysql-test/suite/rpl_ndb/r/rpl_ndb_log.result
index 5b6ca5f5097..540c430e757 100644
--- a/mysql-test/suite/rpl_ndb/r/rpl_ndb_log.result
+++ b/mysql-test/suite/rpl_ndb/r/rpl_ndb_log.result
@@ -317,14 +317,14 @@ insert into t1 values (NULL, last_insert_id()), (NULL, last_insert_id());
 show binlog events from <binlog_start>;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
 master-bin.000001	#	Query	#	#	use `test`; create table t1(a int auto_increment primary key, b int)
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
-master-bin.000001	#	Query	#	#	use `test`; BEGIN
+master-bin.000001	#	Query	#	#	COMMIT
+master-bin.000001	#	Query	#	#	BEGIN
 master-bin.000001	#	Table_map	#	#	table_id: # (test.t1)
 master-bin.000001	#	Write_rows	#	#	table_id: # flags: STMT_END_F
-master-bin.000001	#	Query	#	#	use `test`; COMMIT
+master-bin.000001	#	Query	#	#	COMMIT
 select * from t1;
 a	b
 1	1
diff --git a/mysql-test/suite/rpl_ndb/r/rpl_ndb_stm_innodb.result b/mysql-test/suite/rpl_ndb/r/rpl_ndb_stm_innodb.result
index db9920dd79f..675a69d17a4 100644
--- a/mysql-test/suite/rpl_ndb/r/rpl_ndb_stm_innodb.result
+++ b/mysql-test/suite/rpl_ndb/r/rpl_ndb_stm_innodb.result
@@ -28,7 +28,7 @@ from mysql.ndb_apply_status;
 
 show binlog events from <start_pos> limit 1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	<start_pos>	Query	1	#	use `test`; BEGIN
+master-bin.000001	<start_pos>	Query	1	#	BEGIN
 
 # Now the insert, one step after
 
@@ -53,7 +53,7 @@ from mysql.ndb_apply_status;
 <log_name>	<start_pos>	<end_pos>
 show binlog events from <start_pos> limit 1;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
-master-bin.000001	<start_pos>	Query	1	#	use `test`; BEGIN
+master-bin.000001	<start_pos>	Query	1	#	BEGIN
 
 show binlog events from <start_pos> limit 1,2;
 Log_name	Pos	Event_type	Server_id	End_log_pos	Info
diff --git a/mysql-test/suite/rpl_ndb/t/disabled.def b/mysql-test/suite/rpl_ndb/t/disabled.def
index 6908269d014..2f15112515e 100644
--- a/mysql-test/suite/rpl_ndb/t/disabled.def
+++ b/mysql-test/suite/rpl_ndb/t/disabled.def
@@ -11,3 +11,4 @@
 ##############################################################################
 
 # the below testcase have been reworked to avoid the bug, test contains comment, keep bug open
+rpl_ndb_2ndb   : Bug#45974: rpl_ndb_2ndb fails sporadically
diff --git a/mysql-test/suite/rpl_ndb/t/rpl_ndb_2other-slave.opt b/mysql-test/suite/rpl_ndb/t/rpl_ndb_2other-slave.opt
index 188b31efa8a..dff423702b4 100644
--- a/mysql-test/suite/rpl_ndb/t/rpl_ndb_2other-slave.opt
+++ b/mysql-test/suite/rpl_ndb/t/rpl_ndb_2other-slave.opt
@@ -1 +1 @@
---innodb --ndbcluster=0 --log-slave-updates=0
+--innodb --loose-ndbcluster=OFF --log-slave-updates=0
diff --git a/mysql-test/suite/sys_vars/r/innodb_data_home_dir_basic.result b/mysql-test/suite/sys_vars/r/innodb_data_home_dir_basic.result
index e4bdd79b7c3..52a97bb6c95 100644
--- a/mysql-test/suite/sys_vars/r/innodb_data_home_dir_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_data_home_dir_basic.result
@@ -1,16 +1,16 @@
 '#---------------------BS_STVARS_025_01----------------------#'
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 COUNT(@@GLOBAL.innodb_data_home_dir)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_025_02----------------------#'
 SET @@GLOBAL.innodb_data_home_dir=1;
 ERROR HY000: Variable 'innodb_data_home_dir' is a read only variable
 Expected error 'Read only variable'
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 COUNT(@@GLOBAL.innodb_data_home_dir)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_025_03----------------------#'
 SELECT @@GLOBAL.innodb_data_home_dir = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -20,8 +20,8 @@ NULL
 1 Expected
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 COUNT(@@GLOBAL.innodb_data_home_dir)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_data_home_dir';
@@ -36,8 +36,8 @@ NULL
 '#---------------------BS_STVARS_025_05----------------------#'
 SELECT COUNT(@@innodb_data_home_dir);
 COUNT(@@innodb_data_home_dir)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(@@local.innodb_data_home_dir);
 ERROR HY000: Variable 'innodb_data_home_dir' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
@@ -46,8 +46,8 @@ ERROR HY000: Variable 'innodb_data_home_dir' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 COUNT(@@GLOBAL.innodb_data_home_dir)
-1
-1 Expected
+0
+0 Expected
 SELECT innodb_data_home_dir = @@SESSION.innodb_data_home_dir;
 ERROR 42S22: Unknown column 'innodb_data_home_dir' in 'field list'
 Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_method_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_method_basic.result
index 8c8924cdd86..4a85748092d 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flush_method_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_method_basic.result
@@ -1,16 +1,16 @@
 '#---------------------BS_STVARS_029_01----------------------#'
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
 COUNT(@@GLOBAL.innodb_flush_method)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_029_02----------------------#'
 SET @@GLOBAL.innodb_flush_method=1;
 ERROR HY000: Variable 'innodb_flush_method' is a read only variable
 Expected error 'Read only variable'
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
 COUNT(@@GLOBAL.innodb_flush_method)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_029_03----------------------#'
 SELECT @@GLOBAL.innodb_flush_method = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -20,8 +20,8 @@ NULL
 1 Expected
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
 COUNT(@@GLOBAL.innodb_flush_method)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_flush_method';
@@ -36,8 +36,8 @@ NULL
 '#---------------------BS_STVARS_029_05----------------------#'
 SELECT COUNT(@@innodb_flush_method);
 COUNT(@@innodb_flush_method)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(@@local.innodb_flush_method);
 ERROR HY000: Variable 'innodb_flush_method' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
@@ -46,8 +46,8 @@ ERROR HY000: Variable 'innodb_flush_method' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
 COUNT(@@GLOBAL.innodb_flush_method)
-1
-1 Expected
+0
+0 Expected
 SELECT innodb_flush_method = @@SESSION.innodb_flush_method;
 ERROR 42S22: Unknown column 'innodb_flush_method' in 'field list'
 Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/rpl_init_slave_func.result b/mysql-test/suite/sys_vars/r/rpl_init_slave_func.result
index 5f730bff882..bdb586e84c2 100644
--- a/mysql-test/suite/sys_vars/r/rpl_init_slave_func.result
+++ b/mysql-test/suite/sys_vars/r/rpl_init_slave_func.result
@@ -12,7 +12,7 @@ DROP TABLE IF EXISTS t1;
 CREATE TEMPORARY TABLE t1 AS SELECT @@global.init_slave AS my_column;
 DESCRIBE t1;
 Field	Type	Null	Key	Default	Extra
-my_column	varchar(59)	NO			
+my_column	varchar(59)	YES		NULL	
 DROP TABLE t1;
 SELECT @@global.init_slave = 'SET @@global.max_connections = @@global.max_connections + 1';
 @@global.init_slave = 'SET @@global.max_connections = @@global.max_connections + 1'
diff --git a/mysql-test/suite/sys_vars/r/ssl_capath_basic.result b/mysql-test/suite/sys_vars/r/ssl_capath_basic.result
index 3d161392917..f04b85b956f 100644
--- a/mysql-test/suite/sys_vars/r/ssl_capath_basic.result
+++ b/mysql-test/suite/sys_vars/r/ssl_capath_basic.result
@@ -1,16 +1,16 @@
 '#---------------------BS_STVARS_046_01----------------------#'
 SELECT COUNT(@@GLOBAL.ssl_capath);
 COUNT(@@GLOBAL.ssl_capath)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_046_02----------------------#'
 SET @@GLOBAL.ssl_capath=1;
 ERROR HY000: Variable 'ssl_capath' is a read only variable
 Expected error 'Read only variable'
 SELECT COUNT(@@GLOBAL.ssl_capath);
 COUNT(@@GLOBAL.ssl_capath)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_046_03----------------------#'
 SELECT @@GLOBAL.ssl_capath = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -20,8 +20,8 @@ NULL
 1 Expected
 SELECT COUNT(@@GLOBAL.ssl_capath);
 COUNT(@@GLOBAL.ssl_capath)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='ssl_capath';
@@ -36,8 +36,8 @@ NULL
 '#---------------------BS_STVARS_046_05----------------------#'
 SELECT COUNT(@@ssl_capath);
 COUNT(@@ssl_capath)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(@@local.ssl_capath);
 ERROR HY000: Variable 'ssl_capath' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
@@ -46,8 +46,8 @@ ERROR HY000: Variable 'ssl_capath' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
 SELECT COUNT(@@GLOBAL.ssl_capath);
 COUNT(@@GLOBAL.ssl_capath)
-1
-1 Expected
+0
+0 Expected
 SELECT ssl_capath = @@SESSION.ssl_capath;
 ERROR 42S22: Unknown column 'ssl_capath' in 'field list'
 Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/ssl_cipher_basic.result b/mysql-test/suite/sys_vars/r/ssl_cipher_basic.result
index df0fc8b5aad..0eed40d0580 100644
--- a/mysql-test/suite/sys_vars/r/ssl_cipher_basic.result
+++ b/mysql-test/suite/sys_vars/r/ssl_cipher_basic.result
@@ -1,16 +1,16 @@
 '#---------------------BS_STVARS_048_01----------------------#'
 SELECT COUNT(@@GLOBAL.ssl_cipher);
 COUNT(@@GLOBAL.ssl_cipher)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_048_02----------------------#'
 SET @@GLOBAL.ssl_cipher=1;
 ERROR HY000: Variable 'ssl_cipher' is a read only variable
 Expected error 'Read only variable'
 SELECT COUNT(@@GLOBAL.ssl_cipher);
 COUNT(@@GLOBAL.ssl_cipher)
-1
-1 Expected
+0
+0 Expected
 '#---------------------BS_STVARS_048_03----------------------#'
 SELECT @@GLOBAL.ssl_cipher = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -20,8 +20,8 @@ NULL
 1 Expected
 SELECT COUNT(@@GLOBAL.ssl_cipher);
 COUNT(@@GLOBAL.ssl_cipher)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='ssl_cipher';
@@ -36,8 +36,8 @@ NULL
 '#---------------------BS_STVARS_048_05----------------------#'
 SELECT COUNT(@@ssl_cipher);
 COUNT(@@ssl_cipher)
-1
-1 Expected
+0
+0 Expected
 SELECT COUNT(@@local.ssl_cipher);
 ERROR HY000: Variable 'ssl_cipher' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
@@ -46,8 +46,8 @@ ERROR HY000: Variable 'ssl_cipher' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
 SELECT COUNT(@@GLOBAL.ssl_cipher);
 COUNT(@@GLOBAL.ssl_cipher)
-1
-1 Expected
+0
+0 Expected
 SELECT ssl_cipher = @@SESSION.ssl_cipher;
 ERROR 42S22: Unknown column 'ssl_cipher' in 'field list'
 Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
index f3b02edf83b..acf3741d5fa 100644
--- a/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
@@ -29,7 +29,7 @@
 #   Displaying default value                                       #
 ####################################################################
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
---echo 1 Expected
+--echo 0 Expected
 
 
 --echo '#---------------------BS_STVARS_025_02----------------------#'
@@ -42,7 +42,7 @@ SET @@GLOBAL.innodb_data_home_dir=1;
 --echo Expected error 'Read only variable'
 
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
---echo 1 Expected
+--echo 0 Expected
 
 
 
@@ -58,7 +58,7 @@ WHERE VARIABLE_NAME='innodb_data_home_dir';
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
---echo 1 Expected
+--echo 0 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
@@ -82,7 +82,7 @@ SELECT @@innodb_data_home_dir = @@GLOBAL.innodb_data_home_dir;
 ################################################################################
 
 SELECT COUNT(@@innodb_data_home_dir);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT COUNT(@@local.innodb_data_home_dir);
@@ -93,7 +93,7 @@ SELECT COUNT(@@SESSION.innodb_data_home_dir);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_BAD_FIELD_ERROR
 SELECT innodb_data_home_dir = @@SESSION.innodb_data_home_dir;
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
index 531df4a2464..75af00e33af 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
@@ -29,7 +29,7 @@
 #   Displaying default value                                       #
 ####################################################################
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
---echo 1 Expected
+--echo 0 Expected
 
 
 --echo '#---------------------BS_STVARS_029_02----------------------#'
@@ -42,7 +42,7 @@ SET @@GLOBAL.innodb_flush_method=1;
 --echo Expected error 'Read only variable'
 
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
---echo 1 Expected
+--echo 0 Expected
 
 
 
@@ -58,7 +58,7 @@ WHERE VARIABLE_NAME='innodb_flush_method';
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
---echo 1 Expected
+--echo 0 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
@@ -82,7 +82,7 @@ SELECT @@innodb_flush_method = @@GLOBAL.innodb_flush_method;
 ################################################################################
 
 SELECT COUNT(@@innodb_flush_method);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT COUNT(@@local.innodb_flush_method);
@@ -93,7 +93,7 @@ SELECT COUNT(@@SESSION.innodb_flush_method);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_BAD_FIELD_ERROR
 SELECT innodb_flush_method = @@SESSION.innodb_flush_method;
diff --git a/mysql-test/suite/sys_vars/t/ssl_capath_basic.test b/mysql-test/suite/sys_vars/t/ssl_capath_basic.test
index c32b572fb1b..ece9fe992d9 100644
--- a/mysql-test/suite/sys_vars/t/ssl_capath_basic.test
+++ b/mysql-test/suite/sys_vars/t/ssl_capath_basic.test
@@ -27,7 +27,7 @@
 #   Displaying default value                                       #
 ####################################################################
 SELECT COUNT(@@GLOBAL.ssl_capath);
---echo 1 Expected
+--echo 0 Expected
 
 
 --echo '#---------------------BS_STVARS_046_02----------------------#'
@@ -40,7 +40,7 @@ SET @@GLOBAL.ssl_capath=1;
 --echo Expected error 'Read only variable'
 
 SELECT COUNT(@@GLOBAL.ssl_capath);
---echo 1 Expected
+--echo 0 Expected
 
 
 
@@ -56,7 +56,7 @@ WHERE VARIABLE_NAME='ssl_capath';
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.ssl_capath);
---echo 1 Expected
+--echo 0 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
@@ -80,7 +80,7 @@ SELECT @@ssl_capath = @@GLOBAL.ssl_capath;
 ################################################################################
 
 SELECT COUNT(@@ssl_capath);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT COUNT(@@local.ssl_capath);
@@ -91,7 +91,7 @@ SELECT COUNT(@@SESSION.ssl_capath);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 SELECT COUNT(@@GLOBAL.ssl_capath);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_BAD_FIELD_ERROR
 SELECT ssl_capath = @@SESSION.ssl_capath;
diff --git a/mysql-test/suite/sys_vars/t/ssl_cipher_basic.test b/mysql-test/suite/sys_vars/t/ssl_cipher_basic.test
index 425f7aae442..c58b22da2d5 100644
--- a/mysql-test/suite/sys_vars/t/ssl_cipher_basic.test
+++ b/mysql-test/suite/sys_vars/t/ssl_cipher_basic.test
@@ -27,7 +27,7 @@
 #   Displaying default value                                       #
 ####################################################################
 SELECT COUNT(@@GLOBAL.ssl_cipher);
---echo 1 Expected
+--echo 0 Expected
 
 
 --echo '#---------------------BS_STVARS_048_02----------------------#'
@@ -40,7 +40,7 @@ SET @@GLOBAL.ssl_cipher=1;
 --echo Expected error 'Read only variable'
 
 SELECT COUNT(@@GLOBAL.ssl_cipher);
---echo 1 Expected
+--echo 0 Expected
 
 
 
@@ -56,7 +56,7 @@ WHERE VARIABLE_NAME='ssl_cipher';
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.ssl_cipher);
---echo 1 Expected
+--echo 0 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
@@ -80,7 +80,7 @@ SELECT @@ssl_cipher = @@GLOBAL.ssl_cipher;
 ################################################################################
 
 SELECT COUNT(@@ssl_cipher);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT COUNT(@@local.ssl_cipher);
@@ -91,7 +91,7 @@ SELECT COUNT(@@SESSION.ssl_cipher);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 SELECT COUNT(@@GLOBAL.ssl_cipher);
---echo 1 Expected
+--echo 0 Expected
 
 --Error ER_BAD_FIELD_ERROR
 SELECT ssl_cipher = @@SESSION.ssl_cipher;
diff --git a/mysql-test/t/archive_bitfield.test b/mysql-test/t/archive_bitfield.test
index 1e4692270b5..2e90ce39708 100644
--- a/mysql-test/t/archive_bitfield.test
+++ b/mysql-test/t/archive_bitfield.test
@@ -94,5 +94,11 @@ INSERT INTO `t1` VALUES
 (NULL,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,b'100000',b'010010',b'011111',4,5,5,5,5,5,5,5,5,5,3,2,1),
 (NULL,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,b'000000',b'001100',b'111111',4,5,5,5,5,5,5,5,5,5,3,2,1),
 (NULL,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,b'111111',b'000000',b'000000',4,5,5,5,5,5,5,5,5,5,3,2,1);
+# Determine the number of open sessions
+--source include/count_sessions.inc
 --exec $MYSQL_DUMP --hex-blob --compact --order-by-primary --skip-extended-insert --no-create-info test t1
 drop table t1;
+# Wait till the number of open sessions is <= the number before the run with $MYSQL_DUMP
+# = The session caused by mysqldump has finished its disconnect
+--source include/wait_until_count_sessions.inc
+
diff --git a/mysql-test/t/bug46080-master.opt b/mysql-test/t/bug46080-master.opt
new file mode 100644
index 00000000000..f59740afe60
--- /dev/null
+++ b/mysql-test/t/bug46080-master.opt
@@ -0,0 +1 @@
+--skip-grant-tables --skip-name-resolve --safemalloc-mem-limit=4000000
diff --git a/mysql-test/t/bug46080.test b/mysql-test/t/bug46080.test
new file mode 100644
index 00000000000..7e56e3ce421
--- /dev/null
+++ b/mysql-test/t/bug46080.test
@@ -0,0 +1,22 @@
+--echo #
+--echo # Bug #46080: group_concat(... order by) crashes server when
+--echo #  sort_buffer_size cannot allocate
+--echo #
+
+CREATE TABLE t1(a CHAR(255));
+INSERT INTO t1 VALUES ('a');
+
+SET @@SESSION.sort_buffer_size=5*16*1000000;
+SET @@SESSION.max_heap_table_size=5*1000000;
+
+--echo # Must not crash.
+--disable_result_log
+--error 0,5
+SELECT GROUP_CONCAT(a ORDER BY a) FROM t1 GROUP BY a;
+--enable_result_log
+
+DROP TABLE t1;
+SET @@SESSION.sort_buffer_size=default;
+SET @@SESSION.max_heap_table_size=default;
+
+--echo End of 5.0 tests
diff --git a/mysql-test/t/cast.test b/mysql-test/t/cast.test
index 50865215944..8e60d548c2f 100644
--- a/mysql-test/t/cast.test
+++ b/mysql-test/t/cast.test
@@ -269,3 +269,18 @@ SELECT HOUR(NULL),
 DROP TABLE t1;
 
 --echo End of 5.0 tests
+
+--echo #
+--echo #  Bug #44766: valgrind error when using convert() in a subquery
+--echo #
+
+CREATE TABLE t1(a tinyint);
+INSERT INTO t1 VALUES (127);
+SELECT 1 FROM
+(
+ SELECT CONVERT(t2.a USING UTF8) FROM t1, t1 t2 LIMIT 1
+) AS s LIMIT 1;
+DROP TABLE t1;
+
+
+--echo End of 5.1 tests
diff --git a/mysql-test/t/client_xml.test b/mysql-test/t/client_xml.test
index 739b56f5ab1..0847e2b366b 100644
--- a/mysql-test/t/client_xml.test
+++ b/mysql-test/t/client_xml.test
@@ -18,6 +18,10 @@ create table t1 (
   `a>b` text
 );
 insert into t1 values (1, 2, 'a&b a<b a>b');
+
+# Determine the number of open sessions
+--source include/count_sessions.inc
+
 --exec $MYSQL --xml test -e "select * from t1"
 --exec $MYSQL_DUMP --xml --skip-create test
 
@@ -33,3 +37,8 @@ drop table t1;
 
 # Restore global concurrent_insert value
 set @@global.concurrent_insert= @old_concurrent_insert;
+
+# Wait till the number of open sessions is <= the number before the runs with
+# $MYSQL and $MYSQL_DUMP
+# = The session caused by mysql and mysqldump have finished their disconnect
+--source include/wait_until_count_sessions.inc
diff --git a/mysql-test/t/connect.test b/mysql-test/t/connect.test
index f609240646a..9a29e4ff861 100644
--- a/mysql-test/t/connect.test
+++ b/mysql-test/t/connect.test
@@ -55,7 +55,8 @@ disconnect con4;
 connect (fail_con,localhost,test,,test2);
 --replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT
 --error ER_ACCESS_DENIED_ERROR
-connect (fail_con,localhost,test,,"");
+# Need to protect "" within '' so it's interpreted literally
+connect (fail_con,localhost,test,,'""');
 --replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT
 --error ER_ACCESS_DENIED_ERROR
 connect (fail_con,localhost,test,zorro,test2);
diff --git a/mysql-test/t/consistent_snapshot.test b/mysql-test/t/consistent_snapshot.test
index 82edf2e22b2..fb1f3bc007c 100644
--- a/mysql-test/t/consistent_snapshot.test
+++ b/mysql-test/t/consistent_snapshot.test
@@ -12,9 +12,9 @@ connect (con1,localhost,root,,);
 --echo # Establish connection con2 (user=root)
 connect (con2,localhost,root,,);
 
-### Test 1:
-### - While a consistent snapshot transaction is executed,
-###   no external inserts should be visible to the transaction.
+--echo ### Test 1:
+--echo ### - While a consistent snapshot transaction is executed,
+--echo ###   no external inserts should be visible to the transaction.
 
 --echo # Switch to connection con1
 connection con1;
@@ -31,9 +31,9 @@ SELECT * FROM t1; # if consistent snapshot was set as expected, we
 # should see nothing.
 COMMIT;
 
-### Test 2:
-### - For any non-consistent snapshot transaction, external
-###   committed inserts should be visible to the transaction.
+--echo ### Test 2:
+--echo ### - For any non-consistent snapshot transaction, external
+--echo ###   committed inserts should be visible to the transaction.
 
 DELETE FROM t1;
 START TRANSACTION; # Now we omit WITH CONSISTENT SNAPSHOT
@@ -48,6 +48,24 @@ SELECT * FROM t1; # if consistent snapshot was not set, as expected, we
 # should see 1.
 COMMIT;
 
+--echo ### Test 3:
+--echo ### - Bug#44664: valgrind warning for COMMIT_AND_CHAIN and ROLLBACK_AND_CHAIN
+--echo ###   Chaining a transaction does not retain consistency level.
+
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+DELETE FROM t1;
+COMMIT WORK AND CHAIN;
+
+--echo # Switch to connection con2
+connection con2;
+INSERT INTO t1 VALUES(1);
+
+--echo # Switch to connection con1
+connection con1;
+SELECT * FROM t1; # if consistent snapshot was not set, as expected, we
+# should see 1.
+COMMIT;
+
 --echo # Switch to connection default + close connections con1 and con2
 connection default;
 disconnect con1;
diff --git a/mysql-test/t/count_distinct3.test b/mysql-test/t/count_distinct3.test
index 2f7cf7e5260..ad2bbee95a3 100644
--- a/mysql-test/t/count_distinct3.test
+++ b/mysql-test/t/count_distinct3.test
@@ -3,6 +3,8 @@
 # mysql-4.1
 #
 
+--source include/big_test.inc
+
 --disable_warnings
 DROP TABLE IF EXISTS t1, t2;
 --enable_warnings
diff --git a/mysql-test/t/csv.test b/mysql-test/t/csv.test
index abc161d014c..cdf274190dd 100644
--- a/mysql-test/t/csv.test
+++ b/mysql-test/t/csv.test
@@ -1800,10 +1800,11 @@ connect (con1,localhost,root,,);
 # EE_FILENOTFOUND 29
 --error 29
 select * from t1;
+--disconnect con1
+--source include/wait_until_disconnected.inc
 connection default;
 unlock tables;
 drop table t1;
---disconnect con1
 
 #
 # Bug#41441 repair csv table crashes debug server
diff --git a/mysql-test/t/ctype_cp932_binlog_stm.test b/mysql-test/t/ctype_cp932_binlog_stm.test
index 383009ae7c3..89df33a6df5 100644
--- a/mysql-test/t/ctype_cp932_binlog_stm.test
+++ b/mysql-test/t/ctype_cp932_binlog_stm.test
@@ -22,7 +22,7 @@ CALL bug18293("Foo's a Bar", _cp932 0xED40ED41ED42, 47.93)|
 SELECT HEX(s1),HEX(s2),d FROM t4|
 DROP PROCEDURE bug18293|
 DROP TABLE t4|
-SHOW BINLOG EVENTS FROM 369|
+SHOW BINLOG EVENTS FROM 370|
 delimiter ;|
 
 --echo End of 5.0 tests
@@ -31,7 +31,14 @@ delimiter ;|
 # #28436: Incorrect position in SHOW BINLOG EVENTS causes server coredump 
 # Note: 364 is a magic position (found experimentally, depends on 
 # the log's contents) that caused the server crash.
+
 --error 1220
-SHOW BINLOG EVENTS FROM 364;
+SHOW BINLOG EVENTS FROM 365;
+
+--echo Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
+CREATE TABLE t1 (a varchar(16)) character set cp932;
+INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
+SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
+DROP TABLE t1;
 
 --echo End of 5.1 tests
diff --git a/mysql-test/t/ctype_gbk_binlog.test b/mysql-test/t/ctype_gbk_binlog.test
new file mode 100644
index 00000000000..a8f653d1b1e
--- /dev/null
+++ b/mysql-test/t/ctype_gbk_binlog.test
@@ -0,0 +1,36 @@
+-- source include/have_binlog_format_mixed_or_statement.inc
+-- source include/have_gbk.inc
+
+SET NAMES gbk;
+--character_set gbk
+
+CREATE TABLE t1 (
+  f1 BLOB
+) ENGINE=MyISAM DEFAULT CHARSET=gbk;
+
+delimiter |;
+CREATE PROCEDURE p1(IN val BLOB)
+BEGIN
+     SET @tval = val;
+     SET @sql_cmd = CONCAT_WS(' ', 'insert into t1(f1) values(?)');
+     PREPARE stmt FROM @sql_cmd;
+     EXECUTE stmt USING @tval;
+     DEALLOCATE PREPARE stmt;
+END|
+delimiter ;|
+
+SET @`tcontent`:=_binary 0x50434B000900000000000000E9000000 COLLATE `binary`/*!*/;
+CALL p1(@`tcontent`);
+
+FLUSH LOGS;
+DROP PROCEDURE p1;
+RENAME TABLE t1 to t2;
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--exec $MYSQL_BINLOG --force-if-open --short-form $MYSQLD_DATADIR/master-bin.000001 | $MYSQL
+SELECT hex(f1) FROM t2;
+SELECT hex(f1) FROM t1;
+
+DROP PROCEDURE p1;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/mysql-test/t/ctype_ldml.test b/mysql-test/t/ctype_ldml.test
index 73a23a751e8..db9461bfbf7 100644
--- a/mysql-test/t/ctype_ldml.test
+++ b/mysql-test/t/ctype_ldml.test
@@ -37,6 +37,15 @@ UPDATE t1 SET col2=col1;
 SELECT * FROM t1 WHERE col1=col2 ORDER BY col1;
 DROP TABLE t1;
 
+--echo #
+--echo # Bug#43827 Server closes connections and restarts
+--echo #
+# Crash happened with a user-defined utf8 collation,
+# on attempt to insert a string longer than the column can store.
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci);
+INSERT INTO t1 SELECT REPEAT('a',11);
+DROP TABLE t1;
+
 #
 #  Vietnamese experimental collation
 #
diff --git a/mysql-test/t/ctype_sjis.test b/mysql-test/t/ctype_sjis.test
index 27cbdff451b..7de94e34dea 100644
--- a/mysql-test/t/ctype_sjis.test
+++ b/mysql-test/t/ctype_sjis.test
@@ -83,3 +83,13 @@ SET NAMES sjis;
 SELECT HEX('�����@�\') FROM DUAL;
 
 # End of 4.1 tests
+
+--echo # Start of 5.1 tests
+
+--echo Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
+CREATE TABLE t1 (a varchar(16)) character set sjis;
+INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
+SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
+DROP TABLE t1;
+
+--echo # End of 5.1 tests
diff --git a/mysql-test/t/ddl_i18n_koi8r.test b/mysql-test/t/ddl_i18n_koi8r.test
index 2d94a899aad..fecef2f95d5 100644
--- a/mysql-test/t/ddl_i18n_koi8r.test
+++ b/mysql-test/t/ddl_i18n_koi8r.test
@@ -1128,15 +1128,22 @@ SHOW CREATE TABLE mysqltest2.t2|
 #
 # Cleanup.
 #
+delimiter ;|
 
+--connection con2
+--echo
+--echo ---> connection: con2
+--disconnect con2
+--source include/wait_until_disconnected.inc
+--connection con3
+--echo
+--echo ---> connection: con3
+--disconnect con3
+--source include/wait_until_disconnected.inc
 --connection default
 --echo
 --echo ---> connection: default
+USE test;
+DROP DATABASE mysqltest1;
+DROP DATABASE mysqltest2;
 
---disconnect con2
---disconnect con3
-
-use test|
-
-DROP DATABASE mysqltest1|
-DROP DATABASE mysqltest2|
diff --git a/mysql-test/t/ddl_i18n_utf8.test b/mysql-test/t/ddl_i18n_utf8.test
index 1d5415d9373..8788d0604f2 100644
--- a/mysql-test/t/ddl_i18n_utf8.test
+++ b/mysql-test/t/ddl_i18n_utf8.test
@@ -1128,15 +1128,22 @@ SHOW CREATE TABLE mysqltest2.t2|
 #
 # Cleanup.
 #
+delimiter ;|
 
+--connection con2
+--echo
+--echo ---> connection: con2
+--disconnect con2
+--source include/wait_until_disconnected.inc
+--connection con3
+--echo
+--echo ---> connection: con3
+--disconnect con3
+--source include/wait_until_disconnected.inc
 --connection default
 --echo
 --echo ---> connection: default
+USE test;
+DROP DATABASE mysqltest1;
+DROP DATABASE mysqltest2;
 
---disconnect con2
---disconnect con3
-
-use test|
-
-DROP DATABASE mysqltest1|
-DROP DATABASE mysqltest2|
diff --git a/mysql-test/t/derived.test b/mysql-test/t/derived.test
index 4e79fac584f..d28c19bbd18 100644
--- a/mysql-test/t/derived.test
+++ b/mysql-test/t/derived.test
@@ -273,8 +273,32 @@ select t2.* from ((select * from t1) as A inner join t2 on A.ID = t2.FID);
 select t2.* from (select * from t1) as A inner join t2 on A.ID = t2.FID;
 drop table t1, t2;
 
+connection con1;
 disconnect con1;
+--source include/wait_until_disconnected.inc
 connection default;
 drop user mysqltest_1;
 
-# End of 4.1 tests
+--echo # End of 4.1 tests
+
+#
+# Bug #41156: List of derived tables acts like a chain of
+#             mutually-nested subqueries
+#
+
+SELECT 0 FROM
+(SELECT 0) t01, (SELECT 0) t02, (SELECT 0) t03, (SELECT 0) t04, (SELECT 0) t05,
+(SELECT 0) t06, (SELECT 0) t07, (SELECT 0) t08, (SELECT 0) t09, (SELECT 0) t10,
+(SELECT 0) t11, (SELECT 0) t12, (SELECT 0) t13, (SELECT 0) t14, (SELECT 0) t15,
+(SELECT 0) t16, (SELECT 0) t17, (SELECT 0) t18, (SELECT 0) t19, (SELECT 0) t20,
+(SELECT 0) t21, (SELECT 0) t22, (SELECT 0) t23, (SELECT 0) t24, (SELECT 0) t25,
+(SELECT 0) t26, (SELECT 0) t27, (SELECT 0) t28, (SELECT 0) t29, (SELECT 0) t30,
+(SELECT 0) t31, (SELECT 0) t32, (SELECT 0) t33, (SELECT 0) t34, (SELECT 0) t35,
+(SELECT 0) t36, (SELECT 0) t37, (SELECT 0) t38, (SELECT 0) t39, (SELECT 0) t40,
+(SELECT 0) t41, (SELECT 0) t42, (SELECT 0) t43, (SELECT 0) t44, (SELECT 0) t45,
+(SELECT 0) t46, (SELECT 0) t47, (SELECT 0) t48, (SELECT 0) t49, (SELECT 0) t50,
+(SELECT 0) t51, (SELECT 0) t52, (SELECT 0) t53, (SELECT 0) t54, (SELECT 0) t55,
+(SELECT 0) t56, (SELECT 0) t57, (SELECT 0) t58, (SELECT 0) t59, (SELECT 0) t60,
+(SELECT 0) t61; # 61 == MAX_TABLES
+
+--echo # End of 5.0 tests
diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def
index 760c29bbae6..5436b7166f4 100644
--- a/mysql-test/t/disabled.def
+++ b/mysql-test/t/disabled.def
@@ -10,8 +10,7 @@
 #
 ##############################################################################
 kill                     : Bug#37780 2008-12-03 HHunger need some changes to be robust enough for pushbuild.
-innodb_bug39438          : BUG#42383 2009-01-28 lsoares "This fails in embedded and on windows.  Note that this test is not run on windows and on embedded in PB for main trees currently"
+innodb_bug39438          : Bug#42383 2009-01-28 lsoares "This fails in embedded and on windows.  Note that this test is not run on windows and on embedded in PB for main trees currently"
 query_cache_28249        : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically
+init_connect             : Bug#44920 2009-07-06 pcrews MTR not processing master.opt input properly on Windows.  *Must be done this way due to the nature of the bug*
 
-#concurrent_innodb_safelog: disabled for embedded server due to bug#43733 Select on processlist let the embedded server crash (concurrent_innodb_safelog).
-#concurrent_innodb_unsafelog: disabled for embedded server due to bug#43733.
diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test
index 91345886e93..bb4dd3e11f9 100644
--- a/mysql-test/t/drop.test
+++ b/mysql-test/t/drop.test
@@ -117,8 +117,11 @@ connection addconroot1;
 --reap
 connection addconroot2;
 --reap
-disconnect addconroot1;
 disconnect addconroot2;
+--source include/wait_until_disconnected.inc
+connection addconroot1;
+disconnect addconroot1;
+--source include/wait_until_disconnected.inc
 connection default;
 
 --echo End of 5.0 tests
diff --git a/mysql-test/t/events_grant.test b/mysql-test/t/events_grant.test
index cff2475c5aa..8db4333cc03 100644
--- a/mysql-test/t/events_grant.test
+++ b/mysql-test/t/events_grant.test
@@ -97,7 +97,9 @@ DROP EVENT one_event;
 connection default;
 --echo "One event should not be there"
 SELECT EVENT_CATALOG, EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_BODY, EVENT_DEFINITION, EVENT_TYPE, EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, STATUS,ON_COMPLETION, EVENT_COMMENT FROM INFORMATION_SCHEMA.EVENTS ORDER BY EVENT_SCHEMA, EVENT_NAME;
+connection ev_con1;
 disconnect ev_con1;
+--source include/wait_until_disconnected.inc
 connection default;
 DROP USER ev_test@localhost;
 DROP DATABASE events_test2;
@@ -106,9 +108,6 @@ DROP DATABASE events_test2;
 # End of tests
 #
 
-let $wait_condition=
-  select count(*) = 0 from information_schema.processlist
-  where db='events_test' and command = 'Connect' and user=current_user();
---source include/wait_condition.inc
+--source include/check_events_off.inc
 
 DROP DATABASE events_test;
diff --git a/mysql-test/t/events_stress.test b/mysql-test/t/events_stress.test
index 22959898b43..e51fa734256 100644
--- a/mysql-test/t/events_stress.test
+++ b/mysql-test/t/events_stress.test
@@ -109,7 +109,7 @@ connection conn3;
 --send
 DROP DATABASE events_conn3_db;
 connection default;
---send
+# --send
 DROP DATABASE events_conn1_test2;
 DROP DATABASE events_conn1_test3;
 SET GLOBAL event_scheduler=off;
@@ -135,3 +135,7 @@ DROP USER event_user3@localhost;
 #
 
 DROP DATABASE events_test;
+
+# Cleanup
+SET GLOBAL event_scheduler=off;
+--source include/check_events_off.inc
diff --git a/mysql-test/t/events_trans_notembedded.test b/mysql-test/t/events_trans_notembedded.test
index 3c151dd18b1..0353d183386 100644
--- a/mysql-test/t/events_trans_notembedded.test
+++ b/mysql-test/t/events_trans_notembedded.test
@@ -50,6 +50,7 @@ delete from t1;
 commit work;
 # Cleanup
 disconnect conn1;
+--source include/wait_until_disconnected.inc
 connection default;
 drop user mysqltest_user1@localhost;
 drop database mysqltest_db2;
diff --git a/mysql-test/t/fix_priv_tables.test b/mysql-test/t/fix_priv_tables.test
index c7cd500f8d2..eeda9bc8d15 100644
--- a/mysql-test/t/fix_priv_tables.test
+++ b/mysql-test/t/fix_priv_tables.test
@@ -51,8 +51,13 @@ echo;
 -- disable_query_log
 
 # Run the mysql_fix_privilege_tables.sql using "mysql --force"
+# Determine the number of open sessions
+--source include/count_sessions.inc
 --exec $MYSQL --force mysql < $MYSQL_FIX_PRIVILEGE_TABLES > $MYSQLTEST_VARDIR/tmp/fix_priv_tables.log 2>&1
 --remove_file $MYSQLTEST_VARDIR/tmp/fix_priv_tables.log
+# Wait till the number of open sessions is <= the number before the run with $MYSQL
+# = The session caused by mysql has finished its disconnect
+--source include/wait_until_count_sessions.inc
 
 -- enable_query_log
 -- enable_result_log
diff --git a/mysql-test/t/flush.test b/mysql-test/t/flush.test
index c832cb79158..f27d4cf2fad 100644
--- a/mysql-test/t/flush.test
+++ b/mysql-test/t/flush.test
@@ -171,6 +171,7 @@ set session low_priority_updates=default;
 connect (con1,localhost,root,,);
 send select benchmark(200, (select sin(1))) > 1000;
 disconnect con1;
+--source include/wait_until_disconnected.inc
 connection default;
 
 --echo End of 5.0 tests
diff --git a/mysql-test/t/func_compress.test b/mysql-test/t/func_compress.test
index 0a3a3823fee..d63130f190d 100644
--- a/mysql-test/t/func_compress.test
+++ b/mysql-test/t/func_compress.test
@@ -50,6 +50,7 @@ set @@global.max_allowed_packet=1048576*100;
 --connect (newconn, localhost, root,,)
 eval select compress(repeat('aaaaaaaaaa', IF('$LOW_MEMORY', 10, 10000000))) is null;
 disconnect newconn;
+--source include/wait_until_disconnected.inc
 connection default;
 set @@global.max_allowed_packet=default;
 
@@ -88,4 +89,24 @@ select *, uncompress(a) from t1;
 select *, uncompress(a), uncompress(a) is null from t1;
 drop table t1;
 
+#
+# Bug #44796: valgrind: too many my_longlong10_to_str_8bit warnings after 
+#             uncompressed_length
+#
+
+CREATE TABLE t1 (c1 INT);
+INSERT INTO t1 VALUES (1), (1111), (11111);
+
+# Disable warnings to avoid dependency on max_allowed_packet value
+--disable_warnings
+SELECT UNCOMPRESS(c1), UNCOMPRESSED_LENGTH(c1) FROM t1;
+--enable_warnings
+
+# We do not need the results, just make sure there are no valgrind errors
+--disable_result_log
+EXPLAIN EXTENDED SELECT * FROM (SELECT UNCOMPRESSED_LENGTH(c1) FROM t1) AS s;
+--enable_result_log
+
+DROP TABLE t1;
+
 --echo End of 5.0 tests
diff --git a/mysql-test/t/func_concat.test b/mysql-test/t/func_concat.test
index f2aa0d004e5..1c7e5823fb2 100644
--- a/mysql-test/t/func_concat.test
+++ b/mysql-test/t/func_concat.test
@@ -78,3 +78,37 @@ SELECT * FROM t1 WHERE CONCAT(c1,' ',c2) REGEXP 'First.*';
 DROP TABLE t1;
 
 --echo # End of 5.0 tests
+
+
+--echo #
+--echo # Bug #44743: Join in combination with concat does not always work
+--echo #
+CREATE TABLE t1 (
+  a VARCHAR(100) NOT NULL DEFAULT '0',
+  b VARCHAR(2) NOT NULL DEFAULT '',
+  c VARCHAR(2) NOT NULL DEFAULT '',
+  d TEXT NOT NULL,
+  PRIMARY KEY (a, b, c),
+  KEY (a)
+) DEFAULT CHARSET=utf8;
+
+INSERT INTO t1 VALUES ('gui_A', 'a', 'b', 'str1'),
+  ('gui_AB', 'a', 'b', 'str2'), ('gui_ABC', 'a', 'b', 'str3');
+
+CREATE TABLE t2 (
+  a VARCHAR(100) NOT NULL DEFAULT '',
+  PRIMARY KEY (a)
+) DEFAULT CHARSET=latin1;
+
+INSERT INTO t2 VALUES ('A'), ('AB'), ('ABC');
+
+SELECT CONCAT('gui_', t2.a), t1.d FROM t2 
+  LEFT JOIN t1 ON t1.a = CONCAT('gui_', t2.a) AND t1.b = 'a' AND t1.c = 'b';
+
+EXPLAIN SELECT CONCAT('gui_', t2.a), t1.d FROM t2 
+  LEFT JOIN t1 ON t1.a = CONCAT('gui_', t2.a) AND t1.b = 'a' AND t1.c = 'b';
+
+DROP TABLE t1, t2;
+
+
+--echo # End of 5.1 tests
diff --git a/mysql-test/t/func_crypt.test b/mysql-test/t/func_crypt.test
index cc3cdb9564d..6dedeaa0fef 100644
--- a/mysql-test/t/func_crypt.test
+++ b/mysql-test/t/func_crypt.test
@@ -56,3 +56,15 @@ explain extended select password('idkfa '), old_password('idkfa');
 select encrypt('1234','_.');
 
 # End of 4.1 tests
+
+--echo #
+--echo # Bug #44767: invalid memory reads in password() and old_password() 
+--echo #             functions
+--echo #
+
+CREATE TABLE t1(c1 MEDIUMBLOB);
+INSERT INTO t1 VALUES (REPEAT('a', 1024));
+SELECT OLD_PASSWORD(c1), PASSWORD(c1) FROM t1;
+DROP TABLE t1;
+
+--echo End of 5.0 tests
diff --git a/mysql-test/t/func_in.test b/mysql-test/t/func_in.test
index 3fc1697f146..adc074259ad 100644
--- a/mysql-test/t/func_in.test
+++ b/mysql-test/t/func_in.test
@@ -439,4 +439,21 @@ SELECT CASE c1 WHEN c1 + 1 THEN 1 END, ABS(AVG(c0)) FROM t1;
 
 DROP TABLE t1;
 
+#
+# Bug #44399: crash with statement using TEXT columns, aggregates, GROUP BY, 
+# and HAVING
+#
+
+CREATE TABLE t1(a TEXT, b INT, c INT UNSIGNED, d DECIMAL(12,2), e REAL);
+INSERT INTO t1 VALUES('iynfj', 1, 1, 1, 1);
+INSERT INTO t1 VALUES('innfj', 2, 2, 2, 2);
+SELECT SUM( DISTINCT a ) FROM t1 GROUP BY a HAVING a IN ( AVG( 1 ), 1 + a);
+SELECT SUM( DISTINCT b ) FROM t1 GROUP BY b HAVING b IN ( AVG( 1 ), 1 + b);
+SELECT SUM( DISTINCT c ) FROM t1 GROUP BY c HAVING c IN ( AVG( 1 ), 1 + c);
+SELECT SUM( DISTINCT d ) FROM t1 GROUP BY d HAVING d IN ( AVG( 1 ), 1 + d);
+SELECT SUM( DISTINCT e ) FROM t1 GROUP BY e HAVING e IN ( AVG( 1 ), 1 + e);
+SELECT SUM( DISTINCT e ) FROM t1 GROUP BY b,c,d HAVING (b,c,d) IN 
+  ((AVG( 1 ), 1 + c, 1 + d), (AVG( 1 ), 2 + c, 2 + d));
+DROP TABLE t1;
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/func_math.test b/mysql-test/t/func_math.test
index e67f5f29e3a..91fdce8addb 100644
--- a/mysql-test/t/func_math.test
+++ b/mysql-test/t/func_math.test
@@ -269,6 +269,15 @@ SELECT a, ROUND(a) FROM t1;
 
 DROP TABLE t1;
 
+#
+# Bug#45152 crash with round() function on longtext column in a derived table
+#
+CREATE TABLE t1(f1 LONGTEXT) engine=myisam;
+INSERT INTO t1 VALUES ('a');
+SELECT 1 FROM (SELECT ROUND(f1) AS a FROM t1) AS s WHERE a LIKE 'a';
+SELECT 1 FROM (SELECT ROUND(f1, f1) AS a FROM t1) AS s WHERE a LIKE 'a';
+DROP TABLE t1;
+
 --echo End of 5.0 tests
 
 #
@@ -282,4 +291,22 @@ SELECT 1e300 / 1e-300;
 SELECT EXP(750);
 SELECT POW(10, 309);
 
+--echo #
+--echo # Bug #44768: SIGFPE crash when selecting rand from a view
+--echo #             containing null
+--echo #
+
+CREATE OR REPLACE VIEW v1 AS SELECT NULL AS a;
+SELECT RAND(a) FROM v1;
+DROP VIEW v1;
+
+SELECT RAND(a) FROM (SELECT NULL AS a) b;
+
+CREATE TABLE t1 (i INT);
+INSERT INTO t1 VALUES (NULL);
+SELECT RAND(i) FROM t1;
+DROP TABLE t1;
+
+--echo #
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/func_set.test b/mysql-test/t/func_set.test
index 5f37cd2a13e..294efa8caf1 100644
--- a/mysql-test/t/func_set.test
+++ b/mysql-test/t/func_set.test
@@ -84,3 +84,16 @@ engine=myisam default charset=latin1;
 insert into t1 values (''),(null),(null),(''),(''),('');
 select find_in_set(f1,f1) as a from t1,(select find_in_set(f1,f1) as b from t1) a;
 drop table t1;
+#
+# Bug#45168: assertion with convert() and empty set value
+#
+CREATE TABLE t1( a SET('a', 'b', 'c') );
+CREATE TABLE t2( a SET('a', 'b', 'c') );
+
+INSERT INTO t1 VALUES ('d');
+INSERT INTO t2 VALUES ('');
+
+SELECT CONVERT( a USING latin1 ) FROM t1;
+SELECT CONVERT( a USING latin1 ) FROM t2;
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/t/func_str.test b/mysql-test/t/func_str.test
index ef406d2aeca..7cb7f7f72d2 100644
--- a/mysql-test/t/func_str.test
+++ b/mysql-test/t/func_str.test
@@ -1282,6 +1282,16 @@ INSERT INTO t1 VALUES ('2008-12-31','aaaaaa');
 SELECT DATE_FORMAT(c, GET_FORMAT(DATE, 'eur')) h, CONCAT(UPPER(aa),', ', aa) i FROM t1;
 DROP TABLE t1;
 
+
+--echo #
+--echo # BUG#44774: load_file function produces valgrind warnings
+--echo #
+CREATE TABLE t1 (a TINYBLOB);
+INSERT INTO t1 VALUES ('aaaaaaaa');
+SELECT LOAD_FILE(a) FROM t1;
+DROP TABLE t1;
+
+
 --echo End of 5.0 tests
 
 #
diff --git a/mysql-test/t/gis.test b/mysql-test/t/gis.test
index 0dae4509518..4a60e777cc7 100644
--- a/mysql-test/t/gis.test
+++ b/mysql-test/t/gis.test
@@ -667,4 +667,28 @@ desc v1;
 drop view v1;
 drop table t1;
 
+#
+# Bug#44684: valgrind reports invalid reads in 
+# Item_func_spatial_collection::val_str
+#
+SELECT MultiPoint(12345,'');
+SELECT MultiPoint(123451,'');
+SELECT MultiPoint(1234512,'');
+SELECT MultiPoint(12345123,'');
+
+SELECT MultiLineString(12345,'');
+SELECT MultiLineString(123451,'');
+SELECT MultiLineString(1234512,'');
+SELECT MultiLineString(12345123,'');
+
+SELECT LineString(12345,'');
+SELECT LineString(123451,'');
+SELECT LineString(1234512,'');
+SELECT LineString(12345123,'');
+
+SELECT Polygon(12345,'');
+SELECT Polygon(123451,'');
+SELECT Polygon(1234512,'');
+SELECT Polygon(12345123,'');
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/grant.test b/mysql-test/t/grant.test
index 2e42bdbf06c..bcd393bd6ab 100644
--- a/mysql-test/t/grant.test
+++ b/mysql-test/t/grant.test
@@ -1471,5 +1471,59 @@ DROP DATABASE dbbug33464;
 
 SET @@global.log_bin_trust_function_creators= @old_log_bin_trust_function_creators;
 
+#
+# Bug#44658 Create procedure makes server crash when user does not have ALL privilege
+#
+CREATE USER user1;
+CREATE USER user2;
+GRANT CREATE ON db1.* TO 'user1'@'localhost';
+GRANT CREATE ROUTINE ON db1.* TO 'user1'@'localhost';
+GRANT CREATE ON db1.* TO 'user2'@'%';
+GRANT CREATE ROUTINE ON db1.* TO 'user2'@'%';
+FLUSH PRIVILEGES;
+SHOW GRANTS FOR 'user1'@'localhost';
+connect (con1,localhost,user1,,);
+--echo ** Connect as user1 and create a procedure.
+--echo ** The creation will imply implicitly assigned
+--echo ** EXECUTE and ALTER ROUTINE privileges to
+--echo ** the current user user1@localhost. 
+SELECT @@GLOBAL.sql_mode;
+SELECT @@SESSION.sql_mode;
+CREATE DATABASE db1;
+DELIMITER ||;
+CREATE PROCEDURE db1.proc1(p1 INT)
+ BEGIN
+ SET @x = 0;
+ REPEAT SET @x = @x + 1; UNTIL @x > p1 END REPEAT;
+ END ;||
+DELIMITER ;||
+
+connect (con2,localhost,user2,,);
+--echo ** Connect as user2 and create a procedure.
+--echo ** Implicitly assignment of privileges will
+--echo ** fail because the user2@localhost is an
+--echo ** unknown user.
+DELIMITER ||;
+CREATE PROCEDURE db1.proc2(p1 INT)
+ BEGIN
+ SET @x = 0;
+ REPEAT SET @x = @x + 1; UNTIL @x > p1 END REPEAT;
+ END ;||
+DELIMITER ;||
+
+connection default;
+SHOW GRANTS FOR 'user1'@'localhost';
+SHOW GRANTS FOR 'user2';
+disconnect con1;
+disconnect con2;
+DROP PROCEDURE db1.proc1;
+DROP PROCEDURE db1.proc2;
+REVOKE ALL ON db1.* FROM 'user1'@'localhost';
+REVOKE ALL ON db1.* FROM 'user2'@'%';
+DROP USER 'user1';
+DROP USER 'user1'@'localhost';
+DROP USER 'user2';
+DROP DATABASE db1;
+
 # Wait till we reached the initial number of concurrent sessions
 --source include/wait_until_count_sessions.inc
diff --git a/mysql-test/t/group_min_max.test b/mysql-test/t/group_min_max.test
index adfa77c881c..981be3efece 100644
--- a/mysql-test/t/group_min_max.test
+++ b/mysql-test/t/group_min_max.test
@@ -982,4 +982,39 @@ SELECT DISTINCT c FROM t1 WHERE d=4;
 
 DROP TABLE t1;
 
+--echo #
+--echo # Bug #45386: Wrong query result with MIN function in field list, 
+--echo #  WHERE and GROUP BY clause
+--echo #
+
+CREATE TABLE t (a INT, b INT, INDEX (a,b));
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+
+--echo # test MIN
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+
+--echo # test MAX
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+
+--echo # test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+
+DROP TABLE t;
+
+
 --echo End of 5.0 tests
diff --git a/mysql-test/t/heap_btree.test b/mysql-test/t/heap_btree.test
index b51eeb27331..637c6ba1c81 100644
--- a/mysql-test/t/heap_btree.test
+++ b/mysql-test/t/heap_btree.test
@@ -253,5 +253,13 @@ insert into t1 values (1, 1), (3, 3), (2, 2), (NULL, 1), (NULL, NULL), (0, 0);
 select * from t1 where a is null;
 drop table t1;
 
+-- echo #
+-- echo # bug#39918 - memory (heap) engine crashing while executing self join with delete
+-- echo #
+
+CREATE TABLE t1(a INT, KEY USING BTREE (a)) ENGINE=MEMORY;
+INSERT INTO t1 VALUES(1),(1);
+DELETE a1 FROM t1 AS a1, t1 AS a2 WHERE a1.a=a2.a;
+DROP TABLE t1;
 --echo End of 5.0 tests
 
diff --git a/mysql-test/t/information_schema_db.test b/mysql-test/t/information_schema_db.test
index 6353e94fd51..0ff1d05f364 100644
--- a/mysql-test/t/information_schema_db.test
+++ b/mysql-test/t/information_schema_db.test
@@ -53,7 +53,7 @@ order by table_name;
 end|
 delimiter ;|
 
-create table t1 
+create table t1
 (f1 int(10) unsigned not null,
  f2 varchar(100) not null,
  primary key (f1), unique key (f2));
@@ -105,8 +105,8 @@ drop function f2;
 drop view v1, v2;
 
 #
-# Bug#20543: select on information_schema strange warnings, view, different
-#             schemas/users
+# Bug#20543 select on information_schema strange warnings, view, different
+#           schemas/users
 #
 #
 create database testdb_1;
@@ -125,7 +125,7 @@ grant insert on v1 to testdb_2@localhost;
 create view v5 as select f1 from t1;
 grant show view on v5 to testdb_2@localhost;
 
---error 1227
+--error ER_SPECIFIC_ACCESS_DENIED_ERROR
 create definer=`no_such_user`@`no_such_host` view v6 as select f1 from t1;
 
 connection default;
@@ -169,46 +169,53 @@ use testdb_1;
 revoke show view on v6 from testdb_2@localhost;
 connection testdb_2;
 
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show fields from testdb_1.v5;
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show create view testdb_1.v5;
 
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show fields from testdb_1.v6;
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show create view testdb_1.v6;
 
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show fields from testdb_1.v7;
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show create view testdb_1.v7;
 
---error 1345
+--error ER_VIEW_NO_EXPLAIN
 show create view v4;
-#--error 1345
+#--error ER_VIEW_NO_EXPLAIN
 show fields from v4;
 
 show fields from v2;
 show fields from testdb_1.v1;
 show create view v2;
---error 1142
+--error ER_TABLEACCESS_DENIED_ERROR
 show create view testdb_1.v1;
 
-select table_name from information_schema.columns a 
+select table_name from information_schema.columns a
 where a.table_name = 'v2';
-select view_definition from information_schema.views a 
+select view_definition from information_schema.views a
 where a.table_name = 'v2';
-select view_definition from information_schema.views a 
+select view_definition from information_schema.views a
 where a.table_name = 'testdb_1.v1';
 
---error 1356
+--error ER_VIEW_INVALID
 select * from v2;
 
 connection default;
 use test;
 drop view testdb_1.v1, v2, testdb_1.v3, v4;
 drop database testdb_1;
+connection testdb_1;
+disconnect testdb_1;
+--source include/wait_until_disconnected.inc
+connection testdb_2;
+disconnect testdb_2;
+--source include/wait_until_disconnected.inc
+connection default;
 drop user testdb_1@localhost;
 drop user testdb_2@localhost;
 
@@ -239,4 +246,7 @@ show create view testdb_1.v1;
 connection default;
 drop user mysqltest_1@localhost;
 drop database testdb_1;
+connection user1;
 disconnect user1;
+--source include/wait_until_disconnected.inc
+connection default;
diff --git a/mysql-test/t/init_file.test b/mysql-test/t/init_file.test
index ceb5cae9743..7eb5381651d 100644
--- a/mysql-test/t/init_file.test
+++ b/mysql-test/t/init_file.test
@@ -14,7 +14,7 @@ SELECT * INTO @X FROM init_file.startup limit 0,1;
 SELECT * INTO @Y FROM init_file.startup limit 1,1;
 SELECT YEAR(@X)-YEAR(@Y);
 # Enable this DROP DATABASE only after resolving bug #42507
-# DROP DATABASE init_file;
+DROP DATABASE init_file;
 
 --echo ok
 --echo end of 4.1 tests
@@ -28,4 +28,9 @@ select * from t1;
 #   30, 3, 11, 13
 select * from t2;
 # Enable this DROP TABLE only after resolving bug #42507
-#drop table t1, t2;
+drop table t1, t2;
+
+# MTR will restart server anyway, but by forcing it we avoid being warned
+# about the apparent side effect
+
+call mtr.force_restart();
diff --git a/mysql-test/t/innodb_bug21704.test b/mysql-test/t/innodb_bug21704.test
new file mode 100644
index 00000000000..c649b61034c
--- /dev/null
+++ b/mysql-test/t/innodb_bug21704.test
@@ -0,0 +1,96 @@
+-- source include/have_innodb.inc
+
+--echo #
+--echo # Bug#21704: Renaming column does not update FK definition.
+--echo #
+
+--echo
+--echo # Test that it's not possible to rename columns participating in a
+--echo # foreign key (either in the referencing or referenced table).
+--echo
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT,
+                 CONSTRAINT fk1 FOREIGN KEY (a) REFERENCES t1(a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY(b), C INT,
+  CONSTRAINT fk2 FOREIGN KEY (b) REFERENCES t3 (a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+INSERT INTO t1 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t3 VALUES (1,1,1),(2,2,2),(3,3,3);
+
+--echo
+--echo # Test renaming the column in the referenced table.
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t1 CHANGE a c INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t1 CHANGE b c INT;
+--disable_info
+
+--echo
+--echo # Test renaming the column in the referencing table
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t2 CHANGE a c INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t2 CHANGE b c INT;
+--disable_info
+
+--echo
+--echo # Test with self-referential constraints
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t3 CHANGE a d INT;
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t3 CHANGE b d INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t3 CHANGE c d INT;
+--disable_info
+
+--echo
+--echo # Cleanup.
+--echo
+
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
diff --git a/mysql-test/t/innodb_bug40565.test b/mysql-test/t/innodb_bug40565.test
new file mode 100644
index 00000000000..d7aa0fd514a
--- /dev/null
+++ b/mysql-test/t/innodb_bug40565.test
@@ -0,0 +1,10 @@
+# Bug #40565 Update Query Results in "1 Row Affected" But Should Be "Zero Rows"
+-- source include/have_innodb.inc
+
+create table bug40565(value decimal(4,2)) engine=innodb;
+insert into bug40565 values (1), (null);
+--enable_info
+update bug40565 set value=NULL;
+update bug40565 set value=NULL;
+--disable_info
+drop table bug40565;
diff --git a/mysql-test/t/innodb_bug42101-nonzero-master.opt b/mysql-test/t/innodb_bug42101-nonzero-master.opt
new file mode 100644
index 00000000000..d71dbe17d5b
--- /dev/null
+++ b/mysql-test/t/innodb_bug42101-nonzero-master.opt
@@ -0,0 +1 @@
+--innodb_commit_concurrency=1
diff --git a/mysql-test/t/innodb_bug42101-nonzero.test b/mysql-test/t/innodb_bug42101-nonzero.test
new file mode 100644
index 00000000000..685fdf20489
--- /dev/null
+++ b/mysql-test/t/innodb_bug42101-nonzero.test
@@ -0,0 +1,21 @@
+#
+# Bug#42101  	Race condition in innodb_commit_concurrency
+# http://bugs.mysql.com/42101
+#
+
+-- source include/have_innodb.inc
+
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
diff --git a/mysql-test/t/innodb_bug42101.test b/mysql-test/t/innodb_bug42101.test
new file mode 100644
index 00000000000..b6536490d48
--- /dev/null
+++ b/mysql-test/t/innodb_bug42101.test
@@ -0,0 +1,19 @@
+#
+# Bug#42101  	Race condition in innodb_commit_concurrency
+# http://bugs.mysql.com/42101
+#
+
+-- source include/have_innodb.inc
+
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
diff --git a/mysql-test/t/innodb_bug45357.test b/mysql-test/t/innodb_bug45357.test
new file mode 100644
index 00000000000..81727f352dd
--- /dev/null
+++ b/mysql-test/t/innodb_bug45357.test
@@ -0,0 +1,10 @@
+-- source include/have_innodb.inc
+
+set session transaction isolation level read committed;
+
+create table bug45357(a int, b int,key(b))engine=innodb;
+insert into bug45357 values (25170,6122);
+update bug45357 set a=1 where b=30131;
+delete from bug45357 where b < 20996;
+delete from bug45357 where b < 7001;
+drop table bug45357;
diff --git a/mysql-test/t/innodb_mysql.test b/mysql-test/t/innodb_mysql.test
index ad9e726f5b4..c643465b2f3 100644
--- a/mysql-test/t/innodb_mysql.test
+++ b/mysql-test/t/innodb_mysql.test
@@ -332,4 +332,133 @@ DROP TABLE t1;
 DROP TABLE t2;
 DROP TABLE t3;
 
+#
+# Bug#43580: Issue with Innodb on multi-table update
+#
+CREATE TABLE t1 (a INT, b INT, KEY (a)) ENGINE = INNODB;
+CREATE TABLE t2 (a INT KEY, b INT, KEY (b)) ENGINE = INNODB;
+
+CREATE TABLE t3 (a INT, b INT KEY, KEY (a)) ENGINE = INNODB;
+CREATE TABLE t4 (a INT KEY, b INT, KEY (b)) ENGINE = INNODB;
+
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+
+INSERT INTO t3 VALUES (1, 101), (2, 102), (3, 103), (4, 104), (5, 105), (6, 106);
+INSERT INTO t4 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+
+UPDATE t1, t2 SET t1.a = t1.a + 100, t2.b = t1.a + 10 
+WHERE t1.a BETWEEN 2 AND 4 AND t2.a = t1.b;
+--sorted_result
+SELECT * FROM t2;
+
+UPDATE t3, t4 SET t3.a = t3.a + 100, t4.b = t3.a + 10 
+WHERE t3.a BETWEEN 2 AND 4 AND t4.a = t3.b - 100;
+--sorted_result
+SELECT * FROM t4;
+
+DROP TABLE t1, t2, t3, t4;
+
+--echo #
+--echo # Bug#44886: SIGSEGV in test_if_skip_sort_order() -
+--echo #            uninitialized variable used as subscript 
+--echo #
+
+CREATE TABLE t1 (a INT, b INT, c INT, d INT, PRIMARY KEY (b), KEY (a,c))
+  ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,1,1,0);
+
+CREATE TABLE t2 (a INT, b INT, e INT, KEY (e)) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,1,2);
+
+CREATE TABLE t3 (a INT, b INT) ENGINE=MyISAM;
+INSERT INTO t3 VALUES (1, 1);
+
+SELECT * FROM t1, t2, t3
+  WHERE t1.a = t3.a AND (t1.b = t3.b OR t1.d) AND t2.b = t1.b AND t2.e = 2
+  GROUP BY t1.b;
+
+DROP TABLE t1, t2, t3;
+
+--echo #
+--echo # Bug #45828: Optimizer won't use partial primary key if another 
+--echo # index can prevent filesort
+--echo #
+
+# Create the table
+CREATE TABLE `t1` (
+  c1 int NOT NULL,
+  c2 int NOT NULL,
+  c3 int NOT NULL,
+  PRIMARY KEY (c1,c2),
+  KEY  (c3)
+) ENGINE=InnoDB;
+
+# populate with data
+INSERT INTO t1 VALUES (5,2,1246276747);
+INSERT INTO t1 VALUES (2,1,1246281721);
+INSERT INTO t1 VALUES (7,3,1246281756);
+INSERT INTO t1 VALUES (4,2,1246282139);
+INSERT INTO t1 VALUES (3,1,1246282230);
+INSERT INTO t1 VALUES (1,0,1246282712);
+INSERT INTO t1 VALUES (8,3,1246282765);
+INSERT INTO t1 SELECT c1+10,c2+10,c3+10 FROM t1;
+INSERT INTO t1 SELECT c1+100,c2+100,c3+100 from t1;
+INSERT INTO t1 SELECT c1+1000,c2+1000,c3+1000 from t1;
+INSERT INTO t1 SELECT c1+10000,c2+10000,c3+10000 from t1;
+INSERT INTO t1 SELECT c1+100000,c2+100000,c3+100000 from t1;
+INSERT INTO t1 SELECT c1+1000000,c2+1000000,c3+1000000 from t1;
+
+# query and no rows will match the c1 condition, whereas all will match c3
+SELECT * FROM t1 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+
+# SHOULD use the pk.
+# index on c3 will be used instead of primary key
+EXPLAIN SELECT * FROM t1 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+
+# if we force the primary key, we can see the estimate is 1 
+EXPLAIN SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+
+
+CREATE TABLE t2 (
+  c1 int NOT NULL,
+  c2 int NOT NULL,
+  c3 int NOT NULL,
+  KEY (c1,c2),
+  KEY (c3)
+) ENGINE=InnoDB;
+
+# SHOULD use the pk.
+# if we switch it from a primary key to a regular index, it works correctly as well
+explain SELECT * FROM t2 WHERE c1 = 99999999 AND c3 > 1 ORDER BY c3;
+
+DROP TABLE t1,t2;
+
+
+--echo #
+--echo # 36259: Optimizing with ORDER BY
+--echo #
+
+CREATE TABLE t1 (
+  a INT NOT NULL AUTO_INCREMENT,
+  b INT NOT NULL,
+  c INT NOT NULL,
+  d VARCHAR(5),
+  e INT NOT NULL,
+  PRIMARY KEY (a), KEY i2 (b,c,d)
+) ENGINE=InnoDB;
+
+INSERT INTO t1 (b,c,d,e) VALUES (1,1,'a',1), (2,2,'b',2);
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+INSERT INTO t1 (b,c,d,e) SELECT RAND()*10000, RAND()*10000, d, e FROM t1;
+EXPLAIN SELECT * FROM t1 WHERE b=1 AND c=1 ORDER BY a;
+EXPLAIN SELECT * FROM t1 FORCE INDEX(i2) WHERE b=1 and c=1 ORDER BY a;
+EXPLAIN SELECT * FROM t1 FORCE INDEX(PRIMARY) WHERE b=1 AND c=1 ORDER BY a;
+
+DROP TABLE t1;
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/insert_select.test b/mysql-test/t/insert_select.test
index 499db086877..f8023fcfc60 100644
--- a/mysql-test/t/insert_select.test
+++ b/mysql-test/t/insert_select.test
@@ -323,6 +323,16 @@ INSERT INTO t2 (f1, f2)
 SELECT * FROM t2;
 DROP TABLE t1, t2;
 
+#
+# Bug#44306: Assertion fail on duplicate key error in 'INSERT ... SELECT' 
+# statements
+#
+CREATE TABLE t1 ( a INT KEY, b INT );
+INSERT INTO t1 VALUES ( 0, 1 );
+--error ER_DUP_ENTRY
+INSERT INTO t1 ( b ) SELECT MAX( b ) FROM t1 WHERE b = 2;
+DROP TABLE t1;
+
 #
 # Bug #26207: inserts don't work with shortened index
 #
diff --git a/mysql-test/t/log_tables_debug.test b/mysql-test/t/log_tables_debug.test
new file mode 100644
index 00000000000..19a62614608
--- /dev/null
+++ b/mysql-test/t/log_tables_debug.test
@@ -0,0 +1,94 @@
+### t/log_tables_debug.test
+#
+# Log-related tests requiring a debug-build server.
+#
+
+# extra clean-up required due to Bug#38124, set to 1 when behavior has
+# changed (see explanation in log_state.test)
+let $fixed_bug38124 = 0;
+
+--source include/have_debug.inc
+
+# Several subtests modify global variables. Save the initial values only here,
+# but reset to the initial values per subtest.
+SET @old_general_log= @@global.general_log;
+SET @old_general_log_file= @@global.general_log_file;
+SET @old_slow_query_log= @@global.slow_query_log;
+SET @old_slow_query_log_file= @@global.slow_query_log_file;
+
+
+--echo #
+--echo # Bug#45387 Information about statement id for prepared
+--echo #           statements missed from general log
+--echo #
+
+let MYSQLD_DATADIR= `SELECT @@datadir`;
+
+# set logging to our specific bug log to control the entries added
+SET @@global.general_log = ON;
+SET @@global.general_log_file = 'bug45387_general.log';
+
+# turn on output of timestamps on all log file entries
+SET SESSION debug='+d,reset_log_last_time';
+
+let CONN_ID= `SELECT CONNECTION_ID()`;
+FLUSH LOGS;
+
+# reset log settings
+SET @@global.general_log = @old_general_log;
+SET @@global.general_log_file = @old_general_log_file;
+SET SESSION debug='-d';
+
+perl;
+  # get the relevant info from the surrounding perl invocation
+  $datadir= $ENV{'MYSQLD_DATADIR'};
+  $conn_id= $ENV{'CONN_ID'};
+
+  # loop through the log file looking for the stmt querying for conn id
+  open(FILE, "$datadir/bug45387_general.log") or
+    die("Unable to read log file $datadir/bug45387_general.log: $!\n");
+  while(<FILE>) {
+    if (/\d{6}\s+\d+:\d+:\d+[ \t]+(\d+)[ \t]+Query[ \t]+SELECT CONNECTION_ID/) {
+      $found= $1;
+      break;
+    }
+  }
+
+  # print the result
+  if ($found == $conn_id) {
+    print "Bug#45387: ID match.\n";
+  } else {
+    print "Bug#45387: Expected ID '$conn_id', found '$found' in log file.\n";
+    print "Contents of log file:\n";
+    seek(FILE, 0, 0);
+    while($line= <FILE>) {
+      print $line;
+    }
+  }
+
+  close(FILE);
+EOF
+
+--remove_file $MYSQLD_DATADIR/bug45387_general.log
+
+--echo End of 5.1 tests
+
+
+--echo #
+--echo # Cleanup
+--echo #
+
+# Reset global system variables to initial values if forgotten somewhere above.
+SET global general_log = @old_general_log;
+SET global general_log_file = @old_general_log_file;
+SET global slow_query_log = @old_slow_query_log;
+SET global slow_query_log_file = @old_slow_query_log_file;
+if(!$fixed_bug38124)
+{
+   --disable_query_log
+   let $my_var = `SELECT @old_general_log_file`;
+   eval SET @@global.general_log_file = '$my_var';
+   let $my_var = `SELECT @old_slow_query_log_file`;
+   eval SET @@global.slow_query_log_file = '$my_var';
+   --enable_query_log
+}
diff --git a/mysql-test/t/lowercase_fs_off.test b/mysql-test/t/lowercase_fs_off.test
index 414027cb485..878564c32ab 100644
--- a/mysql-test/t/lowercase_fs_off.test
+++ b/mysql-test/t/lowercase_fs_off.test
@@ -14,16 +14,18 @@ flush privileges;
 connect (sample,localhost,sample,password,d1);
 connection sample;
 select database();
---error 1044
+--error ER_DBACCESS_DENIED_ERROR
 create database d2;
---error 1044
+--error ER_DBACCESS_DENIED_ERROR
 create database D1;
 disconnect sample;
+--source include/wait_until_disconnected.inc
 
 connection master;
 drop user 'sample'@'localhost';
 drop database if exists d1;
 disconnect master;
+--source include/wait_until_disconnected.inc
 connection default;
 
 # End of 4.1 tests
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 5315c91daa6..8760876b7ee 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -1535,4 +1535,24 @@ SELECT * FROM m1;
 
 DROP VIEW v1;
 DROP TABLE m1, t1;
+
+--echo #
+--echo # Bug #45796: invalid memory reads and writes when altering merge and 
+--echo #             base tables
+--echo #
+
+CREATE TABLE t1(c1 INT) ENGINE=MyISAM;
+CREATE TABLE m1(c1 INT) ENGINE=MERGE UNION=(t1);
+ALTER TABLE m1 ADD INDEX idx_c1(c1);
+# Open the MERGE table and allocate buffers based on children's definition.
+--error ER_WRONG_MRG_TABLE
+SELECT * FROM m1;
+# Change the child table definition.
+ALTER TABLE t1 ADD INDEX idx_c1(c1);
+# Check that old buffers are not reused
+SELECT * FROM m1;
+
+DROP TABLE m1;
+DROP TABLE t1;
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/multi_update2.test b/mysql-test/t/multi_update2.test
index 47f9bc7bad7..a04518f4964 100644
--- a/mysql-test/t/multi_update2.test
+++ b/mysql-test/t/multi_update2.test
@@ -2,14 +2,46 @@
 # Test of update statement that uses many tables.
 #
 
+#
+# If we are running with
+# - Valgrind           -> $VALGRIND_TEST <> 0
+# - debug tracing      -> @@session.debug LIKE '%trace%'
+# the resource consumption (storage space needed, runtime) will be extreme.
+# Therefore we require that the option "--big-test" is also set.
+#
+
+let $need_big= 0;
+--disable_query_log
+--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+SET @aux = @@session.debug;
+if (!$mysql_errno)
+{
+  # We have returncode 0 = the server system variable @@session.debug exists.
+  # But we only need "--big-test" in case of tracing.
+  if (`SELECT @@session.debug LIKE '%trace%'`)
+  {
+    let $need_big= 1;
+  }
+}
+--enable_query_log
+if ($VALGRIND_TEST)
+{
+  # We are running with Valgrind
+  inc $need_big;
+}
+if (`SELECT '$BIG_TEST' = '' AND $need_big = 1`)
+{
+  --skip Need "--big-test" when running with the option "--debug" or "--valgrind"
+}
+
+#
+# Bug#1820 Rows not deleted from second table on multi-table delete
+#
+
 --disable_warnings
 DROP TABLE IF EXISTS t1,t2;
 --enable_warnings
 
-#
-# Bug#1820 Rows not deleted from second table on multi-table delete
-#
-
 CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL) ;
 --echo # The protocolling of many inserts into t1 is suppressed.
 --disable_query_log
diff --git a/mysql-test/t/mysql-bug45236.test b/mysql-test/t/mysql-bug45236.test
new file mode 100644
index 00000000000..efc10ed19ea
--- /dev/null
+++ b/mysql-test/t/mysql-bug45236.test
@@ -0,0 +1,45 @@
+#
+# Bug #45236: large blob inserts from mysqldump fail, possible memory issue ?
+#
+# This test consumes a significant amount of resources.
+# Therefore it should be kept separated from other tests.
+# Otherwise we might suffer from problems like
+# Bug#43801 mysql.test takes too long, fails due to expired timeout
+#           on debx86-b in PB
+#
+
+-- source include/not_embedded.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+# Have to change the global variable as the session variable is
+# read-only.
+SET @old_max_allowed_packet= @@global.max_allowed_packet;
+# ~1 MB blob length + some space for the rest of INSERT query
+SET @@global.max_allowed_packet = 1024 * 1024 + 1024;
+
+# Create a new connection since the global max_allowed_packet
+# has no effect onr the current one
+connect (con1, localhost, root,,);
+
+CREATE TABLE t1(data LONGBLOB);
+INSERT INTO t1 SELECT CONCAT(REPEAT('1', 1024*1024 - 27), 
+                             "\'\r dummydb dummyhost");
+
+let $outfile= $MYSQLTEST_VARDIR/tmp/bug41486.sql;
+--error 0,1
+remove_file $outfile;
+--exec $MYSQL_DUMP --compact -t test t1 > $outfile
+# Check that the mysql client does not interpret the "\r" sequence as a command
+--exec $MYSQL --max_allowed_packet=1M test < $outfile 2>&1
+
+DROP TABLE t1;
+
+# Cleanup
+disconnect con1;
+--source include/wait_until_disconnected.inc
+remove_file $outfile;
+connection default;
+SET @@global.max_allowed_packet = @old_max_allowed_packet;
diff --git a/mysql-test/t/mysql.test b/mysql-test/t/mysql.test
index 7e970d5b104..2bb9a02e9d7 100644
--- a/mysql-test/t/mysql.test
+++ b/mysql-test/t/mysql.test
@@ -342,6 +342,21 @@ EOF
 
 remove_file $MYSQLTEST_VARDIR/tmp/bug31060.sql;
 
+#
+# Bug #39101: client -i (--ignore-spaces) option does not seem to work
+#
+--exec $MYSQL -i -e "SELECT COUNT (*)"
+--exec $MYSQL --ignore-spaces -e "SELECT COUNT (*)"
+--exec $MYSQL -b -i -e "SELECT COUNT (*)"
+
+#
+# Bug#37268 'binary' character set makes CLI-internal commands case sensitive
+#
+--replace_regex /\([0-9]*\)/(errno)/
+--error 1
+--exec $MYSQL --default-character-set=binary test -e "CONNECT test invalid_hostname" 2>&1
+--exec $MYSQL --default-character-set=binary test -e "DELIMITER //" 2>&1
+
 --echo End of 5.0 tests
 
 #
@@ -367,4 +382,10 @@ remove_file $MYSQLTEST_VARDIR/tmp/bug31060.sql;
 
 drop tables t1, t2;
 
+#
+# Bug #27884: mysql --html does not quote HTML special characters in output
+# 
+--exec $MYSQL --html test -e "select '< & >' as '<'"
+
+--echo
 --echo End of tests
diff --git a/mysql-test/t/mysqlbinlog.test b/mysql-test/t/mysqlbinlog.test
index 46060649784..7767abe43d0 100644
--- a/mysql-test/t/mysqlbinlog.test
+++ b/mysql-test/t/mysqlbinlog.test
@@ -367,4 +367,16 @@ echo *** Unsigned server_id $s_id_max is found: $s_id_unsigned ***;
 eval SET @@global.server_id= $save_server_id;
 --remove_file $binlog_file
 
+#
+# Bug #41943: mysqlbinlog.exe crashes if --hexdump option is used
+#
+
+RESET MASTER;
+FLUSH LOGS;
+
+# We do not need the results, just make sure that mysqlbinlog does not crash
+--exec $MYSQL_BINLOG --hexdump --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT  master-bin.000001 >/dev/null
+
+--echo End of 5.0 tests
+
 --echo End of 5.1 tests
diff --git a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test
index fe89d7bdafa..ec96124e14b 100644
--- a/mysql-test/t/mysqldump.test
+++ b/mysql-test/t/mysqldump.test
@@ -1,6 +1,5 @@
 # Embedded server doesn't support external clients
 --source include/not_embedded.inc
---source include/have_log_bin.inc
 
 # Binlog is required
 --source include/have_log_bin.inc
@@ -1395,9 +1394,6 @@ drop table t1;
 drop user mysqltest_1@localhost;
 
 
---echo #
---echo # Bug#21527 mysqldump incorrectly tries to LOCK TABLES on the
---echo #           information_schema database.
 --echo #
 --echo # Bug#21424 mysqldump failing to export/import views
 --echo #
@@ -1464,6 +1460,13 @@ disconnect root;
 --remove_file $MYSQLTEST_VARDIR/tmp/bug21527.sql
 use test;
 
+--echo #
+--echo # Bug #21527 mysqldump incorrectly tries to LOCK TABLES on the 
+--echo # information_schema database.
+--echo #
+--echo # Bug #33762: mysqldump can not dump INFORMATION_SCHEMA
+--echo #
+--exec $MYSQL_DUMP --compact --opt -d information_schema TABLES
 
 --echo #
 --echo # Bug#19745 mysqldump --xml produces invalid xml
@@ -1699,9 +1702,6 @@ DROP TABLE t1;
 # Added for use-thread option
 #
 
-# THIS PART OF THE TEST IS DISABLED UNTIL Bug#32991 IS FIXED
-if ($bug32991_fixed) {
-
 create table t1 (a text , b text);
 create table t2 (a text , b text);
 insert t1 values ("Duck, Duck", "goose");
@@ -1739,8 +1739,6 @@ drop table t2;
 
 drop table words2;
 
-}
-
 --echo #
 --echo # Bug#16853 mysqldump doesn't show events
 --echo #
@@ -1954,6 +1952,59 @@ DROP DATABASE mysqldump_test_db;
 SET @@GLOBAL.CONCURRENT_INSERT = @OLD_CONCURRENT_INSERT;
 
 
+###########################################################################
+
+--echo
+--echo Bug #34861 - mysqldump with --tab gives weird output for triggers.
+--echo
+
+CREATE TABLE t1 (f1 INT);
+CREATE TRIGGER tr1 BEFORE UPDATE ON t1 FOR EACH ROW SET @f1 = 1;
+CREATE PROCEDURE pr1 () SELECT "Meow";
+CREATE EVENT ev1 ON SCHEDULE AT '2030-01-01 00:00:00' DO SELECT "Meow";
+
+--echo
+SHOW TRIGGERS;
+SHOW EVENTS;
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+
+--echo
+--echo dump table; if anything goes to stdout, it ends up here: ---------------
+--exec $MYSQL_DUMP --compact --routines --triggers --events --result-file=$MYSQLTEST_VARDIR/tmp/test_34861.sql --tab=$MYSQLTEST_VARDIR/tmp/ test
+
+--echo
+--echo drop everything
+DROP EVENT ev1;
+DROP TRIGGER tr1;
+DROP TABLE t1;
+DROP PROCEDURE pr1;
+
+--echo
+--echo reload table; this should restore table and trigger
+--exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
+SHOW TRIGGERS;
+SHOW EVENTS;
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+
+--echo
+--echo reload db; this should restore routines and events
+--exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/test_34861.sql
+SHOW TRIGGERS;
+SHOW EVENTS;
+SELECT name,body FROM mysql.proc WHERE NAME = 'pr1';
+
+--echo
+--echo cleanup
+--remove_file $MYSQLTEST_VARDIR/tmp/t1.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/t1.sql
+--remove_file $MYSQLTEST_VARDIR/tmp/test_34861.sql
+--disable_warnings
+DROP EVENT IF EXISTS ev1;
+DROP PROCEDURE IF EXISTS pr1;
+DROP TRIGGER IF EXISTS tr1;
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
 ###########################################################################
 
 --echo #
diff --git a/mysql-test/t/mysqldump_restore.test b/mysql-test/t/mysqldump_restore.test
new file mode 100644
index 00000000000..835ee3ee9e9
--- /dev/null
+++ b/mysql-test/t/mysqldump_restore.test
@@ -0,0 +1,111 @@
+###############################################################################
+# mysqldump_restore.test
+#
+# Purpose:  Tests if mysqldump output can be used to successfully restore
+#           tables and data.  
+#           We CREATE a table, mysqldump it to a file, ALTER the original
+#           table's name, recreate the table from the mysqldump file, then
+#           utilize include/diff_tables to compare the original and recreated
+#           tables.
+#
+#           We use several examples from mysqldump.test here and include
+#           the relevant bug numbers and headers from that test.
+#
+# NOTE:     This test is not currently complete and offers only basic
+#           cases of mysqldump output being restored. 
+#           Also, does NOT work with -X (xml) output!
+#           
+# Author:   pcrews
+# Created:  2009-05-21
+# Last Change:
+# Change date:
+###############################################################################
+
+# Embedded server doesn't support external clients
+--source include/not_embedded.inc
+--source include/have_log_bin.inc
+
+--echo # Set concurrent_insert = 0 to prevent random errors
+--echo # will reset to original value at the end of the test
+SET @old_concurrent_insert = @@global.concurrent_insert;
+SET @@global.concurrent_insert = 0;
+
+# Define mysqldumpfile here.  It is used to capture mysqldump output
+# in order to test the output's ability to restore an exact copy of the table
+let $mysqldumpfile = $MYSQLTEST_VARDIR/tmp/mysqldumpfile.sql;
+
+--echo # Pre-test cleanup
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo # Begin tests
+--echo #
+--echo # Bug#2005 Long decimal comparison bug.
+--echo #
+CREATE TABLE t1 (a DECIMAL(64, 20));
+INSERT INTO t1 VALUES ("1234567890123456789012345678901234567890"),
+("0987654321098765432109876543210987654321");
+--exec $MYSQL_DUMP --compact test t1 > $mysqldumpfile
+let $table_name = test.t1;
+--source include/mysqldump.inc
+
+--echo #
+--echo # Bug#3361 mysqldump quotes DECIMAL values inconsistently
+--echo #
+CREATE TABLE t1 (a DECIMAL(10,5), b FLOAT);
+# Check at first how mysql work with quoted decimal
+INSERT INTO t1 VALUES (1.2345, 2.3456);
+INSERT INTO t1 VALUES ('1.2345', 2.3456);
+INSERT INTO t1 VALUES ("1.2345", 2.3456);
+SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ANSI_QUOTES';
+INSERT INTO t1 VALUES (1.2345, 2.3456);
+INSERT INTO t1 VALUES ('1.2345', 2.3456);
+--error ER_BAD_FIELD_ERROR
+INSERT INTO t1 VALUES ("1.2345", 2.3456);
+SET SQL_MODE=@OLD_SQL_MODE;
+
+# check how mysqldump make quoting
+--exec $MYSQL_DUMP --compact test t1 > $mysqldumpfile
+let $table_name = test.t1;
+--source include/mysqldump.inc
+
+--echo #
+--echo # Bug#1994 mysqldump does not correctly dump UCS2 data
+--echo # Bug#4261 mysqldump 10.7 (mysql 4.1.2) --skip-extended-insert drops NULL from inserts
+--echo #
+CREATE TABLE t1 (a  VARCHAR(255)) DEFAULT CHARSET koi8r;
+INSERT INTO t1  VALUES (_koi8r x'C1C2C3C4C5'), (NULL);
+--exec $MYSQL_DUMP --skip-comments --skip-extended-insert test t1 > $mysqldumpfile
+let $table_name = test.t1;
+--source include/mysqldump.inc
+
+--echo #
+--echo # WL#2319 Exclude Tables from dump
+--echo #
+CREATE TABLE t1 (a INT);
+CREATE TABLE t2 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+INSERT INTO t2 VALUES (4),(5),(6);
+--exec $MYSQL_DUMP --skip-comments --ignore-table=test.t1 test > $mysqldumpfile
+let $table_name = test.t2;
+--source include/mysqldump.inc
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#8830 mysqldump --skip-extended-insert causes --hex-blob to dump wrong values
+--echo #
+CREATE TABLE t1 (`b` blob);
+INSERT INTO `t1` VALUES (0x602010000280100005E71A);
+--exec $MYSQL_DUMP --skip-extended-insert --hex-blob test --skip-comments t1 > $mysqldumpfile
+let $table_name = test.t1;
+--source include/mysqldump.inc
+
+--echo # End tests
+
+--echo # Cleanup
+--echo # Reset concurrent_insert to its original value
+SET @@global.concurrent_insert = @old_concurrent_insert;
+--echo # remove mysqldumpfile
+--error 0,1
+--remove_file $mysqldumpfile
diff --git a/mysql-test/t/mysqltest.test b/mysql-test/t/mysqltest.test
index 55cd041aaf5..578b2bf5c6c 100644
--- a/mysql-test/t/mysqltest.test
+++ b/mysql-test/t/mysqltest.test
@@ -1780,6 +1780,56 @@ remove_file $MYSQLTEST_VARDIR/tmp/file2.tmp;
 --error 1
 --exec echo "copy_file from_file;" | $MYSQL_TEST 2>&1
 
+
+# ----------------------------------------------------------------------------
+# test for move_file
+# ----------------------------------------------------------------------------
+
+# - Check that if source file does not exist, nothing will be created.
+
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file1.tmp;
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file2.tmp;
+--error 1
+move_file $MYSQLTEST_VARDIR/tmp/file1.tmp $MYSQLTEST_VARDIR/tmp/file2.tmp;
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file1.tmp;
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file2.tmp;
+
+# - Check that if source file exists, everything works properly.
+
+--write_file $MYSQLTEST_VARDIR/tmp/file1.tmp
+file1
+EOF
+
+move_file $MYSQLTEST_VARDIR/tmp/file1.tmp $MYSQLTEST_VARDIR/tmp/file2.tmp;
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file1.tmp;
+file_exists $MYSQLTEST_VARDIR/tmp/file2.tmp;
+
+# - Check that if destination file exists, everything works properly.
+#   (file2.tmp exists from the previous check; file1.tmp needs to be created)
+
+--write_file $MYSQLTEST_VARDIR/tmp/file1.tmp
+file1
+EOF
+
+move_file $MYSQLTEST_VARDIR/tmp/file1.tmp $MYSQLTEST_VARDIR/tmp/file2.tmp;
+--error 1
+file_exists $MYSQLTEST_VARDIR/tmp/file1.tmp;
+file_exists $MYSQLTEST_VARDIR/tmp/file2.tmp;
+remove_file $MYSQLTEST_VARDIR/tmp/file2.tmp;
+
+# - Check usage.
+
+--error 1
+--exec echo "move_file ;" | $MYSQL_TEST 2>&1
+
+--error 1
+--exec echo "move_file from_file;" | $MYSQL_TEST 2>&1
+
 # ----------------------------------------------------------------------------
 # test for chmod
 # ----------------------------------------------------------------------------
@@ -2037,6 +2087,10 @@ let $value= query_get_value(SELECT 'A B' AS "MyColumn", MyColumn, 1);
 let $value= query_get_value(SELECT 1 AS "My Column", My Column, 1);
 --echo value= $value
 #
+# 4.1 Query containing , protected by quotes, quotes also on column
+let $value= query_get_value('SELECT 1 as a, 2 as b', "b", 1);
+--echo value= $value
+#
 #------------ Negative tests ------------
 # 5. Incomplete statement including missing parameters
 # 5.1 incomplete statement
diff --git a/mysql-test/t/openssl_1.test b/mysql-test/t/openssl_1.test
index 240a977fdca..baa1603faab 100644
--- a/mysql-test/t/openssl_1.test
+++ b/mysql-test/t/openssl_1.test
@@ -238,7 +238,18 @@ DROP TABLE t1;
 --enable_query_log
 select 'is still running; no cipher request crashed the server' as result from dual;
 
-##
+#
+# Bug#42158: leak: SSL_get_peer_certificate() doesn't have matching X509_free()
+#
+
+GRANT SELECT ON test.* TO bug42158@localhost REQUIRE X509;
+FLUSH PRIVILEGES;
+connect(con1,localhost,bug42158,,,,,SSL);
+SHOW STATUS LIKE 'Ssl_cipher';
+disconnect con1;
+connection default;
+DROP USER bug42158@localhost;
+
 --echo End of 5.1 tests
 
 # Wait till we reached the initial number of concurrent sessions
diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test
index ce89609de39..8b4af201af2 100644
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@@ -1935,7 +1935,58 @@ INSERT INTO t1 VALUES (10), (100), (200), (300), (400);
 EXPLAIN PARTITIONS SELECT * FROM t1 WHERE a>=200;
 DROP TABLE t1;
 
+#
+# Bug#44821: select distinct on partitioned table returns wrong results
+#
+CREATE TABLE t1 ( a INT, b INT, c INT, KEY bc(b, c) )
+PARTITION BY KEY (a, b) PARTITIONS 3
+;
+
+INSERT INTO t1 VALUES
+(17, 1, -8),
+(3,  1, -7),
+(23, 1, -6),
+(22, 1, -5),
+(11, 1, -4),
+(21, 1, -3),
+(19, 1, -2),
+(30, 1, -1),
+
+(20, 1, 1),
+(16, 1, 2),
+(18, 1, 3),
+(9,  1, 4),
+(15, 1, 5),
+(28, 1, 6),
+(29, 1, 7),
+(25, 1, 8),
+(10, 1, 9),
+(13, 1, 10),
+(27, 1, 11),
+(24, 1, 12),
+(12, 1, 13),
+(26, 1, 14),
+(14, 1, 15)
+;
+
+SELECT b, c FROM t1 WHERE b = 1 GROUP BY b, c;
+
+EXPLAIN
+SELECT b, c FROM t1 WHERE b = 1 GROUP BY b, c;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Bug #45807: crash accessing partitioned table and sql_mode 
+--echo #   contains ONLY_FULL_GROUP_BY
+--echo #
+
+SET SESSION SQL_MODE='ONLY_FULL_GROUP_BY';
+CREATE TABLE t1(id INT,KEY(id)) ENGINE=MYISAM 
+  PARTITION BY HASH(id) PARTITIONS 2;
+DROP TABLE t1;
+SET SESSION SQL_MODE=DEFAULT;
+
 --echo End of 5.1 tests
 
-
 SET @@global.general_log= @old_general_log;
diff --git a/mysql-test/t/plugin.test b/mysql-test/t/plugin.test
index 0635a58a4a6..7fc62b445c9 100644
--- a/mysql-test/t/plugin.test
+++ b/mysql-test/t/plugin.test
@@ -3,13 +3,16 @@
 CREATE TABLE t1(a int) ENGINE=EXAMPLE;
 DROP TABLE t1;
 
-INSTALL PLUGIN example SONAME 'ha_example.so';
+--replace_regex /\.dll/.so/
+eval INSTALL PLUGIN example SONAME $HA_EXAMPLE_SO;
+--replace_regex /\.dll/.so/
 --error 1125
-INSTALL PLUGIN EXAMPLE SONAME 'ha_example.so';
+eval INSTALL PLUGIN EXAMPLE SONAME $HA_EXAMPLE_SO;
 
 UNINSTALL PLUGIN example;
 
-INSTALL PLUGIN example SONAME 'ha_example.so';
+--replace_regex /\.dll/.so/
+eval INSTALL PLUGIN example SONAME $HA_EXAMPLE_SO;
 
 CREATE TABLE t1(a int) ENGINE=EXAMPLE;
 
@@ -30,8 +33,8 @@ UNINSTALL PLUGIN non_exist;
 --echo # Bug#32034: check_func_enum() does not check correct values but set it
 --echo #            to impossible int val
 --echo #
-
-INSTALL PLUGIN example SONAME 'ha_example.so';
+--replace_regex /\.dll/.so/
+eval INSTALL PLUGIN example SONAME $HA_EXAMPLE_SO;
 
 SET GLOBAL example_enum_var= e1;
 SET GLOBAL example_enum_var= e2;
@@ -45,7 +48,8 @@ UNINSTALL PLUGIN example;
 #
 # Bug #32757 hang with sql_mode set when setting some global variables
 #
-INSTALL PLUGIN example SONAME 'ha_example.so';
+--replace_regex /\.dll/.so/
+eval INSTALL PLUGIN example SONAME $HA_EXAMPLE_SO;
 
 select @@session.sql_mode into @old_sql_mode;
 
diff --git a/mysql-test/t/plugin_load-master.opt b/mysql-test/t/plugin_load-master.opt
index 66637841f16..bb7831c5769 100644
--- a/mysql-test/t/plugin_load-master.opt
+++ b/mysql-test/t/plugin_load-master.opt
@@ -1,3 +1,3 @@
 $EXAMPLE_PLUGIN_OPT
-"--plugin-load=;EXAMPLE=ha_example.so;"
+$EXAMPLE_PLUGIN_LOAD
 --loose-plugin-example-enum-var=e2
diff --git a/mysql-test/t/query_cache_debug.test b/mysql-test/t/query_cache_debug.test
index 8cf5e9d4b16..d30cd458e99 100644
--- a/mysql-test/t/query_cache_debug.test
+++ b/mysql-test/t/query_cache_debug.test
@@ -112,3 +112,148 @@ DROP TABLE t1,t2;
 SET GLOBAL concurrent_insert= DEFAULT;
 SET GLOBAL query_cache_size= DEFAULT;
 SET GLOBAL query_cache_type= DEFAULT;
+
+
+--echo #
+--echo # Bug43758 Query cache can lock up threads in 'freeing items' state
+--echo #
+FLUSH STATUS;
+SET GLOBAL query_cache_type=DEMAND;
+SET GLOBAL query_cache_size= 1024*768;
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2,t3,t4,t5;
+--enable_warnings
+CREATE TABLE t1 (a VARCHAR(100));
+CREATE TABLE t2 (a VARCHAR(100));
+CREATE TABLE t3 (a VARCHAR(100));
+CREATE TABLE t4 (a VARCHAR(100));
+CREATE TABLE t5 (a VARCHAR(100));
+
+INSERT INTO t1 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t2 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t3 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t4 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+INSERT INTO t5 VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb');
+
+connect (thd2, localhost, root, ,test);
+connect (thd3, localhost, root, ,test);
+connect (thd1, localhost, root, ,test);
+
+connection thd1;
+--echo =================================== Connection thd1
+--echo **
+--echo ** Load Query Cache with a result set and one table.
+--echo **
+SELECT SQL_CACHE * FROM t1;
+--echo *************************************************************************
+--echo ** We want to accomplish the following state:
+--echo **  - Query cache status: TABLE_FLUSH_IN_PROGRESS
+--echo **  - THD1: invalidate_table_internal (iterating query blocks)
+--echo **  - THD2: query_cache_insert (cond_wait)
+--echo **  - THD3: query_cache_insert (cond_wait)
+--echo **  - No thread should be holding the structure_guard_mutex.
+--echo **
+--echo ** First step is to place a DELETE-statement on the debug hook just
+--echo ** before the mutex lock in invalidate_table_internal.
+--echo ** This will allow new result sets to be written into the QC.
+--echo ** 
+SET SESSION debug='+d,wait_in_query_cache_invalidate1';
+SET SESSION debug='+d,wait_in_query_cache_invalidate2';
+--send DELETE FROM t1 WHERE a like '%a%';
+
+connection default;
+--echo =================================== Connection default
+--echo ** Assert that the expect process status is obtained.
+LET $wait_condition= SELECT SQL_NO_CACHE COUNT(*)= 1 FROM information_schema.processlist WHERE state= 'wait_in_query_cache_invalidate1';
+--source include/wait_condition.inc
+-- echo **
+
+connection thd2;
+--echo =================================== Connection thd2
+--echo ** On THD2: Insert a result into the cache. This attempt will be blocked
+--echo ** because of a debug hook placed just before the mutex lock after which
+--echo ** the first part of the result set is written.
+SET SESSION debug='+d,wait_in_query_cache_insert';
+--send SELECT SQL_CACHE * FROM t2 UNION SELECT * FROM t3
+
+connection thd3;
+--echo =================================== Connection thd3
+--echo ** On THD3: Insert another result into the cache and block on the same
+--echo ** debug hook.
+SET SESSION debug='+d,wait_in_query_cache_insert';
+--send SELECT SQL_CACHE * FROM t4 UNION SELECT * FROM t5;
+
+connection default;
+--echo =================================== Connection default
+--echo ** Assert that the two SELECT-stmt threads to reach the hook.
+LET $wait_condition= SELECT SQL_NO_CACHE COUNT(*)= 2 FROM information_schema.processlist WHERE state='wait_in_query_cache_insert';
+--source include/wait_condition.inc
+--echo **
+--echo **
+
+--echo ** Signal the DELETE thread, THD1, to continue. It will enter the mutex
+--echo ** lock and set query cache status to TABLE_FLUSH_IN_PROGRESS and then
+--echo ** unlock the mutex before stopping on the next debug hook.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_invalidate1' LIMIT 1 INTO @flush_thread_id;
+KILL QUERY @flush_thread_id;
+--echo ** Assert that we reach the next debug hook.
+LET $wait_condition= SELECT SQL_NO_CACHE COUNT(*)= 1 FROM information_schema.processlist WHERE state='wait_in_query_cache_invalidate2';
+--source include/wait_condition.inc
+
+--echo **
+--echo ** Signal the remaining debug hooks blocking THD2 and THD3.
+--echo ** The threads will grab the guard mutex enter the wait condition and
+--echo ** and finally release the mutex. The threads will continue to wait
+--echo ** until a broadcast signal reaches them causing both threads to 
+--echo ** come alive and check the condition.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_insert' ORDER BY id ASC LIMIT 1 INTO @thread_id;
+KILL QUERY @thread_id;
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_insert' ORDER BY id DESC LIMIT 1 INTO @thread_id;
+KILL QUERY @thread_id;
+
+--echo **
+--echo ** Finally signal the DELETE statement on THD1 one last time.
+--echo ** The stmt will complete the query cache invalidation and return 
+--echo ** cache status to NO_FLUSH_IN_PROGRESS. On the status change
+--echo ** One signal will be sent to the thread group waiting for executing
+--echo ** invalidations and a broadcast signal will be sent to the thread 
+--echo ** group holding result set writers.
+SELECT SQL_NO_CACHE id FROM information_schema.processlist WHERE state='wait_in_query_cache_invalidate2' LIMIT 1 INTO @flush_thread_id;
+KILL QUERY @flush_thread_id;
+
+--echo **
+--echo *************************************************************************
+--echo ** No tables should be locked
+connection thd2;
+--echo =================================== Connection thd2
+reap;
+DELETE FROM t1;
+DELETE FROM t2;
+DELETE FROM t3;
+
+connection thd3;
+--echo =================================== Connection thd3
+reap;
+DELETE FROM t4;
+DELETE FROM t5;
+
+connection thd1;
+--echo =================================== Connection thd1
+reap;
+
+--echo ** Done.
+
+connection default;
+disconnect thd1;
+disconnect thd2;
+disconnect thd3;
+SET GLOBAL query_cache_size= 0;
+
+connection default;
+--echo # Restore defaults
+RESET QUERY CACHE;
+FLUSH STATUS;
+DROP TABLE t1,t2,t3,t4,t5;
+SET GLOBAL query_cache_size= DEFAULT;
+SET GLOBAL query_cache_type= DEFAULT;
+exit;
diff --git a/mysql-test/t/select.test b/mysql-test/t/select.test
index 715bdf0e667..7d3785ecccc 100644
--- a/mysql-test/t/select.test
+++ b/mysql-test/t/select.test
@@ -3799,4 +3799,90 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE (a=a AND a=a AND b=b) OR b > 20;
 EXPLAIN EXTENDED SELECT * FROM t1 WHERE (a=a AND b=b AND a=a) OR b > 20;
 DROP TABLE t1;
 
+
+--echo #
+--echo # Bug#45266: Uninitialized variable lead to an empty result.
+--echo #
+--disable_warnings
+drop table if exists A,AA,B,BB;
+CREATE TABLE `A` (
+  `pk` int(11) NOT NULL AUTO_INCREMENT,
+  `date_key` date NOT NULL,
+  `date_nokey` date NOT NULL,
+  `datetime_key` datetime NOT NULL,
+  `int_nokey` int(11) NOT NULL,
+  `time_key` time NOT NULL,
+  `time_nokey` time NOT NULL,
+  PRIMARY KEY (`pk`),
+  KEY `date_key` (`date_key`),
+  KEY `time_key` (`time_key`),
+  KEY `datetime_key` (`datetime_key`)
+);
+
+CREATE TABLE `AA` (
+  `pk` int(11) NOT NULL AUTO_INCREMENT,
+  `int_nokey` int(11) NOT NULL,
+  `time_key` time NOT NULL,
+  KEY `time_key` (`time_key`),
+  PRIMARY KEY (`pk`)
+);
+
+CREATE TABLE `B` (
+  `date_nokey` date NOT NULL,
+  `date_key` date NOT NULL,
+  `time_key` time NOT NULL,
+  `datetime_nokey` datetime NOT NULL,
+  `varchar_key` varchar(1) NOT NULL,
+  KEY `date_key` (`date_key`),
+  KEY `time_key` (`time_key`),
+  KEY `varchar_key` (`varchar_key`)
+);
+
+INSERT INTO `B` VALUES ('2003-07-28','2003-07-28','15:13:38','0000-00-00 00:00:00','f'),('0000-00-00','0000-00-00','00:05:48','2004-07-02 14:34:13','x');
+
+CREATE TABLE `BB` (
+  `pk` int(11) NOT NULL AUTO_INCREMENT,
+  `int_nokey` int(11) NOT NULL,
+  `date_key` date NOT NULL,
+  `varchar_nokey` varchar(1) NOT NULL,
+  `date_nokey` date NOT NULL,
+  PRIMARY KEY (`pk`),
+  KEY `date_key` (`date_key`)
+);
+
+INSERT INTO `BB` VALUES (10,8,'0000-00-00','i','0000-00-00'),(11,0,'2005-08-18','','2005-08-18');
+# Test #1
+SELECT table1 . `pk` AS field1 
+  FROM 
+    (BB AS table1 INNER JOIN 
+      (AA AS table2 STRAIGHT_JOIN A AS table3 
+        ON ( table3 . `date_key` = table2 . `pk` ))
+       ON ( table3 . `datetime_key` = table2 . `int_nokey` ))
+  WHERE  ( table3 . `date_key` <= 4 AND table2 . `pk` = table1 . `varchar_nokey`)
+  GROUP BY field1 ;
+
+SELECT table3 .`date_key` field1
+  FROM
+    B table1 LEFT JOIN B table3 JOIN
+      (BB table6 JOIN A table7 ON table6 .`varchar_nokey`)
+       ON table6 .`int_nokey` ON table6 .`date_key`
+  WHERE  NOT ( table1 .`varchar_key`  AND table7 .`pk`) GROUP  BY field1;
+
+# Test #2
+SELECT table4 . `time_nokey` AS field1 FROM 
+  (AA AS table1 CROSS JOIN 
+    (AA AS table2 STRAIGHT_JOIN 
+      (B AS table3 STRAIGHT_JOIN A AS table4 
+       ON ( table4 . `date_key` = table3 . `time_key` ))
+     ON ( table4 . `pk` = table3 . `date_nokey` ))
+   ON ( table4 . `time_key` = table3 . `datetime_nokey` ))
+  WHERE  ( table4 . `time_key` < table1 . `time_key` AND
+            table1 . `int_nokey` != 'f')
+  GROUP BY field1  ORDER BY field1 , field1;
+
+SELECT table1 .`time_key` field2  FROM B table1  LEFT JOIN  BB JOIN A table5 ON table5 .`date_nokey`  ON table5 .`int_nokey` GROUP  BY field2;
+--enable_warnings
+
+drop table A,AA,B,BB;
+--echo #end of test for bug#45266
 --echo End of 5.1 tests
diff --git a/mysql-test/t/sp-error.test b/mysql-test/t/sp-error.test
index 8d7c6d75a34..66b960c938f 100644
--- a/mysql-test/t/sp-error.test
+++ b/mysql-test/t/sp-error.test
@@ -2435,3 +2435,16 @@ delimiter ;$$
 #
 LOAD DATA INFILE '../../tmp/proc.txt' INTO TABLE mysql.proc;
 remove_file $MYSQLTEST_VARDIR/tmp/proc.txt;
+
+#
+# Bug #38159: Function parsing problem generates misleading error message
+#
+
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2);
+--error ER_FUNC_INEXISTENT_NAME_COLLISION
+SELECT MAX (a) FROM t1 WHERE b = 999999;
+SELECT AVG (a) FROM t1 WHERE b = 999999;
+--error ER_SP_DOES_NOT_EXIST
+SELECT non_existent (a) FROM t1 WHERE b = 999999;
+DROP TABLE t1;
diff --git a/mysql-test/t/sp-fib.test b/mysql-test/t/sp-fib.test
new file mode 100644
index 00000000000..24a51b99c2d
--- /dev/null
+++ b/mysql-test/t/sp-fib.test
@@ -0,0 +1,54 @@
+# Fibonacci, for recursion test. (Yet Another Numerical series :)
+# Split from main.sp due to problems reported in Bug#15866
+
+--disable_warnings
+drop table if exists t3;
+--enable_warnings
+create table t3 ( f bigint unsigned not null );
+
+# We deliberately do it the awkward way, fetching the last two
+# values from the table, in order to exercise various statements
+# and table accesses at each turn.
+--disable_warnings
+drop procedure if exists fib;
+--enable_warnings
+
+# Now for multiple statements...
+delimiter |;
+
+create procedure fib(n int unsigned)
+begin
+  if n > 1 then
+    begin
+      declare x, y bigint unsigned;
+      declare c cursor for select f from t3 order by f desc limit 2;
+      open c;
+      fetch c into y;
+      fetch c into x;
+      insert into t3 values (x+y);
+      call fib(n-1);
+      ## Close the cursor AFTER the recursion to ensure that the stack
+      ## frame is somewhat intact.
+      close c;
+    end;
+  end if;
+end|
+
+# Enable recursion
+set @@max_sp_recursion_depth= 20|
+
+insert into t3 values (0), (1)|
+
+# The small number of recursion levels is intentional.
+# We need to avoid
+# Bug#15866 main.sp fails (thread stack limit
+#           insufficient for recursive call "fib(20)")
+# which affects some platforms.
+call fib(4)|
+
+select * from t3 order by f asc|
+
+drop table t3|
+drop procedure fib|
+set @@max_sp_recursion_depth= 0|
+
diff --git a/mysql-test/t/sp.test b/mysql-test/t/sp.test
index fdf6ed8f382..5eeac457958 100644
--- a/mysql-test/t/sp.test
+++ b/mysql-test/t/sp.test
@@ -1561,61 +1561,6 @@ drop procedure ip|
 show procedure status where name like '%p%' and db='test'|
 
 
-# Fibonacci, for recursion test. (Yet Another Numerical series :)
-#
---disable_warnings
-drop table if exists t3|
---enable_warnings
-create table t3 ( f bigint unsigned not null )|
-
-# We deliberately do it the awkward way, fetching the last two
-# values from the table, in order to exercise various statements
-# and table accesses at each turn.
---disable_warnings
-drop procedure if exists fib|
---enable_warnings
-create procedure fib(n int unsigned)
-begin
-  if n > 1 then
-    begin
-      declare x, y bigint unsigned;
-      declare c cursor for select f from t3 order by f desc limit 2;
-
-      open c;
-      fetch c into y;
-      fetch c into x;
-      close c;
-      insert into t3 values (x+y);
-      call fib(n-1);
-    end;
-  end if;
-end|
-
-# Enable recursion
-set @@max_sp_recursion_depth= 20|
-
-# Minimum test: recursion of 3 levels
-
-insert into t3 values (0), (1)|
-
-call fib(3)|
-
-select * from t3 order by f asc|
-
-truncate table t3|
-
-# The original test, 20 levels, ran into memory limits on some machines
-# and builds. Try 10 instead...
-
-insert into t3 values (0), (1)|
-
-call fib(10)|
-
-select * from t3 order by f asc|
-drop table t3|
-drop procedure fib|
-set @@max_sp_recursion_depth= 0|
-
 #
 # Comment & suid
 #
diff --git a/mysql-test/t/sp_notembedded.test b/mysql-test/t/sp_notembedded.test
index f540126c405..ecb37c1299c 100644
--- a/mysql-test/t/sp_notembedded.test
+++ b/mysql-test/t/sp_notembedded.test
@@ -345,6 +345,32 @@ drop procedure p1;
 drop table t1;
 set session low_priority_updates=default;
 
+#
+# Bug#44798 MySQL engine crashes when creating stored procedures with execute_priv=N
+#
+INSERT INTO mysql.user (Host, User, Password, Select_priv, Insert_priv, Update_priv,
+Delete_priv, Create_priv, Drop_priv, Reload_priv, Shutdown_priv, Process_priv, File_priv,
+Grant_priv, References_priv, Index_priv, Alter_priv, Show_db_priv, Super_priv,
+Create_tmp_table_priv, Lock_tables_priv, Execute_priv, Repl_slave_priv, Repl_client_priv,
+Create_view_priv, Show_view_priv, Create_routine_priv, Alter_routine_priv,
+Create_user_priv, ssl_type, ssl_cipher, x509_issuer, x509_subject, max_questions,
+max_updates, max_connections, max_user_connections) 
+VALUES('%', 'mysqltest_1', password(''), 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N',
+'N', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'N', 'N', 'N', 'Y', 'Y', 'N', '',
+'', '', '', '0', '0', '0', '0');
+FLUSH PRIVILEGES;
+
+connect (con1, localhost, mysqltest_1,,);
+connection con1;
+CREATE PROCEDURE p1(i INT) BEGIN END;
+disconnect con1;
+connection default;
+DROP PROCEDURE p1;
+
+DELETE FROM mysql.user WHERE User='mysqltest_1';
+FLUSH PRIVILEGES;
+
+
 #
 # Restore global concurrent_insert value. Keep in the end of the test file.
 #
diff --git a/mysql-test/t/sql_mode.test b/mysql-test/t/sql_mode.test
index acc9cc7979e..4a9f34443cb 100644
--- a/mysql-test/t/sql_mode.test
+++ b/mysql-test/t/sql_mode.test
@@ -308,3 +308,39 @@ flush privileges;
 
 --connection default
 drop user mysqltest_32753@localhost;
+
+#
+# Bug#45100: Incomplete DROP USER in case of SQL_MODE = 'PAD_CHAR_TO_FULL_LENGTH'
+#
+
+--disable_warnings
+DROP TABLE IF EXISTS t1,t2;
+--enable_warnings
+
+# Generate some prerequisites
+CREATE USER 'user_PCTFL'@'localhost' identified by 'PWD';
+CREATE USER 'user_no_PCTFL'@'localhost' identified by 'PWD';
+
+CREATE TABLE t1 (f1 BIGINT);
+CREATE TABLE t2 (f1 CHAR(3) NOT NULL, f2 CHAR(20));
+
+# Grant privilege on a TABLE
+GRANT ALL ON t1 TO 'user_PCTFL'@'localhost','user_no_PCTFL'@'localhost';
+# Grant privilege on some COLUMN of a table
+GRANT SELECT(f1) ON t2 TO 'user_PCTFL'@'localhost','user_no_PCTFL'@'localhost';
+
+SET @OLD_SQL_MODE = @@SESSION.SQL_MODE;
+SET SESSION SQL_MODE = 'PAD_CHAR_TO_FULL_LENGTH';
+DROP USER 'user_PCTFL'@'localhost';
+SET SESSION SQL_MODE = @OLD_SQL_MODE;
+DROP USER 'user_no_PCTFL'@'localhost';
+
+FLUSH PRIVILEGES;
+
+SELECT * FROM mysql.db WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+SELECT * FROM mysql.tables_priv WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+SELECT * FROM mysql.columns_priv WHERE Host = 'localhost' AND User LIKE 'user_%PCTFL';
+
+# Cleanup
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/mysql-test/t/status.test b/mysql-test/t/status.test
index 5842f59af5c..5da210f5a69 100644
--- a/mysql-test/t/status.test
+++ b/mysql-test/t/status.test
@@ -12,6 +12,12 @@
 set @old_concurrent_insert= @@global.concurrent_insert;
 set @@global.concurrent_insert= 0;
 
+# Disable logging to table, since this will also cause table locking and unlocking, which will
+# show up in SHOW STATUS and may cause sporadic failures
+
+SET @old_log_output = @@global.log_output;
+SET GLOBAL LOG_OUTPUT = 'FILE';
+
 # PS causes different statistics
 --disable_ps_protocol
 
@@ -350,6 +356,7 @@ DROP FUNCTION f1;
 # Restore global concurrent_insert value. Keep in the end of the test file.
 --connection default
 set @@global.concurrent_insert= @old_concurrent_insert;
+SET GLOBAL log_output = @old_log_output;
 
 # Wait till we reached the initial number of concurrent sessions
 --source include/wait_until_count_sessions.inc
diff --git a/mysql-test/t/subselect3.test b/mysql-test/t/subselect3.test
index bf461f83a20..7a2a9f328ef 100644
--- a/mysql-test/t/subselect3.test
+++ b/mysql-test/t/subselect3.test
@@ -669,6 +669,25 @@ SELECT ROW(1,2) = (SELECT NULL,    1), ROW(1,2) IN (SELECT NULL,    1);
 SELECT ROW(1,2) = (SELECT    1,    1), ROW(1,2) IN (SELECT    1,    1);
 SELECT ROW(1,2) = (SELECT    1,    2), ROW(1,2) IN (SELECT    1,    2);
 
+#
+# Bug #37362      Crash in do_field_eq
+#
+CREATE TABLE t1 (a INT, b INT, c INT);
+INSERT INTO t1 VALUES (1,1,1), (1,1,1);
+
+--error 1054
+EXPLAIN EXTENDED 
+  SELECT c FROM 
+    ( SELECT 
+      (SELECT COUNT(a) FROM 
+        (SELECT COUNT(b) FROM t1) AS x GROUP BY c
+      ) FROM t1 GROUP BY b
+    ) AS y;
+SHOW WARNINGS;
+
+DROP TABLE t1;
+
+
 --echo End of 5.0 tests
 
 #
diff --git a/mysql-test/t/trigger.test b/mysql-test/t/trigger.test
index 9a5556c518d..1e55f9d5993 100644
--- a/mysql-test/t/trigger.test
+++ b/mysql-test/t/trigger.test
@@ -2370,4 +2370,30 @@ drop trigger trg1;
 drop trigger trg2;
 drop table t1, t2;
 
+#
+# Bug#44653: Server crash noticed when executing random queries with partitions.
+#
+CREATE TABLE t1 ( a INT, b INT );
+CREATE TABLE t2 ( a INT AUTO_INCREMENT KEY, b INT );
+
+INSERT INTO t1 (a) VALUES (1);
+
+delimiter //;
+CREATE TRIGGER tr1
+BEFORE INSERT ON t2
+FOR EACH ROW 
+BEGIN 
+  UPDATE a_nonextisting_table SET a = 1;
+END//
+delimiter ;//
+
+--disable_abort_on_error
+CREATE TABLE IF NOT EXISTS t2 ( a INT, b INT ) SELECT a, b FROM t1;
+--enable_abort_on_error
+
+# Caused failed assertion
+SELECT * FROM t2;
+
+DROP TABLE t1, t2;
+
 --echo End of 5.1 tests.
diff --git a/mysql-test/t/trigger_notembedded.test b/mysql-test/t/trigger_notembedded.test
index 9588ec6e3ed..7a7e6c6bc85 100644
--- a/mysql-test/t/trigger_notembedded.test
+++ b/mysql-test/t/trigger_notembedded.test
@@ -909,4 +909,27 @@ select * from t1;
 drop table t1;
 disconnect flush;
 
+#
+# Bug#45412 SHOW CREATE TRIGGER does not require privileges to disclose trigger data
+#
+CREATE DATABASE db1;
+CREATE TABLE db1.t1 (a char(30)) ENGINE=MEMORY;
+CREATE TRIGGER db1.trg AFTER INSERT ON db1.t1 FOR EACH ROW
+ INSERT INTO db1.t1 VALUES('Some very sensitive data goes here');
+
+CREATE USER 'no_rights'@'localhost';
+REVOKE ALL ON *.* FROM 'no_rights'@'localhost';
+FLUSH PRIVILEGES;
+
+connect (con1,localhost,no_rights,,);
+SELECT trigger_name FROM INFORMATION_SCHEMA.TRIGGERS
+ WHERE trigger_schema = 'db1';
+--error ER_SPECIFIC_ACCESS_DENIED_ERROR
+SHOW CREATE TRIGGER db1.trg;
+
+connection default;
+disconnect con1;
+DROP USER 'no_rights'@'localhost';
+DROP DATABASE db1;
+
 --echo End of 5.1 tests.
diff --git a/mysql-test/t/type_newdecimal.test b/mysql-test/t/type_newdecimal.test
index 4cf9ea63dad..cd3c3f81510 100644
--- a/mysql-test/t/type_newdecimal.test
+++ b/mysql-test/t/type_newdecimal.test
@@ -1257,3 +1257,32 @@ select cast(-3.4 as decimal(2,1));
 select cast(99.6 as decimal(2,0));
 select cast(-13.4 as decimal(2,1));
 select cast(98.6 as decimal(2,0));
+
+--echo #
+--echo # Bug #45262: Bad effects with CREATE TABLE and DECIMAL
+--echo #
+
+CREATE TABLE t1 SELECT .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+DESCRIBE t1;
+SELECT my_col FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 SELECT 1 + .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+DESCRIBE t1;
+SELECT my_col FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 SELECT 1 * .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+DESCRIBE t1;
+SELECT my_col FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 SELECT 1 / .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+DESCRIBE t1;
+SELECT my_col FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 SELECT 1 % .123456789123456789123456789123456789123456789123456789123456789123456789123456789 AS my_col;
+DESCRIBE t1;
+SELECT my_col FROM t1;
+DROP TABLE t1;
diff --git a/mysql-test/t/type_time.test b/mysql-test/t/type_time.test
index 5fc763be7fe..5bb521601e5 100644
--- a/mysql-test/t/type_time.test
+++ b/mysql-test/t/type_time.test
@@ -77,3 +77,16 @@ insert into t1 values('2007-07-02', 1);
 insert into t1 values('2007-07-02', 2);
 SELECT sum(f3) FROM t1 where f2='2007-07-01 00:00:00' group by f2;
 drop table t1;
+
+
+--echo #
+--echo # Bug #44792: valgrind warning when casting from time to time
+--echo #
+
+CREATE TABLE t1 (c TIME);
+INSERT INTO t1 VALUES ('0:00:00');
+SELECT CAST(c AS TIME) FROM t1;
+DROP TABLE t1;
+
+
+--echo End of 5.0 tests
diff --git a/mysql-test/t/union.test b/mysql-test/t/union.test
index ece7099f66e..ec169838d59 100644
--- a/mysql-test/t/union.test
+++ b/mysql-test/t/union.test
@@ -1089,4 +1089,16 @@ CREATE TABLE t2 AS SELECT d FROM t1 UNION SELECT d FROM t1;
 SHOW FIELDS FROM t2;
 DROP TABLE t1, t2;
 
+#
+# Bug#43612 crash with explain extended, union, order by
+#
+CREATE TABLE t1(a INT);
+EXPLAIN EXTENDED
+SELECT a FROM t1
+UNION
+SELECT a FROM t1
+ORDER BY a;
+DROP TABLE t1;
+
+
 --echo End of 5.0 tests
diff --git a/mysql-test/t/user_var.test b/mysql-test/t/user_var.test
index fd4e538ea6c..c0740458a88 100644
--- a/mysql-test/t/user_var.test
+++ b/mysql-test/t/user_var.test
@@ -285,6 +285,18 @@ set @lastid=-1;
 select @lastid != id, @lastid, @lastid := id from t1;
 drop table t1;
 
+#
+# Bug#42009: SELECT into variable gives different results to direct SELECT
+#
+CREATE TABLE t1(a INT, b INT);
+INSERT INTO t1 VALUES (0, 0), (2, 1), (2, 3), (1, 1), (30, 20);
+SELECT a, b INTO @a, @b FROM t1 WHERE a=2 AND b=3 GROUP BY a, b;
+SELECT @a, @b;
+SELECT a, b FROM t1 WHERE a=2 AND b=3 GROUP BY a, b;
+DROP TABLE t1;
+
+--echo End of 5.0 tests
+
 #
 # Bug#42188: crash and/or memory corruption with user variables in trigger
 #
diff --git a/mysql-test/t/variables.test b/mysql-test/t/variables.test
index 6da20409639..1580d7f36d7 100644
--- a/mysql-test/t/variables.test
+++ b/mysql-test/t/variables.test
@@ -1201,4 +1201,29 @@ SET GLOBAL server_id = -1;
 SELECT @@GLOBAL.server_id;
 SET GLOBAL server_id = @old_server_id;
 
+#
+# Bug #42778: delete order by null global variable causes 
+#             assertion .\filesort.cc, line 797
+#
+
+SELECT @@GLOBAL.INIT_FILE, @@GLOBAL.INIT_FILE IS NULL;
+
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES ();
+SET @bug42778= @@sql_safe_updates;
+SET @@sql_safe_updates= 0;
+DELETE FROM t1 ORDER BY (@@GLOBAL.INIT_FILE) ASC LIMIT 10;
+SET @@sql_safe_updates= @bug42778;
+
+DROP TABLE t1;
+
+--echo #
+--echo # BUG#10206 - InnoDB: Transaction requiring Max_BinLog_Cache_size > 4GB always rollsback
+--echo #
+
+SET @old_max_binlog_cache_size = @@GLOBAL.max_binlog_cache_size;
+--echo # Set the max_binlog_cache_size to size more than 4GB. 
+SET GLOBAL max_binlog_cache_size = 5 * 1024 * 1024 * 1024;
+SELECT @@GLOBAL.max_binlog_cache_size;
+SET GLOBAL max_binlog_cache_size = @old_max_binlog_cache_size;
 --echo End of 5.1 tests
diff --git a/mysql-test/t/view.test b/mysql-test/t/view.test
index c9d01266e9e..7bec02e6fb6 100644
--- a/mysql-test/t/view.test
+++ b/mysql-test/t/view.test
@@ -3680,6 +3680,61 @@ SELECT * FROM v1 IGNORE INDEX (c2) WHERE c2=2;
 DROP VIEW v1;
 DROP TABLE t1;
 
+--echo # -----------------------------------------------------------------
+--echo # -- Bug#40825: Error 1356 while selecting from a view 
+--echo # --            with a "HAVING" clause though query works
+--echo # -----------------------------------------------------------------
+--echo
+
+CREATE TABLE t1 (c INT);
+
+--echo
+
+CREATE VIEW v1 (view_column) AS SELECT c AS alias FROM t1 HAVING alias;
+SHOW CREATE VIEW v1;
+SELECT * FROM v1;
+
+--echo
+
+DROP VIEW v1;
+DROP TABLE t1;
+
+--echo
+--echo # -- End of test case for Bug#40825
+--echo
+
+--echo # 
+--echo # Bug #45806 crash when replacing into a view with a join!
+--echo # 
+CREATE TABLE t1(a INT UNIQUE);
+CREATE VIEW v1 AS SELECT t1.a FROM t1, t1 AS a;
+INSERT INTO t1 VALUES (1), (2);
+
+REPLACE INTO v1(a) SELECT 1 FROM t1,t1 AS c;
+SELECT * FROM v1;
+REPLACE INTO v1(a) SELECT 3 FROM t1,t1 AS c;
+SELECT * FROM v1;
+DELETE FROM t1 WHERE a=3;
+INSERT INTO v1(a) SELECT 1 FROM t1,t1 AS c
+ON DUPLICATE KEY UPDATE `v1`.`a`= 1;
+SELECT * FROM v1;
+
+CREATE VIEW v2 AS SELECT t1.a FROM t1, v1 AS a;
+
+REPLACE INTO v2(a) SELECT 1 FROM t1,t1 AS c;
+SELECT * FROM v2;
+REPLACE INTO v2(a) SELECT 3 FROM t1,t1 AS c;
+SELECT * FROM v2;
+INSERT INTO v2(a) SELECT 1 FROM t1,t1 AS c
+ON DUPLICATE KEY UPDATE `v2`.`a`= 1;
+SELECT * FROM v2;
+
+DROP VIEW v1;
+DROP VIEW v2;
+DROP TABLE t1;
+
+--echo # -- End of test case for Bug#45806
+
 --echo # -----------------------------------------------------------------
 --echo # -- End of 5.0 tests.
 --echo # -----------------------------------------------------------------
@@ -3836,6 +3891,17 @@ drop procedure p;
 
 ###########################################################################
 
+
+--echo #
+--echo # Bug #44860: ALTER TABLE on view crashes server
+--echo #
+CREATE TABLE t1 (a INT);
+CREATE VIEW v1 AS SELECT a FROM t1;
+ALTER TABLE v1;
+DROP VIEW v1;
+DROP TABLE t1;
+
+
 --echo # -----------------------------------------------------------------
 --echo # -- End of 5.1 tests.
 --echo # -----------------------------------------------------------------
diff --git a/mysql-test/t/xa.test b/mysql-test/t/xa.test
index 04ecf518577..7b1c6a268d5 100644
--- a/mysql-test/t/xa.test
+++ b/mysql-test/t/xa.test
@@ -124,6 +124,31 @@ drop table t1;
 
 --echo End of 5.0 tests
 
+#
+# Bug#44672: Assertion failed: thd->transaction.xid_state.xid.is_null()
+#
+
+xa start 'a';
+xa end 'a';
+xa rollback 'a';
+xa start 'a';
+xa end 'a';
+xa rollback 'a';
+
+#
+# Bug#45548: XA transaction without access to InnoDB tables crashes the server
+#
+
+xa start 'a';
+xa end 'a';
+xa prepare 'a';
+xa commit 'a';
+
+xa start 'a';
+xa end 'a';
+xa prepare 'a';
+xa commit 'a';
+
 # Wait till all disconnects are completed
 --source include/wait_until_count_sessions.inc
 
diff --git a/mysys/charset.c b/mysys/charset.c
index 7a7ef0ad3ea..b23ab084e90 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -384,7 +384,7 @@ char *get_charsets_dir(char *buf)
   DBUG_RETURN(res);
 }
 
-CHARSET_INFO *all_charsets[256];
+CHARSET_INFO *all_charsets[256]={NULL};
 CHARSET_INFO *default_charset_info = &my_charset_latin1;
 
 void add_compiled_collation(CHARSET_INFO *cs)
diff --git a/mysys/hash.c b/mysys/hash.c
index e7b5352af34..63933abb085 100644
--- a/mysys/hash.c
+++ b/mysys/hash.c
@@ -45,6 +45,32 @@ static uint calc_hash(const HASH *hash, const uchar *key, size_t length)
   return nr1;
 }
 
+/**
+  @brief Initialize the hash
+  
+  @details
+
+  Initialize the hash, by defining and giving valid values for
+  its elements. The failure to allocate memory for the
+  hash->array element will not result in a fatal failure. The
+  dynamic array that is part of the hash will allocate memory
+  as required during insertion.
+
+  @param[in,out] hash         The hash that is initialized
+  @param[in]     charset      The charater set information
+  @param[in]     size         The hash size
+  @param[in]     key_offest   The key offset for the hash
+  @param[in]     key_length   The length of the key used in
+                              the hash
+  @param[in]     get_key      get the key for the hash
+  @param[in]     free_element pointer to the function that
+                              does cleanup
+  @param[in]     CALLER_INFO_PROTO flag that define the behaviour 
+                                   of the hash
+  @return        inidicates success or failure of initialization
+    @retval 0 success
+    @retval 1 failure
+*/
 my_bool
 _my_hash_init(HASH *hash, uint growth_size, CHARSET_INFO *charset,
               ulong size, size_t key_offset, size_t key_length,
@@ -55,12 +81,6 @@ _my_hash_init(HASH *hash, uint growth_size, CHARSET_INFO *charset,
   DBUG_PRINT("enter",("hash: 0x%lx  size: %u", (long) hash, (uint) size));
 
   hash->records=0;
-  if (my_init_dynamic_array_ci(&hash->array, sizeof(HASH_LINK), size,
-                               growth_size))
-  {
-    hash->free=0;				/* Allow call to my_hash_free */
-    DBUG_RETURN(1);
-  }
   hash->key_offset=key_offset;
   hash->key_length=key_length;
   hash->blength=1;
@@ -68,7 +88,8 @@ _my_hash_init(HASH *hash, uint growth_size, CHARSET_INFO *charset,
   hash->free=free_element;
   hash->flags=flags;
   hash->charset=charset;
-  DBUG_RETURN(0);
+  DBUG_RETURN(my_init_dynamic_array_ci(&hash->array, 
+                                       sizeof(HASH_LINK), size, growth_size));
 }
 
 
@@ -114,6 +135,7 @@ void my_hash_free(HASH *hash)
   my_hash_free_elements(hash);
   hash->free= 0;
   delete_dynamic(&hash->array);
+  hash->blength= 0;
   DBUG_VOID_RETURN;
 }
 
diff --git a/mysys/mf_format.c b/mysys/mf_format.c
index f199132626b..6afa2938fa3 100644
--- a/mysys/mf_format.c
+++ b/mysys/mf_format.c
@@ -79,7 +79,7 @@ char * fn_format(char * to, const char *name, const char *dir,
     /* To long path, return original or NULL */
     size_t tmp_length;
     if (flag & MY_SAFE_PATH)
-      return NullS;
+      DBUG_RETURN(NullS);
     tmp_length= strlength(startpos);
     DBUG_PRINT("error",("dev: '%s'  ext: '%s'  length: %u",dev,ext,
                         (uint) length));
diff --git a/mysys/mf_getdate.c b/mysys/mf_getdate.c
index 3a8e1be6a0b..9475bebd107 100644
--- a/mysys/mf_getdate.c
+++ b/mysys/mf_getdate.c
@@ -45,15 +45,15 @@ void get_date(register char * to, int flag, time_t date)
    skr=date ? (time_t) date : my_time(0);
 #if defined(HAVE_LOCALTIME_R) && defined(_REENTRANT)
    if (flag & GETDATE_GMT)
-     localtime_r(&skr,&tm_tmp);
-   else
      gmtime_r(&skr,&tm_tmp);
+   else
+     localtime_r(&skr,&tm_tmp);
    start_time= &tm_tmp;
 #else
    if (flag & GETDATE_GMT)
-     start_time= localtime(&skr);
-   else
      start_time= gmtime(&skr);
+   else
+     start_time= localtime(&skr);
 #endif
    if (flag & GETDATE_SHORT_DATE)
      sprintf(to,"%02d%02d%02d",
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 4b74cdbf266..fd3c2501226 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -20,6 +20,7 @@
 #include <mysys_err.h>
 #include <my_getopt.h>
 #include <errno.h>
+#include <m_string.h>
 
 typedef void (*init_func_p)(const struct my_option *option, uchar* *variable,
                             longlong value);
@@ -410,7 +411,8 @@ invalid value '%s'",
 	  argument= optend;
 	}
 	else if (optp->arg_type == OPT_ARG &&
-		 (optp->var_type & GET_TYPE_MASK) == GET_BOOL)
+		 (((optp->var_type & GET_TYPE_MASK) == GET_BOOL) ||
+                   (optp->var_type & GET_TYPE_MASK) == GET_ENUM))
 	{
 	  if (optend == disabled_my_option)
 	    *((my_bool*) value)= (my_bool) 0;
@@ -648,8 +650,18 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument,
 	return EXIT_OUT_OF_MEMORY;
       break;
     case GET_ENUM:
-      if (((*(int*)result_pos)= find_type(argument, opts->typelib, 2) - 1) < 0)
-        return EXIT_ARGUMENT_INVALID;
+      if (((*(int*)result_pos)=
+             find_type(argument, opts->typelib, 2) - 1) < 0)
+      {
+        /*
+          Accept an integer representation of the enumerated item.
+        */
+        char *endptr;
+        unsigned int arg= (unsigned int) strtol(argument, &endptr, 10);
+        if (*endptr || arg >= opts->typelib->count)
+          return EXIT_ARGUMENT_INVALID;
+        *(int*)result_pos= arg;
+      }
       break;
     case GET_SET:
       *((ulonglong*)result_pos)= find_typeset(argument, opts->typelib, &err);
diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c
index 36d07b475e9..c484f1d4c54 100644
--- a/mysys/safemalloc.c
+++ b/mysys/safemalloc.c
@@ -174,7 +174,7 @@ void *_mymalloc(size_t size, const char *filename, uint lineno, myf MyFlags)
   data[size + 3]= MAGICEND3;
   irem->filename= (char *) filename;
   irem->linenum= lineno;
-  irem->datasize= (uint32) size;
+  irem->datasize= size;
   irem->prev=	  NULL;
 
   /* Add this remember structure to the linked list */
diff --git a/scripts/mysql_convert_table_format.sh b/scripts/mysql_convert_table_format.sh
index d15c7b28410..6f586d0e8e0 100644
--- a/scripts/mysql_convert_table_format.sh
+++ b/scripts/mysql_convert_table_format.sh
@@ -23,18 +23,30 @@ $opt_help=$opt_version=$opt_verbose=$opt_force=0;
 $opt_user=$opt_database=$opt_password=undef;
 $opt_host="localhost";
 $opt_socket="";
-$opt_type="MYISAM";
+$opt_engine="MYISAM";
 $opt_port=0;
 $exit_status=0;
 
-GetOptions("force","help","host=s","password=s","user=s","type=s","verbose","version","socket=s", "port=i") || 
-  usage(0);
+GetOptions(
+  "e|engine|type=s"       => \$opt_type,
+  "f|force"               => \$opt_force,
+  "help|?"               => \$opt_help,
+  "h|host=s"              => \$opt_host,
+  "p|password=s"          => \$opt_password,
+  "u|user=s"              => \$opt_user,
+  "v|verbose"             => \$opt_verbose,
+  "V|version"             => \$opt_version,
+  "S|socket=s"            => \$opt_socket, 
+  "P|port=i"              => \$opt_port
+) || usage(0);
+
 usage($opt_version) if ($#ARGV < 0 || $opt_help || $opt_version);
+
 $opt_database=shift(@ARGV);
 
-if (uc($opt_type) eq "HEAP")
+if (grep { /^$opt_engine$/i } qw(HEAP MEMORY BLACKHOLE))
 {
-  print "Converting to type HEAP would delete your tables; aborting\n";
+  print "Converting to '$opt_engine' would delete your data; aborting\n";
   exit(1);
 }
 
@@ -54,21 +66,29 @@ $dbh = DBI->connect("DBI:mysql:$opt_database:${opt_host}$connect_opt",
 		    { PrintError => 0})
   || die "Can't connect to database $opt_database: $DBI::errstr\n";
 
-if ($#ARGV < 0)
+my @tables;
+
+push(@ARGV, "%") if(!@ARGV);
+
+foreach $pattern (@ARGV)
 {
-  # Fetch all table names from the database
   my ($sth,$row);
-  $sth=$dbh->prepare("show tables");
-  $sth->execute || die "Can't get tables from $opt_database; $DBI::errstr\n";
+  $sth=$dbh->prepare("SHOW TABLES LIKE ?");
+  $rv= $sth->execute($pattern);
+  if(!int($rv))
+  {
+    warn "Can't get tables matching '$pattern' from $opt_database; $DBI::errstr\n"; 
+    exit(1) unless $opt_force;
+  }
   while (($row = $sth->fetchrow_arrayref))
   {
-    push(@ARGV,$row->[0]);
+    push(@tables, $row->[0]);
   }
   $sth->finish;
 }
 
 print "Converting tables:\n" if ($opt_verbose);
-foreach $table (@ARGV)
+foreach $table (@tables)
 {
   my ($sth,$row);
 
@@ -76,14 +96,15 @@ foreach $table (@ARGV)
   $sth=$dbh->prepare("show table status like '$table'");  
   if ($sth->execute && ($row = $sth->fetchrow_arrayref))
   {
-    if (uc($row->[1]) eq uc($opt_type))
+    if (uc($row->[1]) eq uc($opt_engine))
     {
-      print "$table is already of type $opt_type;  Ignored\n";
+      print "$table already uses the '$opt_engine' engine;  Ignored\n";
       next;
     }
   }
   print "converting $table\n" if ($opt_verbose);
-  if (!$dbh->do("ALTER TABLE $table ENGINE=$opt_type"))
+  $table=~ s/`/``/g;
+  if (!$dbh->do("ALTER TABLE `$table` ENGINE=$opt_engine"))
   {
     print STDERR "Can't convert $table: Error $DBI::errstr\n";
     exit(1) if (!$opt_force);
@@ -103,43 +124,43 @@ sub usage
 
   print <<EOF;
 
-Conversion of a MySQL tables to other table types.
+Conversion of a MySQL tables to other storage engines
 
- Usage: $0 database [tables]
+ Usage: $0 database [table[ table ...]]
  If no tables has been specifed, all tables in the database will be converted.
+ You can also use wildcards, ie "my%"
 
  The following options are available:
 
---force
+-f, --force
   Continue even if there is some error.
 
---help or --Information
+-?, --help
   Shows this help
 
---host='host name' (Default $opt_host)
-  Host name where the database server is located.
+-e, --engine=ENGINE
+  Converts tables to the given storage engine (Default: $opt_engine)
 
---password='password'
+-h, --host=HOST
+  Host name where the database server is located. (Default: $opt_host)
+
+-p, --password=PASSWORD
   Password for the current user.
 
---port=port
+-P, --port=PORT
   TCP/IP port to connect to if host is not "localhost".
 
---socket='/path/to/socket'
+-S, --socket=SOCKET
   Socket to connect with.
 
---ENGINE='table-type'
-  Converts tables to the given table type (Default: $opt_type)
-  MySQL 3.23 supports at least the BDB, ISAM and MYISAM types.
-
---user='user_name'
+-u, --user=USER
   User name to log into the SQL server.
 
---verbose
+-v, --verbose
   This is a test specific option that is only used when debugging a test.
   Print more information about what is going on.
 
---version
+-V, --version
   Shows the version of this program.
 EOF
   exit(1);
diff --git a/scripts/mysql_find_rows.sh b/scripts/mysql_find_rows.sh
index 77eacc8a9b4..967a8196ebd 100644
--- a/scripts/mysql_find_rows.sh
+++ b/scripts/mysql_find_rows.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000, 2004 MySQL AB
 # 
 # This program is free software; you can redistribute it and/or modify
diff --git a/scripts/mysql_fix_extensions.sh b/scripts/mysql_fix_extensions.sh
index fbc72406f5e..6d4e017f678 100644
--- a/scripts/mysql_fix_extensions.sh
+++ b/scripts/mysql_fix_extensions.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # This is a utility for MySQL. It is not needed by any standard part
 # of MySQL.
 
diff --git a/scripts/mysql_setpermission.sh b/scripts/mysql_setpermission.sh
index b1ea26a9b7d..5fa6b969e39 100644
--- a/scripts/mysql_setpermission.sh
+++ b/scripts/mysql_setpermission.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ## Emacs, this is -*- perl -*- mode? :-)
 ##
 ##        Permission setter for MySQL
diff --git a/scripts/mysql_zap.sh b/scripts/mysql_zap.sh
index 6c05afb772c..f78212e2578 100644
--- a/scripts/mysql_zap.sh
+++ b/scripts/mysql_zap.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2002, 2004 MySQL AB
 # 
 # This program is free software; you can redistribute it and/or modify
@@ -27,8 +27,8 @@ $opt_f= 0;
 $opt_t= 0;
 $opt_a = "";
 
-$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4" || $^O eq 'darwin';
-$LINUX = $^O eq 'linux';
+$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4";
+$LINUX = $^O eq 'linux' || $^O eq 'darwin';
 $pscmd = $BSD ? "/bin/ps -auxww" : $LINUX ? "/bin/ps axuw" : "/bin/ps -ef";
 
 open(TTYIN, "</dev/tty") || die "can't read /dev/tty: $!";
diff --git a/scripts/mysqlaccess.sh b/scripts/mysqlaccess.sh
index bcaf9f8af8e..0153a3afa7c 100644
--- a/scripts/mysqlaccess.sh
+++ b/scripts/mysqlaccess.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # ****************************
 package MySQLaccess;
 #use strict;
diff --git a/scripts/mysqld_multi.sh b/scripts/mysqld_multi.sh
index 3cb4665eb1c..430c74874eb 100644
--- a/scripts/mysqld_multi.sh
+++ b/scripts/mysqld_multi.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 
 use Getopt::Long;
-use POSIX qw(strftime);
+use POSIX qw(strftime getcwd);
 
 $|=1;
 $VER="2.16";
@@ -295,6 +295,7 @@ sub start_mysqlds()
   {
     @options = defaults_for_group($groups[$i]);
 
+    $basedir_found= 0; # The default
     $mysqld_found= 1; # The default
     $mysqld_found= 0 if (!length($mysqld));
     $com= "$mysqld";
@@ -310,17 +311,25 @@ sub start_mysqlds()
 	$com= $options[$j];
         $mysqld_found= 1;
       }
+      elsif ("--basedir=" eq substr($options[$j], 0, 10))
+      {
+        $basedir= $options[$j];
+        $basedir =~ s/^--basedir=//;
+        $basedir_found= 1;
+        $options[$j]= quote_shell_word($options[$j]);
+        $tmp.= " $options[$j]";
+      }
       else
       {
 	$options[$j]= quote_shell_word($options[$j]);
 	$tmp.= " $options[$j]";
       }
     }
-    if ($opt_verbose && $com =~ m/\/safe_mysqld$/ && !$info_sent)
+    if ($opt_verbose && $com =~ m/\/(safe_mysqld|mysqld_safe)$/ && !$info_sent)
     {
-      print "WARNING: safe_mysqld is being used to start mysqld. In this case you ";
+      print "WARNING: $1 is being used to start mysqld. In this case you ";
       print "may need to pass\n\"ledir=...\" under groups [mysqldN] to ";
-      print "safe_mysqld in order to find the actual mysqld binary.\n";
+      print "$1 in order to find the actual mysqld binary.\n";
       print "ledir (library executable directory) should be the path to the ";
       print "wanted mysqld binary.\n\n";
       $info_sent= 1;
@@ -337,7 +346,16 @@ sub start_mysqlds()
       print "group [$groups[$i]] separately.\n";
       exit(1);
     }
+    if ($basedir_found)
+    {
+      $curdir=getcwd();
+      chdir($basedir) or die "Can't change to datadir $basedir";
+    }
     system($com);
+    if ($basedir_found)
+    {
+      chdir($curdir) or die "Can't change back to original dir $curdir";
+    }
   }
   if (!$i && !$opt_no_log)
   {
@@ -670,9 +688,9 @@ language   = @datadir@/mysql/english
 user       = unix_user1
 
 [mysqld3]
-mysqld     = /path/to/safe_mysqld/safe_mysqld
+mysqld     = /path/to/mysqld_safe
 ledir      = /path/to/mysqld-binary/
-mysqladmin = /path/to/mysqladmin/mysqladmin
+mysqladmin = /path/to/mysqladmin
 socket     = /tmp/mysql.sock3
 port       = 3308
 pid-file   = @localstatedir@3/hostname.pid3
diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh
index 960c3e39bab..23b5efcaf2b 100644
--- a/scripts/mysqld_safe.sh
+++ b/scripts/mysqld_safe.sh
@@ -67,7 +67,7 @@ my_which ()
   ret=0
   for file
   do
-    for dir in "$PATH"
+    for dir in $PATH
     do
       if [ -f "$dir/$file" ]
       then
@@ -391,8 +391,8 @@ then
   fi
   # Change the err log to the right user, if it is in use
   if [ $want_syslog -eq 0 ]; then
-    touch $err_log
-    chown $user $err_log
+    touch "$err_log"
+    chown $user "$err_log"
   fi
   if test -n "$open_files"
   then
@@ -509,9 +509,9 @@ fi
 #
 # If there exists an old pid file, check if the daemon is already running
 # Note: The switches to 'ps' may depend on your operating system
-if test -f $pid_file
+if test -f "$pid_file"
 then
-  PID=`cat $pid_file`
+  PID=`cat "$pid_file"`
   if @CHECK_PID@
   then
     if @FIND_PROC@
@@ -520,8 +520,8 @@ then
       exit 1
     fi
   fi
-  rm -f $pid_file
-  if test -f $pid_file
+  rm -f "$pid_file"
+  if test -f "$pid_file"
   then
     log_error "Fatal error: Can't remove the pid file:
 $pid_file
@@ -563,11 +563,11 @@ test -n "$NOHUP_NICENESS" && cmd="$cmd < /dev/null"
 log_notice "Starting $MYSQLD daemon with databases from $DATADIR"
 while true
 do
-  rm -f $safe_mysql_unix_port $pid_file	# Some extra safety
+  rm -f $safe_mysql_unix_port "$pid_file"	# Some extra safety
 
   eval_log_error "$cmd"
 
-  if test ! -f $pid_file		# This is removed if normal shutdown
+  if test ! -f "$pid_file"		# This is removed if normal shutdown
   then
     break
   fi
diff --git a/scripts/mysqldumpslow.sh b/scripts/mysqldumpslow.sh
index ce2670b2abd..8580b8e6203 100644
--- a/scripts/mysqldumpslow.sh
+++ b/scripts/mysqldumpslow.sh
@@ -20,7 +20,7 @@ GetOptions(\%opt,
     'v|verbose+',# verbose
     'help+',	# write usage info
     'd|debug+',	# debug
-    's=s',	# what to sort by (t, at, l, al, r, ar etc)
+    's=s',	# what to sort by (al, at, ar, c, t, l, r)
     'r!',	# reverse the sort order (largest last instead of first)
     't=i',	# just show the top n queries
     'a!',	# don't abstract all numbers to N and strings to 'S'
@@ -163,7 +163,14 @@ Parse and summarize the MySQL slow query log. Options are
 
   -v           verbose
   -d           debug
-  -s ORDER     what to sort by (t, at, l, al, r, ar etc), 'at' is default
+  -s ORDER     what to sort by (al, at, ar, c, l, r, t), 'at' is default
+                al: average lock time
+                ar: average rows sent
+                at: average query time
+                 c: count
+                 l: lock time
+                 r: rows sent
+                 t: query time  
   -r           reverse the sort order (largest last instead of first)
   -t NUM       just show the top n queries
   -a           don't abstract all numbers to N and strings to 'S'
diff --git a/sql-bench/as3ap.sh b/sql-bench/as3ap.sh
index d84219a37eb..1c672377fd3 100644
--- a/sql-bench/as3ap.sh
+++ b/sql-bench/as3ap.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/bench-count-distinct.sh b/sql-bench/bench-count-distinct.sh
index 31558aa0b2e..5cc9fb555af 100644
--- a/sql-bench/bench-count-distinct.sh
+++ b/sql-bench/bench-count-distinct.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/bench-init.pl.sh b/sql-bench/bench-init.pl.sh
index 588e518a648..919ddcedf16 100644
--- a/sql-bench/bench-init.pl.sh
+++ b/sql-bench/bench-init.pl.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2003, 2005 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/compare-results.sh b/sql-bench/compare-results.sh
index 145c4894ca2..fec65497c57 100644
--- a/sql-bench/compare-results.sh
+++ b/sql-bench/compare-results.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/copy-db.sh b/sql-bench/copy-db.sh
index f74fa68a081..e0c290d2453 100644
--- a/sql-bench/copy-db.sh
+++ b/sql-bench/copy-db.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/crash-me.sh b/sql-bench/crash-me.sh
index b28bdba7f9f..cc8659513c2 100644
--- a/sql-bench/crash-me.sh
+++ b/sql-bench/crash-me.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # -*- perl -*-
 # Copyright (C) 2000-2006 MySQL AB
 #
diff --git a/sql-bench/innotest1.sh b/sql-bench/innotest1.sh
index 8675de19ae4..1c5450a1d9e 100644
--- a/sql-bench/innotest1.sh
+++ b/sql-bench/innotest1.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/InnoDB combined database
 #     (c) 2002 Innobase Oy & MySQL AB
diff --git a/sql-bench/innotest1a.sh b/sql-bench/innotest1a.sh
index 93f8a2a443b..876100e5de4 100644
--- a/sql-bench/innotest1a.sh
+++ b/sql-bench/innotest1a.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/InnoDB combined database
 #     (c) 2002 Innobase Oy & MySQL AB
diff --git a/sql-bench/innotest1b.sh b/sql-bench/innotest1b.sh
index 48fe96ebe7d..3f6c9f5bd5f 100644
--- a/sql-bench/innotest1b.sh
+++ b/sql-bench/innotest1b.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/InnoDB combined database
 #     (c) 2002 Innobase Oy & MySQL AB
diff --git a/sql-bench/innotest2.sh b/sql-bench/innotest2.sh
index aea44003903..cfeb0527970 100644
--- a/sql-bench/innotest2.sh
+++ b/sql-bench/innotest2.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/InnoDB combined database
 #     (c) 2002 Innobase Oy & MySQL AB
diff --git a/sql-bench/innotest2a.sh b/sql-bench/innotest2a.sh
index 3d4bb9933da..f77ed3ddadd 100644
--- a/sql-bench/innotest2a.sh
+++ b/sql-bench/innotest2a.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/Innobase combined database
 #     (c) 2000 Innobase Oy & MySQL AB
diff --git a/sql-bench/innotest2b.sh b/sql-bench/innotest2b.sh
index 272b6dcffd0..72a71d06c73 100644
--- a/sql-bench/innotest2b.sh
+++ b/sql-bench/innotest2b.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 ############################################################################
 #     Stress test for MySQL/Innobase combined database
 #     (c) 2000 Innobase Oy & MySQL AB
diff --git a/sql-bench/run-all-tests.sh b/sql-bench/run-all-tests.sh
index 50ac8d0cbe3..a4b03428d94 100644
--- a/sql-bench/run-all-tests.sh
+++ b/sql-bench/run-all-tests.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/server-cfg.sh b/sql-bench/server-cfg.sh
index a7492f67d1a..5ed7fdf482c 100644
--- a/sql-bench/server-cfg.sh
+++ b/sql-bench/server-cfg.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # -*- perl -*-
 # Copyright (C) 2000-2006 MySQL AB
 #
diff --git a/sql-bench/test-ATIS.sh b/sql-bench/test-ATIS.sh
index 6d102fd3977..79b38a95506 100644
--- a/sql-bench/test-ATIS.sh
+++ b/sql-bench/test-ATIS.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-alter-table.sh b/sql-bench/test-alter-table.sh
index eb06582dc0b..36db26f4bf3 100644
--- a/sql-bench/test-alter-table.sh
+++ b/sql-bench/test-alter-table.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-big-tables.sh b/sql-bench/test-big-tables.sh
index 0226967bc54..33694a42e17 100644
--- a/sql-bench/test-big-tables.sh
+++ b/sql-bench/test-big-tables.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-connect.sh b/sql-bench/test-connect.sh
index 84175c357aa..d0f3f0791a4 100644
--- a/sql-bench/test-connect.sh
+++ b/sql-bench/test-connect.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-create.sh b/sql-bench/test-create.sh
index 63672519e61..40fd9c49ae8 100644
--- a/sql-bench/test-create.sh
+++ b/sql-bench/test-create.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-insert.sh b/sql-bench/test-insert.sh
index badc52c99d6..387cb48e494 100644
--- a/sql-bench/test-insert.sh
+++ b/sql-bench/test-insert.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-select.sh b/sql-bench/test-select.sh
index 809755ab4d7..41e8205196e 100644
--- a/sql-bench/test-select.sh
+++ b/sql-bench/test-select.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-transactions.sh b/sql-bench/test-transactions.sh
index 5723c856564..edbfef0e3ce 100644
--- a/sql-bench/test-transactions.sh
+++ b/sql-bench/test-transactions.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-bench/test-wisconsin.sh b/sql-bench/test-wisconsin.sh
index 38ab93e7f4a..1974461a7c4 100644
--- a/sql-bench/test-wisconsin.sh
+++ b/sql-bench/test-wisconsin.sh
@@ -1,4 +1,4 @@
-#!@PERL@
+#!/usr/bin/perl
 # Copyright (C) 2000-2001, 2003 MySQL AB
 #
 # This library is free software; you can redistribute it and/or
diff --git a/sql-common/client.c b/sql-common/client.c
index d2c7e02551d..2cbc15ad746 100644
--- a/sql-common/client.c
+++ b/sql-common/client.c
@@ -120,6 +120,7 @@ const char 	*def_shared_memory_base_name= default_shared_memory_base_name;
 
 static void mysql_close_free_options(MYSQL *mysql);
 static void mysql_close_free(MYSQL *mysql);
+static void mysql_prune_stmt_list(MYSQL *mysql);
 
 #if !(defined(__WIN__) || defined(__NETWARE__))
 static int wait_for_data(my_socket fd, uint timeout);
@@ -476,6 +477,9 @@ HANDLE create_shared_memory(MYSQL *mysql,NET *net, uint connect_timeout)
   DWORD error_code = 0;
   DWORD event_access_rights= SYNCHRONIZE | EVENT_MODIFY_STATE;
   char *shared_memory_base_name = mysql->options.shared_memory_base_name;
+  static const char *name_prefixes[] = {"","Global\\"};
+  const char *prefix;
+  int i;
 
   /*
      get enough space base-name + '_' + longest suffix we might ever send
@@ -490,9 +494,18 @@ HANDLE create_shared_memory(MYSQL *mysql,NET *net, uint connect_timeout)
     shared_memory_base_name is unique value for each server
     unique_part is uniquel value for each object (events and file-mapping)
   */
-  suffix_pos = strxmov(tmp, "Global\\", shared_memory_base_name, "_", NullS);
-  strmov(suffix_pos, "CONNECT_REQUEST");
-  if (!(event_connect_request= OpenEvent(event_access_rights, FALSE, tmp)))
+  for (i = 0; i< array_elements(name_prefixes); i++)
+  {
+    prefix= name_prefixes[i];
+    suffix_pos = strxmov(tmp, prefix , shared_memory_base_name, "_", NullS);
+    strmov(suffix_pos, "CONNECT_REQUEST");
+    event_connect_request= OpenEvent(event_access_rights, FALSE, tmp);
+    if (event_connect_request)
+    {
+      break;
+    }
+  }
+  if (!event_connect_request)
   {
     error_allow = CR_SHARED_MEMORY_CONNECT_REQUEST_ERROR;
     goto err;
@@ -544,7 +557,7 @@ HANDLE create_shared_memory(MYSQL *mysql,NET *net, uint connect_timeout)
     unique_part is uniquel value for each object (events and file-mapping)
     number_of_connection is number of connection between server and client
   */
-  suffix_pos = strxmov(tmp, "Global\\", shared_memory_base_name, "_", connect_number_char,
+  suffix_pos = strxmov(tmp, prefix , shared_memory_base_name, "_", connect_number_char,
 		       "_", NullS);
   strmov(suffix_pos, "DATA");
   if ((handle_file_map = OpenFileMapping(FILE_MAP_WRITE,FALSE,tmp)) == NULL)
@@ -924,6 +937,7 @@ void end_server(MYSQL *mysql)
     vio_delete(mysql->net.vio);
     reset_sigpipe(mysql);
     mysql->net.vio= 0;          /* Marker */
+    mysql_prune_stmt_list(mysql);
   }
   net_end(&mysql->net);
   free_old_query(mysql);
@@ -2526,30 +2540,9 @@ my_bool mysql_reconnect(MYSQL *mysql)
   tmp_mysql.reconnect= 1;
   tmp_mysql.free_me= mysql->free_me;
 
-  /*
-    For each stmt in mysql->stmts, move it to tmp_mysql if it is
-    in state MYSQL_STMT_INIT_DONE, otherwise close it.
-  */
-  {
-    LIST *element= mysql->stmts;
-    for (; element; element= element->next)
-    {
-      MYSQL_STMT *stmt= (MYSQL_STMT *) element->data;
-      if (stmt->state != MYSQL_STMT_INIT_DONE)
-      {
-        stmt->mysql= 0;
-        stmt->last_errno= CR_SERVER_LOST;
-        strmov(stmt->last_error, ER(CR_SERVER_LOST));
-        strmov(stmt->sqlstate, unknown_sqlstate);
-      }
-      else
-      {
-        tmp_mysql.stmts= list_add(tmp_mysql.stmts, &stmt->list);
-      }
-      /* No need to call list_delete for statement here */
-    }
-    mysql->stmts= NULL;
-  }
+  /* Move prepared statements (if any) over to the new mysql object */
+  tmp_mysql.stmts= mysql->stmts;
+  mysql->stmts= 0;
 
   /* Don't free options as these are now used in tmp_mysql */
   bzero((char*) &mysql->options,sizeof(mysql->options));
@@ -2639,6 +2632,46 @@ static void mysql_close_free(MYSQL *mysql)
 }
 
 
+/**
+  For use when the connection to the server has been lost (in which case 
+  the server has discarded all information about prepared statements
+  associated with the connection).
+
+  Mark all statements in mysql->stmts by setting stmt->mysql= 0 if the
+  statement has transitioned beyond the MYSQL_STMT_INIT_DONE state, and
+  unlink the statement from the mysql->stmts list.
+
+  The remaining pruned list of statements (if any) is kept in mysql->stmts.
+
+  @param mysql       pointer to the MYSQL object
+
+  @return none
+*/
+static void mysql_prune_stmt_list(MYSQL *mysql)
+{
+  LIST *element= mysql->stmts;
+  LIST *pruned_list= 0;
+
+  for (; element; element= element->next)
+  {
+    MYSQL_STMT *stmt= (MYSQL_STMT *) element->data;
+    if (stmt->state != MYSQL_STMT_INIT_DONE)
+    {
+      stmt->mysql= 0;
+      stmt->last_errno= CR_SERVER_LOST;
+      strmov(stmt->last_error, ER(CR_SERVER_LOST));
+      strmov(stmt->sqlstate, unknown_sqlstate);
+    }
+    else
+    {
+      pruned_list= list_add(pruned_list, element);
+    }
+  }
+
+  mysql->stmts= pruned_list;
+}
+
+
 /*
   Clear connection pointer of every statement: this is necessary
   to give error on attempt to use a prepared statement of closed
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index cfd049e1864..6f162f4d84d 100755
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -39,7 +39,8 @@ SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/sql/sql_yacc.h
 
 ADD_DEFINITIONS(-DMYSQL_SERVER -D_CONSOLE -DHAVE_DLOPEN -DHAVE_EVENT_SCHEDULER)
 
-ADD_EXECUTABLE(mysqld
+
+SET (SQL_SOURCE
                ../sql-common/client.c derror.cc des_key_file.cc
                discover.cc ../libmysql/errmsg.c field.cc  field_conv.cc 
                filesort.cc gstream.cc
@@ -82,54 +83,45 @@ ADD_EXECUTABLE(mysqld
                ${PROJECT_SOURCE_DIR}/include/mysql_version.h 
                ${PROJECT_SOURCE_DIR}/sql/sql_builtin.cc
                ${PROJECT_SOURCE_DIR}/sql/lex_hash.h)
+ADD_LIBRARY(sql ${SQL_SOURCE})
 
-TARGET_LINK_LIBRARIES(mysqld
-                      heap myisam myisammrg mysys yassl zlib debug dbug yassl 
-                      taocrypt strings vio regex wsock32 ws2_32)
+IF (NOT EXISTS cmake_dummy.cc)
+  FILE (WRITE cmake_dummy.cc "")
+ENDIF (NOT EXISTS cmake_dummy.cc)
+ADD_EXECUTABLE(mysqld cmake_dummy.cc)
 
 SET_TARGET_PROPERTIES(mysqld PROPERTIES OUTPUT_NAME mysqld${MYSQLD_EXE_SUFFIX})
+SET_TARGET_PROPERTIES(mysqld PROPERTIES ENABLE_EXPORTS TRUE)
 
-IF(cmake_version EQUAL 20406)
-# Work around for 2.4.6 bug, OUTPUT_NAME will not set the right .PDB
-# file name. Note that COMPILE_FLAGS set some temporary pdb during build,
-# LINK_FLAGS sets the real one.
-SET_TARGET_PROPERTIES(mysqld PROPERTIES
-                      COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/mysqld${MYSQLD_EXE_SUFFIX}.pdb"
-                      LINK_FLAGS  "/PDB:${CMAKE_CFG_INTDIR}/mysqld${MYSQLD_EXE_SUFFIX}.pdb")
-ENDIF(cmake_version EQUAL 20406)
+SET (MYSQLD_CORE_LIBS mysys zlib dbug strings yassl taocrypt vio regex sql)
+TARGET_LINK_LIBRARIES(mysqld ${MYSQLD_CORE_LIBS} ${MYSQLD_STATIC_ENGINE_LIBS})
+TARGET_LINK_LIBRARIES(mysqld ws2_32.lib)
 
-IF(EMBED_MANIFESTS)
-  MYSQL_EMBED_MANIFEST("mysqld" "asInvoker")
-ENDIF(EMBED_MANIFESTS)
-IF(WITH_ARCHIVE_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld archive)
-ENDIF(WITH_ARCHIVE_STORAGE_ENGINE)
-IF(WITH_BLACKHOLE_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld blackhole)
-ENDIF(WITH_BLACKHOLE_STORAGE_ENGINE)
-IF(WITH_CSV_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld csv)
-ENDIF(WITH_CSV_STORAGE_ENGINE)
-IF(WITH_EXAMPLE_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld example)
-ENDIF(WITH_EXAMPLE_STORAGE_ENGINE)
-IF(WITH_FEDERATED_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld federated)
-ENDIF(WITH_FEDERATED_STORAGE_ENGINE)
-IF(WITH_INNOBASE_STORAGE_ENGINE)
-  TARGET_LINK_LIBRARIES(mysqld innobase)
-ENDIF(WITH_INNOBASE_STORAGE_ENGINE)
 
-ADD_DEPENDENCIES(mysqld GenError)
+IF(MSVC AND NOT WITHOUT_DYNAMIC_PLUGINS)
+  # Set module definition file. Also use non-incremental linker, 
+  # incremental appears to crash from time to time,if used with /DEF option
+  SET_TARGET_PROPERTIES(mysqld PROPERTIES LINK_FLAGS "/DEF:mysqld.def /INCREMENTAL:NO")
 
-# NOTE CMake 2.4.6 creates strange dependencies between files in OUTPUT,
-# so for now we only list one if more than one
+  FOREACH (CORELIB ${MYSQLD_CORE_LIBS})
+    GET_TARGET_PROPERTY(LOC ${CORELIB} LOCATION)
+    FILE(TO_NATIVE_PATH ${LOC} LOC)
+    SET (LIB_LOCATIONS ${LIB_LOCATIONS} ${LOC}) 
+  ENDFOREACH (CORELIB ${MYSQLD_CORE_LIBS})
+ 
+  ADD_CUSTOM_COMMAND(TARGET mysqld PRE_LINK
+    COMMAND cscript ARGS //nologo ${PROJECT_SOURCE_DIR}/win/create_def_file.js
+                  ${PLATFORM}  ${LIB_LOCATIONS} > mysqld.def 
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/sql)
+ENDIF(MSVC AND NOT WITHOUT_DYNAMIC_PLUGINS)
+
+ADD_DEPENDENCIES(sql GenError)
 
 # Sql Parser custom command
 ADD_CUSTOM_COMMAND(
         OUTPUT ${PROJECT_SOURCE_DIR}/sql/sql_yacc.h
-#              ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
-        COMMAND bison.exe ARGS -y -p MYSQL --defines=sql_yacc.h
+               ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
+        COMMAND bison ARGS -y -p MYSQL --defines=sql_yacc.h
                                --output=sql_yacc.cc sql_yacc.yy
         DEPENDS ${PROJECT_SOURCE_DIR}/sql/sql_yacc.yy)
 
@@ -146,16 +138,16 @@ ADD_CUSTOM_COMMAND(
 ADD_CUSTOM_TARGET(
         GenServerSource ALL
         DEPENDS ${PROJECT_SOURCE_DIR}/sql/sql_yacc.h
-#               ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
+                ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
                 ${PROJECT_SOURCE_DIR}/sql/message.h
-#               ${PROJECT_SOURCE_DIR}/sql/message.rc
+                ${PROJECT_SOURCE_DIR}/sql/message.rc
                 ${PROJECT_SOURCE_DIR}/sql/lex_hash.h)
 
 ADD_DEPENDENCIES(mysqld GenServerSource)
 
 # Remove the auto-generated files as part of 'Clean Solution'
 SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES 
-                         "lex_hash.h;sql_yacc.h;sql_yacc.cc")
+  "lex_hash.h;sql_yacc.h;sql_yacc.cc;mysqld.def")
 
 ADD_LIBRARY(udf_example MODULE udf_example.c udf_example.def)
 ADD_DEPENDENCIES(udf_example strings GenError)
diff --git a/sql/field.cc b/sql/field.cc
index d11b509075b..ed085de1db3 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1165,7 +1165,7 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len,
   if (unsigned_flag)
   {
 
-    if (((ulonglong) *rnd > unsigned_max) && (*rnd= (longlong) unsigned_max) ||
+    if ((((ulonglong) *rnd > unsigned_max) && (*rnd= (longlong) unsigned_max)) ||
         error == MY_ERRNO_ERANGE)
     {
       goto out_of_range;
@@ -1350,7 +1350,7 @@ void Field::copy_from_tmp(int row_offset)
   if (null_ptr)
   {
     *null_ptr= (uchar) ((null_ptr[0] & (uchar) ~(uint) null_bit) |
-			null_ptr[row_offset] & (uchar) null_bit);
+			(null_ptr[row_offset] & (uchar) null_bit));
   }
 }
 
@@ -4081,8 +4081,8 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
   int error;
   char *end;
   double nr= my_strntod(cs,(char*) from,len,&end,&error);
-  if (error || (!len || (uint) (end-from) != len &&
-                table->in_use->count_cuted_fields))
+  if (error || (!len || ((uint) (end-from) != len &&
+                table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -4343,8 +4343,8 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
   int error;
   char *end;
   double nr= my_strntod(cs,(char*) from, len, &end, &error);
-  if (error || (!len || (uint) (end-from) != len &&
-                table->in_use->count_cuted_fields))
+  if (error || (!len || ((uint) (end-from) != len &&
+                table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -5196,7 +5196,7 @@ int Field_time::store(longlong nr, bool unsigned_val)
                          MYSQL_TIMESTAMP_TIME, 1);
     error= 1;
   }
-  else if (nr > (longlong) TIME_MAX_VALUE || nr < 0 && unsigned_val)
+  else if (nr > (longlong) TIME_MAX_VALUE || (nr < 0 && unsigned_val))
   {
     tmp= TIME_MAX_VALUE;
     set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
@@ -5307,7 +5307,7 @@ bool Field_time::get_time(MYSQL_TIME *ltime)
     ltime->neg= 1;
     tmp=-tmp;
   }
-  ltime->day= 0;
+  ltime->year= ltime->month= ltime->day= 0;
   ltime->hour=   (int) (tmp/10000);
   tmp-=ltime->hour*10000;
   ltime->minute= (int) tmp/100;
@@ -5361,7 +5361,7 @@ int Field_year::store(const char *from, uint len,CHARSET_INFO *cs)
   int error;
   longlong nr= cs->cset->strntoull10rnd(cs, from, len, 0, &end, &error);
 
-  if (nr < 0 || nr >= 100 && nr <= 1900 || nr > 2155 || 
+  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 ||
       error == MY_ERRNO_ERANGE)
   {
     *ptr=0;
@@ -5405,7 +5405,7 @@ int Field_year::store(double nr)
 int Field_year::store(longlong nr, bool unsigned_val)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE;
-  if (nr < 0 || nr >= 100 && nr <= 1900 || nr > 2155)
+  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155)
   {
     *ptr= 0;
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
@@ -6429,16 +6429,16 @@ int Field_str::store(double nr)
   /* Calculate the exponent from the 'e'-format conversion */
   if (anr < 1.0 && anr > 0)
   {
-    for (exp= 0; anr < 1e-100; exp-= 100, anr*= 1e100);
-    for (; anr < 1e-10; exp-= 10, anr*= 1e10);
-    for (i= 1; anr < 1 / log_10[i]; exp--, i++);
+    for (exp= 0; anr < 1e-100; exp-= 100, anr*= 1e100) ;
+    for (; anr < 1e-10; exp-= 10, anr*= 1e10) ;
+    for (i= 1; anr < 1 / log_10[i]; exp--, i++) ;
     exp--;
   }
   else
   {
-    for (exp= 0; anr > 1e100; exp+= 100, anr/= 1e100);
-    for (; anr > 1e10; exp+= 10, anr/= 1e10);
-    for (i= 1; anr > log_10[i]; exp++, i++);
+    for (exp= 0; anr > 1e100; exp+= 100, anr/= 1e100) ;
+    for (; anr > 1e10; exp+= 10, anr/= 1e10) ;
+    for (i= 1; anr > log_10[i]; exp++, i++) ;
   }
 
   max_length= local_char_length - neg;
@@ -8833,7 +8833,7 @@ bool Field_num::eq_def(Field *field)
   Field_num *from_num= (Field_num*) field;
 
   if (unsigned_flag != from_num->unsigned_flag ||
-      zerofill && !from_num->zerofill && !zero_pack() ||
+      (zerofill && !from_num->zerofill && !zero_pack()) ||
       dec != from_num->dec)
     return 0;
   return 1;
@@ -8974,7 +8974,7 @@ int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs)
   ASSERT_COLUMN_MARKED_FOR_WRITE;
   int delta;
 
-  for (; length && !*from; from++, length--);          // skip left 0's
+  for (; length && !*from; from++, length--) ;         // skip left 0's
   delta= bytes_in_rec - length;
 
   if (delta < -1 ||
@@ -9306,7 +9306,7 @@ Field_bit::unpack(uchar *to, const uchar *from, uint param_data,
     and slave have the same sizes, then use the old unpack() method.
   */
   if (param_data == 0 ||
-      (from_bit_len == bit_len) && (from_len == bytes_in_rec))
+      ((from_bit_len == bit_len) && (from_len == bytes_in_rec)))
   {
     if (bit_len > 0)
     {
@@ -9385,7 +9385,7 @@ int Field_bit_as_char::store(const char *from, uint length, CHARSET_INFO *cs)
   int delta;
   uchar bits= (uchar) (field_length & 7);
 
-  for (; length && !*from; from++, length--);          // skip left 0's
+  for (; length && !*from; from++, length--) ;         // skip left 0's
   delta= bytes_in_rec - length;
 
   if (delta < 0 ||
diff --git a/sql/field_conv.cc b/sql/field_conv.cc
index 11d0bb9cc82..3574534722e 100644
--- a/sql/field_conv.cc
+++ b/sql/field_conv.cc
@@ -99,7 +99,7 @@ static void do_field_to_null_str(Copy_field *copy)
 static void do_outer_field_to_null_str(Copy_field *copy)
 {
   if (*copy->null_row ||
-      copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit))
+      (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit)))
   {
     bzero(copy->to_ptr,copy->from_length);
     copy->to_null_ptr[0]=1;			// Always bit 1
@@ -212,7 +212,7 @@ static void do_copy_null(Copy_field *copy)
 static void do_outer_field_null(Copy_field *copy)
 {
   if (*copy->null_row ||
-      copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit))
+      (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit)))
   {
     *copy->to_null_ptr|=copy->to_bit;
     copy->to_field->reset();
@@ -665,9 +665,9 @@ Copy_field::get_copy_func(Field *to,Field *from)
       */
       if (to->real_type() != from->real_type() ||
           !compatible_db_low_byte_first ||
-          ((to->table->in_use->variables.sql_mode &
+          (((to->table->in_use->variables.sql_mode &
             (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) &&
-           to->type() == MYSQL_TYPE_DATE ||
+           to->type() == MYSQL_TYPE_DATE) ||
            to->type() == MYSQL_TYPE_DATETIME))
       {
 	if (from->real_type() == MYSQL_TYPE_ENUM ||
@@ -770,8 +770,8 @@ int field_conv(Field *to,Field *from)
 	to->table->s->db_low_byte_first == from->table->s->db_low_byte_first &&
         (!(to->table->in_use->variables.sql_mode &
            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) ||
-         to->type() != MYSQL_TYPE_DATE &&
-         to->type() != MYSQL_TYPE_DATETIME) &&
+         (to->type() != MYSQL_TYPE_DATE &&
+          to->type() != MYSQL_TYPE_DATETIME)) &&
         (from->real_type() != MYSQL_TYPE_VARCHAR ||
          ((Field_varstring*)from)->length_bytes ==
           ((Field_varstring*)to)->length_bytes))
diff --git a/sql/gstream.cc b/sql/gstream.cc
index 0c8011549f3..e2bb41b8541 100644
--- a/sql/gstream.cc
+++ b/sql/gstream.cc
@@ -75,7 +75,7 @@ bool Gis_read_stream::get_next_number(double *d)
   skip_space();
 
   if ((m_cur >= m_limit) ||
-      (*m_cur < '0' || *m_cur > '9') && *m_cur != '-' && *m_cur != '+')
+      ((*m_cur < '0' || *m_cur > '9') && *m_cur != '-' && *m_cur != '+'))
   {
     set_error_msg("Numeric constant expected");
     return 1;
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 03bae38d94c..264e5649ea9 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -1263,7 +1263,7 @@ int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
   }
   if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
   {
-    char unique_index_name[FN_LEN];
+    char unique_index_name[FN_LEN + 1];
     static const char* unique_suffix= "$unique";
     m_has_unique_index= TRUE;
     strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
@@ -5205,7 +5205,7 @@ int ha_ndbcluster::create(const char *name,
   uchar *data= NULL, *pack_data= NULL;
   bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
   bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
-  char tablespace[FN_LEN];
+  char tablespace[FN_LEN + 1];
   NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
 
   DBUG_ENTER("ha_ndbcluster::create");
@@ -5668,7 +5668,7 @@ int ha_ndbcluster::create_index(const char *name, KEY *key_info,
                                 NDB_INDEX_TYPE idx_type, uint idx_no)
 {
   int error= 0;
-  char unique_name[FN_LEN];
+  char unique_name[FN_LEN + 1];
   static const char* unique_suffix= "$unique";
   DBUG_ENTER("ha_ndbcluster::create_ordered_index");
   DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));  
@@ -6716,7 +6716,7 @@ int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
   size_t len;
   uchar* data= NULL;
   Ndb* ndb;
-  char key[FN_REFLEN];
+  char key[FN_REFLEN + 1];
   DBUG_ENTER("ndbcluster_discover");
   DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 
 
@@ -6727,7 +6727,7 @@ int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
     ERR_RETURN(ndb->getNdbError());
   }
   NDBDICT* dict= ndb->getDictionary();
-  build_table_filename(key, sizeof(key), db, name, "", 0);
+  build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
   /* ndb_share reference temporary */
   NDB_SHARE *share= get_share(key, 0, FALSE);
   if (share)
@@ -6892,9 +6892,9 @@ int ndbcluster_drop_database_impl(const char *path)
     drop_list.push_back(thd->strdup(elmt.name));
   }
   // Drop any tables belonging to database
-  char full_path[FN_REFLEN];
+  char full_path[FN_REFLEN + 1];
   char *tmp= full_path +
-    build_table_filename(full_path, sizeof(full_path), dbname, "", "", 0);
+    build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
   if (ndb->setDatabaseName(dbname))
   {
     ERR_RETURN(ndb->getNdbError());
@@ -6963,7 +6963,7 @@ int ndb_create_table_from_engine(THD *thd, const char *db,
 int ndbcluster_find_all_files(THD *thd)
 {
   Ndb* ndb;
-  char key[FN_REFLEN];
+  char key[FN_REFLEN + 1];
   NDBDICT *dict;
   int unhandled, retries= 5, skipped;
   DBUG_ENTER("ndbcluster_find_all_files");
@@ -7021,7 +7021,7 @@ int ndbcluster_find_all_files(THD *thd)
     
       /* check if database exists */
       char *end= key +
-        build_table_filename(key, sizeof(key), elmt.database, "", "", 0);
+        build_table_filename(key, sizeof(key) - 1, elmt.database, "", "", 0);
       if (my_access(key, F_OK))
       {
         /* no such database defined, skip table */
@@ -7102,7 +7102,7 @@ int ndbcluster_find_files(handlerton *hton, THD *thd,
   { // extra bracket to avoid gcc 2.95.3 warning
   uint i;
   Ndb* ndb;
-  char name[FN_REFLEN];
+  char name[FN_REFLEN + 1];
   HASH ndb_tables, ok_tables;
   NDBDICT::List list;
 
@@ -7172,7 +7172,8 @@ int ndbcluster_find_files(handlerton *hton, THD *thd,
     DBUG_PRINT("info", ("%s", file_name->str));
     if (hash_search(&ndb_tables, (uchar*) file_name->str, file_name->length))
     {
-      build_table_filename(name, sizeof(name), db, file_name->str, reg_ext, 0);
+      build_table_filename(name, sizeof(name) - 1, db,
+                           file_name->str, reg_ext, 0);
       if (my_access(name, F_OK))
       {
         pthread_mutex_lock(&LOCK_open);
@@ -7194,7 +7195,8 @@ int ndbcluster_find_files(handlerton *hton, THD *thd,
     }
     
     // Check for .ndb file with this name
-    build_table_filename(name, sizeof(name), db, file_name->str, ha_ndb_ext, 0);
+    build_table_filename(name, sizeof(name) - 1, db,
+                         file_name->str, ha_ndb_ext, 0);
     DBUG_PRINT("info", ("Check access for %s", name));
     if (my_access(name, F_OK))
     {
@@ -7237,7 +7239,7 @@ int ndbcluster_find_files(handlerton *hton, THD *thd,
   /* setup logging to binlog for all discovered tables */
   {
     char *end, *end1= name +
-      build_table_filename(name, sizeof(name), db, "", "", 0);
+      build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
     for (i= 0; i < ok_tables.records; i++)
     {
       file_name_str= (char*)hash_element(&ok_tables, i);
@@ -7259,7 +7261,8 @@ int ndbcluster_find_files(handlerton *hton, THD *thd,
     file_name_str= (char*) hash_element(&ndb_tables, i);
     if (!hash_search(&ok_tables, (uchar*) file_name_str, strlen(file_name_str)))
     {
-      build_table_filename(name, sizeof(name), db, file_name_str, reg_ext, 0);
+      build_table_filename(name, sizeof(name) - 1,
+                           db, file_name_str, reg_ext, 0);
       if (my_access(name, F_OK))
       {
         DBUG_PRINT("info", ("%s must be discovered", file_name_str));
@@ -7639,7 +7642,7 @@ void ndbcluster_print_error(int error, const NdbOperation *error_op)
 void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
 {
   char *end, *ptr, *tmp_name;
-  char tmp_buff[FN_REFLEN];
+  char tmp_buff[FN_REFLEN + 1];
  
   tmp_name= tmp_buff;
   /* Scan name from the end */
@@ -7665,7 +7668,7 @@ void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
     ptr++;
   }
 #endif
-  filename_to_tablename(tmp_name, dbname, FN_REFLEN);
+  filename_to_tablename(tmp_name, dbname, sizeof(tmp_buff) - 1);
 }
 
 /**
@@ -7685,7 +7688,7 @@ void
 ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
 {
   char *end, *ptr, *tmp_name;
-  char tmp_buff[FN_REFLEN];
+  char tmp_buff[FN_REFLEN + 1];
 
   tmp_name= tmp_buff;
   /* Scan name from the end */
@@ -7706,7 +7709,7 @@ ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
     ptr++;
   }
 #endif
-  filename_to_tablename(tmp_name, tabname, FN_REFLEN);
+  filename_to_tablename(tmp_name, tabname, sizeof(tmp_buff) - 1);
 }
 
 /**
@@ -7892,11 +7895,12 @@ uint8 ha_ndbcluster::table_cache_type()
 uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname,
                          Uint64 *commit_count)
 {
-  char name[FN_REFLEN];
+  char name[FN_REFLEN + 1];
   NDB_SHARE *share;
   DBUG_ENTER("ndb_get_commitcount");
 
-  build_table_filename(name, sizeof(name), dbname, tabname, "", 0);
+  build_table_filename(name, sizeof(name) - 1,
+                       dbname, tabname, "", 0);
   DBUG_PRINT("enter", ("name: %s", name));
   pthread_mutex_lock(&ndbcluster_mutex);
   if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
@@ -10040,7 +10044,7 @@ bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *create_info,
       ai=1;
   }
 
-  char tablespace_name[FN_LEN]; 
+  char tablespace_name[FN_LEN + 1]; 
   if (get_tablespace_name(current_thd, tablespace_name, FN_LEN))
   {
     if (create_info->tablespace) 
diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc
index baf86d739eb..f705af8bf47 100644
--- a/sql/ha_ndbcluster_binlog.cc
+++ b/sql/ha_ndbcluster_binlog.cc
@@ -788,7 +788,7 @@ static int ndbcluster_create_ndb_apply_status_table(THD *thd)
   if (g_ndb_cluster_connection->get_no_ready() <= 0)
     DBUG_RETURN(0);
 
-  char buf[1024], *end;
+  char buf[1024 + 1], *end;
 
   if (ndb_extra_logging)
     sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_APPLY_TABLE);
@@ -798,7 +798,7 @@ static int ndbcluster_create_ndb_apply_status_table(THD *thd)
     if so, remove it since there is none in Ndb
   */
   {
-    build_table_filename(buf, sizeof(buf),
+    build_table_filename(buf, sizeof(buf) - 1,
                          NDB_REP_DB, NDB_APPLY_TABLE, reg_ext, 0);
     my_delete(buf, MYF(0));
   }
@@ -846,7 +846,7 @@ static int ndbcluster_create_schema_table(THD *thd)
   if (g_ndb_cluster_connection->get_no_ready() <= 0)
     DBUG_RETURN(0);
 
-  char buf[1024], *end;
+  char buf[1024 + 1], *end;
 
   if (ndb_extra_logging)
     sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_SCHEMA_TABLE);
@@ -856,7 +856,7 @@ static int ndbcluster_create_schema_table(THD *thd)
     if so, remove it since there is none in Ndb
   */
   {
-    build_table_filename(buf, sizeof(buf),
+    build_table_filename(buf, sizeof(buf) - 1,
                          NDB_REP_DB, NDB_SCHEMA_TABLE, reg_ext, 0);
     my_delete(buf, MYF(0));
   }
@@ -1321,8 +1321,8 @@ int ndbcluster_log_schema_op(THD *thd, NDB_SHARE *share,
 
   NDB_SCHEMA_OBJECT *ndb_schema_object;
   {
-    char key[FN_REFLEN];
-    build_table_filename(key, sizeof(key), db, table_name, "", 0);
+    char key[FN_REFLEN + 1];
+    build_table_filename(key, sizeof(key) - 1, db, table_name, "", 0);
     ndb_schema_object= ndb_get_schema_object(key, TRUE, FALSE);
   }
 
@@ -1674,7 +1674,7 @@ ndb_handle_schema_change(THD *thd, Ndb *ndb, NdbEventOperation *pOp,
   if (is_remote_change && is_online_alter_table)
   {
     const char *tabname= table_share->table_name.str;
-    char key[FN_REFLEN];
+    char key[FN_REFLEN + 1];
     uchar *data= 0, *pack_data= 0;
     size_t length, pack_length;
     int error;
@@ -1683,7 +1683,7 @@ ndb_handle_schema_change(THD *thd, Ndb *ndb, NdbEventOperation *pOp,
     
     DBUG_PRINT("info", ("Detected frm change of table %s.%s",
                         dbname, tabname));
-    build_table_filename(key, FN_LEN-1, dbname, tabname, NullS, 0);
+    build_table_filename(key, FN_LEN - 1, dbname, tabname, NullS, 0);
     /*
       If the there is no local table shadowing the altered table and 
       it has an frm that is different than the one on disk then 
@@ -1855,9 +1855,11 @@ static void ndb_binlog_query(THD *thd, Cluster_schema *schema)
   else
     thd->server_id= schema->any_value;
   thd->db= schema->db;
+  int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
   thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query,
                     schema->query_length, FALSE,
-                    schema->name[0] == 0 || thd->db[0] == 0);
+                    schema->name[0] == 0 || thd->db[0] == 0,
+                    errcode);
   thd->server_id= thd_server_id_save;
   thd->db= thd_db_save;
 }
@@ -1924,8 +1926,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
           break;
 	case SOT_TRUNCATE_TABLE:
         {
-          char key[FN_REFLEN];
-          build_table_filename(key, sizeof(key),
+          char key[FN_REFLEN + 1];
+          build_table_filename(key, sizeof(key) - 1,
                                schema->db, schema->name, "", 0);
           /* ndb_share reference temporary, free below */
           NDB_SHARE *share= get_share(key, 0, FALSE, FALSE);
@@ -2171,8 +2173,8 @@ ndb_binlog_thread_handle_schema_event_post_epoch(THD *thd,
     int log_query= 0;
     {
       enum SCHEMA_OP_TYPE schema_type= (enum SCHEMA_OP_TYPE)schema->type;
-      char key[FN_REFLEN];
-      build_table_filename(key, sizeof(key), schema->db, schema->name, "", 0);
+      char key[FN_REFLEN + 1];
+      build_table_filename(key, sizeof(key) - 1, schema->db, schema->name, "", 0);
       if (schema_type == SOT_CLEAR_SLOCK)
       {
         pthread_mutex_lock(&ndbcluster_mutex);
@@ -2506,8 +2508,8 @@ ndb_rep_event_name(String *event_name,const char *db, const char *tbl)
 bool
 ndbcluster_check_if_local_table(const char *dbname, const char *tabname)
 {
-  char key[FN_REFLEN];
-  char ndb_file[FN_REFLEN];
+  char key[FN_REFLEN + 1];
+  char ndb_file[FN_REFLEN + 1];
 
   DBUG_ENTER("ndbcluster_check_if_local_table");
   build_table_filename(key, FN_LEN-1, dbname, tabname, reg_ext, 0);
@@ -2532,9 +2534,9 @@ ndbcluster_check_if_local_tables_in_db(THD *thd, const char *dbname)
   DBUG_PRINT("info", ("Looking for files in directory %s", dbname));
   LEX_STRING *tabname;
   List<LEX_STRING> files;
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
 
-  build_table_filename(path, sizeof(path), dbname, "", "", 0);
+  build_table_filename(path, sizeof(path) - 1, dbname, "", "", 0);
   if (find_files(thd, &files, dbname, path, NullS, 0) != FIND_FILES_OK)
   {
     DBUG_PRINT("info", ("Failed to find files"));
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 67bc3156260..74742f58028 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -3179,6 +3179,7 @@ int ha_partition::delete_row(const uchar *buf)
 int ha_partition::delete_all_rows()
 {
   int error;
+  bool truncate= FALSE;
   handler **file;
   THD *thd= ha_thd();
   DBUG_ENTER("ha_partition::delete_all_rows");
@@ -3190,12 +3191,16 @@ int ha_partition::delete_all_rows()
     ha_data->next_auto_inc_val= 0;
     ha_data->auto_inc_initialized= FALSE;
     unlock_auto_increment();
+    truncate= TRUE;
   }
   file= m_file;
   do
   {
     if ((error= (*file)->ha_delete_all_rows()))
       DBUG_RETURN(error);
+    /* Ignore the error */
+    if (truncate)
+      (void) (*file)->ha_reset_auto_increment(0);
   } while (*(++file));
   DBUG_RETURN(0);
 }
diff --git a/sql/handler.cc b/sql/handler.cc
index 0de7718d113..285690ea66a 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1074,6 +1074,13 @@ int ha_commit_trans(THD *thd, bool all)
     user, or an implicit commit issued by a DDL.
   */
   THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
+  /*
+    "real" is a nick name for a transaction for which a commit will
+    make persistent changes. E.g. a 'stmt' transaction inside a 'all'
+    transation is not 'real': even though it's possible to commit it,
+    the changes are not durable as they might be rolled back if the
+    enclosing 'all' transaction is rolled back.
+  */
   bool is_real_trans= all || thd->transaction.all.ha_list == 0;
   Ha_trx_info *ha_info= trans->ha_list;
   my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
@@ -1185,16 +1192,9 @@ end:
     if (rw_trans)
       start_waiting_global_read_lock(thd);
   }
-  else if (all)
-  {
-    /*
-      A COMMIT of an empty transaction. There may be savepoints.
-      Destroy them. If the transaction is not empty
-      savepoints are cleared in ha_commit_one_phase()
-      or ha_rollback_trans().
-    */
+  /* Free resources and perform other cleanup even for 'empty' transactions. */
+  else if (is_real_trans)
     thd->transaction.cleanup();
-  }
 #endif /* USING_TRANSACTIONS */
   DBUG_RETURN(error);
 }
@@ -1207,6 +1207,13 @@ int ha_commit_one_phase(THD *thd, bool all)
 {
   int error=0;
   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+  /*
+    "real" is a nick name for a transaction for which a commit will
+    make persistent changes. E.g. a 'stmt' transaction inside a 'all'
+    transation is not 'real': even though it's possible to commit it,
+    the changes are not durable as they might be rolled back if the
+    enclosing 'all' transaction is rolled back.
+  */
   bool is_real_trans=all || thd->transaction.all.ha_list == 0;
   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
   DBUG_ENTER("ha_commit_one_phase");
@@ -1228,8 +1235,6 @@ int ha_commit_one_phase(THD *thd, bool all)
     }
     trans->ha_list= 0;
     trans->no_2pc=0;
-    if (is_real_trans)
-      thd->transaction.xid_state.xid.null();
     if (all)
     {
 #ifdef HAVE_QUERY_CACHE
@@ -1237,9 +1242,11 @@ int ha_commit_one_phase(THD *thd, bool all)
         query_cache.invalidate(thd->transaction.changed_tables);
 #endif
       thd->variables.tx_isolation=thd->session_tx_isolation;
-      thd->transaction.cleanup();
     }
   }
+  /* Free resources and perform other cleanup even for 'empty' transactions. */
+  if (is_real_trans)
+    thd->transaction.cleanup();
 #endif /* USING_TRANSACTIONS */
   DBUG_RETURN(error);
 }
@@ -1250,6 +1257,13 @@ int ha_rollback_trans(THD *thd, bool all)
   int error=0;
   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+  /*
+    "real" is a nick name for a transaction for which a commit will
+    make persistent changes. E.g. a 'stmt' transaction inside a 'all'
+    transation is not 'real': even though it's possible to commit it,
+    the changes are not durable as they might be rolled back if the
+    enclosing 'all' transaction is rolled back.
+  */
   bool is_real_trans=all || thd->transaction.all.ha_list == 0;
   DBUG_ENTER("ha_rollback_trans");
 
@@ -1295,18 +1309,13 @@ int ha_rollback_trans(THD *thd, bool all)
     }
     trans->ha_list= 0;
     trans->no_2pc=0;
-    if (is_real_trans)
-    {
-      if (thd->transaction_rollback_request)
-        thd->transaction.xid_state.rm_error= thd->main_da.sql_errno();
-      else
-        thd->transaction.xid_state.xid.null();
-    }
+    if (is_real_trans && thd->transaction_rollback_request)
+      thd->transaction.xid_state.rm_error= thd->main_da.sql_errno();
     if (all)
       thd->variables.tx_isolation=thd->session_tx_isolation;
   }
   /* Always cleanup. Even if there nht==0. There may be savepoints. */
-  if (all)
+  if (is_real_trans)
     thd->transaction.cleanup();
 #endif /* USING_TRANSACTIONS */
   if (all)
@@ -2284,8 +2293,8 @@ int handler::update_auto_increment()
   DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
 
   if ((nr= table->next_number_field->val_int()) != 0 ||
-      table->auto_increment_field_not_null &&
-      thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)
+      (table->auto_increment_field_not_null &&
+      thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
   {
     /*
       Update next_insert_id if we had already generated a value in this
@@ -3575,7 +3584,7 @@ int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
   int error;
   uchar *frmblob;
   size_t frmlen;
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   HA_CREATE_INFO create_info;
   TABLE table;
   TABLE_SHARE share;
@@ -3594,7 +3603,7 @@ int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
     frmblob and frmlen are set, write the frm to disk
   */
 
-  build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
+  build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
   // Save the frm file
   error= writefrm(path, frmblob, frmlen);
   my_free(frmblob, MYF(0));
@@ -4821,7 +4830,7 @@ fl_log_iterator_buffer_init(struct handler_iterator *iterator)
   if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
                              ((ALIGN_SIZE(sizeof(LEX_STRING)) +
                                sizeof(enum log_status) +
-                               + FN_REFLEN) *
+                               + FN_REFLEN + 1) *
                               (uint) dirp->number_off_files),
                              MYF(0))) == 0)
   {
@@ -4849,7 +4858,7 @@ fl_log_iterator_buffer_init(struct handler_iterator *iterator)
     name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
                        FN_REFLEN, fl_dir, file->name, NullS);
     buff->names[buff->entries].length= (name_ptr -
-                                        buff->names[buff->entries].str) - 1;
+                                        buff->names[buff->entries].str);
     buff->statuses[buff->entries]= st;
     buff->entries++;
   }
diff --git a/sql/handler.h b/sql/handler.h
index d43fc4725dd..11d0f71fc72 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -876,9 +876,9 @@ typedef struct {
   ulonglong delete_length;
   ha_rows records;
   ulong mean_rec_length;
-  time_t create_time;
-  time_t check_time;
-  time_t update_time;
+  ulong create_time;
+  ulong check_time;
+  ulong update_time;
   ulonglong check_sum;
 } PARTITION_INFO;
 
@@ -1037,9 +1037,9 @@ public:
   ha_rows records;
   ha_rows deleted;			/* Deleted records */
   ulong mean_rec_length;		/* physical reclength */
-  time_t create_time;			/* When table was created */
-  time_t check_time;
-  time_t update_time;
+  ulong create_time;			/* When table was created */
+  ulong check_time;
+  ulong update_time;
   uint block_size;			/* index block size */
 
   ha_statistics():
@@ -1932,8 +1932,8 @@ private:
 	/* Some extern variables used with handlers */
 
 extern const char *ha_row_type[];
-extern const char *tx_isolation_names[];
-extern const char *binlog_format_names[];
+extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[];
+extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[];
 extern TYPELIB tx_isolation_typelib;
 extern TYPELIB myisam_stats_method_typelib;
 extern ulong total_ha, total_ha_2pc;
diff --git a/sql/item.cc b/sql/item.cc
index d1418b9a137..4d9004fff26 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -417,6 +417,7 @@ Item::Item(THD *thd, Item *item):
   name(item->name),
   orig_name(item->orig_name),
   max_length(item->max_length),
+  name_length(item->name_length),
   marker(item->marker),
   decimals(item->decimals),
   maybe_null(item->maybe_null),
@@ -424,7 +425,9 @@ Item::Item(THD *thd, Item *item):
   unsigned_flag(item->unsigned_flag),
   with_sum_func(item->with_sum_func),
   fixed(item->fixed),
+  is_autogenerated_name(item->is_autogenerated_name),
   collation(item->collation),
+  with_subselect(item->with_subselect),
   cmp_context(item->cmp_context)
 {
   next= thd->free_list;				// Put in free list
@@ -1504,10 +1507,6 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
       {
         set(dt);
       }
-      else
-      {
-        // Do nothing
-      }
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
              left_is_superset(this, &dt))
@@ -2261,8 +2260,10 @@ Item_decimal::Item_decimal(const char *str_arg, uint length,
   name= (char*) str_arg;
   decimals= (uint8) decimal_value.frac;
   fixed= 1;
-  max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
-                                             decimals, unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg +
+                                                           decimals,
+                                                           decimals,
+                                                           unsigned_flag);
 }
 
 Item_decimal::Item_decimal(longlong val, bool unsig)
@@ -2270,8 +2271,10 @@ Item_decimal::Item_decimal(longlong val, bool unsig)
   int2my_decimal(E_DEC_FATAL_ERROR, val, unsig, &decimal_value);
   decimals= (uint8) decimal_value.frac;
   fixed= 1;
-  max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
-                                             decimals, unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg +
+                                                           decimals,
+                                                           decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -2280,8 +2283,10 @@ Item_decimal::Item_decimal(double val, int precision, int scale)
   double2my_decimal(E_DEC_FATAL_ERROR, val, &decimal_value);
   decimals= (uint8) decimal_value.frac;
   fixed= 1;
-  max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
-                                             decimals, unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg +
+                                                           decimals,
+                                                           decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -2301,8 +2306,10 @@ Item_decimal::Item_decimal(my_decimal *value_par)
   my_decimal2decimal(value_par, &decimal_value);
   decimals= (uint8) decimal_value.frac;
   fixed= 1;
-  max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
-                                             decimals, unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg +
+                                                           decimals,
+                                                           decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -2312,8 +2319,8 @@ Item_decimal::Item_decimal(const uchar *bin, int precision, int scale)
                     &decimal_value, precision, scale);
   decimals= (uint8) decimal_value.frac;
   fixed= 1;
-  max_length= my_decimal_precision_to_length(precision, decimals,
-                                             unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -2368,8 +2375,10 @@ void Item_decimal::set_decimal_value(my_decimal *value_par)
   my_decimal2decimal(value_par, &decimal_value);
   decimals= (uint8) decimal_value.frac;
   unsigned_flag= !decimal_value.sign();
-  max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
-                                             decimals, unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(decimal_value.intg +
+                                                           decimals,
+                                                           decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -2472,8 +2481,9 @@ longlong_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
     TODO: Give error if we wanted a signed integer and we got an unsigned
     one
   */
-  if (err > 0 ||
-      (end != org_end && !check_if_only_end_space(cs, end, org_end)))
+  if (!current_thd->no_errors &&
+      (err > 0 ||
+       (end != org_end && !check_if_only_end_space(cs, end, org_end))))
   {
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_TRUNCATED_WRONG_VALUE,
@@ -2640,8 +2650,9 @@ void Item_param::set_decimal(const char *str, ulong length)
   str2my_decimal(E_DEC_FATAL_ERROR, str, &decimal_value, &end);
   state= DECIMAL_VALUE;
   decimals= decimal_value.frac;
-  max_length= my_decimal_precision_to_length(decimal_value.precision(),
-                                             decimals, unsigned_flag);
+  max_length=
+    my_decimal_precision_to_length_no_truncation(decimal_value.precision(),
+                                                 decimals, unsigned_flag);
   maybe_null= 0;
   DBUG_VOID_RETURN;
 }
@@ -2771,8 +2782,8 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
       CHARSET_INFO *tocs= thd->variables.collation_connection;
       uint32 dummy_offset;
 
-      value.cs_info.character_set_of_placeholder= 
-        value.cs_info.character_set_client= fromcs;
+      value.cs_info.character_set_of_placeholder= fromcs;
+      value.cs_info.character_set_client= thd->variables.character_set_client;
       /*
         Setup source and destination character sets so that they
         are different only if conversion is necessary: this will
@@ -2797,8 +2808,9 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
       my_decimal2decimal(ent_value, &decimal_value);
       state= DECIMAL_VALUE;
       decimals= ent_value->frac;
-      max_length= my_decimal_precision_to_length(ent_value->precision(),
-                                                 decimals, unsigned_flag);
+      max_length=
+        my_decimal_precision_to_length_no_truncation(ent_value->precision(),
+                                                     decimals, unsigned_flag);
       item_type= Item::DECIMAL_ITEM;
       break;
     }
@@ -3267,10 +3279,58 @@ Item_param::set_param_type_and_swap_value(Item_param *src)
   str_value_ptr.swap(src->str_value_ptr);
 }
 
+/****************************************************************************
+  Item_copy
+****************************************************************************/
+Item_copy *Item_copy::create (Item *item)
+{
+  switch (item->result_type())
+  {
+    case STRING_RESULT:
+      return new Item_copy_string (item);
+    case REAL_RESULT: 
+      return new Item_copy_float (item);
+    case INT_RESULT:
+      return item->unsigned_flag ? 
+        new Item_copy_uint (item) : new Item_copy_int (item);
+    case DECIMAL_RESULT:
+      return new Item_copy_decimal (item);
+
+    case ROW_RESULT:
+      DBUG_ASSERT (0);
+  }
+  /* should not happen */
+  return NULL;
+}
+
 /****************************************************************************
   Item_copy_string
 ****************************************************************************/
 
+double Item_copy_string::val_real()
+{
+  int err_not_used;
+  char *end_not_used;
+  return (null_value ? 0.0 :
+          my_strntod(str_value.charset(), (char*) str_value.ptr(),
+                     str_value.length(), &end_not_used, &err_not_used));
+}
+
+longlong Item_copy_string::val_int()
+{
+  int err;
+  return null_value ? LL(0) : my_strntoll(str_value.charset(),str_value.ptr(),
+                                          str_value.length(),10, (char**) 0,
+                                          &err); 
+}
+
+
+int Item_copy_string::save_in_field(Field *field, bool no_conversions)
+{
+  return save_str_value_in_field(field, &str_value);
+}
+
+
 void Item_copy_string::copy()
 {
   String *res=item->val_str(&str_value);
@@ -3293,12 +3353,163 @@ my_decimal *Item_copy_string::val_decimal(my_decimal *decimal_value)
 {
   // Item_copy_string is used without fix_fields call
   if (null_value)
-    return 0;
+    return (my_decimal *) 0;
   string2my_decimal(E_DEC_FATAL_ERROR, &str_value, decimal_value);
   return (decimal_value);
 }
 
 
+/****************************************************************************
+  Item_copy_int
+****************************************************************************/
+
+void Item_copy_int::copy()
+{
+  cached_value= item->val_int();
+  null_value=item->null_value;
+}
+
+static int save_int_value_in_field (Field *field, longlong nr, 
+                                    bool null_value, bool unsigned_flag);
+
+int Item_copy_int::save_in_field(Field *field, bool no_conversions)
+{
+  return save_int_value_in_field(field, cached_value, 
+                                 null_value, unsigned_flag);
+}
+
+
+String *Item_copy_int::val_str(String *str)
+{
+  if (null_value)
+    return (String *) 0;
+
+  str->set(cached_value, &my_charset_bin);
+  return str;
+}
+
+
+my_decimal *Item_copy_int::val_decimal(my_decimal *decimal_value)
+{
+  if (null_value)
+    return (my_decimal *) 0;
+
+  int2my_decimal(E_DEC_FATAL_ERROR, cached_value, unsigned_flag, decimal_value);
+  return decimal_value;
+}
+
+
+/****************************************************************************
+  Item_copy_uint
+****************************************************************************/
+
+String *Item_copy_uint::val_str(String *str)
+{
+  if (null_value)
+    return (String *) 0;
+
+  str->set((ulonglong) cached_value, &my_charset_bin);
+  return str;
+}
+
+
+/****************************************************************************
+  Item_copy_float
+****************************************************************************/
+
+String *Item_copy_float::val_str(String *str)
+{
+  if (null_value)
+    return (String *) 0;
+  else
+  {
+    double nr= val_real();
+    str->set_real(nr,decimals, &my_charset_bin);
+    return str;
+  }
+}
+
+
+my_decimal *Item_copy_float::val_decimal(my_decimal *decimal_value)
+{
+  if (null_value)
+    return (my_decimal *) 0;
+  else
+  {
+    double nr= val_real();
+    double2my_decimal(E_DEC_FATAL_ERROR, nr, decimal_value);
+    return decimal_value;
+  }
+}
+
+
+int Item_copy_float::save_in_field(Field *field, bool no_conversions)
+{
+  if (null_value)
+    return set_field_to_null(field);
+  field->set_notnull();
+  return field->store(cached_value);
+}
+
+
+/****************************************************************************
+  Item_copy_decimal
+****************************************************************************/
+
+int Item_copy_decimal::save_in_field(Field *field, bool no_conversions)
+{
+  if (null_value)
+    return set_field_to_null(field);
+  field->set_notnull();
+  return field->store_decimal(&cached_value);
+}
+
+
+String *Item_copy_decimal::val_str(String *result)
+{
+  if (null_value)
+    return (String *) 0;
+  result->set_charset(&my_charset_bin);
+  my_decimal2string(E_DEC_FATAL_ERROR, &cached_value, 0, 0, 0, result);
+  return result;
+}
+
+
+double Item_copy_decimal::val_real()
+{
+  if (null_value)
+    return 0.0;
+  else
+  {
+    double result;
+    my_decimal2double(E_DEC_FATAL_ERROR, &cached_value, &result);
+    return result;
+  }
+}
+
+
+longlong Item_copy_decimal::val_int()
+{
+  if (null_value)
+    return LL(0);
+  else
+  {
+    longlong result;
+    my_decimal2int(E_DEC_FATAL_ERROR, &cached_value, unsigned_flag, &result);
+    return result;
+  }
+}
+
+
+void Item_copy_decimal::copy()
+{
+  my_decimal *nr= item->val_decimal(&cached_value);
+  if (nr && nr != &cached_value)
+    memcpy (&cached_value, nr, sizeof (my_decimal)); 
+  null_value= item->null_value;
+}
+
+
 /*
   Functions to convert item to field (for send_fields)
 */
@@ -3390,14 +3601,12 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
   current->mark_as_dependent(last);
   if (thd->lex->describe & DESCRIBE_EXTENDED)
   {
-    char warn_buff[MYSQL_ERRMSG_SIZE];
-    sprintf(warn_buff, ER(ER_WARN_FIELD_RESOLVED),
-            db_name, (db_name[0] ? "." : ""),
-            table_name, (table_name [0] ? "." : ""),
-            resolved_item->field_name,
-	    current->select_number, last->select_number);
-    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-		 ER_WARN_FIELD_RESOLVED, warn_buff);
+    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+		 ER_WARN_FIELD_RESOLVED, ER(ER_WARN_FIELD_RESOLVED),
+                 db_name, (db_name[0] ? "." : ""),
+                 table_name, (table_name [0] ? "." : ""),
+                 resolved_item->field_name,
+                 current->select_number, last->select_number);
   }
 }
 
@@ -4209,16 +4418,22 @@ mark_non_agg_field:
       Fields from outer selects added to the aggregate function
       outer_fields list as its unknown at the moment whether it's
       aggregated or not.
+      We're using either the select lex of the cached table (if present)
+      or the field's resolution context. context->select_lex is 
+      safe for use because it's either the SELECT we want to use 
+      (the current level) or a stub added by non-SELECT queries.
     */
+    SELECT_LEX *select_lex= cached_table ? 
+      cached_table->select_lex : context->select_lex;
     if (!thd->lex->in_sum_func)
-      cached_table->select_lex->full_group_by_flag|= NON_AGG_FIELD_USED;
+      select_lex->full_group_by_flag|= NON_AGG_FIELD_USED;
     else
     {
       if (outer_fixed)
         thd->lex->in_sum_func->outer_fields.push_back(this);
       else if (thd->lex->in_sum_func->nest_level !=
           thd->lex->current_select->nest_level)
-        cached_table->select_lex->full_group_by_flag|= NON_AGG_FIELD_USED;
+        select_lex->full_group_by_flag|= NON_AGG_FIELD_USED;
     }
   }
   return FALSE;
@@ -4875,7 +5090,10 @@ int Item_null::save_safe_in_field(Field *field)
 /*
   This implementation can lose str_value content, so if the
   Item uses str_value to store something, it should
-  reimplement it's ::save_in_field() as Item_string, for example, does
+  reimplement it's ::save_in_field() as Item_string, for example, does.
+
+  Note: all Item_XXX::val_str(str) methods must NOT rely on the fact that
+  str != str_value. For example, see fix for bug #44743.
 */
 
 int Item::save_in_field(Field *field, bool no_conversions)
@@ -4945,10 +5163,9 @@ int Item_uint::save_in_field(Field *field, bool no_conversions)
   return Item_int::save_in_field(field, no_conversions);
 }
 
-
-int Item_int::save_in_field(Field *field, bool no_conversions)
+static int save_int_value_in_field (Field *field, longlong nr, 
+                                    bool null_value, bool unsigned_flag)
 {
-  longlong nr=val_int();
   if (null_value)
     return set_field_to_null(field);
   field->set_notnull();
@@ -4956,6 +5173,12 @@ int Item_int::save_in_field(Field *field, bool no_conversions)
 }
 
 
+int Item_int::save_in_field(Field *field, bool no_conversions)
+{
+  return save_int_value_in_field (field, val_int(), null_value, unsigned_flag);
+}
+
+
 int Item_decimal::save_in_field(Field *field, bool no_conversions)
 {
   field->set_notnull();
@@ -5812,7 +6035,8 @@ void Item_ref::print(String *str, enum_query_type query_type)
         !table_name && name && alias_name_used)
     {
       THD *thd= current_thd;
-      append_identifier(thd, str, name, (uint) strlen(name));
+      append_identifier(thd, str, (*ref)->real_item()->name,
+                        (*ref)->real_item()->name_length);
     }
     else
       (*ref)->print(str, query_type);
@@ -7084,8 +7308,9 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
     int item_prec = max(prev_decimal_int_part, item_int_part) + decimals;
     int precision= min(item_prec, DECIMAL_MAX_PRECISION);
     unsigned_flag&= item->unsigned_flag;
-    max_length= my_decimal_precision_to_length(precision, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(precision,
+                                                             decimals,
+                                                             unsigned_flag);
   }
 
   switch (Field::result_merge_type(fld_type))
diff --git a/sql/item.h b/sql/item.h
index 96a4e9f7a31..3dfcd7c2612 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -2443,48 +2443,203 @@ public:
 #include "item_xmlfunc.h"
 #endif
 
-class Item_copy_string :public Item
+/**
+  Base class to implement typed value caching Item classes
+
+  Item_copy_ classes are very similar to the corresponding Item_
+  classes (e.g. Item_copy_int is similar to Item_int) but they add
+  the following additional functionality to Item_ :
+    1. Nullability
+    2. Possibility to store the value not only on instantiation time,
+       but also later.
+  Item_copy_ classes are a functionality subset of Item_cache_ 
+  classes, as e.g. they don't support comparisons with the original Item
+  as Item_cache_ classes do.
+  Item_copy_ classes are used in GROUP BY calculation.
+  TODO: Item_copy should be made an abstract interface and Item_copy_
+  classes should inherit both the respective Item_ class and the interface.
+  Ideally we should drop Item_copy_ classes altogether and merge 
+  their functionality to Item_cache_ (and these should be made to inherit
+  from Item_).
+*/
+
+class Item_copy :public Item
 {
+protected:  
+
+  /**
+    Stores the type of the resulting field that would be used to store the data
+    in the cache. This is to avoid calls to the original item.
+  */
   enum enum_field_types cached_field_type;
-public:
+
+  /** The original item that is copied */
   Item *item;
-  Item_copy_string(Item *i) :item(i)
+
+  /**
+    Stores the result type of the original item, so it can be returned
+    without calling the original item's method
+  */
+  Item_result cached_result_type;
+
+  /**
+    Constructor of the Item_copy class
+
+    stores metadata information about the original class as well as a 
+    pointer to it.
+  */
+  Item_copy(Item *i)
   {
+    item= i;
     null_value=maybe_null=item->maybe_null;
     decimals=item->decimals;
     max_length=item->max_length;
     name=item->name;
     cached_field_type= item->field_type();
+    cached_result_type= item->result_type();
+    unsigned_flag= item->unsigned_flag;
   }
+
+public:
+  /** 
+    Factory method to create the appropriate subclass dependent on the type of 
+    the original item.
+
+    @param item      the original item.
+  */  
+  static Item_copy *create (Item *item);
+
+  /** 
+    Update the cache with the value of the original item
+   
+    This is the method that updates the cached value.
+    It must be explicitly called by the user of this class to store the value 
+    of the orginal item in the cache.
+  */  
+  virtual void copy() = 0;
+
+  Item *get_item() { return item; }
+  /** All of the subclasses should have the same type tag */
   enum Type type() const { return COPY_STR_ITEM; }
-  enum Item_result result_type () const { return STRING_RESULT; }
   enum_field_types field_type() const { return cached_field_type; }
-  double val_real()
-  {
-    int err_not_used;
-    char *end_not_used;
-    return (null_value ? 0.0 :
-	    my_strntod(str_value.charset(), (char*) str_value.ptr(),
-		       str_value.length(), &end_not_used, &err_not_used));
-  }
-  longlong val_int()
-  {
-    int err;
-    return null_value ? LL(0) : my_strntoll(str_value.charset(),str_value.ptr(),
-                                            str_value.length(),10, (char**) 0,
-                                            &err); 
-  }
-  String *val_str(String*);
-  my_decimal *val_decimal(my_decimal *);
+  enum Item_result result_type () const { return cached_result_type; }
+
   void make_field(Send_field *field) { item->make_field(field); }
-  void copy();
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_str_value_in_field(field, &str_value);
-  }
   table_map used_tables() const { return (table_map) 1L; }
   bool const_item() const { return 0; }
   bool is_null() { return null_value; }
+
+  /*  
+    Override the methods below as pure virtual to make sure all the 
+    sub-classes implement them.
+  */  
+
+  virtual String *val_str(String*) = 0;
+  virtual my_decimal *val_decimal(my_decimal *) = 0;
+  virtual double val_real() = 0;
+  virtual longlong val_int() = 0;
+  virtual int save_in_field(Field *field, bool no_conversions) = 0;
+};
+
+/**
+ Implementation of a string cache.
+ 
+ Uses Item::str_value for storage
+*/ 
+class Item_copy_string : public Item_copy
+{
+public:
+  Item_copy_string (Item *item) : Item_copy(item) {}
+
+  String *val_str(String*);
+  my_decimal *val_decimal(my_decimal *);
+  double val_real();
+  longlong val_int();
+  void copy();
+  int save_in_field(Field *field, bool no_conversions);
+};
+
+
+class Item_copy_int : public Item_copy
+{
+protected:  
+  longlong cached_value; 
+public:
+  Item_copy_int (Item *i) : Item_copy(i) {}
+  int save_in_field(Field *field, bool no_conversions);
+
+  virtual String *val_str(String*);
+  virtual my_decimal *val_decimal(my_decimal *);
+  virtual double val_real()
+  {
+    return null_value ? 0.0 : (double) cached_value;
+  }
+  virtual longlong val_int()
+  {
+    return null_value ? LL(0) : cached_value;
+  }
+  virtual void copy();
+};
+
+
+class Item_copy_uint : public Item_copy_int
+{
+public:
+  Item_copy_uint (Item *item) : Item_copy_int(item) 
+  {
+    unsigned_flag= 1;
+  }
+
+  String *val_str(String*);
+  double val_real()
+  {
+    return null_value ? 0.0 : (double) (ulonglong) cached_value;
+  }
+};
+
+
+class Item_copy_float : public Item_copy
+{
+protected:  
+  double cached_value; 
+public:
+  Item_copy_float (Item *i) : Item_copy(i) {}
+  int save_in_field(Field *field, bool no_conversions);
+
+  String *val_str(String*);
+  my_decimal *val_decimal(my_decimal *);
+  double val_real()
+  {
+    return null_value ? 0.0 : cached_value;
+  }
+  longlong val_int()
+  {
+    return (longlong) rint(val_real());
+  }
+  void copy()
+  {
+    cached_value= item->val_real();
+    null_value= item->null_value;
+  }
+};
+
+
+class Item_copy_decimal : public Item_copy
+{
+protected:  
+  my_decimal cached_value;
+public:
+  Item_copy_decimal (Item *i) : Item_copy(i) {}
+  int save_in_field(Field *field, bool no_conversions);
+
+  String *val_str(String*);
+  my_decimal *val_decimal(my_decimal *) 
+  { 
+    return null_value ? NULL: &cached_value; 
+  }
+  double val_real();
+  longlong val_int();
+  void copy();
 };
 
 
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index a9bfea1b806..1ff9ca6a419 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -1484,8 +1484,8 @@ longlong Item_func_truth::val_int()
 
 bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
 {
-  if (!args[0]->fixed && args[0]->fix_fields(thd, args) ||
-      !cache && !(cache= Item_cache::get_cache(args[0])))
+  if ((!args[0]->fixed && args[0]->fix_fields(thd, args)) ||
+      (!cache && !(cache= Item_cache::get_cache(args[0]))))
     return 1;
 
   cache->setup(args[0]);
@@ -2724,16 +2724,6 @@ void Item_func_case::fix_length_and_dec()
     nagg++;
     if (!(found_types= collect_cmp_types(agg, nagg)))
       return;
-    if (with_sum_func || current_thd->lex->current_select->group_list.elements)
-    {
-      /*
-        See TODO commentary in the setup_copy_fields function:
-        item in a group may be wrapped with an Item_copy_string item.
-        That item has a STRING_RESULT result type, so we need
-        to take this type into account.
-      */
-      found_types |= (1 << item_cmp_type(left_result_type, STRING_RESULT));
-    }
 
     for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
     {
@@ -2770,8 +2760,9 @@ void Item_func_case::fix_length_and_dec()
       agg_num_lengths(args[i + 1]);
     if (else_expr_num != -1) 
       agg_num_lengths(args[else_expr_num]);
-    max_length= my_decimal_precision_to_length(max_length + decimals, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(max_length +
+                                                             decimals, decimals,
+                                                             unsigned_flag);
   }
 }
 
@@ -3006,8 +2997,8 @@ int cmp_longlong(void *cmp_arg,
       One of the args is unsigned and is too big to fit into the 
       positive signed range. Report no match.
     */  
-    if (a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX ||
-        b->unsigned_flag && ((ulonglong) b->val) > (ulonglong) LONGLONG_MAX)
+    if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX) ||
+        (b->unsigned_flag && ((ulonglong) b->val) > (ulonglong) LONGLONG_MAX))
       return a->unsigned_flag ? 1 : -1;
     /*
       Although the signedness differs both args can fit into the signed 
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 876aee719a3..0af3c4954cd 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -452,11 +452,45 @@ Field *Item_func::tmp_table_field(TABLE *table)
     return make_string_field(table);
     break;
   case DECIMAL_RESULT:
-    field= new Field_new_decimal(my_decimal_precision_to_length(decimal_precision(),
-                                                                decimals,
-                                                                unsigned_flag),
-                                 maybe_null, name, decimals, unsigned_flag);
+  {
+    uint8 dec= decimals;
+    uint8 intg= decimal_precision() - dec;
+    uint32 len= max_length;
+
+    /*
+      Trying to put too many digits overall in a DECIMAL(prec,dec)
+      will always throw a warning. We must limit dec to
+      DECIMAL_MAX_SCALE however to prevent an assert() later.
+    */
+
+    if (dec > 0)
+    {
+      int overflow;
+
+      dec= min(dec, DECIMAL_MAX_SCALE);
+
+      /*
+        If the value still overflows the field with the corrected dec,
+        we'll throw out decimals rather than integers. This is still
+        bad and of course throws a truncation warning.
+      */
+
+      const int required_length=
+        my_decimal_precision_to_length(intg + dec, dec,
+                                                     unsigned_flag);
+
+      overflow= required_length - len;
+
+      if (overflow > 0)
+        dec= max(0, dec - overflow);            // too long, discard fract
+      else
+        /* Corrected value fits. */
+        len= required_length;
+    }
+
+    field= new Field_new_decimal(len, maybe_null, name, dec, unsigned_flag);
     break;
+  }
   case ROW_RESULT:
   default:
     // This case should never be chosen
@@ -545,8 +579,8 @@ void Item_func::count_decimal_length()
     set_if_smaller(unsigned_flag, args[i]->unsigned_flag);
   }
   int precision= min(max_int_part + decimals, DECIMAL_MAX_PRECISION);
-  max_length= my_decimal_precision_to_length(precision, decimals,
-                                             unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -1141,16 +1175,15 @@ void Item_func_additive_op::result_precision()
   decimals= max(args[0]->decimals, args[1]->decimals);
   int arg1_int= args[0]->decimal_precision() - args[0]->decimals;
   int arg2_int= args[1]->decimal_precision() - args[1]->decimals;
-  int est_prec= max(arg1_int, arg2_int) + 1 + decimals;
-  int precision= min(est_prec, DECIMAL_MAX_PRECISION);
+  int precision= max(arg1_int, arg2_int) + 1 + decimals;
 
   /* Integer operations keep unsigned_flag if one of arguments is unsigned */
   if (result_type() == INT_RESULT)
     unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag;
   else
     unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
-  max_length= my_decimal_precision_to_length(precision, decimals,
-                                             unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -1255,7 +1288,8 @@ void Item_func_mul::result_precision()
   decimals= min(args[0]->decimals + args[1]->decimals, DECIMAL_MAX_SCALE);
   uint est_prec = args[0]->decimal_precision() + args[1]->decimal_precision();
   uint precision= min(est_prec, DECIMAL_MAX_PRECISION);
-  max_length= my_decimal_precision_to_length(precision, decimals,unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -1311,8 +1345,8 @@ void Item_func_div::result_precision()
   else
     unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
   decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
-  max_length= my_decimal_precision_to_length(precision, decimals,
-                                             unsigned_flag);
+  max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
+                                                           unsigned_flag);
 }
 
 
@@ -1944,8 +1978,8 @@ void Item_func_round::fix_length_and_dec()
   unsigned_flag= args[0]->unsigned_flag;
   if (!args[1]->const_item())
   {
-    max_length= args[0]->max_length;
     decimals= args[0]->decimals;
+    max_length= float_length(decimals);
     if (args[0]->result_type() == DECIMAL_RESULT)
     {
       max_length++;
@@ -1965,8 +1999,8 @@ void Item_func_round::fix_length_and_dec()
 
   if (args[0]->decimals == NOT_FIXED_DEC)
   {
-    max_length= args[0]->max_length;
     decimals= min(decimals_to_set, NOT_FIXED_DEC);
+    max_length= float_length(decimals);
     hybrid_type= REAL_RESULT;
     return;
   }
@@ -1999,8 +2033,9 @@ void Item_func_round::fix_length_and_dec()
 
     precision-= decimals_delta - length_increase;
     decimals= min(decimals_to_set, DECIMAL_MAX_SCALE);
-    max_length= my_decimal_precision_to_length(precision, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(precision,
+                                                             decimals,
+                                                             unsigned_flag);
     break;
   }
   default:
@@ -2143,9 +2178,6 @@ bool Item_func_rand::fix_fields(THD *thd,Item **ref)
     if (!rand && !(rand= (struct rand_struct*)
                    thd->stmt_arena->alloc(sizeof(*rand))))
       return TRUE;
-
-    if (args[0]->const_item())
-      seed_random (args[0]);
   }
   else
   {
@@ -2175,8 +2207,21 @@ void Item_func_rand::update_used_tables()
 double Item_func_rand::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  if (arg_count && !args[0]->const_item())
-    seed_random (args[0]);
+  if (arg_count)
+  {
+    if (!args[0]->const_item())
+      seed_random(args[0]);
+    else if (first_eval)
+    {
+      /*
+        Constantness of args[0] may be set during JOIN::optimize(), if arg[0]
+        is a field item of "constant" table. Thus, we have to evaluate
+        seed_random() for constant arg there but not at the fix_fields method.
+      */
+      first_eval= FALSE;
+      seed_random(args[0]);
+    }
+  }
   return my_rnd(rand);
 }
 
@@ -2233,8 +2278,9 @@ void Item_func_min_max::fix_length_and_dec()
     }
   }
   else if ((cmp_type == DECIMAL_RESULT) || (cmp_type == INT_RESULT))
-    max_length= my_decimal_precision_to_length(max_int_part+decimals, decimals,
-                                            unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(max_int_part +
+                                                             decimals, decimals,
+                                                             unsigned_flag);
   cached_field_type= agg_field_type(args, arg_count);
 }
 
@@ -4177,6 +4223,41 @@ Item_func_set_user_var::check(bool use_result_field)
 }
 
 
+/**
+  @brief Evaluate and store item's result.
+  This function is invoked on "SELECT ... INTO @var ...".
+  
+  @param    item    An item to get value from.
+*/
+
+void Item_func_set_user_var::save_item_result(Item *item)
+{
+  DBUG_ENTER("Item_func_set_user_var::save_item_result");
+
+  switch (cached_result_type) {
+  case REAL_RESULT:
+    save_result.vreal= item->val_result();
+    break;
+  case INT_RESULT:
+    save_result.vint= item->val_int_result();
+    unsigned_flag= item->unsigned_flag;
+    break;
+  case STRING_RESULT:
+    save_result.vstr= item->str_result(&value);
+    break;
+  case DECIMAL_RESULT:
+    save_result.vdec= item->val_decimal_result(&decimal_buff);
+    break;
+  case ROW_RESULT:
+  default:
+    // Should never happen
+    DBUG_ASSERT(0);
+    break;
+  }
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   This functions is invoked on
   SET \@variable or \@variable:= expression.
@@ -4418,8 +4499,8 @@ int Item_func_set_user_var::save_in_field(Field *field, bool no_conversions,
   update();
 
   if (result_type() == STRING_RESULT ||
-      result_type() == REAL_RESULT &&
-      field->result_type() == STRING_RESULT)
+      (result_type() == REAL_RESULT &&
+      field->result_type() == STRING_RESULT))
   {
     String *result;
     CHARSET_INFO *cs= collation.collation;
@@ -4835,10 +4916,20 @@ bool Item_func_get_system_var::is_written_to_binlog()
 }
 
 
+void Item_func_get_system_var::update_null_value()
+{
+  THD *thd= current_thd;
+  int save_no_errors= thd->no_errors;
+  thd->no_errors= TRUE;
+  Item::update_null_value();
+  thd->no_errors= save_no_errors;
+}
+
+
 void Item_func_get_system_var::fix_length_and_dec()
 {
   char *cptr;
-  maybe_null=0;
+  maybe_null= TRUE;
   max_length= 0;
 
   if (var->check_type(var_type))
@@ -5747,6 +5838,14 @@ Item_func_sp::func_name() const
 }
 
 
+int my_missing_function_error(const LEX_STRING &token, const char *func_name)
+{
+  if (token.length && is_lex_native_function (&token))
+    return my_error(ER_FUNC_INEXISTENT_NAME_COLLISION, MYF(0), func_name);
+  else
+    return my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION", func_name);
+}
+
 
 /**
   @brief Initialize the result field by creating a temporary dummy table
@@ -5779,7 +5878,7 @@ Item_func_sp::init_result_field(THD *thd)
   if (!(m_sp= sp_find_routine(thd, TYPE_ENUM_FUNCTION, m_name,
                                &thd->sp_func_cache, TRUE)))
   {
-    my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION", m_name->m_qname.str);
+    my_missing_function_error (m_name->m_name, m_name->m_qname.str);
     context->process_error(thd);
     DBUG_RETURN(TRUE);
   }
diff --git a/sql/item_func.h b/sql/item_func.h
index d23d821baf6..514f93a39ea 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -378,7 +378,8 @@ public:
   Item_decimal_typecast(Item *a, int len, int dec) :Item_func(a)
   {
     decimals= dec;
-    max_length= my_decimal_precision_to_length(len, dec, unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(len, dec,
+                                                             unsigned_flag);
   }
   String *val_str(String *str);
   double val_real();
@@ -696,14 +697,16 @@ public:
 class Item_func_rand :public Item_real_func
 {
   struct rand_struct *rand;
+  bool first_eval; // TRUE if val_real() is called 1st time
 public:
-  Item_func_rand(Item *a) :Item_real_func(a), rand(0) {}
+  Item_func_rand(Item *a) :Item_real_func(a), rand(0), first_eval(TRUE) {}
   Item_func_rand()	  :Item_real_func() {}
   double val_real();
   const char *func_name() const { return "rand"; }
   bool const_item() const { return 0; }
   void update_used_tables();
   bool fix_fields(THD *thd, Item **ref);
+  void cleanup() { first_eval= TRUE; Item_real_func::cleanup(); }
 private:
   void seed_random (Item * val);  
 };
@@ -1341,6 +1344,7 @@ public:
   bool send(Protocol *protocol, String *str_arg);
   void make_field(Send_field *tmp_field);
   bool check(bool use_result_field);
+  void save_item_result(Item *item);
   bool update();
   enum Item_result result_type () const { return cached_result_type; }
   bool fix_fields(THD *thd, Item **ref);
@@ -1452,6 +1456,7 @@ public:
                            LEX_STRING *component_arg, const char *name_arg,
                            size_t name_len_arg);
   enum Functype functype() const { return GSYSVAR_FUNC; }
+  void update_null_value();
   void fix_length_and_dec();
   void print(String *str, enum_query_type query_type);
   bool const_item() const { return true; }
diff --git a/sql/item_geofunc.cc b/sql/item_geofunc.cc
index 24a92c78e9c..a34204b7181 100644
--- a/sql/item_geofunc.cc
+++ b/sql/item_geofunc.cc
@@ -416,7 +416,10 @@ String *Item_func_spatial_collection::val_str(String *str)
     else
     {
       enum Geometry::wkbType wkb_type;
-      const char *data= res->ptr() + 4/*SRID*/ + 1;
+      const uint data_offset= 4/*SRID*/ + 1;
+      if (res->length() < data_offset + sizeof(uint32))
+        goto err;
+      const char *data= res->ptr() + data_offset;
 
       /*
 	In the case of named collection we must check that items
@@ -439,7 +442,7 @@ String *Item_func_spatial_collection::val_str(String *str)
 	break;
 
       case Geometry::wkb_linestring:
-	if (str->append(data, POINT_DATA_SIZE, 512))
+	if (len < POINT_DATA_SIZE || str->append(data, POINT_DATA_SIZE, 512))
 	  goto err;
 	break;
       case Geometry::wkb_polygon:
@@ -448,11 +451,15 @@ String *Item_func_spatial_collection::val_str(String *str)
 	double x1, y1, x2, y2;
 	const char *org_data= data;
 
-	if (len < 4 + 2 * POINT_DATA_SIZE)
+	if (len < 4)
 	  goto err;
 
 	n_points= uint4korr(data);
 	data+= 4;
+
+        if (n_points < 2 || len < 4 + n_points * POINT_DATA_SIZE)
+          goto err;
+        
 	float8get(x1, data);
 	data+= SIZEOF_STORED_DOUBLE;
 	float8get(y1, data);
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 267036e4a3d..be94f19f597 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -1629,16 +1629,17 @@ String *Item_func_password::val_str(String *str)
     return 0;
   if (res->length() == 0)
     return &my_empty_string;
-  make_scrambled_password(tmp_value, res->c_ptr());
+  my_make_scrambled_password(tmp_value, res->ptr(), res->length());
   str->set(tmp_value, SCRAMBLED_PASSWORD_CHAR_LENGTH, res->charset());
   return str;
 }
 
-char *Item_func_password::alloc(THD *thd, const char *password)
+char *Item_func_password::alloc(THD *thd, const char *password,
+                                size_t pass_len)
 {
   char *buff= (char *) thd->alloc(SCRAMBLED_PASSWORD_CHAR_LENGTH+1);
   if (buff)
-    make_scrambled_password(buff, password);
+    my_make_scrambled_password(buff, password, pass_len);
   return buff;
 }
 
@@ -1652,16 +1653,17 @@ String *Item_func_old_password::val_str(String *str)
     return 0;
   if (res->length() == 0)
     return &my_empty_string;
-  make_scrambled_password_323(tmp_value, res->c_ptr());
+  my_make_scrambled_password_323(tmp_value, res->ptr(), res->length());
   str->set(tmp_value, SCRAMBLED_PASSWORD_CHAR_LENGTH_323, res->charset());
   return str;
 }
 
-char *Item_func_old_password::alloc(THD *thd, const char *password)
+char *Item_func_old_password::alloc(THD *thd, const char *password,
+                                    size_t pass_len)
 {
   char *buff= (char *) thd->alloc(SCRAMBLED_PASSWORD_CHAR_LENGTH_323+1);
   if (buff)
-    make_scrambled_password_323(buff, password);
+    my_make_scrambled_password_323(buff, password, pass_len);
   return buff;
 }
 
@@ -2706,7 +2708,13 @@ String *Item_func_conv_charset::val_str(String *str)
   DBUG_ASSERT(fixed == 1);
   if (use_cached_value)
     return null_value ? 0 : &str_value;
-  String *arg= args[0]->val_str(str);
+  /* 
+    Here we don't pass 'str' as a parameter to args[0]->val_str()
+    as 'str' may point to 'str_value' (e.g. see Item::save_in_field()),
+    which we use below to convert string. 
+    Use argument's 'str_value' instead.
+  */
+  String *arg= args[0]->val_str(&args[0]->str_value);
   uint dummy_errors;
   if (!arg)
   {
@@ -2943,7 +2951,7 @@ String *Item_load_file::val_str(String *str)
       )
     goto err;
 
-  (void) fn_format(path, file_name->c_ptr(), mysql_real_data_home, "",
+  (void) fn_format(path, file_name->c_ptr_safe(), mysql_real_data_home, "",
 		   MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
 
   /* Read only allowed from within dir specified by secure_file_priv */
@@ -2969,7 +2977,7 @@ String *Item_load_file::val_str(String *str)
   }
   if (tmp_value.alloc(stat_info.st_size))
     goto err;
-  if ((file = my_open(file_name->c_ptr(), O_RDONLY, MYF(0))) < 0)
+  if ((file = my_open(file_name->ptr(), O_RDONLY, MYF(0))) < 0)
     goto err;
   if (my_read(file, (uchar*) tmp_value.ptr(), stat_info.st_size, MYF(MY_NABP)))
   {
@@ -3219,7 +3227,21 @@ longlong Item_func_uncompressed_length::val_int()
   if (res->is_empty()) return 0;
 
   /*
-    res->ptr() using is safe because we have tested that string is not empty,
+    If length is <= 4 bytes, data is corrupt. This is the best we can do
+    to detect garbage input without decompressing it.
+  */
+  if (res->length() <= 4)
+  {
+    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
+                        ER_ZLIB_Z_DATA_ERROR,
+                        ER(ER_ZLIB_Z_DATA_ERROR));
+    null_value= 1;
+    return 0;
+  }
+
+ /*
+    res->ptr() using is safe because we have tested that string is at least
+    5 bytes long.
     res->c_ptr() is not used because:
       - we do not need \0 terminated string to get first 4 bytes
       - c_ptr() tests simbol after string end (uninitialiozed memory) which
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 5265f608344..2cdb45100ae 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -268,7 +268,7 @@ public:
   String *val_str(String *str);
   void fix_length_and_dec() { max_length= SCRAMBLED_PASSWORD_CHAR_LENGTH; }
   const char *func_name() const { return "password"; }
-  static char *alloc(THD *thd, const char *password);
+  static char *alloc(THD *thd, const char *password, size_t pass_len);
 };
 
 
@@ -287,7 +287,7 @@ public:
   String *val_str(String *str);
   void fix_length_and_dec() { max_length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323; } 
   const char *func_name() const { return "old_password"; }
-  static char *alloc(THD *thd, const char *password);
+  static char *alloc(THD *thd, const char *password, size_t pass_len);
 };
 
 
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 3981b91a27c..00c09679737 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1314,8 +1314,8 @@ Item_in_subselect::row_value_transformer(JOIN *join)
     Item *item_having_part2= 0;
     for (uint i= 0; i < cols_num; i++)
     {
-      DBUG_ASSERT(left_expr->fixed &&
-                  select_lex->ref_pointer_array[i]->fixed ||
+      DBUG_ASSERT((left_expr->fixed &&
+                  select_lex->ref_pointer_array[i]->fixed) ||
                   (select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
                    ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
                     Item_ref::OUTER_REF));
@@ -1392,8 +1392,8 @@ Item_in_subselect::row_value_transformer(JOIN *join)
     for (uint i= 0; i < cols_num; i++)
     {
       Item *item, *item_isnull;
-      DBUG_ASSERT(left_expr->fixed &&
-                  select_lex->ref_pointer_array[i]->fixed ||
+      DBUG_ASSERT((left_expr->fixed &&
+                  select_lex->ref_pointer_array[i]->fixed) ||
                   (select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
                    ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
                     Item_ref::OUTER_REF));
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index a6d8bb8a52d..38251294053 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -798,8 +798,9 @@ void Item_sum_sum::fix_length_and_dec()
   {
     /* SUM result can't be longer than length(arg) + length(MAX_ROWS) */
     int precision= args[0]->decimal_precision() + DECIMAL_LONGLONG_DIGITS;
-    max_length= my_decimal_precision_to_length(precision, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(precision,
+                                                             decimals,
+                                                             unsigned_flag);
     curr_dec_buff= 0;
     hybrid_type= DECIMAL_RESULT;
     my_decimal_set_zero(dec_buffs);
@@ -1233,8 +1234,9 @@ void Item_sum_avg::fix_length_and_dec()
   {
     int precision= args[0]->decimal_precision() + prec_increment;
     decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
-    max_length= my_decimal_precision_to_length(precision, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(precision,
+                                                             decimals,
+                                                             unsigned_flag);
     f_precision= min(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
     f_scale=  args[0]->decimals;
     dec_bin_size= my_decimal_get_binary_size(f_precision, f_scale);
@@ -1439,8 +1441,9 @@ void Item_sum_variance::fix_length_and_dec()
   {
     int precision= args[0]->decimal_precision()*2 + prec_increment;
     decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
-    max_length= my_decimal_precision_to_length(precision, decimals,
-                                               unsigned_flag);
+    max_length= my_decimal_precision_to_length_no_truncation(precision,
+                                                             decimals,
+                                                             unsigned_flag);
 
     break;
   }
@@ -2640,8 +2643,8 @@ bool Item_sum_count_distinct::setup(THD *thd)
       enum enum_field_types f_type= f->type();
       tree_key_length+= f->pack_length();
       if ((f_type == MYSQL_TYPE_VARCHAR) ||
-          !f->binary() && (f_type == MYSQL_TYPE_STRING ||
-                           f_type == MYSQL_TYPE_VAR_STRING))
+          (!f->binary() && (f_type == MYSQL_TYPE_STRING ||
+                           f_type == MYSQL_TYPE_VAR_STRING)))
       {
         all_binary= FALSE;
         break;
@@ -3324,8 +3327,13 @@ bool Item_func_group_concat::add()
 
   TREE_ELEMENT *el= 0;                          // Only for safety
   if (row_eligible && tree)
+  {
     el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
                     tree->custom_arg);
+    /* check if there was enough memory to insert the row */
+    if (!el)
+      return 1;
+  }
   /*
     If the row is not a duplicate (el->count == 1)
     we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 8caff22eab9..d79b0b02998 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -446,7 +446,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
         strict_week_number= (*ptr=='V' || *ptr=='v');
 	tmp= (char*) val + min(val_len, 2);
 	if ((week_number= (int) my_strtoll10(val, &tmp, &error)) < 0 ||
-            strict_week_number && !week_number ||
+            (strict_week_number && !week_number) ||
             week_number > 53)
           goto err;
 	val= tmp;
@@ -542,10 +542,10 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       %V,%v require %X,%x resprectively,
       %U,%u should be used with %Y and not %X or %x
     */
-    if (strict_week_number &&
+    if ((strict_week_number &&
         (strict_week_number_year < 0 ||
-         strict_week_number_year_type != sunday_first_n_first_week_non_iso) ||
-        !strict_week_number && strict_week_number_year >= 0)
+         strict_week_number_year_type != sunday_first_n_first_week_non_iso)) ||
+        (!strict_week_number && strict_week_number_year >= 0))
       goto err;
 
     /* Number of days since year 0 till 1st Jan of this year */
diff --git a/sql/log.cc b/sql/log.cc
index ed2eff6625d..8bb6ba8e9c6 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -153,7 +153,8 @@ private:
 class binlog_trx_data {
 public:
   binlog_trx_data()
-    : at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
+    : at_least_one_stmt(0), incident(FALSE), m_pending(0),
+    before_stmt_pos(MY_OFF_T_UNDEF)
   {
     trans_log.end_of_file= max_binlog_cache_size;
   }
@@ -184,6 +185,7 @@ public:
     delete pending();
     set_pending(0);
     reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
+    trans_log.end_of_file= max_binlog_cache_size;
     if (pos < before_stmt_pos)
       before_stmt_pos= MY_OFF_T_UNDEF;
 
@@ -206,6 +208,7 @@ public:
     if (!empty())
       truncate(0);
     before_stmt_pos= MY_OFF_T_UNDEF;
+    incident= FALSE;
     trans_log.end_of_file= max_binlog_cache_size;
     DBUG_ASSERT(empty());
   }
@@ -222,11 +225,22 @@ public:
 
   IO_CACHE trans_log;                         // The transaction cache
 
+  void set_incident(void)
+  {
+    incident= TRUE;
+  }
+  
+  bool has_incident(void)
+  {
+    return(incident);
+  }
+
   /**
     Boolean that is true if there is at least one statement in the
     transaction cache.
   */
   bool at_least_one_stmt;
+  bool incident;
 
 private:
   /*
@@ -942,7 +956,7 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
   bool error= FALSE;
   Log_event_handler **current_handler;
   bool is_command= FALSE;
-  char user_host_buff[MAX_USER_HOST_SIZE];
+  char user_host_buff[MAX_USER_HOST_SIZE + 1];
   Security_context *sctx= thd->security_ctx;
   uint user_host_len= 0;
   ulonglong query_utime, lock_utime;
@@ -1008,7 +1022,7 @@ bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
 {
   bool error= FALSE;
   Log_event_handler **current_handler= general_log_handler_list;
-  char user_host_buff[MAX_USER_HOST_SIZE];
+  char user_host_buff[MAX_USER_HOST_SIZE + 1];
   Security_context *sctx= thd->security_ctx;
   ulong id;
   uint user_host_len= 0;
@@ -1391,7 +1405,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
   */
   if (end_ev != NULL)
   {
-    thd->binlog_flush_pending_rows_event(TRUE);
+    if (thd->binlog_flush_pending_rows_event(TRUE))
+      DBUG_RETURN(1);
     /*
       Doing a commit or a rollback including non-transactional tables,
       i.e., ending a transaction where we might write the transaction
@@ -1402,7 +1417,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
       were, we would have to ensure that we're not ending a statement
       inside a stored function.
      */
-    error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
+    error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
+                               trx_data->has_incident());
     trx_data->reset();
 
     /*
@@ -1428,7 +1444,11 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
      */
     thd->binlog_remove_pending_rows_event(TRUE);
     if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+    {
+      if (trx_data->has_incident())
+        mysql_bin_log.write_incident(thd, TRUE);
       trx_data->reset();
+    }
     else                                        // ...statement
       trx_data->truncate(trx_data->before_stmt_pos);
 
@@ -1502,8 +1522,7 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
               YESNO(thd->transaction.stmt.modified_non_trans_table)));
   if (!in_transaction || all)
   {
-    Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE);
-    qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
+    Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
     error= binlog_end_trans(thd, trx_data, &qev, all);
     goto end;
   }
@@ -1545,9 +1564,11 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
                        YESNO(all),
                        YESNO(thd->transaction.all.modified_non_trans_table),
                        YESNO(thd->transaction.stmt.modified_non_trans_table)));
-  if (all && thd->transaction.all.modified_non_trans_table ||
-      !all && thd->transaction.stmt.modified_non_trans_table ||
-      (thd->options & OPTION_KEEP_LOG))
+  if ((all && thd->transaction.all.modified_non_trans_table) ||
+      (!all && thd->transaction.stmt.modified_non_trans_table &&
+       !mysql_bin_log.check_write_error(thd)) ||
+      ((thd->options & OPTION_KEEP_LOG) &&
+        !mysql_bin_log.check_write_error(thd)))
   {
     /*
       We write the transaction cache with a rollback last if we have
@@ -1557,18 +1578,25 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
       transactional table in that statement as well, which needs to be
       rolled back on the slave.
     */
-    Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE);
-    qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
+    Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
     error= binlog_end_trans(thd, trx_data, &qev, all);
   }
-  else if (all && !thd->transaction.all.modified_non_trans_table ||
-           !all && !thd->transaction.stmt.modified_non_trans_table)
+  else
   {
     /*
-      If we have modified only transactional tables, we can truncate
-      the transaction cache without writing anything to the binary
-      log.
+      We reach this point if either only transactional tables were modified or
+      the effect of a statement that did not get into the binlog needs to be
+      rolled back. In the latter case, if a statement changed non-transactional
+      tables or had the OPTION_KEEP_LOG associated, we write an incident event
+      to the binlog in order to stop slaves and notify users that some changes
+      on the master did not get into the binlog and slaves will be inconsistent.
+      On the other hand, if a statement is transactional, we just safely roll it
+      back.
      */
+    if ((thd->transaction.stmt.modified_non_trans_table ||
+        (thd->options & OPTION_KEEP_LOG)) &&
+        mysql_bin_log.check_write_error(thd))
+      trx_data->set_incident();
     error= binlog_end_trans(thd, trx_data, 0, all);
   }
   if (!all)
@@ -1576,6 +1604,44 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
   DBUG_RETURN(error);
 }
 
+void MYSQL_BIN_LOG::set_write_error(THD *thd)
+{
+  DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
+
+  write_error= 1;
+
+  if (check_write_error(thd))
+    DBUG_VOID_RETURN;
+
+  if (my_errno == EFBIG)
+    my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
+  else
+    my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
+
+  DBUG_VOID_RETURN;
+}
+
+bool MYSQL_BIN_LOG::check_write_error(THD *thd)
+{
+  DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
+
+  bool checked= FALSE;
+
+  if (!thd->is_error())
+    DBUG_RETURN(checked);
+
+  switch (thd->main_da.sql_errno())
+  {
+    case ER_TRANS_CACHE_FULL:
+    case ER_ERROR_ON_WRITE:
+    case ER_BINLOG_LOGGING_IMPOSSIBLE:
+      checked= TRUE;
+    break;
+  }
+
+  DBUG_RETURN(checked);
+}
+
 /**
   @note
   How do we handle this (unlikely but legal) case:
@@ -1606,10 +1672,11 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
 
   binlog_trans_log_savepos(thd, (my_off_t*) sv);
   /* Write it to the binary log */
-  
+
+  int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
   int const error=
     thd->binlog_query(THD::STMT_QUERY_TYPE,
-                      thd->query, thd->query_length, TRUE, FALSE);
+                      thd->query, thd->query_length, TRUE, FALSE, errcode);
   DBUG_RETURN(error);
 }
 
@@ -1625,9 +1692,10 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
   if (unlikely(thd->transaction.all.modified_non_trans_table || 
                (thd->options & OPTION_KEEP_LOG)))
   {
+    int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
     int error=
       thd->binlog_query(THD::STMT_QUERY_TYPE,
-                        thd->query, thd->query_length, TRUE, FALSE);
+                        thd->query, thd->query_length, TRUE, FALSE, errcode);
     DBUG_RETURN(error);
   }
   binlog_trans_log_truncate(thd, *(my_off_t*)sv);
@@ -2065,6 +2133,9 @@ bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host,
   /* Test if someone closed between the is_open test and lock */
   if (is_open())
   {
+    /* for testing output of timestamp and thread id */
+    DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
+
     /* Note that my_b_write() assumes it knows the length for this */
       if (event_time != last_time)
       {
@@ -2073,7 +2144,7 @@ bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host,
         localtime_r(&event_time, &start);
 
         time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
-                                   "%02d%02d%02d %2d:%02d:%02d",
+                                   "%02d%02d%02d %2d:%02d:%02d\t",
                                    start.tm_year % 100, start.tm_mon + 1,
                                    start.tm_mday, start.tm_hour,
                                    start.tm_min, start.tm_sec);
@@ -3851,6 +3922,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
     if (pending->write(file))
     {
       pthread_mutex_unlock(&LOCK_log);
+      set_write_error(thd);
       DBUG_RETURN(1);
     }
 
@@ -3925,7 +3997,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
   */
   bool const end_stmt=
     thd->prelocked_mode && thd->lex->requires_prelocking();
-  thd->binlog_flush_pending_rows_event(end_stmt);
+  if (thd->binlog_flush_pending_rows_event(end_stmt))
+    DBUG_RETURN(error);
 
   pthread_mutex_lock(&LOCK_log);
 
@@ -3976,8 +4049,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
         DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu",
                             event_info->get_cache_stmt(),
                             (ulong) trans_log_pos));
-        if (trans_log_pos == 0)
-          thd->binlog_start_trans_and_stmt();
+        thd->binlog_start_trans_and_stmt();
         file= trans_log;
       }
       /*
@@ -4055,7 +4127,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
        Write the SQL command
      */
 
-    if (event_info->write(file))
+    if (event_info->write(file) || 
+        DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
       goto err;
 
     if (file == &log_file) // we are writing to the real log (disk)
@@ -4069,13 +4142,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
 
 err:
     if (error)
-    {
-      if (my_errno == EFBIG)
-	my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
-      else
-	my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
-      write_error=1;
-    }
+      set_write_error(thd);
   }
 
   if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
@@ -4327,6 +4394,58 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
   return 0;                                     // All OK
 }
 
+/*
+  Helper function to get the error code of the query to be binlogged.
+ */
+int query_error_code(THD *thd, bool not_killed)
+{
+  int error;
+  
+  if (not_killed)
+  {
+    error= thd->is_error() ? thd->main_da.sql_errno() : 0;
+
+    /* thd->main_da.sql_errno() might be ER_SERVER_SHUTDOWN or
+       ER_QUERY_INTERRUPTED, So here we need to make sure that error
+       is not set to these errors when specified not_killed by the
+       caller.
+    */
+    if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
+      error= 0;
+  }
+  else
+  {
+    /* killed status for DELAYED INSERT thread should never be used */
+    DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
+    error= thd->killed_errno();
+  }
+
+  return error;
+}
+
+bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+{
+  uint error= 0;
+  DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
+  LEX_STRING const write_error_msg=
+    { C_STRING_WITH_LEN("error writing to the binary log") };
+  Incident incident= INCIDENT_LOST_EVENTS;
+  Incident_log_event ev(thd, incident, write_error_msg);
+  if (lock)
+    pthread_mutex_lock(&LOCK_log);
+  ev.write(&log_file);
+  if (lock)
+  {
+    if (!error && !(error= flush_and_sync()))
+    {
+      signal_update();
+      rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
+    }
+    pthread_mutex_unlock(&LOCK_log);
+  }
+  DBUG_RETURN(error);
+}
+
 /**
   Write a cached log entry to the binary log.
   - To support transaction over replication, we wrap the transaction
@@ -4339,6 +4458,9 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
   @param cache		The cache to copy to the binlog
   @param commit_event   The commit event to print after writing the
                         contents of the cache.
+  @param incident       Defines if an incident event should be created to
+                        notify that some non-transactional changes did
+                        not get into the binlog.
 
   @note
     We only come here if there is something in the cache.
@@ -4348,7 +4470,8 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
     'cache' needs to be reinitialized after this functions returns.
 */
 
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
+bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
+                          bool incident)
 {
   DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
   VOID(pthread_mutex_lock(&LOCK_log));
@@ -4370,19 +4493,8 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
         transaction is either a BEGIN..COMMIT block or a single
         statement in autocommit mode.
       */
-      Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE);
-      /*
-        Imagine this is rollback due to net timeout, after all
-        statements of the transaction succeeded. Then we want a
-        zero-error code in BEGIN.  In other words, if there was a
-        really serious error code it's already in the statement's
-        events, there is no need to put it also in this internally
-        generated event, and as this event is generated late it would
-        lead to false alarms.
+      Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
 
-        This is safer than thd->clear_error() against kills at shutdown.
-      */
-      qinfo.error_code= 0;
       /*
         Now this Query_log_event has artificial log_pos 0. It must be
         adjusted to reflect the real position in the log. Not doing it
@@ -4408,6 +4520,10 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
 
       if (commit_event && commit_event->write(&log_file))
         goto err;
+
+      if (incident && write_incident(thd, FALSE))
+        goto err;
+
       if (flush_and_sync())
         goto err;
       DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
diff --git a/sql/log.h b/sql/log.h
index d54df8add3b..d306d6f7182 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -356,9 +356,12 @@ public:
   void new_file();
 
   bool write(Log_event* event_info); // binary log write
-  bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
+  bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
+  bool write_incident(THD *thd, bool lock);
 
   int  write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+  void set_write_error(THD *thd);
+  bool check_write_error(THD *thd);
 
   void start_union_events(THD *thd, query_id_t query_id_param);
   void stop_union_events(THD *thd);
@@ -581,4 +584,6 @@ enum enum_binlog_format {
 };
 extern TYPELIB binlog_format_typelib;
 
+int query_error_code(THD *thd, bool not_killed);
+
 #endif /* LOG_H */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 1a8cb8ee4fa..5f77ab3dcc4 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -369,6 +369,34 @@ int convert_handler_error(int error, THD* thd, TABLE *table)
   return (actual_error);
 }
 
+inline bool concurrency_error_code(int error)
+{
+  switch (error)
+  {
+  case ER_LOCK_WAIT_TIMEOUT:
+  case ER_LOCK_DEADLOCK:
+  case ER_XA_RBDEADLOCK:
+    return TRUE;
+  default: 
+    return (FALSE);
+  }
+}
+
+inline bool unexpected_error_code(int unexpected_error)
+{
+  switch (unexpected_error) 
+  {
+  case ER_NET_READ_ERROR:
+  case ER_NET_ERROR_ON_WRITE:
+  case ER_QUERY_INTERRUPTED:
+  case ER_SERVER_SHUTDOWN:
+  case ER_NEW_ABORTING_CONNECTION:
+    return(TRUE);
+  default:
+    return(FALSE);
+  }
+}
+
 /*
   pretty_print_str()
 */
@@ -791,8 +819,8 @@ Log_event::do_shall_skip(Relay_log_info *rli)
                       (ulong) server_id, (ulong) ::server_id,
                       rli->replicate_same_server_id,
                       rli->slave_skip_counter));
-  if (server_id == ::server_id && !rli->replicate_same_server_id ||
-      rli->slave_skip_counter == 1 && rli->is_in_group())
+  if ((server_id == ::server_id && !rli->replicate_same_server_id) ||
+      (rli->slave_skip_counter == 1 && rli->is_in_group()))
     return EVENT_SKIP_IGNORE;
   else if (rli->slave_skip_counter > 0)
     return EVENT_SKIP_COUNT;
@@ -2316,19 +2344,16 @@ Query_log_event::Query_log_event()
       query_length      - size of the  `query_arg' array
       using_trans       - there is a modified transactional table
       suppress_use      - suppress the generation of 'USE' statements
-      killed_status_arg - an optional with default to THD::KILLED_NO_VALUE
-                          if the value is different from the default, the arg
-                          is set to the current thd->killed value.
-                          A caller might need to masquerade thd->killed with
-                          THD::NOT_KILLED.
+      errcode           - the error code of the query
+      
   DESCRIPTION
   Creates an event for binlogging
-  The value for local `killed_status' can be supplied by caller.
+  The value for `errcode' should be supplied by caller.
 */
 Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 				 ulong query_length, bool using_trans,
-				 bool suppress_use,
-                                 THD::killed_state killed_status_arg)
+				 bool suppress_use, int errcode)
+
   :Log_event(thd_arg,
              (thd_arg->thread_specific_used ? LOG_EVENT_THREAD_SPECIFIC_F :
               0) |
@@ -2349,22 +2374,7 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 {
   time_t end_time;
 
-  if (killed_status_arg == THD::KILLED_NO_VALUE)
-    killed_status_arg= thd_arg->killed;
-  error_code=
-    (killed_status_arg == THD::NOT_KILLED) ?
-    (thd_arg->is_error() ? thd_arg->main_da.sql_errno() : 0) :
-    ((thd_arg->system_thread & SYSTEM_THREAD_DELAYED_INSERT) ? 0 :
-     thd_arg->killed_errno());
-
-  /* thd_arg->main_da.sql_errno() might be ER_SERVER_SHUTDOWN or
-     ER_QUERY_INTERRUPTED, So here we need to make sure that
-     error_code is not set to these errors when specified NOT_KILLED
-     by the caller
-  */
-  if ((killed_status_arg == THD::NOT_KILLED) &&
-      (error_code == ER_SERVER_SHUTDOWN || error_code == ER_QUERY_INTERRUPTED))
-    error_code= 0;
+  error_code= errcode;
 
   time(&end_time);
   exec_time = (ulong) (end_time  - thd_arg->start_time);
@@ -2751,7 +2761,8 @@ void Query_log_event::print_query_header(IO_CACHE* file,
 
   if (!(flags & LOG_EVENT_SUPPRESS_USE_F) && db)
   {
-    if ((different_db= memcmp(print_event_info->db, db, db_len + 1)))
+    different_db= memcmp(print_event_info->db, db, db_len + 1);
+    if (different_db)
       memcpy(print_event_info->db, db, db_len + 1);
     if (db[0] && different_db) 
       my_b_printf(file, "use %s%s\n", db, print_event_info->delimiter);
@@ -3008,7 +3019,10 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
             ::do_apply_event(), then the companion SET also have so
             we don't need to reset_one_shot_variables().
   */
-  if (rpl_filter->db_ok(thd->db))
+  if (!strncmp(query_arg, "BEGIN", q_len_arg) ||
+      !strncmp(query_arg, "COMMIT", q_len_arg) ||
+      !strncmp(query_arg, "ROLLBACK", q_len_arg) ||
+      rpl_filter->db_ok(thd->db))
   {
     thd->set_time((time_t)when);
     thd->query_length= q_len_arg;
@@ -3020,7 +3034,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
     DBUG_PRINT("query",("%s",thd->query));
 
     if (ignored_error_code((expected_error= error_code)) ||
-	!check_expected_error(thd,rli,expected_error))
+	!unexpected_error_code(expected_error))
     {
       if (flags2_inited)
         /*
@@ -3152,8 +3166,8 @@ compare_errors:
     actual_error= thd->is_error() ? thd->main_da.sql_errno() : 0;
     DBUG_PRINT("info",("expected_error: %d  sql_errno: %d",
  		       expected_error, actual_error));
-    if ((expected_error != actual_error) &&
- 	expected_error &&
+    if ((expected_error && expected_error != actual_error &&
+         !concurrency_error_code(expected_error)) &&
  	!ignored_error_code(actual_error) &&
  	!ignored_error_code(expected_error))
     {
@@ -3172,7 +3186,8 @@ Default database: '%s'. Query: '%s'",
     /*
       If we get the same error code as expected, or they should be ignored. 
     */
-    else if (expected_error == actual_error ||
+    else if ((expected_error == actual_error && 
+              !concurrency_error_code(expected_error)) ||
  	     ignored_error_code(actual_error))
     {
       DBUG_PRINT("info",("error ignored"));
@@ -3344,8 +3359,8 @@ void Start_log_event_v3::print(FILE* file, PRINT_EVENT_INFO* print_event_info)
       my_b_printf(&cache," at startup");
     my_b_printf(&cache, "\n");
     if (flags & LOG_EVENT_BINLOG_IN_USE_F)
-      my_b_printf(&cache, "# Warning: this binlog was not closed properly. "
-                  "Most probably mysqld crashed writing it.\n");
+      my_b_printf(&cache, "# Warning: this binlog is either in use or was not "
+                  "closed properly.\n");
   }
   if (!is_artificial_event() && created)
   {
@@ -4366,7 +4381,7 @@ void Load_log_event::print(FILE* file_arg, PRINT_EVENT_INFO* print_event_info,
     {
       if (i)
 	my_b_printf(&cache, ",");
-      my_b_printf(&cache, field);
+      my_b_printf(&cache, "%s", field);
 	  
       field += field_lens[i]  + 1;
     }
@@ -6619,9 +6634,9 @@ Execute_load_query_log_event(THD *thd_arg, const char* query_arg,
                              uint fn_pos_end_arg,
                              enum_load_dup_handling dup_handling_arg,
                              bool using_trans, bool suppress_use,
-                             THD::killed_state killed_err_arg):
+                             int errcode):
   Query_log_event(thd_arg, query_arg, query_length_arg, using_trans,
-                  suppress_use, killed_err_arg),
+                  suppress_use, errcode),
   file_id(thd_arg->file_id), fn_pos_start(fn_pos_start_arg),
   fn_pos_end(fn_pos_end_arg), dup_handling(dup_handling_arg)
 {
@@ -6694,7 +6709,7 @@ void Execute_load_query_log_event::print(FILE* file,
   {
     my_b_write(&cache, (uchar*) query, fn_pos_start);
     my_b_printf(&cache, " LOCAL INFILE \'");
-    my_b_printf(&cache, local_fname);
+    my_b_printf(&cache, "%s", local_fname);
     my_b_printf(&cache, "\'");
     if (dup_handling == LOAD_DUP_REPLACE)
       my_b_printf(&cache, " REPLACE");
@@ -6912,8 +6927,8 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
     solution, to be able to terminate a started statement in the
     binary log: the extraneous events will be removed in the future.
    */
-  DBUG_ASSERT(tbl_arg && tbl_arg->s && tid != ~0UL ||
-              !tbl_arg && !cols && tid == ~0UL);
+  DBUG_ASSERT((tbl_arg && tbl_arg->s && tid != ~0UL) ||
+              (!tbl_arg && !cols && tid == ~0UL));
 
   if (thd_arg->options & OPTION_NO_FOREIGN_KEY_CHECKS)
       set_flags(NO_FOREIGN_KEY_CHECKS_F);
@@ -7105,7 +7120,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
 #endif
 
   DBUG_ASSERT(m_rows_buf <= m_rows_cur);
-  DBUG_ASSERT(!m_rows_buf || m_rows_end && m_rows_buf < m_rows_end);
+  DBUG_ASSERT(!m_rows_buf || (m_rows_end && m_rows_buf < m_rows_end));
   DBUG_ASSERT(m_rows_cur <= m_rows_end);
 
   /* The cast will always work since m_rows_cur <= m_rows_end */
@@ -7862,10 +7877,11 @@ Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid,
 
   /*
     Now set the size of the data to the size of the field metadata array
-    plus one or two bytes for number of elements in the field metadata array.
+    plus one or three bytes (see pack.c:net_store_length) for number of 
+    elements in the field metadata array.
   */
   if (m_field_metadata_size > 255)
-    m_data_size+= m_field_metadata_size + 2; 
+    m_data_size+= m_field_metadata_size + 3; 
   else
     m_data_size+= m_field_metadata_size + 1; 
 
@@ -9312,7 +9328,7 @@ Incident_log_event::print(FILE *file,
 
   Write_on_release_cache cache(&print_event_info->head_cache, file);
   print_header(&cache, print_event_info, FALSE);
-  my_b_printf(&cache, "\n# Incident: %s", description());
+  my_b_printf(&cache, "\n# Incident: %s\nRELOAD DATABASE; # Shall generate syntax error\n", description());
 }
 #endif
 
diff --git a/sql/log_event.h b/sql/log_event.h
index bda53da8ab0..8202dddcc76 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -676,6 +676,7 @@ typedef struct st_print_event_info
 #ifdef MYSQL_CLIENT
   uint verbose;
   table_mapping m_table_map;
+  table_mapping m_table_map_ignored;
 #endif
 
   /*
@@ -1623,8 +1624,7 @@ public:
 #ifndef MYSQL_CLIENT
 
   Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length,
-                  bool using_trans, bool suppress_use,
-                  THD::killed_state killed_err_arg= THD::KILLED_NO_VALUE);
+                  bool using_trans, bool suppress_use, int error);
   const char* get_db() { return db; }
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
@@ -2875,8 +2875,7 @@ public:
                                uint fn_pos_end_arg,
                                enum_load_dup_handling dup_handling_arg,
                                bool using_trans, bool suppress_use,
-                               THD::killed_state
-                               killed_err_arg= THD::KILLED_NO_VALUE);
+                               int errcode);
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
diff --git a/sql/my_decimal.h b/sql/my_decimal.h
index 0e79f70ab4e..21669e82c44 100644
--- a/sql/my_decimal.h
+++ b/sql/my_decimal.h
@@ -183,6 +183,19 @@ inline uint my_decimal_length_to_precision(uint length, uint scale,
                  (unsigned_flag || !length ? 0:1));
 }
 
+inline uint32 my_decimal_precision_to_length_no_truncation(uint precision,
+                                                           uint8 scale,
+                                                           bool unsigned_flag)
+{
+  /*
+    When precision is 0 it means that original length was also 0. Thus
+    unsigned_flag is ignored in this case.
+  */
+  DBUG_ASSERT(precision || !scale);
+  return (uint32)(precision + (scale > 0 ? 1 : 0) +
+                  (unsigned_flag || !precision ? 0 : 1));
+}
+
 inline uint32 my_decimal_precision_to_length(uint precision, uint8 scale,
                                              bool unsigned_flag)
 {
@@ -192,8 +205,8 @@ inline uint32 my_decimal_precision_to_length(uint precision, uint8 scale,
   */
   DBUG_ASSERT(precision || !scale);
   set_if_smaller(precision, DECIMAL_MAX_PRECISION);
-  return (uint32)(precision + (scale>0 ? 1:0) +
-                  (unsigned_flag || !precision ? 0:1));
+  return my_decimal_precision_to_length_no_truncation(precision, scale,
+                                                      unsigned_flag);
 }
 
 inline
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 949fabe3928..8a7501ba108 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -82,9 +82,9 @@ extern query_id_t global_query_id;
 inline query_id_t next_query_id() { return global_query_id++; }
 
 /* useful constants */
-extern const key_map key_map_empty;
-extern key_map key_map_full;          /* Should be threaded as const */
-extern const char *primary_key_name;
+extern MYSQL_PLUGIN_IMPORT const key_map key_map_empty;
+extern MYSQL_PLUGIN_IMPORT key_map key_map_full;          /* Should be threaded as const */
+extern MYSQL_PLUGIN_IMPORT const char *primary_key_name;
 
 #include "mysql_com.h"
 #include <violite.h>
@@ -123,8 +123,10 @@ char* query_table_status(THD *thd,const char *db,const char *table_name);
                         "in MySQL %s. Please use %s instead.", (Old), (Ver), (New)); \
   } while(0)
 
-extern CHARSET_INFO *system_charset_info, *files_charset_info ;
-extern CHARSET_INFO *national_charset_info, *table_alias_charset;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *files_charset_info ;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *national_charset_info;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *table_alias_charset;
 
 
 enum Derivation
@@ -690,14 +692,19 @@ typedef struct st_sql_list {
   }
 } SQL_LIST;
 
-
+#if defined(MYSQL_DYNAMIC_PLUGIN) && defined(_WIN32)
+extern "C" THD *_current_thd_noinline();
+#define _current_thd() _current_thd_noinline()
+#else
 extern pthread_key(THD*, THR_THD);
 inline THD *_current_thd(void)
 {
   return my_pthread_getspecific_ptr(THD*,THR_THD);
 }
+#endif
 #define current_thd _current_thd()
 
+
 /** 
   The meat of thd_proc_info(THD*, char*), a macro that packs the last
   three calling-info parameters. 
@@ -1415,14 +1422,14 @@ enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table);
 
 /* sql_prepare.cc */
 
-void mysql_stmt_prepare(THD *thd, const char *packet, uint packet_length);
-void mysql_stmt_execute(THD *thd, char *packet, uint packet_length);
-void mysql_stmt_close(THD *thd, char *packet);
+void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length);
+void mysqld_stmt_execute(THD *thd, char *packet, uint packet_length);
+void mysqld_stmt_close(THD *thd, char *packet);
 void mysql_sql_stmt_prepare(THD *thd);
 void mysql_sql_stmt_execute(THD *thd);
 void mysql_sql_stmt_close(THD *thd);
-void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length);
-void mysql_stmt_reset(THD *thd, char *packet);
+void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length);
+void mysqld_stmt_reset(THD *thd, char *packet);
 void mysql_stmt_get_longdata(THD *thd, char *pos, ulong packet_length);
 void reinit_stmt_before_use(THD *thd, LEX *lex);
 
@@ -1884,8 +1891,12 @@ extern time_t server_start_time, flush_status_time;
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
 extern uint mysql_data_home_len;
-extern char *mysql_data_home,server_version[SERVER_VERSION_LENGTH],
-            mysql_real_data_home[], mysql_unpacked_real_data_home[];
+
+extern MYSQL_PLUGIN_IMPORT char  *mysql_data_home;
+extern char server_version[SERVER_VERSION_LENGTH];
+extern MYSQL_PLUGIN_IMPORT char mysql_real_data_home[];
+extern char mysql_unpacked_real_data_home[];
+
 extern CHARSET_INFO *character_set_filesystem;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
@@ -1893,10 +1904,13 @@ extern char *opt_mysql_tmpdir, mysql_charsets_dir[],
             def_ft_boolean_syntax[sizeof(ft_boolean_syntax)];
 extern int mysql_unpacked_real_data_home_len;
 #define mysql_tmpdir (my_tmpdir(&mysql_tmpdir_list))
-extern MY_TMPDIR mysql_tmpdir_list;
+extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
 extern const LEX_STRING command_name[];
-extern const char *first_keyword, *my_localhost, *delayed_user, *binary_keyword;
-extern const char **errmesg;			/* Error messages */
+
+extern const char *first_keyword, *delayed_user, *binary_keyword;
+extern MYSQL_PLUGIN_IMPORT const char  *my_localhost;
+extern MYSQL_PLUGIN_IMPORT const char **errmesg;			/* Error messages */
+
 extern const char *myisam_recover_options_str;
 extern const char *in_left_expr_name, *in_additional_cond, *in_having_cond;
 extern const char * const TRG_EXT;
@@ -1910,8 +1924,8 @@ extern Le_creator le_creator;
 extern char language[FN_REFLEN];
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
-extern char reg_ext[FN_EXTLEN];
-extern uint reg_ext_length;
+extern MYSQL_PLUGIN_IMPORT char reg_ext[FN_EXTLEN];
+extern MYSQL_PLUGIN_IMPORT uint reg_ext_length;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
 extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN];
@@ -1931,20 +1945,22 @@ extern ulong slave_open_temp_tables;
 extern ulong query_cache_size, query_cache_min_res_unit;
 extern ulong slow_launch_threads, slow_launch_time;
 extern ulong table_cache_size, table_def_size;
-extern ulong max_connections,max_connect_errors, connect_timeout;
+extern MYSQL_PLUGIN_IMPORT ulong max_connections;
+extern ulong max_connect_errors, connect_timeout;
 extern ulong slave_net_timeout, slave_trans_retries;
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
 extern ulong query_buff_size;
 extern ulong max_prepared_stmt_count, prepared_stmt_count;
-extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit;
+extern ulong binlog_cache_size, open_files_limit;
+extern ulonglong max_binlog_cache_size;
 extern ulong max_binlog_size, max_relay_log_size;
 extern ulong opt_binlog_rows_event_max_size;
 extern ulong rpl_recovery_rank, thread_cache_size, thread_pool_size;
 extern ulong back_log;
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
-extern ulong specialflag;
+extern ulong MYSQL_PLUGIN_IMPORT specialflag;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
 extern ulong current_pid;
@@ -1957,7 +1973,7 @@ extern uint protocol_version, mysqld_port, dropping_tables;
 extern uint delay_key_write_options;
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
-extern uint lower_case_table_names;
+extern MYSQL_PLUGIN_IMPORT uint lower_case_table_names;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
 extern bool opt_endinfo, using_udf_functions;
@@ -1965,7 +1981,7 @@ extern my_bool locked_in_memory;
 extern bool opt_using_transactions;
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
-extern bool mysqld_embedded;
+extern MYSQL_PLUGIN_IMPORT bool mysqld_embedded;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
 extern bool opt_large_files, server_id_supplied;
@@ -2003,7 +2019,7 @@ extern uint opt_large_page_size;
 extern char *opt_logname, *opt_slow_logname;
 extern const char *log_output_str;
 
-extern MYSQL_BIN_LOG mysql_bin_log;
+extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log;
 extern LOGGER logger;
 extern TABLE_LIST general_log, slow_log;
 extern FILE *bootstrap_file;
@@ -2011,13 +2027,14 @@ extern int bootstrap_error;
 extern FILE *stderror_file;
 extern pthread_key(MEM_ROOT**,THR_MALLOC);
 extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, LOCK_lock_db,
-       LOCK_thread_count,LOCK_mapped_file,LOCK_user_locks, LOCK_status,
+       LOCK_mapped_file,LOCK_user_locks, LOCK_status,
        LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_generator,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_slave_list, LOCK_active_mi, LOCK_manager, LOCK_global_read_lock,
        LOCK_global_system_variables, LOCK_user_conn,
        LOCK_prepared_stmt_count,
        LOCK_bytes_sent, LOCK_bytes_received, LOCK_connection_count;
+extern MYSQL_PLUGIN_IMPORT pthread_mutex_t LOCK_thread_count;
 #ifdef HAVE_OPENSSL
 extern pthread_mutex_t LOCK_des_key_file;
 #endif
@@ -2037,7 +2054,7 @@ extern const String my_null_string;
 extern SHOW_VAR status_vars[];
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
-extern struct system_variables global_system_variables;
+extern MYSQL_PLUGIN_IMPORT struct system_variables global_system_variables;
 #endif /* MYSQL_SERVER || INNODB_COMPATIBILITY_HOOKS */
 #ifdef MYSQL_SERVER
 extern struct system_variables max_system_variables;
@@ -2255,6 +2272,16 @@ char *fn_rext(char *name);
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
 uint strconvert(CHARSET_INFO *from_cs, const char *from,
                 CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors);
+/* depends on errmsg.txt Database `db`, Table `t` ... */
+#define EXPLAIN_FILENAME_MAX_EXTRA_LENGTH 63
+enum enum_explain_filename_mode
+{
+  EXPLAIN_ALL_VERBOSE= 0,
+  EXPLAIN_PARTITIONS_VERBOSE,
+  EXPLAIN_PARTITIONS_AS_COMMENT
+};
+uint explain_filename(const char *from, char *to, uint to_length,
+                      enum_explain_filename_mode explain_mode);
 uint filename_to_tablename(const char *from, char *to, uint to_length);
 uint tablename_to_filename(const char *from, char *to, uint to_length);
 uint check_n_cut_mysql50_prefix(const char *from, char *to, uint to_length);
@@ -2313,6 +2340,12 @@ extern void turn_parser_debug_on();
 SQL_CRYPT *get_crypt_for_frm(void);
 #endif
 
+/* password.c */
+extern "C" void my_make_scrambled_password_323(char *to, const char *password,
+                                               size_t pass_len);
+extern "C" void my_make_scrambled_password(char *to, const char *password,
+                                           size_t pass_len);
+
 #include "sql_view.h"
 
 /* Some inline functions for more speed */
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 140b7d1490d..83101f6ed0e 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -519,7 +519,8 @@ ulong slave_net_timeout, slave_trans_retries;
 ulong slave_exec_mode_options;
 const char *slave_exec_mode_str= "STRICT";
 ulong thread_cache_size=0, thread_pool_size= 0;
-ulong binlog_cache_size=0, max_binlog_cache_size=0;
+ulong binlog_cache_size=0;
+ulonglong  max_binlog_cache_size=0;
 ulong query_cache_size=0;
 ulong refresh_version;  /* Increments on each reload */
 query_id_t global_query_id;
@@ -994,6 +995,7 @@ static void close_connections(void)
   }
   (void) pthread_mutex_unlock(&LOCK_thread_count);
 
+  close_active_mi();
   DBUG_PRINT("quit",("close_connections thread"));
   DBUG_VOID_RETURN;
 }
@@ -1675,7 +1677,6 @@ static void network_init(void)
       opt_enable_named_pipe)
   {
     
-    pipe_name[sizeof(pipe_name)-1]= 0;		/* Safety if too long string */
     strxnmov(pipe_name, sizeof(pipe_name)-1, "\\\\.\\pipe\\",
 	     mysqld_unix_port, NullS);
     bzero((char*) &saPipeSecurity, sizeof(saPipeSecurity));
@@ -4870,8 +4871,9 @@ void handle_connection_in_main_thread(THD *thd)
   safe_mutex_assert_owner(&LOCK_thread_count);
   thread_cache_size=0;			// Safety
   threads.append(thd);
-  (void) pthread_mutex_unlock(&LOCK_thread_count);
-  handle_one_connection((void*) thd);
+  pthread_mutex_unlock(&LOCK_thread_count);
+  thd->start_utime= my_micro_time();
+  handle_one_connection(thd);
 }
 
 
@@ -4896,7 +4898,7 @@ void create_thread_to_handle_connection(THD *thd)
     thread_created++;
     threads.append(thd);
     DBUG_PRINT("info",(("creating thread %lu"), thd->thread_id));
-    thd->connect_utime= thd->start_utime= my_micro_time();
+    thd->prior_thr_create_utime= thd->start_utime= my_micro_time();
     if ((error=pthread_create(&thd->real_id,&connection_attrib,
                               handle_one_connection,
                               (void*) thd)))
@@ -6679,7 +6681,7 @@ log and this option does nothing anymore.",
   {"max_binlog_cache_size", OPT_MAX_BINLOG_CACHE_SIZE,
    "Can be used to restrict the total size used to cache a multi-transaction query.",
    (uchar**) &max_binlog_cache_size, (uchar**) &max_binlog_cache_size, 0,
-   GET_ULONG, REQUIRED_ARG, ULONG_MAX, IO_SIZE, ULONG_MAX, 0, IO_SIZE, 0},
+   GET_ULL, REQUIRED_ARG, ULONG_MAX, IO_SIZE, ULONGLONG_MAX, 0, IO_SIZE, 0},
   {"max_binlog_size", OPT_MAX_BINLOG_SIZE,
    "Binary log will be rotated automatically when the size exceeds this \
 value. Will also apply to relay logs if max_relay_log_size is 0. \
@@ -6862,7 +6864,7 @@ The minimum value for this variable is 4096.",
    (uchar**) &opt_plugin_dir_ptr, (uchar**) &opt_plugin_dir_ptr, 0,
    GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"plugin-load", OPT_PLUGIN_LOAD,
-   "Optional colon-separated list of plugins to load, where each plugin is "
+   "Optional semicolon-separated list of plugins to load, where each plugin is "
    "identified as name=library, where name is the plugin name and library "
    "is the plugin library in plugin_dir.",
    (uchar**) &opt_plugin_load, (uchar**) &opt_plugin_load, 0,
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 147874611ce..e3aef02637f 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3760,8 +3760,8 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 
   DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
   if (imerge_too_expensive || (imerge_cost > read_time) ||
-      (non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
-      read_time != DBL_MAX)
+      ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
+      read_time != DBL_MAX))
   {
     /*
       Bail out if it is obvious that both index_merge and ROR-union will be
@@ -7950,7 +7950,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
     goto err;
   quick->records= records;
 
-  if (cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error ||
+  if ((cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error) ||
       !(range= new(alloc) QUICK_RANGE()))
     goto err;                                   // out of memory
 
@@ -8556,7 +8556,7 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
 				   last_range->max_keypart_map ? &end_key : 0,
                                    test(last_range->flag & EQ_RANGE),
-				   sorted);
+				   TRUE);
     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
       last_range= 0;			// Stop searching
 
@@ -8826,7 +8826,7 @@ int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
 
   cmp= key_cmp(key_part_info, range_arg->min_key,
                range_arg->min_length);
-  if (cmp > 0 || cmp == 0 && !(range_arg->flag & NEAR_MIN))
+  if (cmp > 0 || (cmp == 0 && !(range_arg->flag & NEAR_MIN)))
     return 0;
   return 1;                                     // outside of range
 }
@@ -10886,8 +10886,14 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
       /* Compare the found key with max_key. */
       int cmp_res= key_cmp(index_info->key_part, max_key,
                            real_prefix_len + min_max_arg_len);
-      if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) ||
-            (cmp_res <= 0)))
+      /*
+        The key is outside of the range if: 
+        the interval is open and the key is equal to the maximum boundry
+        or
+        the key is greater than the maximum
+      */
+      if (((cur_range->flag & NEAR_MAX) && cmp_res == 0) ||
+          cmp_res > 0)
       {
         result= HA_ERR_KEY_NOT_FOUND;
         continue;
@@ -11004,8 +11010,14 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
       /* Compare the found key with min_key. */
       int cmp_res= key_cmp(index_info->key_part, min_key,
                            real_prefix_len + min_max_arg_len);
-      if (!((cur_range->flag & NEAR_MIN) && (cmp_res == 1) ||
-            (cmp_res >= 0)))
+      /*
+        The key is outside of the range if: 
+        the interval is open and the key is equal to the minimum boundry
+        or
+        the key is less than the minimum
+      */
+      if (((cur_range->flag & NEAR_MIN) && cmp_res == 0) ||
+          cmp_res < 0)
         continue;
     }
     /* If we got to this point, the current key qualifies as MAX. */
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index 3ccc1e5cf41..8e7265ba1ad 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -744,8 +744,8 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
   }
   else if (eq_type)
   {
-    if (!is_null && !cond->val_int() ||
-        is_null && !test(part->field->is_null()))  
+    if ((!is_null && !cond->val_int()) ||
+        (is_null && !test(part->field->is_null())))
      return 0;                       // Impossible test
   }
   else if (is_field_part)
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
index f2dbeba1bbf..3d65fa1de31 100644
--- a/sql/parse_file.cc
+++ b/sql/parse_file.cc
@@ -231,7 +231,7 @@ sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name,
       including dir name, file name itself, and an extension,
       and with unpack_filename() executed over it.
     */    
-    path_end= strxnmov(path, FN_REFLEN, file_name->str, NullS) - path;
+    path_end= strxnmov(path, sizeof(path) - 1, file_name->str, NullS) - path;
   }
 
   // temporary file name
@@ -314,7 +314,7 @@ my_bool rename_in_schema_file(THD *thd,
                               const char *schema, const char *old_name, 
                               const char *new_db, const char *new_name)
 {
-  char old_path[FN_REFLEN], new_path[FN_REFLEN], arc_path[FN_REFLEN];
+  char old_path[FN_REFLEN + 1], new_path[FN_REFLEN + 1], arc_path[FN_REFLEN + 1];
 
   build_table_filename(old_path, sizeof(old_path) - 1,
                        schema, old_name, reg_ext, 0);
diff --git a/sql/password.c b/sql/password.c
index 1ff67888ea4..9204c660b77 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -137,16 +137,35 @@ void hash_password(ulong *result, const char *password, uint password_len)
     Create password to be stored in user database from raw string
     Used for pre-4.1 password handling
   SYNOPSIS
-    make_scrambled_password_323()
+    my_make_scrambled_password_323()
     to        OUT store scrambled password here
     password  IN  user-supplied password
+    pass_len  IN  length of password string
+*/
+
+void my_make_scrambled_password_323(char *to, const char *password,
+                                    size_t pass_len)
+{
+  ulong hash_res[2];
+  hash_password(hash_res, password, (uint) pass_len);
+  sprintf(to, "%08lx%08lx", hash_res[0], hash_res[1]);
+}
+
+
+/*
+  Wrapper around my_make_scrambled_password_323() to maintain client lib ABI
+  compatibility.
+  In server code usage of my_make_scrambled_password_323() is preferred to
+  avoid strlen().
+  SYNOPSIS
+    make_scrambled_password_323()
+    to        OUT store scrambled password here
+    password  IN  NULL-terminated string with user-supplied password
 */
 
 void make_scrambled_password_323(char *to, const char *password)
 {
-  ulong hash_res[2];
-  hash_password(hash_res, password, (uint) strlen(password));
-  sprintf(to, "%08lx%08lx", hash_res[0], hash_res[1]);
+  my_make_scrambled_password_323(to, password, strlen(password));
 }
 
 
@@ -383,20 +402,21 @@ my_crypt(char *to, const uchar *s1, const uchar *s2, uint len)
     The result of this function is used as return value from PASSWORD() and
     is stored in the database.
   SYNOPSIS
-    make_scrambled_password()
+    my_make_scrambled_password()
     buf       OUT buffer of size 2*SHA1_HASH_SIZE + 2 to store hex string
-    password  IN  NULL-terminated password string
+    password  IN  password string
+    pass_len  IN  length of password string
 */
 
-void
-make_scrambled_password(char *to, const char *password)
+void my_make_scrambled_password(char *to, const char *password,
+                                size_t pass_len)
 {
   SHA1_CONTEXT sha1_context;
   uint8 hash_stage2[SHA1_HASH_SIZE];
 
   mysql_sha1_reset(&sha1_context);
   /* stage 1: hash password */
-  mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) strlen(password));
+  mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) pass_len);
   mysql_sha1_result(&sha1_context, (uint8 *) to);
   /* stage 2: hash stage1 output */
   mysql_sha1_reset(&sha1_context);
@@ -409,6 +429,23 @@ make_scrambled_password(char *to, const char *password)
 }
   
 
+/*
+  Wrapper around my_make_scrambled_password() to maintain client lib ABI
+  compatibility.
+  In server code usage of my_make_scrambled_password() is preferred to
+  avoid strlen().
+  SYNOPSIS
+    make_scrambled_password()
+    buf       OUT buffer of size 2*SHA1_HASH_SIZE + 2 to store hex string
+    password  IN  NULL-terminated password string
+*/
+
+void make_scrambled_password(char *to, const char *password)
+{
+  my_make_scrambled_password(to, password, strlen(password));
+}
+
+
 /*
     Produce an obscure octet sequence from password and random
     string, recieved from the server. This sequence corresponds to the
diff --git a/sql/rpl_reporting.cc b/sql/rpl_reporting.cc
index 28f257790c7..a09140de3c4 100644
--- a/sql/rpl_reporting.cc
+++ b/sql/rpl_reporting.cc
@@ -13,6 +13,7 @@ Slave_reporting_capability::report(loglevel level, int err_code,
   va_list args;
   va_start(args, msg);
 
+  pthread_mutex_lock(&err_lock);
   switch (level)
   {
   case ERROR_LEVEL:
@@ -38,6 +39,7 @@ Slave_reporting_capability::report(loglevel level, int err_code,
 
   my_vsnprintf(pbuff, pbuffsize, msg, args);
 
+  pthread_mutex_unlock(&err_lock);
   va_end(args);
 
   /* If the msg string ends with '.', do not add a ',' it would be ugly */
@@ -46,3 +48,8 @@ Slave_reporting_capability::report(loglevel level, int err_code,
                   (pbuff[0] && *(strend(pbuff)-1) == '.') ? "" : ",",
                   err_code);
 }
+
+Slave_reporting_capability::~Slave_reporting_capability()
+{
+  pthread_mutex_destroy(&err_lock);
+}
diff --git a/sql/rpl_reporting.h b/sql/rpl_reporting.h
index 2e3fa3cea83..ce33407e516 100644
--- a/sql/rpl_reporting.h
+++ b/sql/rpl_reporting.h
@@ -16,6 +16,8 @@
 class Slave_reporting_capability
 {
 public:
+  /** lock used to synchronize m_last_error on 'SHOW SLAVE STATUS' **/
+  mutable pthread_mutex_t err_lock;
   /**
      Constructor.
 
@@ -24,6 +26,7 @@ public:
   Slave_reporting_capability(char const *thread_name)
     : m_thread_name(thread_name)
   {
+      pthread_mutex_init(&err_lock, MY_MUTEX_INIT_FAST);
   }
 
   /**
@@ -44,7 +47,9 @@ public:
      STATUS</code>.
    */
   void clear_error() {
+    pthread_mutex_lock(&err_lock);
     m_last_error.clear();
+    pthread_mutex_unlock(&err_lock);
   }
 
   /**
@@ -72,6 +77,7 @@ public:
 
   Error const& last_error() const { return m_last_error; }
 
+  virtual ~Slave_reporting_capability()= 0;
 private:
   /**
      Last error produced by the I/O or SQL thread respectively.
@@ -79,6 +85,10 @@ private:
   mutable Error m_last_error;
 
   char const *const m_thread_name;
+
+  // not implemented
+  Slave_reporting_capability(const Slave_reporting_capability& rhs);
+  Slave_reporting_capability& operator=(const Slave_reporting_capability& rhs);
 };
 
 #endif // RPL_REPORTING_H
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index e93417374fe..18fbae9bb9d 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -104,9 +104,16 @@ int init_relay_log_info(Relay_log_info* rli,
   rli->tables_to_lock= 0;
   rli->tables_to_lock_count= 0;
 
-  fn_format(rli->slave_patternload_file, PREFIX_SQL_LOAD, slave_load_tmpdir, "",
-            MY_PACK_FILENAME | MY_UNPACK_FILENAME |
-            MY_RETURN_REAL_PATH);
+  char pattern[FN_REFLEN];
+  if (fn_format(pattern, PREFIX_SQL_LOAD, slave_load_tmpdir, "",
+            MY_SAFE_PATH | MY_RETURN_REAL_PATH) == NullS)
+  {
+    pthread_mutex_unlock(&rli->data_lock);
+    sql_print_error("Unable to use slave's temporary directory %s",
+                    slave_load_tmpdir);
+    DBUG_RETURN(1);
+  }
+  unpack_filename(rli->slave_patternload_file, pattern);
   rli->slave_patternload_file_size= strlen(rli->slave_patternload_file);
 
   /*
@@ -940,6 +947,7 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
   if (count_relay_log_space(rli))
   {
     *errmsg= "Error counting relay log space";
+    error=1;
     goto err;
   }
   if (!just_reset)
diff --git a/sql/set_var.cc b/sql/set_var.cc
index bc8c91342e6..0b89333ce03 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -359,8 +359,8 @@ static sys_var_const    sys_lower_case_table_names(&vars,
                                                    &lower_case_table_names);
 static sys_var_thd_ulong_session_readonly sys_max_allowed_packet(&vars, "max_allowed_packet",
 					       &SV::max_allowed_packet);
-static sys_var_long_ptr	sys_max_binlog_cache_size(&vars, "max_binlog_cache_size",
-						  &max_binlog_cache_size);
+static sys_var_ulonglong_ptr sys_max_binlog_cache_size(&vars, "max_binlog_cache_size",
+                                                       &max_binlog_cache_size);
 static sys_var_long_ptr	sys_max_binlog_size(&vars, "max_binlog_size",
 					    &max_binlog_size,
                                             fix_max_binlog_size);
diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt
index f9b66990e93..42bca02984d 100644
--- a/sql/share/errmsg.txt
+++ b/sql/share/errmsg.txt
@@ -6076,7 +6076,7 @@ ER_SLAVE_INCIDENT
 ER_NO_PARTITION_FOR_GIVEN_VALUE_SILENT
         eng "Table has no partition for some existing values"
 ER_BINLOG_UNSAFE_STATEMENT
-        eng "Statement is not safe to log in statement format."
+        eng "Statement may not be safe to log in statement format."
         swe "Detta �r inte s�kert att logga i statement-format."
 ER_SLAVE_FATAL_ERROR
         eng "Fatal error: %s"
@@ -6177,3 +6177,27 @@ ER_TOO_LONG_TABLE_COMMENT
 ER_TOO_LONG_FIELD_COMMENT
   eng "Comment for field '%-.64s' is too long (max = %lu)"
   por "Coment�rio para o campo '%-.64s' � longo demais (max = %lu)"
+
+ER_FUNC_INEXISTENT_NAME_COLLISION 42000 
+  eng "FUNCTION %s does not exist. Check the 'Function Name Parsing and Resolution' section in the Reference Manual"
+
+# When updating these, please update EXPLAIN_FILENAME_MAX_EXTRA_LENGTH in
+# mysql_priv.h with the new maximal additional length for explain_filename.
+ER_DATABASE_NAME
+  eng "Database `%s`"
+  swe "Databas `%s`"
+ER_TABLE_NAME
+  eng "Table `%s`"
+  swe "Tabell `%s`"
+ER_PARTITION_NAME
+  eng "Partition `%s`"
+  swe "Partition `%s`"
+ER_SUBPARTITION_NAME
+  eng "Subpartition `%s`"
+  swe "Subpartition `%s`"
+ER_TEMPORARY_NAME
+  eng "Temporary"
+  swe "Tempor�r"
+ER_RENAMED_NAME
+  eng "Renamed"
+  swe "Namn�ndrad"
diff --git a/sql/slave.cc b/sql/slave.cc
index 81c18c5e04b..81be7064f89 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -389,6 +389,13 @@ void init_slave_skip_errors(const char* arg)
   DBUG_VOID_RETURN;
 }
 
+static void set_thd_in_use_temporary_tables(Relay_log_info *rli)
+{
+  TABLE *table;
+
+  for (table= rli->save_temporary_tables ; table ; table= table->next)
+    table->in_use= rli->sql_thd;
+}
 
 int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
 {
@@ -661,7 +668,7 @@ static int end_slave_on_walk(Master_info* mi, uchar* /*unused*/)
 
 
 /*
-  Free all resources used by slave
+  Release slave threads at time of executing shutdown.
 
   SYNOPSIS
     end_slave()
@@ -687,14 +694,31 @@ void end_slave()
       once multi-master code is ready.
     */
     terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
-    end_master_info(active_mi);
-    delete active_mi;
-    active_mi= 0;
   }
   pthread_mutex_unlock(&LOCK_active_mi);
   DBUG_VOID_RETURN;
 }
 
+/**
+   Free all resources used by slave threads at time of executing shutdown.
+   The routine must be called after all possible users of @c active_mi
+   have left.
+
+   SYNOPSIS
+     close_active_mi()
+
+*/
+void close_active_mi()
+{
+  pthread_mutex_lock(&LOCK_active_mi);
+  if (active_mi)
+  {
+    end_master_info(active_mi);
+    delete active_mi;
+    active_mi= 0;
+  }
+  pthread_mutex_unlock(&LOCK_active_mi);
+}
 
 static bool io_slave_killed(THD* thd, Master_info* mi)
 {
@@ -803,7 +827,7 @@ int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
         up to and including newline.
       */
       int c;
-      while (((c=my_b_get(f)) != '\n' && c != my_b_EOF));
+      while (((c=my_b_get(f)) != '\n' && c != my_b_EOF)) ;
     }
     DBUG_RETURN(0);
   }
@@ -1486,6 +1510,8 @@ bool show_master_info(THD* thd, Master_info* mi)
 
     pthread_mutex_lock(&mi->data_lock);
     pthread_mutex_lock(&mi->rli.data_lock);
+    pthread_mutex_lock(&mi->err_lock);
+    pthread_mutex_lock(&mi->rli.err_lock);
     protocol->store(mi->host, &my_charset_bin);
     protocol->store(mi->user, &my_charset_bin);
     protocol->store((uint32) mi->port);
@@ -1585,6 +1611,8 @@ bool show_master_info(THD* thd, Master_info* mi)
     // Last_SQL_Error
     protocol->store(mi->rli.last_error().message, &my_charset_bin);
 
+    pthread_mutex_unlock(&mi->rli.err_lock);
+    pthread_mutex_unlock(&mi->err_lock);
     pthread_mutex_unlock(&mi->rli.data_lock);
     pthread_mutex_unlock(&mi->data_lock);
 
@@ -1856,25 +1884,6 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
   DBUG_RETURN(len - 1);
 }
 
-
-int check_expected_error(THD* thd, Relay_log_info const *rli,
-                         int expected_error)
-{
-  DBUG_ENTER("check_expected_error");
-
-  switch (expected_error) {
-  case ER_NET_READ_ERROR:
-  case ER_NET_ERROR_ON_WRITE:
-  case ER_QUERY_INTERRUPTED:
-  case ER_SERVER_SHUTDOWN:
-  case ER_NEW_ABORTING_CONNECTION:
-    DBUG_RETURN(1);
-  default:
-    DBUG_RETURN(0);
-  }
-}
-
-
 /*
   Check if the current error is of temporary nature of not.
   Some errors are temporary in nature, such as
@@ -2228,7 +2237,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
                           "the slave_transaction_retries variable.",
                           slave_trans_retries);
       }
-      else if (exec_res && !temp_err ||
+      else if ((exec_res && !temp_err) ||
                (opt_using_transactions &&
                 rli->group_relay_log_pos == rli->event_relay_log_pos))
       {
@@ -2390,6 +2399,7 @@ pthread_handler_t handle_slave_io(void *arg)
 
   pthread_detach_this_thread();
   thd->thread_stack= (char*) &thd; // remember where our stack is
+  mi->clear_error();
   if (init_slave_thread(thd, SLAVE_THD_IO))
   {
     pthread_cond_broadcast(&mi->start_cond);
@@ -2504,6 +2514,7 @@ requesting master dump") ||
         goto connected;
       });
 
+    DBUG_ASSERT(mi->last_error().number == 0);
     while (!io_slave_killed(thd,mi))
     {
       ulong event_len;
@@ -2668,13 +2679,20 @@ err:
   LOAD DATA INFILE.
  */
 static 
-int check_temp_dir(char* tmp_dir, char *tmp_file)
+int check_temp_dir(char* tmp_file)
 {
   int fd;
   MY_DIR *dirp;
+  char tmp_dir[FN_REFLEN];
+  size_t tmp_dir_size;
 
   DBUG_ENTER("check_temp_dir");
 
+  /*
+    Get the directory from the temporary file.
+  */
+  dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
+
   /*
     Check if the directory exists.
    */
@@ -2750,6 +2768,7 @@ pthread_handler_t handle_slave_sql(void *arg)
   }
   thd->init_for_queries();
   thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
+  set_thd_in_use_temporary_tables(rli);   // (re)set sql_thd in use for saved temp tables
   pthread_mutex_lock(&LOCK_thread_count);
   threads.append(thd);
   pthread_mutex_unlock(&LOCK_thread_count);
@@ -2830,7 +2849,7 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
                     llstr(rli->group_master_log_pos,llbuff),rli->group_relay_log_name,
                     llstr(rli->group_relay_log_pos,llbuff1));
 
-  if (check_temp_dir(slave_load_tmpdir, rli->slave_patternload_file))
+  if (check_temp_dir(rli->slave_patternload_file))
   {
     rli->report(ERROR_LEVEL, thd->main_da.sql_errno(), 
                 "Unable to use slave's temporary directory %s - %s", 
@@ -2996,6 +3015,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   DBUG_ASSERT(rli->sql_thd == thd);
   THD_CHECK_SENTRY(thd);
   rli->sql_thd= 0;
+  set_thd_in_use_temporary_tables(rli);  // (re)set sql_thd in use for saved temp tables
   pthread_mutex_lock(&LOCK_thread_count);
   THD_CHECK_SENTRY(thd);
   delete thd;
@@ -3694,6 +3714,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
 
   if (!slave_was_killed)
   {
+    mi->clear_error(); // clear possible left over reconnect error
     if (reconnect)
     {
       if (!suppress_warnings && global_system_variables.log_warnings)
diff --git a/sql/slave.h b/sql/slave.h
index abd63315e62..a44a7eed83e 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -171,10 +171,10 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
 bool rpl_master_erroneous_autoinc(THD* thd);
 
 const char *print_slave_db_safe(const char *db);
-int check_expected_error(THD* thd, Relay_log_info const *rli, int error_code);
 void skip_load_data_infile(NET* net);
 
-void end_slave(); /* clean up */
+void end_slave(); /* release slave threads */
+void close_active_mi(); /* clean up slave threads data */
 void clear_until_condition(Relay_log_info* rli);
 void clear_slave_error(Relay_log_info* rli);
 void end_relay_log_info(Relay_log_info* rli);
diff --git a/sql/sp.cc b/sql/sp.cc
index 8c8149d0afc..29e228f5e45 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -941,7 +941,7 @@ sp_create_routine(THD *thd, int type, sp_head *sp)
       /* Such a statement can always go directly to binlog, no trans cache */
       thd->binlog_query(THD::MYSQL_QUERY_TYPE,
                         log_query.c_ptr(), log_query.length(),
-                        FALSE, FALSE, THD::NOT_KILLED);
+                        FALSE, FALSE, 0);
       thd->variables.sql_mode= 0;
     }
 
@@ -1308,13 +1308,20 @@ sp_find_routine(THD *thd, int type, sp_name *name, sp_cache **cp,
 /**
   This is used by sql_acl.cc:mysql_routine_grant() and is used to find
   the routines in 'routines'.
+
+  @param thd Thread handler
+  @param routines List of needles in the hay stack
+  @param any Any of the needles are good enough
+
+  @return
+    @retval FALSE Found.
+    @retval TRUE  Not found
 */
 
-int
-sp_exist_routines(THD *thd, TABLE_LIST *routines, bool any, bool no_error)
+bool
+sp_exist_routines(THD *thd, TABLE_LIST *routines, bool any)
 {
   TABLE_LIST *routine;
-  bool result= 0;
   bool sp_object_found;
   DBUG_ENTER("sp_exists_routine");
   for (routine= routines; routine; routine= routine->next_global)
@@ -1336,21 +1343,16 @@ sp_exist_routines(THD *thd, TABLE_LIST *routines, bool any, bool no_error)
     if (sp_object_found)
     {
       if (any)
-        DBUG_RETURN(1);
-      result= 1;
+        break;
     }
     else if (!any)
     {
-      if (!no_error)
-      {
-	my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION or PROCEDURE", 
-		 routine->table_name);
-	DBUG_RETURN(-1);
-      }
-      DBUG_RETURN(0);
+      my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "FUNCTION or PROCEDURE",
+               routine->table_name);
+      DBUG_RETURN(TRUE);
     }
   }
-  DBUG_RETURN(result);
+  DBUG_RETURN(FALSE);
 }
 
 
diff --git a/sql/sp.h b/sql/sp.h
index 75088ea0b83..75c6856f64b 100644
--- a/sql/sp.h
+++ b/sql/sp.h
@@ -39,8 +39,8 @@ sp_head *
 sp_find_routine(THD *thd, int type, sp_name *name,
                 sp_cache **cp, bool cache_only);
 
-int
-sp_exist_routines(THD *thd, TABLE_LIST *procs, bool any, bool no_error);
+bool
+sp_exist_routines(THD *thd, TABLE_LIST *procs, bool any);
 
 int
 sp_routine_exists_in_table(THD *thd, int type, sp_name *name);
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index d6984bf5ad0..b7ad8c5c906 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -1780,8 +1780,9 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount,
     thd->options= binlog_save_options;
     if (thd->binlog_evt_union.unioned_events)
     {
+      int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
       Query_log_event qinfo(thd, binlog_buf.ptr(), binlog_buf.length(),
-                            thd->binlog_evt_union.unioned_events_trans, FALSE);
+                            thd->binlog_evt_union.unioned_events_trans, FALSE, errcode);
       if (mysql_bin_log.write(&qinfo) &&
           thd->binlog_evt_union.unioned_events_trans)
       {
diff --git a/sql/spatial.h b/sql/spatial.h
index dbf5da6665b..86c2ed8c197 100644
--- a/sql/spatial.h
+++ b/sql/spatial.h
@@ -116,12 +116,12 @@ struct MBR
   int touches(const MBR *mbr)
   {
     /* The following should be safe, even if we compare doubles */
-    return ((((mbr->xmin == xmax) || (mbr->xmax == xmin)) &&
-	     (((mbr->ymin >= ymin) && (mbr->ymin <= ymax)) ||
-	      ((mbr->ymax >= ymin) && (mbr->ymax <= ymax)))) ||
-	    (((mbr->ymin == ymax) || (mbr->ymax == ymin)) &&
-	     (((mbr->xmin >= xmin) && (mbr->xmin <= xmax)) ||
-	      ((mbr->xmax >= xmin) && (mbr->xmax <= xmax)))));
+    return ((mbr->xmin == xmax || mbr->xmax == xmin) &&
+            ((mbr->ymin >= ymin && mbr->ymin <= ymax) ||
+             (mbr->ymax >= ymin && mbr->ymax <= ymax))) ||
+           ((mbr->ymin == ymax || mbr->ymax == ymin) &&
+            ((mbr->xmin >= xmin && mbr->xmin <= xmax) ||
+             (mbr->xmax >= xmin && mbr->xmax <= xmax)));
   }
 
   int within(const MBR *mbr)
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index b1dbb7031ce..ab18a2d1d04 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -936,6 +936,7 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
 #ifdef HAVE_OPENSSL
     Vio *vio=thd->net.vio;
     SSL *ssl= (SSL*) vio->ssl_arg;
+    X509 *cert;
 #endif
 
     /*
@@ -964,8 +965,11 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
       */
       if (vio_type(vio) == VIO_TYPE_SSL &&
 	  SSL_get_verify_result(ssl) == X509_V_OK &&
-	  SSL_get_peer_certificate(ssl))
+	  (cert= SSL_get_peer_certificate(ssl)))
+      {
 	user_access= acl_user->access;
+        X509_free(cert);
+      }
       break;
     case SSL_TYPE_SPECIFIED: /* Client should have specified attrib */
       /*
@@ -974,7 +978,6 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
 	If cipher name is specified, we compare it to actual cipher in
 	use.
       */
-      X509 *cert;
       if (vio_type(vio) != VIO_TYPE_SSL ||
 	  SSL_get_verify_result(ssl) != X509_V_OK)
 	break;
@@ -1014,6 +1017,7 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
             sql_print_information("X509 issuer mismatch: should be '%s' "
 			      "but is '%s'", acl_user->x509_issuer, ptr);
           free(ptr);
+          X509_free(cert);
           user_access=NO_ACCESS;
           break;
         }
@@ -1033,12 +1037,15 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
             sql_print_information("X509 subject mismatch: should be '%s' but is '%s'",
                             acl_user->x509_subject, ptr);
           free(ptr);
+          X509_free(cert);
           user_access=NO_ACCESS;
           break;
         }
         user_access= acl_user->access;
         free(ptr);
       }
+      /* Deallocate the X509 certificate. */
+      X509_free(cert);
       break;
 #else  /* HAVE_OPENSSL */
     default:
@@ -1185,12 +1192,12 @@ static void acl_update_user(const char *user, const char *host,
   for (uint i=0 ; i < acl_users.elements ; i++)
   {
     ACL_USER *acl_user=dynamic_element(&acl_users,i,ACL_USER*);
-    if (!acl_user->user && !user[0] ||
-	acl_user->user && !strcmp(user,acl_user->user))
+    if ((!acl_user->user && !user[0]) ||
+	(acl_user->user && !strcmp(user,acl_user->user)))
     {
-      if (!acl_user->host.hostname && !host[0] ||
-	  acl_user->host.hostname &&
-	  !my_strcasecmp(system_charset_info, host, acl_user->host.hostname))
+      if ((!acl_user->host.hostname && !host[0]) ||
+	  (acl_user->host.hostname &&
+	  !my_strcasecmp(system_charset_info, host, acl_user->host.hostname)))
       {
 	acl_user->access=privileges;
 	if (mqh->specified_limits & USER_RESOURCES::QUERIES_PER_HOUR)
@@ -1268,16 +1275,16 @@ static void acl_update_db(const char *user, const char *host, const char *db,
   for (uint i=0 ; i < acl_dbs.elements ; i++)
   {
     ACL_DB *acl_db=dynamic_element(&acl_dbs,i,ACL_DB*);
-    if (!acl_db->user && !user[0] ||
-	acl_db->user &&
-	!strcmp(user,acl_db->user))
+    if ((!acl_db->user && !user[0]) ||
+	(acl_db->user &&
+	!strcmp(user,acl_db->user)))
     {
-      if (!acl_db->host.hostname && !host[0] ||
-	  acl_db->host.hostname &&
-          !strcmp(host, acl_db->host.hostname))
+      if ((!acl_db->host.hostname && !host[0]) ||
+	  (acl_db->host.hostname &&
+          !strcmp(host, acl_db->host.hostname)))
       {
-	if (!acl_db->db && !db[0] ||
-	    acl_db->db && !strcmp(db,acl_db->db))
+	if ((!acl_db->db && !db[0]) ||
+	    (acl_db->db && !strcmp(db,acl_db->db)))
 	{
 	  if (privileges)
 	    acl_db->access=privileges;
@@ -1486,8 +1493,8 @@ bool acl_check_host(const char *host, const char *ip)
     return 0;
   VOID(pthread_mutex_lock(&acl_cache->lock));
 
-  if (host && hash_search(&acl_check_hosts,(uchar*) host,strlen(host)) ||
-      ip && hash_search(&acl_check_hosts,(uchar*) ip, strlen(ip)))
+  if ((host && hash_search(&acl_check_hosts,(uchar*) host,strlen(host))) ||
+      (ip && hash_search(&acl_check_hosts,(uchar*) ip, strlen(ip))))
   {
     VOID(pthread_mutex_unlock(&acl_cache->lock));
     return 0;					// Found host
@@ -1648,7 +1655,7 @@ bool change_password(THD *thd, const char *host, const char *user,
                   new_password));
     thd->clear_error();
     thd->binlog_query(THD::MYSQL_QUERY_TYPE, buff, query_length,
-                      FALSE, FALSE, THD::NOT_KILLED);
+                      FALSE, FALSE, 0);
   }
 end:
   close_thread_tables(thd);
@@ -1704,8 +1711,8 @@ find_acl_user(const char *host, const char *user, my_bool exact)
                        host,
                        acl_user->host.hostname ? acl_user->host.hostname :
                        ""));
-    if (!acl_user->user && !user[0] ||
-	acl_user->user && !strcmp(user,acl_user->user))
+    if ((!acl_user->user && !user[0]) ||
+	(acl_user->user && !strcmp(user,acl_user->user)))
     {
       if (exact ? !my_strcasecmp(system_charset_info, host,
                                  acl_user->host.hostname ?
@@ -2988,8 +2995,8 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list,
     {
       if (!(rights & CREATE_ACL))
       {
-        char buf[FN_REFLEN];
-        build_table_filename(buf, sizeof(buf), table_list->db,
+        char buf[FN_REFLEN + 1];
+        build_table_filename(buf, sizeof(buf) - 1, table_list->db,
                              table_list->table_name, reg_ext, 0);
         fn_format(buf, buf, "", "", MY_UNPACK_FILENAME  | MY_RESOLVE_SYMLINKS |
                                     MY_RETURN_REAL_PATH | MY_APPEND_EXT);
@@ -3191,26 +3198,24 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list,
 }
 
 
-/*
+/**
   Store routine level grants in the privilege tables
 
-  SYNOPSIS
-    mysql_routine_grant()
-    thd			Thread handle
-    table_list		List of routines to give grant
-    is_proc             true indicates routine list are procedures
-    user_list		List of users to give grant
-    rights		Table level grant
-    revoke_grant	Set to 1 if this is a REVOKE command
+  @param thd Thread handle
+  @param table_list List of routines to give grant
+  @param is_proc Is this a list of procedures?
+  @param user_list List of users to give grant
+  @param rights Table level grant
+  @param revoke_grant Is this is a REVOKE command?
 
-  RETURN
-    0	ok
-    1	error
+  @return
+    @retval FALSE Success.
+    @retval TRUE An error occurred.
 */
 
 bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
 			 List <LEX_USER> &user_list, ulong rights,
-			 bool revoke_grant, bool no_error)
+			 bool revoke_grant, bool write_to_binlog)
 {
   List_iterator <LEX_USER> str_list (user_list);
   LEX_USER *Str, *tmp_Str;
@@ -3221,22 +3226,20 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
 
   if (!initialized)
   {
-    if (!no_error)
-      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
-               "--skip-grant-tables");
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
+             "--skip-grant-tables");
     DBUG_RETURN(TRUE);
   }
   if (rights & ~PROC_ACLS)
   {
-    if (!no_error)
-      my_message(ER_ILLEGAL_GRANT_FOR_TABLE, ER(ER_ILLEGAL_GRANT_FOR_TABLE),
-        	 MYF(0));
+    my_message(ER_ILLEGAL_GRANT_FOR_TABLE, ER(ER_ILLEGAL_GRANT_FOR_TABLE),
+               MYF(0));
     DBUG_RETURN(TRUE);
   }
 
   if (!revoke_grant)
   {
-    if (sp_exist_routines(thd, table_list, is_proc, no_error)<0)
+    if (sp_exist_routines(thd, table_list, is_proc))
       DBUG_RETURN(TRUE);
   }
 
@@ -3317,9 +3320,8 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
     {
       if (revoke_grant)
       {
-        if (!no_error)
-          my_error(ER_NONEXISTING_PROC_GRANT, MYF(0),
-		   Str->user.str, Str->host.str, table_name);
+        my_error(ER_NONEXISTING_PROC_GRANT, MYF(0),
+	         Str->user.str, Str->host.str, table_name);
 	result= TRUE;
 	continue;
       }
@@ -3344,16 +3346,14 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
   }
   thd->mem_root= old_root;
   pthread_mutex_unlock(&acl_cache->lock);
-  if (!result && !no_error)
+
+  if (write_to_binlog)
   {
     write_bin_log(thd, TRUE, thd->query, thd->query_length);
   }
 
   rw_unlock(&LOCK_grant);
 
-  if (!result && !no_error)
-    my_ok(thd);
-
   /* Tables are automatically closed */
   DBUG_RETURN(result);
 }
@@ -5319,16 +5319,13 @@ static int handle_grant_struct(uint struct_no, bool drop,
   uint elements;
   const char *user;
   const char *host;
-  ACL_USER *acl_user;
-  ACL_DB *acl_db;
-  GRANT_NAME *grant_name;
+  ACL_USER *acl_user= NULL;
+  ACL_DB *acl_db= NULL;
+  GRANT_NAME *grant_name= NULL;
   DBUG_ENTER("handle_grant_struct");
   DBUG_PRINT("info",("scan struct: %u  search: '%s'@'%s'",
                      struct_no, user_from->user.str, user_from->host.str));
 
-  LINT_INIT(acl_user);
-  LINT_INIT(acl_db);
-  LINT_INIT(grant_name);
   LINT_INIT(user);
   LINT_INIT(host);
 
@@ -5696,6 +5693,7 @@ bool mysql_drop_user(THD *thd, List <LEX_USER> &list)
   List_iterator <LEX_USER> user_list(list);
   TABLE_LIST tables[GRANT_TABLES];
   bool some_users_deleted= FALSE;
+  ulong old_sql_mode= thd->variables.sql_mode;
   DBUG_ENTER("mysql_drop_user");
 
   /*
@@ -5709,6 +5707,8 @@ bool mysql_drop_user(THD *thd, List <LEX_USER> &list)
   if ((result= open_grant_tables(thd, tables)))
     DBUG_RETURN(result != 1);
 
+  thd->variables.sql_mode&= ~MODE_PAD_CHAR_TO_FULL_LENGTH;
+
   rw_wrlock(&LOCK_grant);
   VOID(pthread_mutex_lock(&acl_cache->lock));
 
@@ -5741,6 +5741,7 @@ bool mysql_drop_user(THD *thd, List <LEX_USER> &list)
 
   rw_unlock(&LOCK_grant);
   close_thread_tables(thd);
+  thd->variables.sql_mode= old_sql_mode;
   DBUG_RETURN(result);
 }
 
@@ -6150,21 +6151,20 @@ bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name,
 }
 
 
-/*
+/**
   Grant EXECUTE,ALTER privilege for a stored procedure
 
-  SYNOPSIS
-    sp_grant_privileges()
-    thd                         The current thread.
-    db				DB of the stored procedure
-    name			Name of the stored procedure
+  @param thd The current thread.
+  @param sp_db
+  @param sp_name
+  @param is_proc
 
-  RETURN
-    0           OK.
-    < 0         Error. Error message not yet sent.
+  @return
+    @retval FALSE Success
+    @retval TRUE An error occured. Error message not yet sent.
 */
 
-int sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
+bool sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
                          bool is_proc)
 {
   Security_context *sctx= thd->security_ctx;
@@ -6174,6 +6174,7 @@ int sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
   bool result;
   ACL_USER *au;
   char passwd_buff[SCRAMBLED_PASSWORD_CHAR_LENGTH+1];
+  Dummy_error_handler error_handler;
   DBUG_ENTER("sp_grant_privileges");
 
   if (!(combo=(LEX_USER*) thd->alloc(sizeof(st_lex_user))))
@@ -6224,8 +6225,11 @@ int sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
     }
     else
     {
-      my_error(ER_PASSWD_LENGTH, MYF(0), SCRAMBLED_PASSWORD_CHAR_LENGTH);
-      return -1;
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_PASSWD_LENGTH,
+                          ER(ER_PASSWD_LENGTH),
+                          SCRAMBLED_PASSWORD_CHAR_LENGTH);
+      return TRUE;
     }
     combo->password.str= passwd_buff;
   }
@@ -6239,10 +6243,17 @@ int sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
     DBUG_RETURN(TRUE);
 
   thd->lex->ssl_type= SSL_TYPE_NOT_SPECIFIED;
+  thd->lex->ssl_cipher= thd->lex->x509_subject= thd->lex->x509_issuer= 0;
   bzero((char*) &thd->lex->mqh, sizeof(thd->lex->mqh));
 
+  /*
+    Only care about whether the operation failed or succeeded
+    as all errors will be handled later.
+  */
+  thd->push_internal_handler(&error_handler);
   result= mysql_routine_grant(thd, tables, is_proc, user_list,
-  				DEFAULT_CREATE_PROC_ACLS, 0, 1);
+                              DEFAULT_CREATE_PROC_ACLS, FALSE, FALSE);
+  thd->pop_internal_handler();
   DBUG_RETURN(result);
 }
 
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
index 9ae17a4bf02..a8090fba2e7 100644
--- a/sql/sql_acl.h
+++ b/sql/sql_acl.h
@@ -233,7 +233,7 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table, List <LEX_USER> &user_list,
                        bool revoke);
 bool mysql_routine_grant(THD *thd, TABLE_LIST *table, bool is_proc,
 			 List <LEX_USER> &user_list, ulong rights,
-			 bool revoke, bool no_error);
+			 bool revoke, bool write_to_binlog);
 my_bool grant_init();
 void grant_free(void);
 my_bool grant_reload(THD *thd);
@@ -264,7 +264,7 @@ void fill_effective_table_privileges(THD *thd, GRANT_INFO *grant,
                                      const char *db, const char *table);
 bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name,
                           bool is_proc);
-int sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
+bool sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
                          bool is_proc);
 bool check_routine_level_acl(THD *thd, const char *db, const char *name,
                              bool is_proc);
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
index 9ca6e0a0a2b..d273b3319ee 100644
--- a/sql/sql_analyse.cc
+++ b/sql/sql_analyse.cc
@@ -246,7 +246,7 @@ bool test_if_number(NUM_INFO *info, const char *str, uint str_len)
       }
       DBUG_RETURN(0);
     }
-    for (str++; *(end - 1) == '0'; end--);  // jump over zeros at the end
+    for (str++; *(end - 1) == '0'; end--) ; // jump over zeros at the end
     if (str == end)		     // number was something like '123.000'
     {
       char *endpos= (char*) str;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 0dc29f7e3c2..88e1620b152 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -473,13 +473,14 @@ static TABLE_SHARE
 
     @todo Rework alternative ways to deal with ER_NO_SUCH TABLE.
   */
-  if (share || thd->is_error() && thd->main_da.sql_errno() != ER_NO_SUCH_TABLE)
+  if (share || (thd->is_error() && thd->main_da.sql_errno() != ER_NO_SUCH_TABLE))
 
     DBUG_RETURN(share);
 
   /* Table didn't exist. Check if some engine can provide it */
-  if ((tmp= ha_create_table_from_engine(thd, table_list->db,
-                                        table_list->table_name)) < 0)
+  tmp= ha_create_table_from_engine(thd, table_list->db,
+                                   table_list->table_name);
+  if (tmp < 0)
   {
     /*
       No such table in any engine.
@@ -1431,11 +1432,10 @@ static inline uint  tmpkeyval(THD *thd, TABLE *table)
 void close_temporary_tables(THD *thd)
 {
   TABLE *table;
-  TABLE *next;
+  TABLE *next= NULL;
   TABLE *prev_table;
   /* Assume thd->options has OPTION_QUOTE_SHOW_CREATE */
   bool was_quote_show= TRUE;
-  LINT_INIT(next);
 
   if (!thd->temporary_tables)
     return;
@@ -1541,7 +1541,7 @@ void close_temporary_tables(THD *thd)
       thd->variables.character_set_client= system_charset_info;
       Query_log_event qinfo(thd, s_query.ptr(),
                             s_query.length() - 1 /* to remove trailing ',' */,
-                            0, FALSE, THD::NOT_KILLED);
+                            0, FALSE, 0);
       thd->variables.character_set_client= cs_save;
       mysql_bin_log.write(&qinfo);
       thd->variables.pseudo_thread_id= save_pseudo_thread_id;
@@ -2432,7 +2432,7 @@ bool lock_table_name_if_not_cached(THD *thd, const char *db,
 
 bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists)
 {
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   int rc;
   DBUG_ENTER("check_if_table_exists");
 
@@ -2617,8 +2617,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
             distance >  0 - we have lock mode higher then we require
             distance == 0 - we have lock mode exactly which we need
           */
-          if (best_distance < 0 && distance > best_distance ||
-              distance >= 0 && distance < best_distance)
+          if ((best_distance < 0 && distance > best_distance) ||
+              (distance >= 0 && distance < best_distance))
           {
             best_distance= distance;
             best_table= table;
@@ -2649,7 +2649,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
       real fix will be made after definition cache will be made)
     */
     {
-      char path[FN_REFLEN];
+      char path[FN_REFLEN + 1];
       enum legacy_db_type not_used;
       build_table_filename(path, sizeof(path) - 1,
                            table_list->db, table_list->table_name, reg_ext, 0);
@@ -2963,6 +2963,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
   table->insert_values= 0;
   table->fulltext_searched= 0;
   table->file->ft_handler= 0;
+  table->reginfo.impossible_range= 0;
   /* Catch wrong handling of the auto_increment_field_not_null. */
   DBUG_ASSERT(!table->auto_increment_field_not_null);
   table->auto_increment_field_not_null= FALSE;
@@ -4015,9 +4016,10 @@ retry:
         /* this DELETE FROM is needed even with row-based binlogging */
         end = strxmov(strmov(query, "DELETE FROM `"),
                       share->db.str,"`.`",share->table_name.str,"`", NullS);
+        int errcode= query_error_code(thd, TRUE);
         thd->binlog_query(THD::STMT_QUERY_TYPE,
                           query, (ulong)(end-query),
-                          FALSE, FALSE, THD::NOT_KILLED);
+                          FALSE, FALSE, errcode);
         my_free(query, MYF(0));
       }
       else
@@ -5585,6 +5587,13 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table)
       other_bitmap=   table->read_set;
     }
 
+    /* 
+       The test-and-set mechanism in the bitmap is not reliable during
+       multi-UPDATE statements under MARK_COLUMNS_READ mode
+       (thd->mark_used_columns == MARK_COLUMNS_READ), as this bitmap contains
+       only those columns that are used in the SET clause. I.e they are being
+       set here. See multi_update::prepare()
+    */
     if (bitmap_fast_test_and_set(current_bitmap, field->field_index))
     {
       if (thd->mark_used_columns == MARK_COLUMNS_WRITE)
@@ -6332,7 +6341,7 @@ find_field_in_tables(THD *thd, Item_ident *item,
       (report_error == REPORT_ALL_ERRORS ||
        report_error == REPORT_EXCEPT_NON_UNIQUE))
   {
-    char buff[NAME_LEN*2+1];
+    char buff[NAME_LEN*2 + 2];
     if (db && db[0])
     {
       strxnmov(buff,sizeof(buff)-1,db,".",table_name,NullS);
@@ -6405,8 +6414,7 @@ find_item_in_list(Item *find, List<Item> &items, uint *counter,
     (and not an item that happens to have a name).
   */
   bool is_ref_by_name= 0;
-  uint unaliased_counter;
-  LINT_INIT(unaliased_counter);                 // Dependent on found_unaliased
+  uint unaliased_counter= 0;
 
   *resolution= NOT_RESOLVED;
 
@@ -7431,7 +7439,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
   thd->lex->current_select->cur_pos_in_select_list= 0;
   while ((item= it++))
   {
-    if (!item->fixed && item->fix_fields(thd, it.ref()) ||
+    if ((!item->fixed && item->fix_fields(thd, it.ref())) ||
 	(item= *(it.ref()))->check_cols(1))
     {
       thd->lex->current_select->is_item_list_lookup= save_is_item_list_lookup;
@@ -7745,8 +7753,8 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 
     DBUG_ASSERT(tables->is_leaf_for_name_resolution());
 
-    if (table_name && my_strcasecmp(table_alias_charset, table_name,
-                                    tables->alias) ||
+    if ((table_name && my_strcasecmp(table_alias_charset, table_name,
+                                    tables->alias)) ||
         (db_name && strcmp(tables->db,db_name)))
       continue;
 
@@ -7777,8 +7785,8 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
        information_schema table, or a nested table reference. See the comment
        for TABLE_LIST.
     */
-    if (!(table && !tables->view && (table->grant.privilege & SELECT_ACL) ||
-          tables->view && (tables->grant.privilege & SELECT_ACL)) &&
+    if (!((table && !tables->view && (table->grant.privilege & SELECT_ACL)) ||
+          (tables->view && (tables->grant.privilege & SELECT_ACL))) &&
         !any_privileges)
     {
       field_iterator.set(tables);
@@ -7832,7 +7840,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
       */
       if (any_privileges)
       {
-        DBUG_ASSERT(tables->field_translation == NULL && table ||
+        DBUG_ASSERT((tables->field_translation == NULL && table) ||
                     tables->is_natural_join);
         DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
         Item_field *fld= (Item_field*) item;
@@ -7971,7 +7979,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
   if (*conds)
   {
     thd->where="where clause";
-    if (!(*conds)->fixed && (*conds)->fix_fields(thd, conds) ||
+    if ((!(*conds)->fixed && (*conds)->fix_fields(thd, conds)) ||
 	(*conds)->check_cols(1))
       goto err_no_arena;
   }
@@ -7991,8 +7999,8 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       {
         /* Make a join an a expression */
         thd->where="on clause";
-        if (!embedded->on_expr->fixed &&
-            embedded->on_expr->fix_fields(thd, &embedded->on_expr) ||
+        if ((!embedded->on_expr->fixed &&
+            embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
 	    embedded->on_expr->check_cols(1))
 	  goto err_no_arena;
         select_lex->cond_count++;
@@ -8147,8 +8155,8 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
                                      enum trg_event_type event)
 {
   return (fill_record(thd, fields, values, ignore_errors) ||
-          triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE));
+          (triggers && triggers->process_triggers(thd, event,
+                                                 TRG_ACTION_BEFORE, TRUE)));
 }
 
 
@@ -8242,8 +8250,8 @@ fill_record_n_invoke_before_triggers(THD *thd, Field **ptr,
                                      enum trg_event_type event)
 {
   return (fill_record(thd, ptr, values, ignore_errors) ||
-          triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE));
+          (triggers && triggers->process_triggers(thd, event,
+                                                 TRG_ACTION_BEFORE, TRUE)));
 }
 
 
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index 7c104ccc455..52de41ef1b2 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -353,11 +353,6 @@ TODO list:
 #define RW_UNLOCK(M) {DBUG_PRINT("lock", ("rwlock unlock 0x%lx",(ulong)(M))); \
   if (!rw_unlock(M)) DBUG_PRINT("lock", ("rwlock unlock ok")); \
   else DBUG_PRINT("lock", ("rwlock unlock FAILED %d", errno)); }
-#define STRUCT_LOCK(M) {DBUG_PRINT("lock", ("%d struct lock...",__LINE__)); \
-  pthread_mutex_lock(M);DBUG_PRINT("lock", ("struct lock OK"));}
-#define STRUCT_UNLOCK(M) { \
-  DBUG_PRINT("lock", ("%d struct unlock...",__LINE__)); \
-  pthread_mutex_unlock(M);DBUG_PRINT("lock", ("struct unlock OK"));}
 #define BLOCK_LOCK_WR(B) {DBUG_PRINT("lock", ("%d LOCK_WR 0x%lx",\
   __LINE__,(ulong)(B))); \
   B->query()->lock_writing();}
@@ -404,8 +399,6 @@ static void debug_wait_for_kill(const char *info)
 #define RW_WLOCK(M) rw_wrlock(M)
 #define RW_RLOCK(M) rw_rdlock(M)
 #define RW_UNLOCK(M) rw_unlock(M)
-#define STRUCT_LOCK(M) pthread_mutex_lock(M)
-#define STRUCT_UNLOCK(M) pthread_mutex_unlock(M)
 #define BLOCK_LOCK_WR(B) B->query()->lock_writing()
 #define BLOCK_LOCK_RD(B) B->query()->lock_reading()
 #define BLOCK_UNLOCK_WR(B) B->query()->unlock_writing()
@@ -420,6 +413,140 @@ TYPELIB query_cache_type_typelib=
 };
 
 
+/**
+  Serialize access to the query cache.
+  If the lock cannot be granted the thread hangs in a conditional wait which
+  is signalled on each unlock.
+
+  The lock attempt will also fail without wait if lock_and_suspend() is in
+  effect by another thread. This enables a quick path in execution to skip waits
+  when the outcome is known.
+
+  @return
+   @retval FALSE An exclusive lock was taken
+   @retval TRUE The locking attempt failed
+*/
+
+bool Query_cache::try_lock(void)
+{
+  bool interrupt= FALSE;
+  DBUG_ENTER("Query_cache::try_lock");
+
+  pthread_mutex_lock(&structure_guard_mutex);
+  while (1)
+  {
+    if (m_cache_lock_status == Query_cache::UNLOCKED)
+    {
+      m_cache_lock_status= Query_cache::LOCKED;
+#ifndef DBUG_OFF
+      THD *thd= current_thd;
+      if (thd)
+        m_cache_lock_thread_id= thd->thread_id;
+#endif
+      break;
+    }
+    else if (m_cache_lock_status == Query_cache::LOCKED_NO_WAIT)
+    {
+      /*
+        If query cache is protected by a LOCKED_NO_WAIT lock this thread
+        should avoid using the query cache as it is being evicted.
+      */
+      interrupt= TRUE;
+      break;
+    }
+    else
+    {
+      DBUG_ASSERT(m_cache_lock_status == Query_cache::LOCKED);
+      pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+    }
+  }
+  pthread_mutex_unlock(&structure_guard_mutex);
+
+  DBUG_RETURN(interrupt);
+}
+
+
+/**
+  Serialize access to the query cache.
+  If the lock cannot be granted the thread hangs in a conditional wait which
+  is signalled on each unlock.
+
+  This method also suspends the query cache so that other threads attempting to
+  lock the cache with try_lock() will fail directly without waiting.
+
+  It is used by all methods which flushes or destroys the whole cache.
+ */
+
+void Query_cache::lock_and_suspend(void)
+{
+  DBUG_ENTER("Query_cache::lock_and_suspend");
+
+  pthread_mutex_lock(&structure_guard_mutex);
+  while (m_cache_lock_status != Query_cache::UNLOCKED)
+    pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+  m_cache_lock_status= Query_cache::LOCKED_NO_WAIT;
+#ifndef DBUG_OFF
+  THD *thd= current_thd;
+  if (thd)
+    m_cache_lock_thread_id= thd->thread_id;
+#endif
+  /* Wake up everybody, a whole cache flush is starting! */
+  pthread_cond_broadcast(&COND_cache_status_changed);
+  pthread_mutex_unlock(&structure_guard_mutex);
+
+  DBUG_VOID_RETURN;
+}
+
+/**
+  Serialize access to the query cache.
+  If the lock cannot be granted the thread hangs in a conditional wait which
+  is signalled on each unlock.
+
+  It is used by all methods which invalidates one or more tables.
+ */
+
+void Query_cache::lock(void)
+{
+  DBUG_ENTER("Query_cache::lock");
+
+  pthread_mutex_lock(&structure_guard_mutex);
+  while (m_cache_lock_status != Query_cache::UNLOCKED)
+    pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+  m_cache_lock_status= Query_cache::LOCKED;
+#ifndef DBUG_OFF
+  THD *thd= current_thd;
+  if (thd)
+    m_cache_lock_thread_id= thd->thread_id;
+#endif
+  pthread_mutex_unlock(&structure_guard_mutex);
+
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Set the query cache to UNLOCKED and signal waiting threads.
+*/
+
+void Query_cache::unlock(void)
+{
+  DBUG_ENTER("Query_cache::unlock");
+  pthread_mutex_lock(&structure_guard_mutex);
+#ifndef DBUG_OFF
+  THD *thd= current_thd;
+  if (thd)
+    DBUG_ASSERT(m_cache_lock_thread_id == thd->thread_id);
+#endif
+  DBUG_ASSERT(m_cache_lock_status == Query_cache::LOCKED ||
+              m_cache_lock_status == Query_cache::LOCKED_NO_WAIT);
+  m_cache_lock_status= Query_cache::UNLOCKED;
+  DBUG_PRINT("Query_cache",("Sending signal"));
+  pthread_cond_signal(&COND_cache_status_changed);
+  pthread_mutex_unlock(&structure_guard_mutex);
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   Helper function for determine if a SELECT statement has a SQL_NO_CACHE
   directive.
@@ -714,14 +841,8 @@ void query_cache_insert(NET *net, const char *packet, ulong length)
   DBUG_EXECUTE_IF("wait_in_query_cache_insert",
                   debug_wait_for_kill("wait_in_query_cache_insert"); );
 
-  STRUCT_LOCK(&query_cache.structure_guard_mutex);
-  bool interrupt;
-  query_cache.wait_while_table_flush_is_in_progress(&interrupt);
-  if (interrupt)
-  {
-    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+  if (query_cache.try_lock())
     DBUG_VOID_RETURN;
-  }
 
   Query_cache_block *query_block= (Query_cache_block*)net->query_cache_query;
   if (!query_block)
@@ -730,7 +851,7 @@ void query_cache_insert(NET *net, const char *packet, ulong length)
       We lost the writer and the currently processed query has been
       invalidated; there is nothing left to do.
     */
-    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+    query_cache.unlock();
     DBUG_VOID_RETURN;
   }
 
@@ -756,7 +877,7 @@ void query_cache_insert(NET *net, const char *packet, ulong length)
     query_cache.free_query(query_block);
     query_cache.refused++;
     // append_result_data no success => we need unlock
-    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+    query_cache.unlock();
     DBUG_VOID_RETURN;
   }
 
@@ -778,14 +899,8 @@ void query_cache_abort(NET *net)
   if (net->query_cache_query == 0)
     DBUG_VOID_RETURN;
 
-  STRUCT_LOCK(&query_cache.structure_guard_mutex);
-  bool interrupt;
-  query_cache.wait_while_table_flush_is_in_progress(&interrupt);
-  if (interrupt)
-  {
-    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+  if (query_cache.try_lock())
     DBUG_VOID_RETURN;
-  }
 
   /*
     While we were waiting another thread might have changed the status
@@ -804,8 +919,7 @@ void query_cache_abort(NET *net)
     DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1););
   }
 
-  STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
-
+  query_cache.unlock();
   DBUG_VOID_RETURN;
 }
 
@@ -833,15 +947,8 @@ void query_cache_end_of_result(THD *thd)
                      emb_count_querycache_size(thd));
 #endif
 
-  STRUCT_LOCK(&query_cache.structure_guard_mutex);
-
-  bool interrupt;
-  query_cache.wait_while_table_flush_is_in_progress(&interrupt);
-  if (interrupt)
-  {
-    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+  if (query_cache.try_lock())
     DBUG_VOID_RETURN;
-  }
 
   query_block= ((Query_cache_block*) thd->net.query_cache_query);
   if (query_block)
@@ -870,10 +977,9 @@ void query_cache_end_of_result(THD *thd)
       */
       DBUG_ASSERT(0);
       query_cache.free_query(query_block);
-      STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+      query_cache.unlock();
       DBUG_VOID_RETURN;
     }
-
     last_result_block= header->result()->prev;
     allign_size= ALIGN_SIZE(last_result_block->used);
     len= max(query_cache.min_allocation_unit, allign_size);
@@ -886,13 +992,11 @@ void query_cache_end_of_result(THD *thd)
     /* Drop the writer. */
     header->writer(0);
     thd->net.query_cache_query= 0;
-
     BLOCK_UNLOCK_WR(query_block);
     DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1););
 
   }
-
-  STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
+  query_cache.unlock();
   DBUG_VOID_RETURN;
 }
 
@@ -952,11 +1056,7 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
 			query_cache_size_arg));
   DBUG_ASSERT(initialized);
 
-  STRUCT_LOCK(&structure_guard_mutex);
-  while (is_flushing())
-    pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
-  m_cache_status= Query_cache::FLUSH_IN_PROGRESS;
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  lock_and_suspend();
 
   /*
     Wait for all readers and writers to exit. When the list of all queries
@@ -988,13 +1088,10 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
   query_cache_size= query_cache_size_arg;
   new_query_cache_size= init_cache();
 
-  STRUCT_LOCK(&structure_guard_mutex);
-  m_cache_status= Query_cache::NO_FLUSH_IN_PROGRESS;
-  pthread_cond_signal(&COND_cache_status_changed);
   if (new_query_cache_size)
     DBUG_EXECUTE("check_querycache",check_integrity(1););
-  STRUCT_UNLOCK(&structure_guard_mutex);
 
+  unlock();
   DBUG_RETURN(new_query_cache_size);
 }
 
@@ -1091,15 +1188,16 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
     */
     ha_release_temporary_latches(thd);
 
-    STRUCT_LOCK(&structure_guard_mutex);
-    if (query_cache_size == 0 || is_flushing())
+    /*
+      A table- or a full flush operation can potentially take a long time to
+      finish. We choose not to wait for them and skip caching statements
+      instead.
+    */
+    if (try_lock())
+      DBUG_VOID_RETURN;
+    if (query_cache_size == 0)
     {
-      /*
-        A table- or a full flush operation can potentially take a long time to 
-        finish. We choose not to wait for them and skip caching statements
-        instead.
-      */
-      STRUCT_UNLOCK(&structure_guard_mutex);
+      unlock();
       DBUG_VOID_RETURN;
     }
     DUMP(this);
@@ -1107,7 +1205,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
     if (ask_handler_allowance(thd, tables_used))
     {
       refused++;
-      STRUCT_UNLOCK(&structure_guard_mutex);
+      unlock();
       DBUG_VOID_RETURN;
     }
 
@@ -1155,7 +1253,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 	  DBUG_PRINT("qcache", ("insertion in query hash"));
 	  header->unlock_n_destroy();
 	  free_memory_block(query_block);
-	  STRUCT_UNLOCK(&structure_guard_mutex);
+          unlock();
 	  goto end;
 	}
 	if (!register_all_tables(query_block, tables_used, local_tables))
@@ -1165,7 +1263,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 	  hash_delete(&queries, (uchar *) query_block);
 	  header->unlock_n_destroy();
 	  free_memory_block(query_block);
-	  STRUCT_UNLOCK(&structure_guard_mutex);
+          unlock();
 	  goto end;
 	}
 	double_linked_list_simple_include(query_block, &queries_blocks);
@@ -1175,7 +1273,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 	header->writer(net);
 	header->tables_type(tables_type);
 
-	STRUCT_UNLOCK(&structure_guard_mutex);
+        unlock();
 
 	// init_n_lock make query block locked
 	BLOCK_UNLOCK_WR(query_block);
@@ -1184,7 +1282,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       {
 	// We have not enough memory to store query => do nothing
 	refused++;
-	STRUCT_UNLOCK(&structure_guard_mutex);
+        unlock();
 	DBUG_PRINT("warning", ("Can't allocate query"));
       }
     }
@@ -1192,7 +1290,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
     {
       // Another thread is processing the same query => do nothing
       refused++;
-      STRUCT_UNLOCK(&structure_guard_mutex);
+      unlock();
       DBUG_PRINT("qcache", ("Another thread process same query"));
     }
   }
@@ -1291,18 +1389,17 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
     }
   }
 
-  STRUCT_LOCK(&structure_guard_mutex);
+  /*
+    Try to obtain an exclusive lock on the query cache. If the cache is
+    disabled or if a full cache flush is in progress, the attempt to
+    get the lock is aborted.
+  */
+  if (try_lock())
+    goto err;
 
   if (query_cache_size == 0)
     goto err_unlock;
 
-  if (is_flushing())
-  {
-    /* Return; Query cache is temporarily disabled while we flush. */
-    DBUG_PRINT("qcache",("query cache disabled"));
-    goto err_unlock;
-  }
-
   /*
     Check that we haven't forgot to reset the query cache variables;
     make sure there are no attached query cache writer to this thread.
@@ -1436,7 +1533,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
         DBUG_PRINT("qcache",
                    ("Temporary table detected: '%s.%s'",
                     table_list.db, table_list.alias));
-        STRUCT_UNLOCK(&structure_guard_mutex);
+        unlock();
         /*
           We should not store result of this query because it contain
           temporary tables => assign following variable to make check
@@ -1457,7 +1554,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_PRINT("qcache",
 		 ("probably no SELECT access to %s.%s =>  return to normal processing",
 		  table_list.db, table_list.alias));
-      STRUCT_UNLOCK(&structure_guard_mutex);
+      unlock();
       thd->lex->safe_to_cache_query=0;		// Don't try to cache this
       BLOCK_UNLOCK_RD(query_block);
       DBUG_RETURN(-1);				// Privilege error
@@ -1500,7 +1597,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
   }
   move_to_query_list_end(query_block);
   hits++;
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
 
   /*
     Send cached result to client
@@ -1540,7 +1637,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
   DBUG_RETURN(1);				// Result sent to client
 
 err_unlock:
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
 err:
   MYSQL_QUERY_CACHE_MISS(thd->query);
   DBUG_RETURN(0);				// Query was not cached
@@ -1661,47 +1758,6 @@ void Query_cache::invalidate(THD *thd, const char *key, uint32  key_length,
 }
 
 
-/**
-  Synchronize the thread with any flushing operations.
-
-  This helper function is called whenever a thread needs to operate on the
-  query cache structure (example: during invalidation). If a table flush is in
-  progress this function will wait for it to stop. If a full flush is in
-  progress, the function will set the interrupt parameter to indicate that the
-  current operation is redundant and should be interrupted.
-
-  @param[out] interrupt This out-parameter will be set to TRUE if the calling
-    function is redundant and should be interrupted.
-
-  @return If the interrupt-parameter is TRUE then m_cache_status is set to
-    NO_FLUSH_IN_PROGRESS. If the interrupt-parameter is FALSE then
-    m_cache_status is set to FLUSH_IN_PROGRESS.
-    The structure_guard_mutex will in any case be locked.
-*/
-
-void Query_cache::wait_while_table_flush_is_in_progress(bool *interrupt)
-{
-  while (is_flushing())
-  {
-    /*
-      If there already is a full flush in progress query cache isn't enabled
-      and additional flushes are redundant; just return instead.
-    */
-    if (m_cache_status == Query_cache::FLUSH_IN_PROGRESS)
-    {
-      *interrupt= TRUE;
-      return;
-    }
-    /*
-      If a table flush is in progress; wait on cache status to change.
-    */
-    if (m_cache_status == Query_cache::TABLE_FLUSH_IN_PROGRESS)
-      pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
-  }
-  *interrupt= FALSE;
-}
-
-
 /**
    Remove all cached queries that uses the given database.
 */
@@ -1711,14 +1767,11 @@ void Query_cache::invalidate(char *db)
   bool restart= FALSE;
   DBUG_ENTER("Query_cache::invalidate (db)");
 
-  STRUCT_LOCK(&structure_guard_mutex);
-  bool interrupt;
-  wait_while_table_flush_is_in_progress(&interrupt);
-  if (interrupt)
-  {
-    STRUCT_UNLOCK(&structure_guard_mutex);
-    return;
-  }
+  /*
+    Lock the query cache and queue all invalidation attempts to avoid
+    the risk of a race between invalidation, cache inserts and flushes.
+  */
+  lock();
 
   THD *thd= current_thd;
 
@@ -1774,7 +1827,7 @@ void Query_cache::invalidate(char *db)
       } while (restart);
     } // end if( tables_blocks )
   }
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
 
   DBUG_VOID_RETURN;
 }
@@ -1798,7 +1851,10 @@ void Query_cache::invalidate_by_MyISAM_filename(const char *filename)
 void Query_cache::flush()
 {
   DBUG_ENTER("Query_cache::flush");
-  STRUCT_LOCK(&structure_guard_mutex);
+  DBUG_EXECUTE_IF("wait_in_query_cache_flush1",
+                  debug_wait_for_kill("wait_in_query_cache_flush1"););
+
+  lock_and_suspend();
   if (query_cache_size > 0)
   {
     DUMP(this);
@@ -1807,7 +1863,7 @@ void Query_cache::flush()
   }
 
   DBUG_EXECUTE("check_querycache",query_cache.check_integrity(1););
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
   DBUG_VOID_RETURN;
 }
 
@@ -1826,18 +1882,16 @@ void Query_cache::pack(ulong join_limit, uint iteration_limit)
 {
   DBUG_ENTER("Query_cache::pack");
 
-  bool interrupt;
-  STRUCT_LOCK(&structure_guard_mutex);
-  wait_while_table_flush_is_in_progress(&interrupt);
-  if (interrupt)
-  {
-    STRUCT_UNLOCK(&structure_guard_mutex);
+  /*
+    If the entire qc is being invalidated we can bail out early
+    instead of waiting for the lock.
+  */
+  if (try_lock())
     DBUG_VOID_RETURN;
-  }
 
   if (query_cache_size == 0)
   {
-    STRUCT_UNLOCK(&structure_guard_mutex);
+    unlock();
     DBUG_VOID_RETURN;
   }
 
@@ -1847,7 +1901,7 @@ void Query_cache::pack(ulong join_limit, uint iteration_limit)
     pack_cache();
   } while ((++i < iteration_limit) && join_results(join_limit));
 
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
   DBUG_VOID_RETURN;
 }
 
@@ -1862,9 +1916,9 @@ void Query_cache::destroy()
   else
   {
     /* Underlying code expects the lock. */
-    STRUCT_LOCK(&structure_guard_mutex);
+    lock_and_suspend();
     free_cache();
-    STRUCT_UNLOCK(&structure_guard_mutex);
+    unlock();
 
     pthread_cond_destroy(&COND_cache_status_changed);
     pthread_mutex_destroy(&structure_guard_mutex);
@@ -1883,7 +1937,7 @@ void Query_cache::init()
   DBUG_ENTER("Query_cache::init");
   pthread_mutex_init(&structure_guard_mutex,MY_MUTEX_INIT_FAST);
   pthread_cond_init(&COND_cache_status_changed, NULL);
-  m_cache_status= Query_cache::NO_FLUSH_IN_PROGRESS;
+  m_cache_lock_status= Query_cache::UNLOCKED;
   initialized = 1;
   DBUG_VOID_RETURN;
 }
@@ -2123,23 +2177,9 @@ void Query_cache::free_cache()
 
 void Query_cache::flush_cache()
 {
-  /*
-    If there is flush in progress, wait for it to finish, and then do
-    our flush.  This is necessary because something could be added to
-    the cache before we acquire the lock again, and some code (like
-    Query_cache::free_cache()) depends on the fact that after the
-    flush the cache is empty.
-  */
-  while (is_flushing())
-    pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
-
-  /*
-    Setting 'FLUSH_IN_PROGRESS' will prevent other threads from using
-    the cache while we are in the middle of the flush, and we release
-    the lock so that other threads won't block.
-  */
-  m_cache_status= Query_cache::FLUSH_IN_PROGRESS;
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  
+  DBUG_EXECUTE_IF("wait_in_query_cache_flush2",
+                  debug_wait_for_kill("wait_in_query_cache_flush2"););
 
   my_hash_reset(&queries);
   while (queries_blocks != 0)
@@ -2147,10 +2187,6 @@ void Query_cache::flush_cache()
     BLOCK_LOCK_WR(queries_blocks);
     free_query_internal(queries_blocks);
   }
-
-  STRUCT_LOCK(&structure_guard_mutex);
-  m_cache_status= Query_cache::NO_FLUSH_IN_PROGRESS;
-  pthread_cond_signal(&COND_cache_status_changed);
 }
 
 /*
@@ -2330,10 +2366,6 @@ Query_cache::write_block_data(ulong data_len, uchar* data,
 }
 
 
-/*
-  On success STRUCT_UNLOCK(&query_cache.structure_guard_mutex) will be done.
-*/
-
 my_bool
 Query_cache::append_result_data(Query_cache_block **current_block,
 				ulong data_len, uchar* data,
@@ -2353,10 +2385,6 @@ Query_cache::append_result_data(Query_cache_block **current_block,
   if (*current_block == 0)
   {
     DBUG_PRINT("qcache", ("allocated first result data block %lu", data_len));
-    /*
-      STRUCT_UNLOCK(&structure_guard_mutex) Will be done by
-      write_result_data if success;
-    */
     DBUG_RETURN(write_result_data(current_block, data_len, data, query_block,
 				  Query_cache_block::RES_BEG));
   }
@@ -2387,10 +2415,6 @@ Query_cache::append_result_data(Query_cache_block **current_block,
     DBUG_PRINT("qcache", ("allocate new block for %lu bytes",
 			data_len-last_block_free_space));
     Query_cache_block *new_block = 0;
-    /*
-      On success STRUCT_UNLOCK(&structure_guard_mutex) will be done
-      by the next call
-    */
     success = write_result_data(&new_block, data_len-last_block_free_space,
 				(uchar*)(((uchar*)data)+last_block_free_space),
 				query_block,
@@ -2405,7 +2429,7 @@ Query_cache::append_result_data(Query_cache_block **current_block,
   else
   {
     // It is success (nobody can prevent us write data)
-    STRUCT_UNLOCK(&structure_guard_mutex);
+    unlock();
   }
 
   // Now finally write data to the last block
@@ -2443,7 +2467,7 @@ my_bool Query_cache::write_result_data(Query_cache_block **result_block,
   if (success)
   {
     // It is success (nobody can prevent us write data)
-    STRUCT_UNLOCK(&structure_guard_mutex);
+    unlock();
     uint headers_len = (ALIGN_SIZE(sizeof(Query_cache_block)) +
 			ALIGN_SIZE(sizeof(Query_cache_result)));
 #ifndef EMBEDDED_LIBRARY
@@ -2601,6 +2625,18 @@ void Query_cache::invalidate_table(THD *thd, TABLE *table)
 
 void Query_cache::invalidate_table(THD *thd, uchar * key, uint32  key_length)
 {
+#ifdef TO_BE_REMOVED
+/*
+  This ifdef'd piece comes from Summit, it's a manual backport (2008-10-15) of
+  http://lists.mysql.com/commits/56418.
+  But that was an early, non-final patch: after that backport was made, the
+  author of the patch decided to abandon it, and his final patch (put into 6.0)
+  was different.
+  Then 5.1's code was changed for some other reasons, so now we have a
+  conflict between the old patch backported to Summit and the latest 5.1.
+  The backport cannot stay, it has to be removed and then rewritten if
+  desired.
+*/
   bool interrupt;
   
   if (m_query_cache_is_disabled)
@@ -2619,28 +2655,35 @@ void Query_cache::invalidate_table(THD *thd, uchar * key, uint32  key_length)
     STRUCT_UNLOCK(&structure_guard_mutex);
     return;
   }
+||||||| BASE-REVISION
+  bool interrupt;
+  STRUCT_LOCK(&structure_guard_mutex);
+  wait_while_table_flush_is_in_progress(&interrupt);
+  if (interrupt)
+  {
+    STRUCT_UNLOCK(&structure_guard_mutex);
+    return;
+  }
+=======
+/* current 5.1 code: */
+#endif
+  DBUG_EXECUTE_IF("wait_in_query_cache_invalidate1",
+                   debug_wait_for_kill("wait_in_query_cache_invalidate1"); );
 
   /*
-    Setting 'TABLE_FLUSH_IN_PROGRESS' will temporarily disable the cache
-    so that structural changes to cache won't block the entire server.
-    However, threads requesting to change the query cache will still have
-    to wait for the flush to finish.
+    Lock the query cache and queue all invalidation attempts to avoid
+    the risk of a race between invalidation, cache inserts and flushes.
   */
-  m_cache_status= Query_cache::TABLE_FLUSH_IN_PROGRESS;
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  lock();
+
+  DBUG_EXECUTE_IF("wait_in_query_cache_invalidate2",
+                  debug_wait_for_kill("wait_in_query_cache_invalidate2"); );
+
 
   if (query_cache_size > 0)
     invalidate_table_internal(thd, key, key_length);
 
-  STRUCT_LOCK(&structure_guard_mutex);
-  m_cache_status= Query_cache::NO_FLUSH_IN_PROGRESS;
-
-  /*
-    net_real_write might be waiting on a change on the m_cache_status
-    variable.
-  */
-  pthread_cond_signal(&COND_cache_status_changed);
-  STRUCT_UNLOCK(&structure_guard_mutex);
+  unlock();
 }
 
 
@@ -2649,7 +2692,7 @@ void Query_cache::invalidate_table(THD *thd, uchar * key, uint32  key_length)
   The caller must ensure that no other thread is trying to work with
   the query cache when this function is executed.
 
-  @pre structure_guard_mutex is acquired or TABLE_FLUSH_IN_PROGRESS is set.
+  @pre structure_guard_mutex is acquired or LOCKED is set.
 */
 
 void
@@ -2667,7 +2710,7 @@ Query_cache::invalidate_table_internal(THD *thd, uchar *key, uint32 key_length)
 /**
   Invalidate a linked list of query cache blocks.
 
-  Each block tries to aquire a block level lock before
+  Each block tries to acquire a block level lock before
   free_query is a called. This function will in turn affect
   related table- and result-blocks.
 
@@ -4191,10 +4234,7 @@ my_bool Query_cache::check_integrity(bool locked)
   DBUG_ENTER("check_integrity");
 
   if (!locked)
-    STRUCT_LOCK(&structure_guard_mutex);
-
-  while (is_flushing())
-    pthread_cond_wait(&COND_cache_status_changed,&structure_guard_mutex);
+    lock_and_suspend();
 
   if (hash_check(&queries))
   {
@@ -4443,7 +4483,7 @@ my_bool Query_cache::check_integrity(bool locked)
   }
   DBUG_ASSERT(result == 0);
   if (!locked)
-    STRUCT_UNLOCK(&structure_guard_mutex);
+    unlock();
   DBUG_RETURN(result);
 }
 
diff --git a/sql/sql_cache.h b/sql/sql_cache.h
index 15e97238335..a8636381956 100644
--- a/sql/sql_cache.h
+++ b/sql/sql_cache.h
@@ -272,12 +272,12 @@ public:
 
 
 private:
+#ifndef DBUG_OFF
+  my_thread_id m_cache_lock_thread_id;
+#endif
   pthread_cond_t COND_cache_status_changed;
-
-  enum Cache_status { NO_FLUSH_IN_PROGRESS, FLUSH_IN_PROGRESS,
-                      TABLE_FLUSH_IN_PROGRESS };
-
-  Cache_status m_cache_status;
+  enum Cache_lock_status { UNLOCKED, LOCKED_NO_WAIT, LOCKED };
+  Cache_lock_status m_cache_lock_status;
 
   bool m_query_cache_is_disabled;
   
@@ -382,8 +382,6 @@ protected:
 	      Query_cache_block *pprev);
   my_bool join_results(ulong join_limit);
 
-  void wait_while_table_flush_is_in_progress(bool *interrupt);
-
   /*
     Following function control structure_guard_mutex
     by themself or don't need structure_guard_mutex
@@ -480,12 +478,6 @@ protected:
   friend void query_cache_abort(NET *net);
 
   bool is_disabled(void) { return m_query_cache_is_disabled; }
-  
-  bool is_flushing(void) 
-  { 
-    return (m_cache_status != Query_cache::NO_FLUSH_IN_PROGRESS);
-  }
-
   /*
     The following functions are only used when debugging
     We don't protect these with ifndef DBUG_OFF to not have to recompile
@@ -503,6 +495,11 @@ protected:
 			Query_cache_block_table * point,
 			const char *name);
   my_bool in_blocks(Query_cache_block * point);
+
+  bool try_lock(void);
+  void lock(void);
+  void lock_and_suspend(void);
+  void unlock(void);
 };
 
 extern Query_cache query_cache;
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index b73822f5a48..5933b233901 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -27,6 +27,7 @@
 
 #include "mysql_priv.h"
 #include "rpl_rli.h"
+#include "rpl_filter.h"
 #include "rpl_record.h"
 #include "slave.h"
 #include <my_bitmap.h>
@@ -546,6 +547,7 @@ THD::THD()
    first_successful_insert_id_in_prev_stmt_for_binlog(0),
    first_successful_insert_id_in_cur_stmt(0),
    stmt_depends_on_first_successful_insert_id_in_prev_stmt(FALSE),
+   examined_row_count(0),
    global_read_lock(0),
    is_fatal_error(0),
    transaction_rollback_request(0),
@@ -590,7 +592,7 @@ THD::THD()
   // Must be reset to handle error with THD's created for init of mysqld
   lex->current_select= 0;
   start_time=(time_t) 0;
-  start_utime= 0L;
+  start_utime= prior_thr_create_utime= 0L;
   utime_after_lock= 0L;
   current_linfo =  0;
   slave_thread = 0;
@@ -674,31 +676,40 @@ THD::THD()
 
 void THD::push_internal_handler(Internal_error_handler *handler)
 {
-  /*
-    TODO: The current implementation is limited to 1 handler at a time only.
-    THD and sp_rcontext need to be modified to use a common handler stack.
-  */
-  DBUG_ASSERT(m_internal_handler == NULL);
-  m_internal_handler= handler;
+  if (m_internal_handler)
+  {
+    handler->m_prev_internal_handler= m_internal_handler;
+    m_internal_handler= handler;
+  }
+  else
+  {
+    m_internal_handler= handler;
+  }
 }
 
 
 bool THD::handle_error(uint sql_errno, const char *message,
                        MYSQL_ERROR::enum_warning_level level)
 {
-  if (m_internal_handler)
+  if (!m_internal_handler)
+    return FALSE;
+
+  for (Internal_error_handler *error_handler= m_internal_handler;
+       error_handler;
+       error_handler= m_internal_handler->m_prev_internal_handler)
   {
-    return m_internal_handler->handle_error(sql_errno, message, level, this);
+    if (error_handler->handle_error(sql_errno, message, level, this))
+    return TRUE;
   }
 
-  return FALSE;                                 // 'FALSE', as per coding style
+  return FALSE;
 }
 
 
 void THD::pop_internal_handler()
 {
   DBUG_ASSERT(m_internal_handler != NULL);
-  m_internal_handler= NULL;
+  m_internal_handler= m_internal_handler->m_prev_internal_handler;
 }
 
 extern "C"
@@ -746,6 +757,12 @@ void thd_get_xid(const MYSQL_THD thd, MYSQL_XID *xid)
   *xid = *(MYSQL_XID *) &thd->transaction.xid_state.xid;
 }
 
+#ifdef _WIN32
+extern "C"   THD *_current_thd_noinline(void)
+{
+  return my_pthread_getspecific_ptr(THD*,THR_THD);
+}
+#endif
 /*
   Init common variables that has to be reset on start and on change_user
 */
@@ -2599,7 +2616,7 @@ bool select_dumpvar::send_data(List<Item> &items)
     {
       Item_func_set_user_var *suv= new Item_func_set_user_var(mv->s, item);
       suv->fix_fields(thd, 0);
-      suv->check(0);
+      suv->save_item_result(item);
       suv->update();
     }
   }
@@ -3649,7 +3666,7 @@ show_query_type(THD::enum_binlog_query_type qtype)
 */
 int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
                       ulong query_len, bool is_trans, bool suppress_use,
-                      THD::killed_state killed_status_arg)
+                      int errcode)
 {
   DBUG_ENTER("THD::binlog_query");
   DBUG_PRINT("enter", ("qtype: %s  query: '%s'",
@@ -3674,7 +3691,8 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
     we should print a warning.
   */
   if (sql_log_bin_toplevel && lex->is_stmt_unsafe() &&
-      variables.binlog_format == BINLOG_FORMAT_STMT)
+      variables.binlog_format == BINLOG_FORMAT_STMT && 
+      binlog_filter->db_ok(this->db))
   {
    /*
      A warning can be elevated a error when STRICT sql mode.
@@ -3716,7 +3734,7 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
      */
     {
       Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
-                            killed_status_arg);
+                            errcode);
       qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
       /*
         Binlog table maps will be irrelevant after a Query_log_event
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 158f4bc27d6..e7e56cf9a4b 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -91,7 +91,7 @@ enum enum_mark_columns
 
 extern char internal_table_name[2];
 extern char empty_c_string[1];
-extern const char **errmesg;
+extern MYSQL_PLUGIN_IMPORT const char **errmesg;
 
 #define TC_LOG_PAGE_SIZE   8192
 #define TC_LOG_MIN_SIZE    (3*TC_LOG_PAGE_SIZE)
@@ -1036,7 +1036,10 @@ show_system_thread(enum_thread_type thread)
 class Internal_error_handler
 {
 protected:
-  Internal_error_handler() {}
+  Internal_error_handler() :
+    m_prev_internal_handler(NULL)
+  {}
+
   virtual ~Internal_error_handler() {}
 
 public:
@@ -1069,6 +1072,28 @@ public:
                             const char *message,
                             MYSQL_ERROR::enum_warning_level level,
                             THD *thd) = 0;
+private:
+  Internal_error_handler *m_prev_internal_handler;
+  friend class THD;
+};
+
+
+/**
+  Implements the trivial error handler which cancels all error states
+  and prevents an SQLSTATE to be set.
+*/
+
+class Dummy_error_handler : public Internal_error_handler
+{
+public:
+  bool handle_error(uint sql_errno,
+                    const char *message,
+                    MYSQL_ERROR::enum_warning_level level,
+                    THD *thd)
+  {
+    /* Ignore error */
+    return TRUE;
+  }
 };
 
 
@@ -1345,7 +1370,8 @@ public:
   /* remote (peer) port */
   uint16 peer_port;
   time_t     start_time, user_time;
-  ulonglong  connect_utime, thr_create_utime; // track down slow pthread_create
+  // track down slow pthread_create
+  ulonglong  prior_thr_create_utime, thr_create_utime;
   ulonglong  start_utime, utime_after_lock;
   
   thr_lock_type update_lock_default;
@@ -1439,6 +1465,14 @@ public:
     {
       changed_tables= 0;
       savepoints= 0;
+      /*
+        If rm_error is raised, it means that this piece of a distributed
+        transaction has failed and must be rolled back. But the user must
+        rollback it explicitly, so don't start a new distributed XA until
+        then.
+      */
+      if (!xid_state.rm_error)
+        xid_state.xid.null();
 #ifdef USING_TRANSACTIONS
       free_root(&mem_root,MYF(MY_KEEP_PREALLOC));
 #endif
@@ -1889,7 +1923,7 @@ public:
   int binlog_query(enum_binlog_query_type qtype,
                    char const *query, ulong query_len,
                    bool is_trans, bool suppress_use,
-                   THD::killed_state killed_err_arg= THD::KILLED_NO_VALUE);
+                   int errcode);
 #endif
 
   /*
@@ -2210,6 +2244,9 @@ public:
   thd_scheduler scheduler;
 
 public:
+  inline Internal_error_handler *get_internal_handler()
+  { return m_internal_handler; }
+
   /**
     Add an internal error handler to the thread execution context.
     @param handler the exception handler to add
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 06528a27da5..3952567c329 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -1075,8 +1075,8 @@ static void prepare_new_connection_state(THD* thd)
 pthread_handler_t handle_one_connection(void *arg)
 {
   THD *thd= (THD*) arg;
-  ulong launch_time= (ulong) ((thd->thr_create_utime= my_micro_time()) -
-                              thd->connect_utime);
+
+  thd->thr_create_utime= my_micro_time();
 
   if (thread_scheduler.init_new_connection_thread())
   {
@@ -1085,8 +1085,20 @@ pthread_handler_t handle_one_connection(void *arg)
     thread_scheduler.end_thread(thd,0);
     return 0;
   }
-  if (launch_time >= slow_launch_time*1000000L)
-    statistic_increment(slow_launch_threads,&LOCK_status);
+
+  /*
+    If a thread was created to handle this connection:
+    increment slow_launch_threads counter if it took more than
+    slow_launch_time seconds to create the thread.
+  */
+  if (thd->prior_thr_create_utime)
+  {
+    ulong launch_time= (ulong) (thd->thr_create_utime -
+                                thd->prior_thr_create_utime);
+    if (launch_time >= slow_launch_time*1000000L)
+      statistic_increment(slow_launch_threads, &LOCK_status);
+    thd->prior_thr_create_utime= 0;
+  }
 
   /*
     handle_one_connection() is normally the only way a thread would
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 5a266c3fac9..98d17fdd318 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -181,8 +181,7 @@ uchar* dboptions_get_key(my_dbopt_t *opt, size_t *length,
 static inline void write_to_binlog(THD *thd, char *query, uint q_len,
                                    char *db, uint db_len)
 {
-  Query_log_event qinfo(thd, query, q_len, 0, 0, THD::NOT_KILLED);
-  qinfo.error_code= 0;
+  Query_log_event qinfo(thd, query, q_len, 0, 0, 0);
   qinfo.db= db;
   qinfo.db_len= db_len;
   mysql_bin_log.write(&qinfo);
@@ -538,13 +537,13 @@ err1:
 bool load_db_opt_by_name(THD *thd, const char *db_name,
                          HA_CREATE_INFO *db_create_info)
 {
-  char db_opt_path[FN_REFLEN];
+  char db_opt_path[FN_REFLEN + 1];
 
   /*
     Pass an empty file name, and the database options file name as extension
     to avoid table name to file name encoding.
   */
-  (void) build_table_filename(db_opt_path, sizeof(db_opt_path),
+  (void) build_table_filename(db_opt_path, sizeof(db_opt_path) - 1,
                               db_name, "", MY_DB_OPT_FILE, 0);
 
   return load_db_opt(thd, db_opt_path, db_create_info);
@@ -646,7 +645,7 @@ int mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info,
   VOID(pthread_mutex_lock(&LOCK_mysql_create_db));
 
   /* Check directory */
-  path_len= build_table_filename(path, sizeof(path), db, "", "", 0);
+  path_len= build_table_filename(path, sizeof(path) - 1, db, "", "", 0);
   path[path_len-1]= 0;                    // Remove last '/' from path
 
   if (my_stat(path,&stat_info,MYF(0)))
@@ -723,8 +722,9 @@ int mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info,
 
     if (mysql_bin_log.is_open())
     {
+      int errcode= query_error_code(thd, TRUE);
       Query_log_event qinfo(thd, query, query_length, 0, 
-			    /* suppress_use */ TRUE, THD::NOT_KILLED);
+			    /* suppress_use */ TRUE, errcode);
 
       /*
 	Write should use the database being created as the "current
@@ -791,7 +791,7 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info)
      We pass MY_DB_OPT_FILE as "extension" to avoid
      "table name to file name" encoding.
   */
-  build_table_filename(path, sizeof(path), db, "", MY_DB_OPT_FILE, 0);
+  build_table_filename(path, sizeof(path) - 1, db, "", MY_DB_OPT_FILE, 0);
   if ((error=write_db_opt(thd, path, create_info)))
     goto exit;
 
@@ -811,8 +811,9 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info)
 
   if (mysql_bin_log.is_open())
   {
+    int errcode= query_error_code(thd, TRUE);
     Query_log_event qinfo(thd, thd->query, thd->query_length, 0,
-			  /* suppress_use */ TRUE, THD::NOT_KILLED);
+			  /* suppress_use */ TRUE, errcode);
 
     /*
       Write should use the database being created as the "current
@@ -883,7 +884,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
 
   VOID(pthread_mutex_lock(&LOCK_mysql_create_db));
 
-  length= build_table_filename(path, sizeof(path), db, "", "", 0);
+  length= build_table_filename(path, sizeof(path) - 1, db, "", "", 0);
   strmov(path+length, MY_DB_OPT_FILE);		// Append db option file name
   del_dbopt(path);				// Remove dboption hash entry
   path[length]= '\0';				// Remove file name
@@ -958,8 +959,9 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
     }
     if (mysql_bin_log.is_open())
     {
+      int errcode= query_error_code(thd, TRUE);
       Query_log_event qinfo(thd, query, query_length, 0, 
-			    /* suppress_use */ TRUE, THD::NOT_KILLED);
+			    /* suppress_use */ TRUE, errcode);
       /*
         Write should use the database being created as the "current
         database" and not the threads current database, which is the
@@ -1839,7 +1841,7 @@ bool mysql_upgrade_db(THD *thd, LEX_STRING *old_db)
     for (uint idx=0 ; idx < nfiles && !thd->killed ; idx++)
     {
       FILEINFO *file= dirp->dir_entry + idx;
-      char *extension, tname[FN_REFLEN];
+      char *extension, tname[FN_REFLEN + 1];
       LEX_STRING table_str;
       DBUG_PRINT("info",("Examining: %s", file->name));
 
@@ -1928,7 +1930,7 @@ bool mysql_upgrade_db(THD *thd, LEX_STRING *old_db)
     for (uint idx=0 ; idx < nfiles ; idx++)
     {
       FILEINFO *file= dirp->dir_entry + idx;
-      char oldname[FN_REFLEN], newname[FN_REFLEN];
+      char oldname[FN_REFLEN + 1], newname[FN_REFLEN + 1];
       DBUG_PRINT("info",("Examining: %s", file->name));
 
       /* skiping . and .. and MY_DB_OPT_FILE */
@@ -1958,8 +1960,9 @@ bool mysql_upgrade_db(THD *thd, LEX_STRING *old_db)
   /* Step8: logging */
   if (mysql_bin_log.is_open())
   {
+    int errcode= query_error_code(thd, TRUE);
     Query_log_event qinfo(thd, thd->query, thd->query_length,
-                          0, TRUE, THD::NOT_KILLED);
+                          0, TRUE, errcode);
     thd->clear_error();
     mysql_bin_log.write(&qinfo);
   }
@@ -1997,10 +2000,10 @@ exit:
 
 bool check_db_dir_existence(const char *db_name)
 {
-  char db_dir_path[FN_REFLEN];
+  char db_dir_path[FN_REFLEN + 1];
   uint db_dir_path_len;
 
-  db_dir_path_len= build_table_filename(db_dir_path, sizeof(db_dir_path),
+  db_dir_path_len= build_table_filename(db_dir_path, sizeof(db_dir_path) - 1,
                                         db_name, "", "", 0);
 
   if (db_dir_path_len && db_dir_path[db_dir_path_len - 1] == FN_LIBCHAR)
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 1b42e522491..677098d275a 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -389,8 +389,12 @@ cleanup:
         FALSE :
         transactional_table;
 
+      int errcode= 0;
       if (error < 0)
         thd->clear_error();
+      else
+        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+      
       /*
         [binlog]: If 'handler::delete_all_rows()' was called and the
         storage engine does not inject the rows itself, we replicate
@@ -402,9 +406,9 @@ cleanup:
       */
       int log_result= thd->binlog_query(query_type,
                                         thd->query, thd->query_length,
-                                        is_trans, FALSE, killed_status);
+                                        is_trans, FALSE, errcode);
 
-      if (log_result && transactional_table)
+      if (log_result)
       {
 	error=1;
       }
@@ -582,6 +586,11 @@ int mysql_multi_delete_prepare(THD *thd)
       }
     }
   }
+  /*
+    Reset the exclude flag to false so it doesn't interfare
+    with further calls to unique_table
+  */
+  lex->select_lex.exclude_from_table_unique_test= FALSE;
   DBUG_RETURN(FALSE);
 }
 
@@ -617,11 +626,24 @@ multi_delete::initialize_tables(JOIN *join)
     DBUG_RETURN(1);
 
   table_map tables_to_delete_from=0;
+  delete_while_scanning= 1;
   for (walk= delete_tables; walk; walk= walk->next_local)
+  {
     tables_to_delete_from|= walk->table->map;
+    if (delete_while_scanning &&
+        unique_table(thd, walk, join->tables_list, false))
+    {
+      /*
+        If the table we are going to delete from appears
+        in join, we need to defer delete. So the delete
+        doesn't interfers with the scaning of results.
+      */
+      delete_while_scanning= 0;
+    }
+  }
+
 
   walk= delete_tables;
-  delete_while_scanning= 1;
   for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
        tab < end;
        tab++)
@@ -784,7 +806,7 @@ void multi_delete::abort()
 
   /* the error was handled or nothing deleted and no side effects return */
   if (error_handled ||
-      !thd->transaction.stmt.modified_non_trans_table && !deleted)
+      (!thd->transaction.stmt.modified_non_trans_table && !deleted))
     DBUG_VOID_RETURN;
 
   /* Something already deleted so we have to invalidate cache */
@@ -818,9 +840,10 @@ void multi_delete::abort()
     */
     if (mysql_bin_log.is_open())
     {
+      int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
       thd->binlog_query(THD::ROW_QUERY_TYPE,
                         thd->query, thd->query_length,
-                        transactional_tables, FALSE);
+                        transactional_tables, FALSE, errcode);
     }
     thd->transaction.all.modified_non_trans_table= true;
   }
@@ -961,11 +984,14 @@ bool multi_delete::send_eof()
   {
     if (mysql_bin_log.is_open())
     {
+      int errcode= 0;
       if (local_error == 0)
         thd->clear_error();
+      else
+        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query, thd->query_length,
-                            transactional_tables, FALSE, killed_status) &&
+                            transactional_tables, FALSE, errcode) &&
           !normal_tables)
       {
 	local_error=1;  // Log write failed: roll back the SQL statement
@@ -1025,7 +1051,7 @@ static bool mysql_truncate_by_delete(THD *thd, TABLE_LIST *table_list)
 bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
 {
   HA_CREATE_INFO create_info;
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   TABLE *table;
   bool error;
   uint path_length;
@@ -1062,7 +1088,7 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok)
     goto end;
   }
 
-  path_length= build_table_filename(path, sizeof(path), table_list->db,
+  path_length= build_table_filename(path, sizeof(path) - 1, table_list->db,
                                     table_list->table_name, reg_ext, 0);
 
   if (!dont_send_ok)
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 41be98621a6..37adf5c403a 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -179,6 +179,7 @@ exit:
     {
       if (thd->is_error() &&
           (thd->main_da.sql_errno() == ER_BAD_FIELD_ERROR ||
+          thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
           thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST))
       {
         thd->clear_error();
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index f51ad318568..2818aa5082c 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -526,7 +526,7 @@ int send_variant_2_list(MEM_ROOT *mem_root, Protocol *protocol,
   String **end= pointers + names->elements;
 
   List_iterator<String> it(*names);
-  for (pos= pointers; pos!=end; (*pos++= it++));
+  for (pos= pointers; pos!=end; (*pos++= it++)) ;
 
   my_qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp);
 
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 9eef9d224e6..77b83b0e6fe 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -301,9 +301,7 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
                                List<Item> &update_fields, table_map *map)
 {
   TABLE *table= insert_table_list->table;
-  my_bool timestamp_mark;
-
-  LINT_INIT(timestamp_mark);
+  my_bool timestamp_mark= 0;
 
   if (table->timestamp_field)
   {
@@ -393,7 +391,7 @@ void upgrade_lock_type(THD *thd, thr_lock_type *lock_type,
                        bool is_multi_insert)
 {
   if (duplic == DUP_UPDATE ||
-      duplic == DUP_REPLACE && *lock_type == TL_WRITE_CONCURRENT_INSERT)
+      (duplic == DUP_REPLACE && *lock_type == TL_WRITE_CONCURRENT_INSERT))
   {
     *lock_type= TL_WRITE_DEFAULT;
     return;
@@ -858,11 +856,13 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       */
       query_cache_invalidate3(thd, table_list, 1);
     }
-    if (changed && error <= 0 || thd->transaction.stmt.modified_non_trans_table
-	|| was_insert_delayed)
+    if ((changed && error <= 0) ||
+        thd->transaction.stmt.modified_non_trans_table ||
+        was_insert_delayed)
     {
       if (mysql_bin_log.is_open())
       {
+        int errcode= 0;
 	if (error <= 0)
         {
 	  /*
@@ -877,6 +877,9 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 	  /* todo: consider removing */
 	  thd->clear_error();
 	}
+        else
+          errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+        
 	/* bug#22725:
 
 	A query which per-row-loop can not be interrupted with
@@ -893,8 +896,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 	if (thd->binlog_query(THD::ROW_QUERY_TYPE,
 			      thd->query, thd->query_length,
 			      transactional_table, FALSE,
-			      (error>0) ? thd->killed : THD::NOT_KILLED) &&
-	    transactional_table)
+			      errcode))
         {
 	  error=1;
 	}
@@ -1113,6 +1115,33 @@ static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list,
 }
 
 
+/*
+  Get extra info for tables we insert into
+
+  @param table     table(TABLE object) we insert into,
+                   might be NULL in case of view
+  @param           table(TABLE_LIST object) or view we insert into
+*/
+
+static void prepare_for_positional_update(TABLE *table, TABLE_LIST *tables)
+{
+  if (table)
+  {
+    if(table->reginfo.lock_type != TL_WRITE_DELAYED)
+      table->prepare_for_position();
+    return;
+  }
+
+  DBUG_ASSERT(tables->view);
+  List_iterator<TABLE_LIST> it(*tables->view_tables);
+  TABLE_LIST *tbl;
+  while ((tbl= it++))
+    prepare_for_positional_update(tbl->table, tbl);
+
+  return;
+}
+
+
 /*
   Prepare items in INSERT statement
 
@@ -1262,9 +1291,8 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
     Only call prepare_for_posistion() if we are not performing a DELAYED
     operation. It will instead be executed by delayed insert thread.
   */
-  if ((duplic == DUP_UPDATE || duplic == DUP_REPLACE) &&
-      (table->reginfo.lock_type != TL_WRITE_DELAYED))
-    table->prepare_for_position();
+  if (duplic == DUP_UPDATE || duplic == DUP_REPLACE)
+    prepare_for_positional_update(table, table_list);
   DBUG_RETURN(FALSE);
 }
 
@@ -2667,6 +2695,12 @@ bool Delayed_insert::handle_inserts(void)
         thd.variables.time_zone = row->time_zone;
       }
 
+      /* if the delayed insert was killed, the killed status is
+         ignored while binlogging */
+      int errcode= 0;
+      if (thd.killed == THD::NOT_KILLED)
+        errcode= query_error_code(&thd, TRUE);
+      
       /*
         If the query has several rows to insert, only the first row will come
         here. In row-based binlogging, this means that the first row will be
@@ -2677,7 +2711,7 @@ bool Delayed_insert::handle_inserts(void)
       */
       thd.binlog_query(THD::ROW_QUERY_TYPE,
                        row->query.str, row->query.length,
-                       FALSE, FALSE);
+                       FALSE, FALSE, errcode);
 
       thd.time_zone_used = backup_time_zone_used;
       thd.variables.time_zone = backup_time_zone;
@@ -3090,7 +3124,10 @@ bool select_insert::send_data(List<Item> &values)
   store_values(values);
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
   if (thd->is_error())
+  {
+    table->auto_increment_field_not_null= FALSE;
     DBUG_RETURN(1);
+  }
   if (table_list)                               // Not CREATE ... SELECT
   {
     switch (table_list->view_check_option(thd, info.ignore)) {
@@ -3104,6 +3141,9 @@ bool select_insert::send_data(List<Item> &values)
   // Release latches in case bulk insert takes a long time
   ha_release_temporary_latches(thd);
   
+  // Release latches in case bulk insert takes a long time
+  ha_release_temporary_latches(thd);
+
   error= write_record(thd, table, &info);
   table->auto_increment_field_not_null= FALSE;
   
@@ -3176,7 +3216,8 @@ bool select_insert::send_eof()
   table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
   table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
 
-  if (changed= (info.copied || info.deleted || info.updated))
+  changed= (info.copied || info.deleted || info.updated);
+  if (changed)
   {
     /*
       We must invalidate the table in the query cache before binlog writing
@@ -3197,11 +3238,14 @@ bool select_insert::send_eof()
   */
   if (mysql_bin_log.is_open())
   {
+    int errcode= 0;
     if (!error)
       thd->clear_error();
+    else
+      errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
     thd->binlog_query(THD::ROW_QUERY_TYPE,
                       thd->query, thd->query_length,
-                      trans_table, FALSE, killed_status);
+                      trans_table, FALSE, errcode);
   }
   table->file->ha_release_auto_increment();
 
@@ -3268,8 +3312,11 @@ void select_insert::abort() {
     if (thd->transaction.stmt.modified_non_trans_table)
     {
         if (mysql_bin_log.is_open())
+        {
+          int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
           thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length,
-                            transactional_table, FALSE);
+                            transactional_table, FALSE, errcode);
+        }
         if (!thd->current_stmt_binlog_row_based && !can_rollback_data())
           thd->transaction.all.modified_non_trans_table= TRUE;
 	if (changed)
@@ -3661,10 +3708,14 @@ select_create::binlog_show_create_table(TABLE **tables, uint count)
   DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */
 
   if (mysql_bin_log.is_open())
+  {
+    int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
     thd->binlog_query(THD::STMT_QUERY_TYPE,
                       query.ptr(), query.length(),
                       /* is_trans */ TRUE,
-                      /* suppress_use */ FALSE);
+                      /* suppress_use */ FALSE,
+                      errcode);
+  }
 }
 
 void select_create::store_values(List<Item> &values)
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b96ac91679b..444a8ae0736 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -433,6 +433,16 @@ bool is_keyword(const char *name, uint len)
   return get_hash_symbol(name,len,0)!=0;
 }
 
+/**
+  Check if name is a sql function
+
+    @param name      checked name
+
+    @return is this a native function or not
+    @retval 0         name is a function
+    @retval 1         name isn't a function
+*/
+
 bool is_lex_native_function(const LEX_STRING *name)
 {
   DBUG_ASSERT(name != NULL);
@@ -770,7 +780,7 @@ bool consume_comment(Lex_input_stream *lip, int remaining_recursions_permitted)
 
 int MYSQLlex(void *arg, void *yythd)
 {
-  reg1	uchar c;
+  reg1	uchar c= 0;
   bool comment_closed;
   int	tokval, result_state;
   uint length;
@@ -788,7 +798,6 @@ int MYSQLlex(void *arg, void *yythd)
   lip->start_token();
   state=lip->next_state;
   lip->next_state=MY_LEX_OPERATOR_OR_IDENT;
-  LINT_INIT(c);
   for (;;)
   {
     switch (state) {
@@ -917,7 +926,7 @@ int MYSQLlex(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state= c; ident_map[c= lip->yyGet()]; result_state|= c);
+        for (result_state= c; ident_map[c= lip->yyGet()]; result_state|= c) ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
@@ -929,7 +938,7 @@ int MYSQLlex(void *arg, void *yythd)
           If we find a space then this can't be an identifier. We notice this
           below by checking start != lex->ptr.
         */
-        for (; state_map[c] == MY_LEX_SKIP ; c= lip->yyGet());
+        for (; state_map[c] == MY_LEX_SKIP ; c= lip->yyGet()) ;
       }
       if (start == lip->get_ptr() && c == '.' && ident_map[lip->yyPeek()])
 	lip->next_state=MY_LEX_IDENT_SEP;
@@ -1002,7 +1011,7 @@ int MYSQLlex(void *arg, void *yythd)
         }
         else if (c == 'b')
         {
-          while ((c= lip->yyGet()) == '0' || c == '1');
+          while ((c= lip->yyGet()) == '0' || c == '1') ;
           if ((lip->yyLength() >= 3) && !ident_map[c])
           {
             /* Skip '0b' */
@@ -1061,7 +1070,7 @@ int MYSQLlex(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c);
+        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c) ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
@@ -1161,7 +1170,7 @@ int MYSQLlex(void *arg, void *yythd)
 
     case MY_LEX_BIN_NUMBER:           // Found b'bin-string'
       lip->yySkip();                  // Accept opening '
-      while ((c= lip->yyGet()) == '0' || c == '1');
+      while ((c= lip->yyGet()) == '0' || c == '1') ;
       if (c != '\'')
         return(ABORT_SYM);            // Illegal hex constant
       lip->yySkip();                  // Accept closing '
@@ -1436,7 +1445,7 @@ int MYSQLlex(void *arg, void *yythd)
 	[(global | local | session) .]variable_name
       */
 
-      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c);
+      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c) ;
       /* If there were non-ASCII characters, mark that we must convert */
       result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
 
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 62106a2500b..22b7d2e359c 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -1976,4 +1976,6 @@ extern bool is_lex_native_function(const LEX_STRING *name);
   @} (End of group Semantic_Analysis)
 */
 
+int my_missing_function_error(const LEX_STRING &token, const char *name);
+
 #endif /* MYSQL_SERVER */
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 0d267111dad..22df77afeb3 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -86,7 +86,7 @@ struct list_node :public Sql_alloc
 };
 
 
-extern list_node end_of_list;
+extern MYSQL_PLUGIN_IMPORT list_node end_of_list;
 
 class base_list :public Sql_alloc
 {
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index d4f499b8d44..1758a6df5f9 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -86,7 +86,7 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
 static bool write_execute_load_query_log_event(THD *thd,
 					       bool duplicates, bool ignore,
 					       bool transactional_table,
-                                               THD::killed_state killed_status);
+                                               int errcode);
 #endif /* EMBEDDED_LIBRARY */
 
 /*
@@ -483,10 +483,12 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
 	/* If the file was not empty, wrote_create_file is true */
 	if (lf_info.wrote_create_file)
 	{
+          int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+          
 	  if (thd->transaction.stmt.modified_non_trans_table)
 	    write_execute_load_query_log_event(thd, handle_duplicates,
 					       ignore, transactional_table,
-                                               killed_status);
+                                               errcode);
 	  else
 	  {
 	    Delete_file_log_event d(thd, db, transactional_table);
@@ -528,8 +530,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
       read_info.end_io_cache();
       if (lf_info.wrote_create_file)
       {
+        int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
         write_execute_load_query_log_event(thd, handle_duplicates, ignore,
-                                           transactional_table,killed_status);
+                                           transactional_table, errcode);
       }
     }
   }
@@ -553,7 +556,7 @@ err:
 static bool write_execute_load_query_log_event(THD *thd,
 					       bool duplicates, bool ignore,
 					       bool transactional_table,
-                                               THD::killed_state killed_err_arg)
+                                               int errcode)
 {
   Execute_load_query_log_event
     e(thd, thd->query, thd->query_length,
@@ -561,7 +564,7 @@ static bool write_execute_load_query_log_event(THD *thd,
       (uint) ((char*)thd->lex->fname_end - (char*)thd->query),
       (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
       (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
-      transactional_table, FALSE, killed_err_arg);
+      transactional_table, FALSE, errcode);
   e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
   return mysql_bin_log.write(&e);
 }
@@ -747,9 +750,9 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
 
       real_item= item->real_item();
 
-      if (!read_info.enclosed &&
+      if ((!read_info.enclosed &&
 	  (enclosed_length && length == 4 &&
-           !memcmp(pos, STRING_WITH_LEN("NULL"))) ||
+           !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
 	  (length == 1 && read_info.found_null))
       {
 
@@ -1148,8 +1151,8 @@ int READ_INFO::read_field()
 	}
 	// End of enclosed field if followed by field_term or line_term
 	if (chr == my_b_EOF ||
-	    chr == line_term_char && terminator(line_term_ptr,
-						line_term_length))
+	    (chr == line_term_char && terminator(line_term_ptr,
+						line_term_length)))
 	{					// Maybe unexpected linefeed
 	  enclosed=1;
 	  found_end_of_line=1;
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 87497b35bd3..0faed66cd4e 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -196,11 +196,8 @@ bool begin_trans(THD *thd)
     error= -1;
   else
   {
-    LEX *lex= thd->lex;
     thd->options|= OPTION_BEGIN;
     thd->server_status|= SERVER_STATUS_IN_TRANS;
-    if (lex->start_transaction_opt & MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT)
-      error= ha_start_consistent_snapshot(thd);
   }
   return error;
 }
@@ -1173,12 +1170,12 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   }
   case COM_STMT_EXECUTE:
   {
-    mysql_stmt_execute(thd, packet, packet_length);
+    mysqld_stmt_execute(thd, packet, packet_length);
     break;
   }
   case COM_STMT_FETCH:
   {
-    mysql_stmt_fetch(thd, packet, packet_length);
+    mysqld_stmt_fetch(thd, packet, packet_length);
     break;
   }
   case COM_STMT_SEND_LONG_DATA:
@@ -1188,17 +1185,17 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   }
   case COM_STMT_PREPARE:
   {
-    mysql_stmt_prepare(thd, packet, packet_length);
+    mysqld_stmt_prepare(thd, packet, packet_length);
     break;
   }
   case COM_STMT_CLOSE:
   {
-    mysql_stmt_close(thd, packet);
+    mysqld_stmt_close(thd, packet);
     break;
   }
   case COM_STMT_RESET:
   {
-    mysql_stmt_reset(thd, packet);
+    mysqld_stmt_reset(thd, packet);
     break;
   }
   case COM_QUERY:
@@ -1372,7 +1369,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       if (check_access(thd, CREATE_ACL, db.str , 0, 1, 0,
                        is_schema_db(db.str)))
 	break;
-      general_log_print(thd, command, packet);
+      general_log_print(thd, command, "%.*s", db.length, db.str);
       bzero(&create_info, sizeof(create_info));
       mysql_create_db(thd, (lower_case_table_names == 2 ? alias.str : db.str),
                       &create_info, 0);
@@ -1397,7 +1394,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                    ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
 	break;
       }
-      general_log_write(thd, command, db.str, db.length);
+      general_log_write(thd, command, "%.*s", db.length, db.str);
       mysql_rm_db(thd, db.str, 0, 0);
       break;
     }
@@ -1582,14 +1579,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     break;
   }
 
-  /* If commit fails, we should be able to reset the OK status. */
-  thd->main_da.can_overwrite_status= TRUE;
-  ha_autocommit_or_rollback(thd, thd->is_error());
-  thd->main_da.can_overwrite_status= FALSE;
-
-  thd->transaction.stmt.reset();
-
-
   /* report error issued during command execution */
   if (thd->killed_errno())
   {
@@ -1602,6 +1591,13 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     thd->mysys_var->abort= 0;
   }
 
+  /* If commit fails, we should be able to reset the OK status. */
+  thd->main_da.can_overwrite_status= TRUE;
+  ha_autocommit_or_rollback(thd, thd->is_error());
+  thd->main_da.can_overwrite_status= FALSE;
+
+  thd->transaction.stmt.reset();
+
   net_end_statement(thd);
   query_cache_end_of_result(thd);
 
@@ -3946,7 +3942,9 @@ end_with_restore_list:
         res= mysql_routine_grant(thd, all_tables,
                                  lex->type == TYPE_ENUM_PROCEDURE, 
                                  lex->users_list, grants,
-                                 lex->sql_command == SQLCOM_REVOKE, 0);
+                                 lex->sql_command == SQLCOM_REVOKE, TRUE);
+        if (!res)
+          my_ok(thd);
       }
       else
       {
@@ -4090,6 +4088,11 @@ end_with_restore_list:
     }
     if (begin_trans(thd))
       goto error;
+    if (lex->start_transaction_opt & MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT)
+    {
+      if (ha_start_consistent_snapshot(thd))
+        goto error;
+    }
     my_ok(thd);
     break;
   case SQLCOM_COMMIT:
@@ -5230,7 +5233,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
 
   if (schema_db)
   {
-    if (!(sctx->master_access & FILE_ACL) && (want_access & FILE_ACL) ||
+    if ((!(sctx->master_access & FILE_ACL) && (want_access & FILE_ACL)) ||
         (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL)))
     {
       if (!no_errors)
@@ -5264,7 +5267,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
     DBUG_RETURN(FALSE);
   }
   if (((want_access & ~sctx->master_access) & ~(DB_ACLS | EXTRA_ACL)) ||
-      ! db && dont_check_global_grants)
+      (! db && dont_check_global_grants))
   {						// We can never grant this
     DBUG_PRINT("error",("No possible access"));
     if (!no_errors)
@@ -5542,7 +5545,7 @@ bool check_some_access(THD *thd, ulong want_access, TABLE_LIST *table)
       if (!check_access(thd, access, table->db,
                         &table->grant.privilege, 0, 1,
                         test(table->schema_table)) &&
-          !check_grant(thd, access, table, 0, 1, 1))
+           !check_grant(thd, access, table, 0, 1, 1))
         DBUG_RETURN(0);
     }
   }
@@ -5798,7 +5801,7 @@ mysql_new_select(LEX *lex, bool move_down)
   /*
     Don't evaluate this subquery during statement prepare even if
     it's a constant one. The flag is switched off in the end of
-    mysql_stmt_prepare.
+    mysqld_stmt_prepare.
   */
   if (thd->stmt_arena->is_stmt_prepare())
     select_lex->uncacheable|= UNCACHEABLE_PREPARE;
@@ -7873,7 +7876,7 @@ bool parse_sql(THD *thd,
   /* Check that if MYSQLparse() failed, thd->is_error() is set. */
 
   DBUG_ASSERT(!mysql_parse_status ||
-              mysql_parse_status && thd->is_error());
+              (mysql_parse_status && thd->is_error()));
 
   /* Reset parser state. */
 
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index 4a50650b6f4..284eaebbe87 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -918,6 +918,9 @@ bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
     Set-up the TABLE_LIST object to be a list with a single table
     Set the object to zero to create NULL pointers and set alias
     and real name to table name and get database name from file name.
+    TODO: Consider generalizing or refactoring Lex::add_table_to_list() so
+    it can be used in all places where we create TABLE_LIST objects.
+    Also consider creating appropriate constructors for TABLE_LIST.
   */
 
   bzero((void*)&tables, sizeof(TABLE_LIST));
@@ -925,6 +928,13 @@ bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
   tables.table= table;
   tables.next_local= 0;
   tables.next_name_resolution_table= 0;
+  /*
+    Cache the table in Item_fields. All the tables can be cached except
+    the trigger pseudo table.
+  */
+  tables.cacheable_table= TRUE;
+  context= thd->lex->current_context();
+  tables.select_lex= context->select_lex;
   strmov(db_name_string, table->s->normalized_path.str);
   dir_length= dirname_length(db_name_string);
   db_name_string[dir_length - 1]= 0;
@@ -932,7 +942,6 @@ bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
   db_name= &db_name_string[home_dir_length];
   tables.db= db_name;
 
-  context= thd->lex->current_context();
   table->map= 1; //To ensure correct calculation of const item
   table->get_fields_in_item_tree= TRUE;
   save_table_list= context->table_list;
@@ -964,8 +973,9 @@ bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
 
   save_use_only_table_context= thd->lex->use_only_table_context;
   thd->lex->use_only_table_context= TRUE;
+  thd->lex->current_select->cur_pos_in_select_list= UNDEF_POS;
   
-  error= func_expr->fix_fields(thd, (Item**)0);
+  error= func_expr->fix_fields(thd, (Item**)&func_expr);
 
   thd->lex->use_only_table_context= save_use_only_table_context;
 
@@ -5090,7 +5100,7 @@ static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
   handler *file= lpt->table->file;
   DBUG_ENTER("mysql_change_partitions");
 
-  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
+  build_table_filename(path, sizeof(path) - 1, lpt->db, lpt->table_name, "", 0);
   if ((error= file->ha_change_partitions(lpt->create_info, path, &lpt->copied,
                                          &lpt->deleted, lpt->pack_frm_data,
                                          lpt->pack_frm_len)))
@@ -5130,7 +5140,7 @@ static bool mysql_rename_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
   int error;
   DBUG_ENTER("mysql_rename_partitions");
 
-  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
+  build_table_filename(path, sizeof(path) - 1, lpt->db, lpt->table_name, "", 0);
   if ((error= lpt->table->file->ha_rename_partitions(path)))
   {
     if (error != 1)
@@ -5171,7 +5181,7 @@ static bool mysql_drop_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
   int error;
   DBUG_ENTER("mysql_drop_partitions");
 
-  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
+  build_table_filename(path, sizeof(path) - 1, lpt->db, lpt->table_name, "", 0);
   if ((error= lpt->table->file->ha_drop_partitions(path)))
   {
     lpt->table->file->print_error(error, MYF(0));
@@ -5516,10 +5526,10 @@ static bool write_log_drop_shadow_frm(ALTER_PARTITION_PARAM_TYPE *lpt)
   partition_info *part_info= lpt->part_info;
   DDL_LOG_MEMORY_ENTRY *log_entry;
   DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL;
-  char shadow_path[FN_REFLEN];
+  char shadow_path[FN_REFLEN + 1];
   DBUG_ENTER("write_log_drop_shadow_frm");
 
-  build_table_shadow_filename(shadow_path, sizeof(shadow_path), lpt);
+  build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt);
   pthread_mutex_lock(&LOCK_gdl);
   if (write_log_replace_delete_frm(lpt, 0UL, NULL,
                                   (const char*)shadow_path, FALSE))
@@ -5559,15 +5569,15 @@ static bool write_log_rename_frm(ALTER_PARTITION_PARAM_TYPE *lpt)
   partition_info *part_info= lpt->part_info;
   DDL_LOG_MEMORY_ENTRY *log_entry;
   DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
-  char path[FN_REFLEN];
-  char shadow_path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
+  char shadow_path[FN_REFLEN + 1];
   DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
   DBUG_ENTER("write_log_rename_frm");
 
   part_info->first_log_entry= NULL;
-  build_table_filename(path, sizeof(path), lpt->db,
+  build_table_filename(path, sizeof(path) - 1, lpt->db,
                        lpt->table_name, "", 0);
-  build_table_shadow_filename(shadow_path, sizeof(shadow_path), lpt);
+  build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt);
   pthread_mutex_lock(&LOCK_gdl);
   if (write_log_replace_delete_frm(lpt, 0UL, shadow_path, path, TRUE))
     goto error;
@@ -5610,16 +5620,16 @@ static bool write_log_drop_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
   partition_info *part_info= lpt->part_info;
   DDL_LOG_MEMORY_ENTRY *log_entry;
   DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
-  char tmp_path[FN_REFLEN];
-  char path[FN_REFLEN];
+  char tmp_path[FN_REFLEN + 1];
+  char path[FN_REFLEN + 1];
   uint next_entry= 0;
   DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
   DBUG_ENTER("write_log_drop_partition");
 
   part_info->first_log_entry= NULL;
-  build_table_filename(path, sizeof(path), lpt->db,
+  build_table_filename(path, sizeof(path) - 1, lpt->db,
                        lpt->table_name, "", 0);
-  build_table_filename(tmp_path, sizeof(tmp_path), lpt->db,
+  build_table_filename(tmp_path, sizeof(tmp_path) - 1, lpt->db,
                        lpt->table_name, "#", 0);
   pthread_mutex_lock(&LOCK_gdl);
   if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
@@ -5669,14 +5679,14 @@ static bool write_log_add_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
   partition_info *part_info= lpt->part_info;
   DDL_LOG_MEMORY_ENTRY *log_entry;
   DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL;
-  char tmp_path[FN_REFLEN];
-  char path[FN_REFLEN];
+  char tmp_path[FN_REFLEN + 1];
+  char path[FN_REFLEN + 1];
   uint next_entry= 0;
   DBUG_ENTER("write_log_add_change_partition");
 
-  build_table_filename(path, sizeof(path), lpt->db,
+  build_table_filename(path, sizeof(path) - 1, lpt->db,
                        lpt->table_name, "", 0);
-  build_table_filename(tmp_path, sizeof(tmp_path), lpt->db,
+  build_table_filename(tmp_path, sizeof(tmp_path) - 1, lpt->db,
                        lpt->table_name, "#", 0);
   pthread_mutex_lock(&LOCK_gdl);
   if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
@@ -5723,16 +5733,16 @@ static bool write_log_final_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
   partition_info *part_info= lpt->part_info;
   DDL_LOG_MEMORY_ENTRY *log_entry;
   DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
-  char path[FN_REFLEN];
-  char shadow_path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
+  char shadow_path[FN_REFLEN + 1];
   DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
   uint next_entry= 0;
   DBUG_ENTER("write_log_final_change_partition");
 
   part_info->first_log_entry= NULL;
-  build_table_filename(path, sizeof(path), lpt->db,
+  build_table_filename(path, sizeof(path) - 1, lpt->db,
                        lpt->table_name, "", 0);
-  build_table_shadow_filename(shadow_path, sizeof(shadow_path), lpt);
+  build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt);
   pthread_mutex_lock(&LOCK_gdl);
   if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
                       lpt->alter_info->flags & ALTER_REORGANIZE_PARTITION))
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 1465b6d2d30..da168d36429 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -29,6 +29,18 @@
 
 extern struct st_mysql_plugin *mysqld_builtins[];
 
+/**
+  @note The order of the enumeration is critical.
+  @see construct_options
+*/
+static const char *global_plugin_typelib_names[]=
+  { "OFF", "ON", "FORCE", NULL };
+enum enum_plugin_load_policy {PLUGIN_OFF, PLUGIN_ON, PLUGIN_FORCE};
+static TYPELIB global_plugin_typelib=
+  { array_elements(global_plugin_typelib_names)-1,
+    "", global_plugin_typelib_names, NULL };
+
+
 char *opt_plugin_load= NULL;
 char *opt_plugin_dir_ptr;
 char opt_plugin_dir[FN_REFLEN];
@@ -192,7 +204,7 @@ static void plugin_load(MEM_ROOT *tmp_root, int *argc, char **argv);
 static bool plugin_load_list(MEM_ROOT *tmp_root, int *argc, char **argv,
                              const char *list);
 static int test_plugin_options(MEM_ROOT *, struct st_plugin_int *,
-                               int *, char **, my_bool);
+                               int *, char **);
 static bool register_builtin(struct st_mysql_plugin *, struct st_plugin_int *,
                              struct st_plugin_int **);
 static void unlock_variables(THD *thd, struct system_variables *vars);
@@ -751,7 +763,7 @@ static bool plugin_add(MEM_ROOT *tmp_root,
       tmp.name.length= name_len;
       tmp.ref_count= 0;
       tmp.state= PLUGIN_IS_UNINITIALIZED;
-      if (test_plugin_options(tmp_root, &tmp, argc, argv, true))
+      if (test_plugin_options(tmp_root, &tmp, argc, argv))
         tmp.state= PLUGIN_IS_DISABLED;
 
       if ((tmp_plugin_ptr= plugin_insert_or_reuse(&tmp)))
@@ -997,7 +1009,6 @@ static int plugin_initialize(struct st_plugin_int *plugin)
   DBUG_ENTER("plugin_initialize");
 
   safe_mutex_assert_owner(&LOCK_plugin);
-
   if (plugin_type_initialize[plugin->plugin->type])
   {
     if ((*plugin_type_initialize[plugin->plugin->type])(plugin))
@@ -1083,6 +1094,20 @@ uchar *get_bookmark_hash_key(const uchar *buff, size_t *length,
   return (uchar*) var->key;
 }
 
+static inline void convert_dash_to_underscore(char *str, int len)
+{
+  for (char *p= str; p <= str+len; p++)
+    if (*p == '-')
+      *p= '_';
+}
+
+static inline void convert_underscore_to_dash(char *str, int len)
+{
+  for (char *p= str; p <= str+len; p++)
+    if (*p == '_')
+      *p= '-';
+}
+
 
 /*
   The logic is that we first load and initialize all compiled in plugins.
@@ -1094,11 +1119,12 @@ uchar *get_bookmark_hash_key(const uchar *buff, size_t *length,
 int plugin_init(int *argc, char **argv, int flags)
 {
   uint i;
-  bool def_enabled, is_myisam;
+  bool is_myisam;
   struct st_mysql_plugin **builtins;
   struct st_mysql_plugin *plugin;
   struct st_plugin_int tmp, *plugin_ptr, **reap;
   MEM_ROOT tmp_root;
+  bool reaped_mandatory_plugin= FALSE;
   DBUG_ENTER("plugin_init");
 
   if (initialized)
@@ -1142,17 +1168,13 @@ int plugin_init(int *argc, char **argv, int flags)
           !my_strnncoll(&my_charset_latin1, (const uchar*) plugin->name,
                         6, (const uchar*) "InnoDB", 6))
         continue;
-      /* by default, ndbcluster and federated are disabled */
-      def_enabled=
-        my_strcasecmp(&my_charset_latin1, plugin->name, "NDBCLUSTER") != 0 &&
-        my_strcasecmp(&my_charset_latin1, plugin->name, "FEDERATED") != 0;
       bzero(&tmp, sizeof(tmp));
       tmp.plugin= plugin;
       tmp.name.str= (char *)plugin->name;
       tmp.name.length= strlen(plugin->name);
       tmp.state= 0;
       free_root(&tmp_root, MYF(MY_MARK_BLOCKS_FREE));
-      if (test_plugin_options(&tmp_root, &tmp, argc, argv, def_enabled))
+      if (test_plugin_options(&tmp_root, &tmp, argc, argv))
         tmp.state= PLUGIN_IS_DISABLED;
       else
         tmp.state= PLUGIN_IS_UNINITIALIZED;
@@ -1227,6 +1249,8 @@ int plugin_init(int *argc, char **argv, int flags)
   while ((plugin_ptr= *(--reap)))
   {
     pthread_mutex_unlock(&LOCK_plugin);
+    if (plugin_ptr->is_mandatory)
+      reaped_mandatory_plugin= TRUE;
     plugin_deinitialize(plugin_ptr, true);
     pthread_mutex_lock(&LOCK_plugin);
     plugin_del(plugin_ptr);
@@ -1234,6 +1258,8 @@ int plugin_init(int *argc, char **argv, int flags)
 
   pthread_mutex_unlock(&LOCK_plugin);
   my_afree(reap);
+  if (reaped_mandatory_plugin)
+    goto err;
 
 end:
   free_root(&tmp_root, MYF(0));
@@ -1299,7 +1325,7 @@ bool plugin_register_builtin(THD *thd, struct st_mysql_plugin *plugin)
   pthread_mutex_lock(&LOCK_plugin);
   rw_wrlock(&LOCK_system_variables_hash);
 
-  if (test_plugin_options(thd->mem_root, &tmp, &dummy_argc, NULL, true))
+  if (test_plugin_options(thd->mem_root, &tmp, &dummy_argc, NULL))
     goto end;
   tmp.state= PLUGIN_IS_UNINITIALIZED;
   if ((result= register_builtin(plugin, &tmp, &ptr)))
@@ -2889,59 +2915,78 @@ my_bool get_one_plugin_option(int optid __attribute__((unused)),
 }
 
 
+/**
+  Creates a set of my_option objects associated with a specified plugin-
+  handle.
+
+  @param mem_root Memory allocator to be used.
+  @param tmp A pointer to a plugin handle
+  @param[out] options A pointer to a pre-allocated static array
+
+  The set is stored in the pre-allocated static array supplied to the function.
+  The size of the array is calculated as (number_of_plugin_varaibles*2+3). The
+  reason is that each option can have a prefix '--plugin-' in addtion to the
+  shorter form '--&lt;plugin-name&gt;'. There is also space allocated for
+  terminating NULL pointers.
+
+  @return
+    @retval -1 An error occurred
+    @retval 0 Success
+*/
+
 static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
-                             my_option *options, my_bool **enabled,
-                             bool can_disable)
+                             my_option *options)
 {
   const char *plugin_name= tmp->plugin->name;
-  uint namelen= strlen(plugin_name), optnamelen;
-  uint buffer_length= namelen * 4 + (can_disable ? 75 : 10);
-  char *name= (char*) alloc_root(mem_root, buffer_length) + 1;
-  char *optname, *p;
+  const LEX_STRING plugin_dash = { C_STRING_WITH_LEN("plugin-") };
+  uint plugin_name_len= strlen(plugin_name);
+  uint optnamelen;
+  const int max_comment_len= 180;
+  char *comment= (char *) alloc_root(mem_root, max_comment_len + 1);
+  char *optname;
+
   int index= 0, offset= 0;
   st_mysql_sys_var *opt, **plugin_option;
   st_bookmark *v;
+
+  /** Used to circumvent the const attribute on my_option::name */
+  char *plugin_name_ptr, *plugin_name_with_prefix_ptr;
+
   DBUG_ENTER("construct_options");
-  DBUG_PRINT("plugin", ("plugin: '%s'  enabled: %d  can_disable: %d",
-                        plugin_name, **enabled, can_disable));
 
+  options[0].name= plugin_name_ptr= (char*) alloc_root(mem_root,
+                                                       plugin_name_len + 1);
+  strcpy(plugin_name_ptr, plugin_name);
+  my_casedn_str(&my_charset_latin1, plugin_name_ptr);
+  convert_underscore_to_dash(plugin_name_ptr, plugin_name_len);
   /* support --skip-plugin-foo syntax */
-  memcpy(name, plugin_name, namelen + 1);
-  my_casedn_str(&my_charset_latin1, name);
-  strxmov(name + namelen + 1, "plugin-", name, NullS);
-  /* Now we have namelen + 1 + 7 + namelen + 1 == namelen * 2 + 9. */
+  options[1].name= plugin_name_with_prefix_ptr= (char*) alloc_root(mem_root,
+                                                plugin_name_len +
+                                                plugin_dash.length + 1);
+  strxmov(plugin_name_with_prefix_ptr, plugin_dash.str, options[0].name, NullS);
 
-  for (p= name + namelen*2 + 8; p > name; p--)
-    if (*p == '_')
-      *p= '-';
+  options[0].id= options[1].id= 256; /* must be >255. dup id ok */
+  options[0].var_type= options[1].var_type= GET_ENUM;
+  options[0].arg_type= options[1].arg_type= OPT_ARG;
+  options[0].def_value= options[1].def_value= 1; /* ON */
+  options[0].typelib= options[1].typelib= &global_plugin_typelib;
 
-  if (can_disable)
-  {
-    strxmov(name + namelen*2 + 10, "Enable ", plugin_name, " plugin. "
-            "Disable with --skip-", name," (will save memory).", NullS);
-    /*
-      Now we have namelen * 2 + 10 (one char unused) + 7 + namelen + 9 +
-      20 + namelen + 20 + 1 == namelen * 4 + 67.
-    */
-
-    options[0].comment= name + namelen*2 + 10;
-  }
+  strxnmov(comment, max_comment_len, "Enable or disable ", plugin_name,
+          " plugin. Possible values are ON, OFF, FORCE (don't start "
+          "if the plugin fails to load).", NullS);
+  options[0].comment= comment;
 
   /*
-    NOTE: 'name' is one char above the allocated buffer!
-    NOTE: This code assumes that 'my_bool' and 'char' are of same size.
+    Allocate temporary space for the value of the tristate.
+    This option will have a limited lifetime and is not used beyond
+    server initialization.
+    GET_ENUM value is an integer.
   */
-  *((my_bool *)(name -1))= **enabled;
-  *enabled= (my_bool *)(name - 1);
+  options[0].value= options[1].value= (uchar **)alloc_root(mem_root,
+                                                          sizeof(int));
+  *((uint*) options[0].value)= *((uint*) options[1].value)=
+    (uint) options[0].def_value;
 
-
-  options[1].name= (options[0].name= name) + namelen + 1;
-  options[0].id= options[1].id= 256; /* must be >255. dup id ok */
-  options[0].var_type= options[1].var_type= GET_BOOL;
-  options[0].arg_type= options[1].arg_type= NO_ARG;
-  options[0].def_value= options[1].def_value= **enabled;
-  options[0].value= options[0].u_max_value=
-  options[1].value= options[1].u_max_value= (uchar**) (name - 1);
   options+= 2;
 
   /*
@@ -2955,7 +3000,7 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
     opt= *plugin_option;
     if (!(opt->flags & PLUGIN_VAR_THDLOCAL))
       continue;
-    if (!(register_var(name, opt->name, opt->flags)))
+    if (!(register_var(plugin_name_ptr, opt->name, opt->flags)))
       continue;
     switch (opt->flags & PLUGIN_VAR_TYPEMASK) {
     case PLUGIN_VAR_BOOL:
@@ -3020,7 +3065,7 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
       if (!opt->update)
       {
         opt->update= update_func_str;
-        if (!(opt->flags & PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_READONLY))
+        if (!(opt->flags & (PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_READONLY)))
         {
           opt->flags|= PLUGIN_VAR_READONLY;
           sql_print_warning("Server variable %s of plugin %s was forced "
@@ -3062,14 +3107,14 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
     if (!(opt->flags & PLUGIN_VAR_THDLOCAL))
     {
       optnamelen= strlen(opt->name);
-      optname= (char*) alloc_root(mem_root, namelen + optnamelen + 2);
-      strxmov(optname, name, "-", opt->name, NullS);
-      optnamelen= namelen + optnamelen + 1;
+      optname= (char*) alloc_root(mem_root, plugin_name_len + optnamelen + 2);
+      strxmov(optname, plugin_name_ptr, "-", opt->name, NullS);
+      optnamelen= plugin_name_len + optnamelen + 1;
     }
     else
     {
       /* this should not fail because register_var should create entry */
-      if (!(v= find_bookmark(name, opt->name, opt->flags)))
+      if (!(v= find_bookmark(plugin_name_ptr, opt->name, opt->flags)))
       {
         sql_print_error("Thread local variable '%s' not allocated "
                         "in plugin '%s'.", opt->name, plugin_name);
@@ -3085,10 +3130,7 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
                                    (optnamelen= v->name_len) + 1);
     }
 
-    /* convert '_' to '-' */
-    for (p= optname; *p; p++)
-      if (*p == '_')
-        *p= '-';
+    convert_underscore_to_dash(optname, optnamelen);
 
     options->name= optname;
     options->comment= opt->comment;
@@ -3103,10 +3145,13 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
     else
       options->value= options->u_max_value= *(uchar***) (opt + 1);
 
+    char *option_name_ptr;
     options[1]= options[0];
-    options[1].name= p= (char*) alloc_root(mem_root, optnamelen + 8);
-    options[1].comment= 0; // hidden
-    strxmov(p, "plugin-", optname, NullS);
+    options[1].name= option_name_ptr= (char*) alloc_root(mem_root,
+                                                        plugin_dash.length +
+                                                        optnamelen + 1);
+    options[1].comment= 0; /* Hidden from the help text */
+    strxmov(option_name_ptr, plugin_dash.str, optname, NullS);
 
     options+= 2;
   }
@@ -3120,55 +3165,57 @@ static my_option *construct_help_options(MEM_ROOT *mem_root,
 {
   st_mysql_sys_var **opt;
   my_option *opts;
-  my_bool dummy, can_disable;
-  my_bool *dummy2= &dummy;
   uint count= EXTRA_OPTIONS;
   DBUG_ENTER("construct_help_options");
 
-  for (opt= p->plugin->system_vars; opt && *opt; opt++, count+= 2);
+  for (opt= p->plugin->system_vars; opt && *opt; opt++, count+= 2)
+    ;
 
   if (!(opts= (my_option*) alloc_root(mem_root, sizeof(my_option) * count)))
     DBUG_RETURN(NULL);
 
   bzero(opts, sizeof(my_option) * count);
 
-  dummy= TRUE; /* plugin is enabled. */
-
-  can_disable=
-      my_strcasecmp(&my_charset_latin1, p->name.str, "MyISAM") &&
-      my_strcasecmp(&my_charset_latin1, p->name.str, "MEMORY");
-
-  if (construct_options(mem_root, p, opts, &dummy2, can_disable))
+  if (construct_options(mem_root, p, opts))
     DBUG_RETURN(NULL);
 
   DBUG_RETURN(opts);
 }
 
 
-/*
-  SYNOPSIS
-    test_plugin_options()
-    tmp_root                    temporary scratch space
-    plugin                      internal plugin structure
-    argc                        user supplied arguments
-    argv                        user supplied arguments
-    default_enabled             default plugin enable status
-  RETURNS:
-    0 SUCCESS - plugin should be enabled/loaded
-  NOTE:
-    Requires that a write-lock is held on LOCK_system_variables_hash
+/**
+  Create and register system variables supplied from the plugin and
+  assigns initial values from corresponding command line arguments.
+
+  @param tmp_root Temporary scratch space
+  @param[out] plugin Internal plugin structure
+  @param argc Number of command line arguments
+  @param argv Command line argument vector
+
+  The plugin will be updated with a policy on how to handle errors during
+  initialization.
+
+  @note Requires that a write-lock is held on LOCK_system_variables_hash
+
+  @return How initialization of the plugin should be handled.
+    @retval  0 Initialization should proceed.
+    @retval  1 Plugin is disabled.
+    @retval -1 An error has occurred.
 */
+
 static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
-                               int *argc, char **argv, my_bool default_enabled)
+                               int *argc, char **argv)
 {
   struct sys_var_chain chain= { NULL, NULL };
-  my_bool enabled_saved= default_enabled, can_disable;
-  my_bool *enabled= &default_enabled;
+  my_bool can_disable;
+  bool disable_plugin;
+  enum_plugin_load_policy plugin_load_policy= PLUGIN_ON;
+
   MEM_ROOT *mem_root= alloc_root_inited(&tmp->mem_root) ?
                       &tmp->mem_root : &plugin_mem_root;
   st_mysql_sys_var **opt;
   my_option *opts= NULL;
-  char *p, *varname;
+  char *varname;
   int error;
   st_mysql_sys_var *o;
   sys_var *v;
@@ -3177,13 +3224,17 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
   DBUG_ENTER("test_plugin_options");
   DBUG_ASSERT(tmp->plugin && tmp->name.str);
 
+  /*
+    The 'federated' and 'ndbcluster' storage engines are always disabled by
+    default.
+  */
+  if (!(my_strcasecmp(&my_charset_latin1, tmp->name.str, "federated") &&
+      my_strcasecmp(&my_charset_latin1, tmp->name.str, "ndbcluster")))
+    plugin_load_policy= PLUGIN_OFF;
+
   for (opt= tmp->plugin->system_vars; opt && *opt; opt++)
     count+= 2; /* --{plugin}-{optname} and --plugin-{plugin}-{optname} */
 
-  can_disable=
-      my_strcasecmp(&my_charset_latin1, tmp->name.str, "MyISAM") &&
-      my_strcasecmp(&my_charset_latin1, tmp->name.str, "MEMORY");
-
   if (count > EXTRA_OPTIONS || (*argc > 1))
   {
     if (!(opts= (my_option*) alloc_root(tmp_root, sizeof(my_option) * count)))
@@ -3193,12 +3244,18 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
     }
     bzero(opts, sizeof(my_option) * count);
 
-    if (construct_options(tmp_root, tmp, opts, &enabled, can_disable))
+    if (construct_options(tmp_root, tmp, opts))
     {
       sql_print_error("Bad options for plugin '%s'.", tmp->name.str);
       DBUG_RETURN(-1);
     }
 
+    /*
+      We adjust the default value to account for the hardcoded exceptions
+      we have set for the federated and ndbcluster storage engines.
+    */
+    opts[0].def_value= opts[1].def_value= (int)plugin_load_policy;
+
     error= handle_options(argc, &argv, opts, get_one_plugin_option);
     (*argc)++; /* add back one for the program name */
 
@@ -3208,64 +3265,79 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
                        tmp->name.str);
        goto err;
     }
+    /*
+     Set plugin loading policy from option value. First element in the option
+     list is always the <plugin name> option value.
+    */
+    plugin_load_policy= (enum_plugin_load_policy)*(uint*)opts[0].value;
   }
 
-  if (!*enabled && !can_disable)
+  disable_plugin= (plugin_load_policy == PLUGIN_OFF);
+  /*
+    The 'MyISAM' and 'Memory' storage engines currently can't be disabled.
+  */
+  can_disable=
+    my_strcasecmp(&my_charset_latin1, tmp->name.str, "MyISAM") &&
+    my_strcasecmp(&my_charset_latin1, tmp->name.str, "MEMORY");
+
+  tmp->is_mandatory= (plugin_load_policy == PLUGIN_FORCE) || !can_disable;
+
+  if (disable_plugin && !can_disable)
   {
     sql_print_warning("Plugin '%s' cannot be disabled", tmp->name.str);
-    *enabled= TRUE;
+    disable_plugin= FALSE;
+  }
+
+  /*
+    If the plugin is disabled it should not be initialized.
+  */
+  if (disable_plugin)
+  {
+    if (global_system_variables.log_warnings)
+      sql_print_information("Plugin '%s' is disabled.",
+                            tmp->name.str);
+    if (opts)
+      my_cleanup_options(opts);
+    DBUG_RETURN(1);
   }
 
   error= 1;
-
-  if (*enabled)
+  for (opt= tmp->plugin->system_vars; opt && *opt; opt++)
   {
-    for (opt= tmp->plugin->system_vars; opt && *opt; opt++)
+    if (((o= *opt)->flags & PLUGIN_VAR_NOSYSVAR))
+      continue;
+    if ((var= find_bookmark(tmp->name.str, o->name, o->flags)))
+      v= new (mem_root) sys_var_pluginvar(var->key + 1, o);
+    else
     {
-      if (((o= *opt)->flags & PLUGIN_VAR_NOSYSVAR))
-        continue;
-
-      if ((var= find_bookmark(tmp->name.str, o->name, o->flags)))
-        v= new (mem_root) sys_var_pluginvar(var->key + 1, o);
-      else
-      {
-        len= tmp->name.length + strlen(o->name) + 2;
-        varname= (char*) alloc_root(mem_root, len);
-        strxmov(varname, tmp->name.str, "-", o->name, NullS);
-        my_casedn_str(&my_charset_latin1, varname);
-
-        for (p= varname; *p; p++)
-          if (*p == '-')
-            *p= '_';
-
-        v= new (mem_root) sys_var_pluginvar(varname, o);
-      }
-      DBUG_ASSERT(v); /* check that an object was actually constructed */
-
-      /*
-        Add to the chain of variables.
-        Done like this for easier debugging so that the
-        pointer to v is not lost on optimized builds.
-      */
-      v->chain_sys_var(&chain);
+      len= tmp->name.length + strlen(o->name) + 2;
+      varname= (char*) alloc_root(mem_root, len);
+      strxmov(varname, tmp->name.str, "-", o->name, NullS);
+      my_casedn_str(&my_charset_latin1, varname);
+      convert_dash_to_underscore(varname, len-1);
+      v= new (mem_root) sys_var_pluginvar(varname, o);
     }
-    if (chain.first)
+    DBUG_ASSERT(v); /* check that an object was actually constructed */
+    /*
+      Add to the chain of variables.
+      Done like this for easier debugging so that the
+      pointer to v is not lost on optimized builds.
+    */
+    v->chain_sys_var(&chain);
+  } /* end for */
+  if (chain.first)
+  {
+    chain.last->next = NULL;
+    if (mysql_add_sys_var_chain(chain.first, NULL))
     {
-      chain.last->next = NULL;
-      if (mysql_add_sys_var_chain(chain.first, NULL))
-      {
-        sql_print_error("Plugin '%s' has conflicting system variables",
-                        tmp->name.str);
-        goto err;
-      }
-      tmp->system_vars= chain.first;
+      sql_print_error("Plugin '%s' has conflicting system variables",
+                      tmp->name.str);
+      goto err;
     }
-    DBUG_RETURN(0);
+    tmp->system_vars= chain.first;
   }
-
-  if (enabled_saved && global_system_variables.log_warnings)
-    sql_print_information("Plugin '%s' disabled by command line option",
-                          tmp->name.str);
+  DBUG_RETURN(0);
+  
 err:
   if (opts)
     my_cleanup_options(opts);
diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h
index 8ae38d58845..004d0d5abb7 100644
--- a/sql/sql_plugin.h
+++ b/sql/sql_plugin.h
@@ -79,6 +79,7 @@ struct st_plugin_int
   void *data;                   /* plugin type specific, e.g. handlerton */
   MEM_ROOT mem_root;            /* memory for dynamic plugin structures */
   sys_var *system_vars;         /* server variables for this plugin */
+  bool is_mandatory;            /* If true then plugin must not fail to load */
 };
 
 
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 29d948472c7..8921f7c0d82 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -717,9 +717,9 @@ static void setup_one_conversion_function(THD *thd, Item_param *param,
     prepared statement, parameter markers are replaced with variable names.
     Example:
     @verbatim
-     mysql_stmt_prepare("UPDATE t1 SET a=a*1.25 WHERE a=?")
+     mysqld_stmt_prepare("UPDATE t1 SET a=a*1.25 WHERE a=?")
        --> general logs gets [Prepare] UPDATE t1 SET a*1.25 WHERE a=?"
-     mysql_stmt_execute(stmt);
+     mysqld_stmt_execute(stmt);
        --> general and binary logs get
                              [Execute] UPDATE t1 SET a*1.25 WHERE a=1"
     @endverbatim
@@ -1435,8 +1435,8 @@ static bool mysql_test_set_fields(Prepared_statement *stmt,
   THD *thd= stmt->thd;
   set_var_base *var;
 
-  if (tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE) ||
-      open_normal_and_derived_tables(thd, tables, 0))
+  if ((tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) 
+      || open_normal_and_derived_tables(thd, tables, 0))
     goto error;
 
   while ((var= it++))
@@ -1471,13 +1471,13 @@ static bool mysql_test_call_fields(Prepared_statement *stmt,
   THD *thd= stmt->thd;
   Item *item;
 
-  if (tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE) ||
+  if ((tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) ||
       open_normal_and_derived_tables(thd, tables, 0))
     goto err;
 
   while ((item= it++))
   {
-    if (!item->fixed && item->fix_fields(thd, it.ref()) ||
+    if ((!item->fixed && item->fix_fields(thd, it.ref())) ||
         item->check_cols(1))
       goto err;
   }
@@ -2057,11 +2057,11 @@ static bool init_param_array(Prepared_statement *stmt)
     to the client, otherwise an error message is set in THD.
 */
 
-void mysql_stmt_prepare(THD *thd, const char *packet, uint packet_length)
+void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
 {
   Prepared_statement *stmt;
   bool error;
-  DBUG_ENTER("mysql_stmt_prepare");
+  DBUG_ENTER("mysqld_stmt_prepare");
 
   DBUG_PRINT("prep_query", ("%s", packet));
 
@@ -2228,9 +2228,8 @@ void mysql_sql_stmt_prepare(THD *thd)
   LEX_STRING *name= &lex->prepared_stmt_name;
   Prepared_statement *stmt;
   const char *query;
-  uint query_len;
+  uint query_len= 0;
   DBUG_ENTER("mysql_sql_stmt_prepare");
-  LINT_INIT(query_len);
   DBUG_ASSERT(thd->protocol == &thd->protocol_text);
 
   if ((stmt= (Prepared_statement*) thd->stmt_map.find_by_name(name)))
@@ -2424,7 +2423,7 @@ static void reset_stmt_params(Prepared_statement *stmt)
     client, otherwise an error message is set in THD.
 */
 
-void mysql_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
+void mysqld_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
 {
   uchar *packet= (uchar*)packet_arg; // GCC 4.0.1 workaround
   ulong stmt_id= uint4korr(packet);
@@ -2434,7 +2433,7 @@ void mysql_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
   uchar *packet_end= packet + packet_length;
   Prepared_statement *stmt;
   bool open_cursor;
-  DBUG_ENTER("mysql_stmt_execute");
+  DBUG_ENTER("mysqld_stmt_execute");
 
   packet+= 9;                               /* stmt_id + 5 bytes of flags */
 
@@ -2445,7 +2444,7 @@ void mysql_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
   {
     char llbuf[22];
     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
-             llstr(stmt_id, llbuf), "mysql_stmt_execute");
+             llstr(stmt_id, llbuf), "mysqld_stmt_execute");
     DBUG_VOID_RETURN;
   }
 
@@ -2462,6 +2461,9 @@ void mysql_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
 
   stmt->execute_loop(&expanded_query, open_cursor, packet, packet_end);
 
+  /* Close connection socket; for use with client testing (Bug#43560). */
+  DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_close(thd->net.vio););
+
   DBUG_VOID_RETURN;
 
 }
@@ -2523,7 +2525,7 @@ void mysql_sql_stmt_execute(THD *thd)
   @param packet_length      Length of packet
 */
 
-void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length)
+void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length)
 {
   /* assume there is always place for 8-16 bytes */
   ulong stmt_id= uint4korr(packet);
@@ -2531,7 +2533,7 @@ void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length)
   Prepared_statement *stmt;
   Statement stmt_backup;
   Server_side_cursor *cursor;
-  DBUG_ENTER("mysql_stmt_fetch");
+  DBUG_ENTER("mysqld_stmt_fetch");
 
   /* First of all clear possible warnings from the previous command */
   mysql_reset_thd_for_next_command(thd);
@@ -2540,7 +2542,7 @@ void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length)
   {
     char llbuf[22];
     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
-             llstr(stmt_id, llbuf), "mysql_stmt_fetch");
+             llstr(stmt_id, llbuf), "mysqld_stmt_fetch");
     DBUG_VOID_RETURN;
   }
 
@@ -2581,9 +2583,9 @@ void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length)
 
     This function resets statement to the state it was right after prepare.
     It can be used to:
-    - clear an error happened during mysql_stmt_send_long_data
+    - clear an error happened during mysqld_stmt_send_long_data
     - cancel long data stream for all placeholders without
-      having to call mysql_stmt_execute.
+      having to call mysqld_stmt_execute.
     - close an open cursor
     Sends 'OK' packet in case of success (statement was reset)
     or 'ERROR' packet (unrecoverable error/statement not found/etc).
@@ -2592,12 +2594,12 @@ void mysql_stmt_fetch(THD *thd, char *packet, uint packet_length)
   @param packet             Packet with stmt id
 */
 
-void mysql_stmt_reset(THD *thd, char *packet)
+void mysqld_stmt_reset(THD *thd, char *packet)
 {
   /* There is always space for 4 bytes in buffer */
   ulong stmt_id= uint4korr(packet);
   Prepared_statement *stmt;
-  DBUG_ENTER("mysql_stmt_reset");
+  DBUG_ENTER("mysqld_stmt_reset");
 
   /* First of all clear possible warnings from the previous command */
   mysql_reset_thd_for_next_command(thd);
@@ -2607,7 +2609,7 @@ void mysql_stmt_reset(THD *thd, char *packet)
   {
     char llbuf[22];
     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
-             llstr(stmt_id, llbuf), "mysql_stmt_reset");
+             llstr(stmt_id, llbuf), "mysqld_stmt_reset");
     DBUG_VOID_RETURN;
   }
 
@@ -2615,7 +2617,7 @@ void mysql_stmt_reset(THD *thd, char *packet)
 
   /*
     Clear parameters from data which could be set by
-    mysql_stmt_send_long_data() call.
+    mysqld_stmt_send_long_data() call.
   */
   reset_stmt_params(stmt);
 
@@ -2636,12 +2638,12 @@ void mysql_stmt_reset(THD *thd, char *packet)
     we don't send any reply to this command.
 */
 
-void mysql_stmt_close(THD *thd, char *packet)
+void mysqld_stmt_close(THD *thd, char *packet)
 {
   /* There is always space for 4 bytes in packet buffer */
   ulong stmt_id= uint4korr(packet);
   Prepared_statement *stmt;
-  DBUG_ENTER("mysql_stmt_close");
+  DBUG_ENTER("mysqld_stmt_close");
 
   thd->main_da.disable_status();
 
@@ -2740,7 +2742,7 @@ void mysql_stmt_get_longdata(THD *thd, char *packet, ulong packet_length)
     stmt->state= Query_arena::ERROR;
     stmt->last_errno= ER_WRONG_ARGUMENTS;
     sprintf(stmt->last_error, ER(ER_WRONG_ARGUMENTS),
-            "mysql_stmt_send_long_data");
+            "mysqld_stmt_send_long_data");
     DBUG_VOID_RETURN;
   }
 #endif
@@ -2844,7 +2846,7 @@ void Prepared_statement::setup_set_params()
     Decide if we have to expand the query (because we must write it to logs or
     because we want to look it up in the query cache) or not.
   */
-  if (mysql_bin_log.is_open() && is_update_query(lex->sql_command) ||
+  if ((mysql_bin_log.is_open() && is_update_query(lex->sql_command)) ||
       opt_log || opt_slow_log ||
       query_cache_is_cacheable_query(lex))
   {
@@ -3164,7 +3166,7 @@ Prepared_statement::set_parameters(String *expanded_query,
   if (res)
   {
     my_error(ER_WRONG_ARGUMENTS, MYF(0),
-             is_sql_ps ? "EXECUTE" : "mysql_stmt_execute");
+             is_sql_ps ? "EXECUTE" : "mysqld_stmt_execute");
     reset_stmt_params(this);
   }
   return res;
@@ -3295,7 +3297,7 @@ Prepared_statement::reprepare()
                           &cur_db_changed))
     return TRUE;
 
-  error= (name.str && copy.set_name(&name) ||
+  error= ((name.str && copy.set_name(&name)) ||
           copy.prepare(query, query_length) ||
           validate_metadata(&copy));
 
@@ -3626,11 +3628,11 @@ error:
 }
 
 
-/** Common part of DEALLOCATE PREPARE and mysql_stmt_close. */
+/** Common part of DEALLOCATE PREPARE and mysqld_stmt_close. */
 
 void Prepared_statement::deallocate()
 {
-  /* We account deallocate in the same manner as mysql_stmt_close */
+  /* We account deallocate in the same manner as mysqld_stmt_close */
   status_var_increment(thd->status_var.com_stmt_close);
   /* Statement map calls delete stmt on erase */
   thd->stmt_map.erase(this);
diff --git a/sql/sql_profile.h b/sql/sql_profile.h
index b5537487d26..245959e0953 100644
--- a/sql/sql_profile.h
+++ b/sql/sql_profile.h
@@ -16,25 +16,12 @@
 #ifndef _SQL_PROFILE_H
 #define _SQL_PROFILE_H
 
-#if __STDC_VERSION__ < 199901L
-#  if __GNUC__ >= 2
-#    define __func__ __FUNCTION__
-#  else
-#    define __func__ _unknown_func_
-extern const char * const _unknown_func_;
-#  endif
-#elif defined(_MSC_VER)
-#  if _MSC_VER < 1300
-#     define __func__ _unknown_func_
-extern const char * const _unknown_func_;
-#  else
-#    define __func__ __FUNCTION__
-#  endif
-#elif defined(__BORLANDC__)
-#  define __func__ __FUNC__
+#ifndef __func__
+#ifdef __FUNCTION__
+#define __func__ __FUNCTION__
 #else
-#  define __func__ _unknown_func_
-extern const char * const _unknown_func_;
+#define __func__ "unknown function"
+#endif
 #endif
 
 extern ST_FIELD_INFO query_profile_statistics_info[];
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index d4331b12cd4..0e0b8eb60b9 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -244,7 +244,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name,
           char *new_table_alias, bool skip_error)
 {
   int rc= 1;
-  char name[FN_REFLEN];
+  char name[FN_REFLEN + 1];
   const char *new_alias, *old_alias;
   frm_type_enum frm_type;
   enum legacy_db_type table_type;
@@ -261,14 +261,16 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name,
     old_alias= ren_table->table_name;
     new_alias= new_table_name;
   }
-  build_table_filename(name, sizeof(name),
+  DBUG_ASSERT(new_alias);
+
+  build_table_filename(name, sizeof(name) - 1,
                        new_db, new_alias, reg_ext, 0);
   if (!access(name,F_OK))
   {
     my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias);
     DBUG_RETURN(1);			// This can't be skipped
   }
-  build_table_filename(name, sizeof(name),
+  build_table_filename(name, sizeof(name) - 1,
                        ren_table->db, old_alias, reg_ext, 0);
 
   frm_type= mysql_frm_type(thd, name, &table_type);
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 06c6c022780..476c8aaaefb 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -147,7 +147,7 @@ static int send_file(THD *thd)
   if (errmsg)
   {
     sql_print_error("Failed in send_file() %s", errmsg);
-    DBUG_PRINT("error", (errmsg));
+    DBUG_PRINT("error", ("%s", errmsg));
   }
   DBUG_RETURN(error);
 }
@@ -1043,6 +1043,7 @@ int reset_slave(THD *thd, Master_info* mi)
      Reset errors (the idea is that we forget about the
      old master).
   */
+  mi->clear_error();
   mi->rli.clear_error();
   mi->rli.clear_until_condition();
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b6cf3848eee..0f2b3183c0c 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1353,7 +1353,7 @@ JOIN::optimize()
         join_tab[const_tables].type != JT_ALL &&
         join_tab[const_tables].type != JT_FT &&
         join_tab[const_tables].type != JT_REF_OR_NULL &&
-        (order && simple_order || group_list && simple_group))
+        ((order && simple_order) || (group_list && simple_group)))
     {
       if (add_ref_to_table_cond(thd,&join_tab[const_tables])) {
         DBUG_RETURN(1);
@@ -1871,9 +1871,9 @@ JOIN::exec()
       like SEC_TO_TIME(SUM(...)).
     */
 
-    if (curr_join->group_list && (!test_if_subpart(curr_join->group_list,
+    if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list,
 						   curr_join->order) || 
-				  curr_join->select_distinct) ||
+				  curr_join->select_distinct)) ||
 	(curr_join->select_distinct &&
 	 curr_join->tmp_table_param.using_indirect_summary_function))
     {					/* Must copy to another table */
@@ -2251,6 +2251,14 @@ JOIN::destroy()
   cond_equal= 0;
 
   cleanup(1);
+ /* Cleanup items referencing temporary table columns */
+  if (!tmp_all_fields3.is_empty())
+  {
+    List_iterator_fast<Item> it(tmp_all_fields3);
+    Item *item;
+    while ((item= it++))
+      item->cleanup();
+  }
   if (exec_tmp_table1)
     free_tmp_table(thd, exec_tmp_table1);
   if (exec_tmp_table2)
@@ -2339,9 +2347,10 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
       }
       else
       {
-        if (err= join->prepare(rref_pointer_array, tables, wild_num,
-                               conds, og_num, order, group, having, proc_param,
-                               select_lex, unit))
+        err= join->prepare(rref_pointer_array, tables, wild_num,
+                           conds, og_num, order, group, having, proc_param,
+                           select_lex, unit);
+        if (err)
 	{
 	  goto err;
 	}
@@ -2356,9 +2365,10 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
 	DBUG_RETURN(TRUE);
     thd_proc_info(thd, "init");
     thd->used_tables=0;                         // Updated by setup_fields
-    if (err= join->prepare(rref_pointer_array, tables, wild_num,
-                           conds, og_num, order, group, having, proc_param,
-                           select_lex, unit))
+    err= join->prepare(rref_pointer_array, tables, wild_num,
+                       conds, og_num, order, group, having, proc_param,
+                       select_lex, unit);
+    if (err)
     {
       goto err;
     }
@@ -2425,7 +2435,6 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
   if (select)
   {
     select->head=table;
-    table->reginfo.impossible_range=0;
     if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0,
                                           limit, 0)) == 1)
       DBUG_RETURN(select->quick->records);
@@ -3836,7 +3845,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
 	if (use->key == prev->key && use->table == prev->table)
 	{
 	  if (prev->keypart+1 < use->keypart ||
-	      prev->keypart == use->keypart && found_eq_constant)
+	      (prev->keypart == use->keypart && found_eq_constant))
 	    continue;				/* remove */
 	}
 	else if (use->keypart != 0)		// First found must be 0
@@ -5141,8 +5150,8 @@ best_extension_by_limited_search(JOIN      *join,
       {
         if (best_record_count > current_record_count ||
             best_read_time > current_read_time ||
-            idx == join->const_tables &&  // 's' is the first table in the QEP
-            s->table == join->sort_by_table)
+            (idx == join->const_tables &&  // 's' is the first table in the QEP
+            s->table == join->sort_by_table))
         {
           if (best_record_count >= current_record_count &&
               best_read_time >= current_read_time &&
@@ -5268,7 +5277,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
       double current_read_time=read_time+best;
       if (best_record_count > current_record_count ||
 	  best_read_time > current_read_time ||
-	  idx == join->const_tables && s->table == join->sort_by_table)
+	  (idx == join->const_tables && s->table == join->sort_by_table))
       {
 	if (best_record_count >= current_record_count &&
 	    best_read_time >= current_read_time &&
@@ -6215,8 +6224,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	    the index if we are using limit and this is the first table
 	  */
 
-	  if (cond &&
-              (!tab->keys.is_subset(tab->const_keys) && i > 0) ||
+	  if ((cond &&
+              !tab->keys.is_subset(tab->const_keys) && i > 0) ||
 	      (!tab->const_keys.is_clear_all() && i == join->const_tables &&
 	       join->unit->select_limit_cnt <
 	       join->best_positions[i].records_read &&
@@ -7087,15 +7096,17 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
   if (!(result->send_fields(fields,
                               Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)))
   {
+    bool send_error= FALSE;
     if (send_row)
     {
       List_iterator_fast<Item> it(fields);
       Item *item;
       while ((item= it++))
 	item->no_rows_in_result();
-      result->send_data(fields);
+      send_error= result->send_data(fields);
     }
-    result->send_eof();				// Should be safe
+    if (!send_error)
+      result->send_eof();				// Should be safe
   }
   /* Update results for FOUND_ROWS */
   join->thd->limit_found_rows= join->thd->examined_row_count= 0;
@@ -7347,7 +7358,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
         left_item_equal->merge(right_item_equal);
         /* Remove the merged multiple equality from the list */
         List_iterator<Item_equal> li(cond_equal->current_level);
-        while ((li++) != right_item_equal);
+        while ((li++) != right_item_equal) ;
         li.remove();
       }
     }
@@ -9383,13 +9394,17 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table,
         +1: for decimal point
       */
 
-      overflow= my_decimal_precision_to_length(intg + dec, dec,
-                                               item->unsigned_flag) - len;
+      const int required_length=
+        my_decimal_precision_to_length(intg + dec, dec,
+                                                     item->unsigned_flag);
+
+      overflow= required_length - len;
 
       if (overflow > 0)
         dec= max(0, dec - overflow);            // too long, discard fract
       else
-        len -= item->decimals - dec;            // corrected value fits
+        /* Corrected value fits. */
+        len= required_length;
     }
 
     new_field= new Field_new_decimal(len, maybe_null, item->name,
@@ -10025,9 +10040,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     reclength=1;				// Dummy select
   /* Use packed rows if there is blobs or a lot of space to gain */
   if (blob_count ||
-      string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
+      (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
       (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
-       string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS))
+       string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
     use_packed_rows= 1;
 
   share->reclength= reclength;
@@ -10828,9 +10843,8 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 {
   int rc= 0;
   enum_nested_loop_state error= NESTED_LOOP_OK;
-  JOIN_TAB *join_tab;
+  JOIN_TAB *join_tab= NULL;
   DBUG_ENTER("do_select");
-  LINT_INIT(join_tab);
   
   join->procedure=procedure;
   join->tmp_table= table;			/* Save for easy recursion */
@@ -12651,7 +12665,10 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
          one row).  The sorting doesn't matter.
         */
         if (key_part == key_part_end && reverse == 0)
+        {
+          *used_key_parts= 0;
           DBUG_RETURN(1);
+        }
       }
       else
         DBUG_RETURN(0);
@@ -13066,9 +13083,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     */
     uint nr;
     key_map keys;
-    uint best_key_parts;
-    int best_key_direction;
-    ha_rows best_records;
+    uint best_key_parts= 0;
+    int best_key_direction= 0;
+    ha_rows best_records= 0;
     double read_time;
     int best_key= -1;
     bool is_best_covering= FALSE;
@@ -13078,9 +13095,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     ha_rows table_records= table->file->stats.records;
     bool group= join->group && order == join->group_list;
     ha_rows ref_key_quick_rows= HA_POS_ERROR;
-    LINT_INIT(best_key_parts);
-    LINT_INIT(best_key_direction);
-    LINT_INIT(best_records); 
 
     /*
       If not used with LIMIT, only use keys if the whole query can be
@@ -13121,12 +13135,20 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     for (nr=0; nr < table->s->keys ; nr++)
     {
       int direction;
+
       if (keys.is_set(nr) &&
           (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
       {
+        /*
+          At this point we are sure that ref_key is a non-ordering
+          key (where "ordering key" is a key that will return rows
+          in the order required by ORDER BY).
+        */
+        DBUG_ASSERT (ref_key != (int) nr);
+
         bool is_covering= table->covering_keys.is_set(nr) ||
-                          nr == table->s->primary_key &&
-	                  table->file->primary_key_is_clustered();
+                          (nr == table->s->primary_key &&
+                          table->file->primary_key_is_clustered());
 	
         /* 
           Don't use an index scan with ORDER BY without limit.
@@ -13139,7 +13161,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	*/ 
         if (is_covering ||
             select_limit != HA_POS_ERROR || 
-            ref_key < 0 && (group || table->force_index))
+            (ref_key < 0 && (group || table->force_index)))
         { 
           double rec_per_key;
           double index_scan_time;
@@ -13148,7 +13170,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             select_limit= table_records;
           if (group)
           {
-            rec_per_key= keyinfo->rec_per_key[used_key_parts-1];
+            rec_per_key= used_key_parts ? keyinfo->rec_per_key[used_key_parts-1]
+                                        : 1;
             set_if_bigger(rec_per_key, 1);
             /*
               With a grouping query each group containing on average
@@ -13203,13 +13226,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	  */
           index_scan_time= select_limit/rec_per_key *
 	                   min(rec_per_key, table->file->scan_time());
-          if (is_covering || 
-              ref_key < 0 && (group || table->force_index) ||
+          if ((ref_key < 0 && is_covering) || 
+              (ref_key < 0 && (group || table->force_index)) ||
               index_scan_time < read_time)
           {
             ha_rows quick_records= table_records;
-            if (is_best_covering && !is_covering ||
-                is_covering && ref_key_quick_rows < select_limit)
+            if ((is_best_covering && !is_covering) ||
+                (is_covering && ref_key_quick_rows < select_limit))
               continue;
             if (table->quick_keys.is_set(nr))
               quick_records= table->quick_rows[nr];
@@ -13417,8 +13440,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   */
   if ((order != join->group_list || 
        !(join->select_options & SELECT_BIG_RESULT) ||
-       select && select->quick &&
-       select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) &&
+       (select && select->quick &&
+        select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) &&
       test_if_skip_sort_order(tab,order,select_limit,0, 
                               is_order_by ?  &table->keys_in_use_for_order_by :
                               &table->keys_in_use_for_group_by))
@@ -13479,9 +13502,24 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   tab->records= table->sort.found_records;	// For SQL_CALC_ROWS
   if (select)
   {
+    /*
+      We need to preserve tablesort's output resultset here, because
+      QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT (called by
+      SQL_SELECT::cleanup()) may free it assuming it's the result of the quick
+      select operation that we no longer need. Note that all the other parts of
+      this data structure are cleaned up when
+      QUICK_INDEX_MERGE_SELECT::get_next encounters end of data, so the next
+      SQL_SELECT::cleanup() call changes sort.io_cache alone.
+    */
+    IO_CACHE *tablesort_result_cache;
+
+    tablesort_result_cache= table->sort.io_cache;
+    table->sort.io_cache= NULL;
+
     select->cleanup();				// filesort did select
     tab->select= 0;
     table->quick_keys.clear_all();  // as far as we cleanup select->quick
+    table->sort.io_cache= tablesort_result_cache;
   }
   tab->select_cond=0;
   tab->last_inner= 0;
@@ -13855,7 +13893,7 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
       pos->field= ((Item_sum*) item)->get_tmp_table_field();
     else if (item->type() == Item::COPY_STR_ITEM)
     {						// Blob patch
-      pos->item= ((Item_copy_string*) item)->item;
+      pos->item= ((Item_copy*) item)->get_item();
     }
     else
       pos->item= *order->item;
@@ -14244,8 +14282,8 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     /* Lookup the current GROUP field in the FROM clause. */
     order_item_type= order_item->type();
     from_field= (Field*) not_found_field;
-    if (is_group_field &&
-        order_item_type == Item::FIELD_ITEM ||
+    if ((is_group_field &&
+        order_item_type == Item::FIELD_ITEM) ||
         order_item_type == Item::REF_ITEM)
     {
       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
@@ -14680,7 +14718,7 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
     DBUG_RETURN(0);
 
-  for (; !(map & tables->table->map); tables= tables->next_leaf);
+  for (; !(map & tables->table->map); tables= tables->next_leaf) ;
   if (map != tables->table->map)
     DBUG_RETURN(0);				// More than one table
   DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
@@ -14926,7 +14964,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       pos= item;
       if (item->field->flags & BLOB_FLAG)
       {
-	if (!(pos= new Item_copy_string(pos)))
+	if (!(pos= Item_copy::create(pos)))
 	  goto err;
        /*
          Item_copy_string::copy for function can call 
@@ -14980,7 +15018,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 	 on how the value is to be used: In some cases this may be an
 	 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
       */
-      if (!(pos=new Item_copy_string(pos)))
+      if (!(pos= Item_copy::create(pos)))
 	goto err;
       if (i < border)                           // HAVING, ORDER and GROUP BY
       {
@@ -15033,8 +15071,8 @@ copy_fields(TMP_TABLE_PARAM *param)
     (*ptr->do_copy)(ptr);
 
   List_iterator_fast<Item> it(param->copy_funcs);
-  Item_copy_string *item;
-  while ((item = (Item_copy_string*) it++))
+  Item_copy *item;
+  while ((item = (Item_copy*) it++))
     item->copy();
 }
 
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index d08b3a248c4..d07e951bfd1 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -601,7 +601,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
   if (open_normal_and_derived_tables(thd, table_list, 0))
   {
     if (!table_list->view ||
-        thd->is_error() && thd->main_da.sql_errno() != ER_VIEW_INVALID)
+        (thd->is_error() && thd->main_da.sql_errno() != ER_VIEW_INVALID))
       DBUG_RETURN(TRUE);
 
     /*
@@ -2819,8 +2819,8 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
                      LOOKUP_FIELD_VALUES *lookup_field_vals,
                      bool with_i_schema, LEX_STRING *db_name)
 {
-  char path[FN_REFLEN];
-  build_table_filename(path, sizeof(path), db_name->str, "", "", 0);
+  char path[FN_REFLEN + 1];
+  build_table_filename(path, sizeof(path) - 1, db_name->str, "", "", 0);
   if (!lookup_field_vals->wild_table_value &&
       lookup_field_vals->table_value.str)
   {
@@ -2982,8 +2982,8 @@ static int fill_schema_table_names(THD *thd, TABLE *table,
   else
   {
     enum legacy_db_type not_used;
-    char path[FN_REFLEN];
-    (void) build_table_filename(path, sizeof(path), db_name->str, 
+    char path[FN_REFLEN + 1];
+    (void) build_table_filename(path, sizeof(path) - 1, db_name->str, 
                                 table_name->str, reg_ext, 0);
     switch (mysql_frm_type(thd, path, &not_used)) {
     case FRMTYPE_ERROR:
@@ -3238,10 +3238,10 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
       if lookup value is empty string then
       it's impossible table name or db name
     */
-    if (lookup_field_vals.db_value.str &&
-        !lookup_field_vals.db_value.str[0] ||
-        lookup_field_vals.table_value.str &&
-        !lookup_field_vals.table_value.str[0])
+    if ((lookup_field_vals.db_value.str &&
+         !lookup_field_vals.db_value.str[0]) ||
+        (lookup_field_vals.table_value.str &&
+         !lookup_field_vals.table_value.str[0]))
     {
       error= 0;
       goto err;
@@ -3470,7 +3470,7 @@ int fill_schema_schemata(THD *thd, TABLE_LIST *tables, COND *cond)
     MY_STAT stat_info;
     if (!lookup_field_vals.db_value.str[0])
       DBUG_RETURN(0);
-    path_len= build_table_filename(path, sizeof(path),
+    path_len= build_table_filename(path, sizeof(path) - 1,
                                    lookup_field_vals.db_value.str, "", "", 0);
     path[path_len-1]= 0;
     if (!my_stat(path,&stat_info,MYF(0)))
@@ -4116,10 +4116,10 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
                                                 TYPE_ENUM_PROCEDURE))
     return 0;
 
-  if (lex->sql_command == SQLCOM_SHOW_STATUS_PROC &&
-      proc_table->field[2]->val_int() == TYPE_ENUM_PROCEDURE ||
-      lex->sql_command == SQLCOM_SHOW_STATUS_FUNC &&
-      proc_table->field[2]->val_int() == TYPE_ENUM_FUNCTION ||
+  if ((lex->sql_command == SQLCOM_SHOW_STATUS_PROC &&
+      proc_table->field[2]->val_int() == TYPE_ENUM_PROCEDURE) ||
+      (lex->sql_command == SQLCOM_SHOW_STATUS_FUNC &&
+      proc_table->field[2]->val_int() == TYPE_ENUM_FUNCTION) ||
       (sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) == 0)
   {
     restore_record(table, s->default_values);
@@ -7071,6 +7071,12 @@ bool show_create_trigger(THD *thd, const sp_name *trg_name)
   if (!lst)
     return TRUE;
 
+  if (check_table_access(thd, TRIGGER_ACL, lst, 1, TRUE))
+  {
+    my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "TRIGGER");
+    return TRUE;
+  }
+
   /*
     Open the table by name in order to load Table_triggers_list object.
 
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 1dd7b55d136..7759985ba85 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -322,12 +322,24 @@ bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
   return copy_aligned(str, arg_length, offset, cs);
 }
 
-	/* Copy with charset conversion */
+
+/**
+   Copies the character data into this String, with optional character set
+   conversion.
+
+   @return
+   FALSE ok
+   TRUE  Could not allocate result buffer
+
+*/
 
 bool String::copy(const char *str, uint32 arg_length,
 		  CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
 {
   uint32 offset;
+
+  DBUG_ASSERT(!str || str != Ptr);
+  
   if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
   {
     *errors= 0;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 29d43155778..e752421223a 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -68,6 +68,234 @@ static void wait_for_kill_signal(THD *thd)
 #endif
 
 
+/**
+  @brief Helper function for explain_filename
+*/
+static char* add_identifier(char *to_p, const char * end_p,
+                           const char* name, uint name_len, int errcode)
+{
+  uint res;
+  uint errors;
+  const char *conv_name;
+  char tmp_name[FN_REFLEN];
+  char conv_string[FN_REFLEN];
+
+  DBUG_ENTER("add_identifier");
+  if (!name[name_len])
+    conv_name= name;
+  else
+  {
+    strnmov(tmp_name, name, name_len);
+    tmp_name[name_len]= 0;
+    conv_name= tmp_name;
+  }
+  res= strconvert(&my_charset_filename, conv_name, system_charset_info,
+                  conv_string, FN_REFLEN, &errors);
+  if (!res || errors)
+    conv_name= name;
+  else
+  {
+    DBUG_PRINT("info", ("conv '%s' -> '%s'", conv_name, conv_string));
+    conv_name= conv_string;
+  }
+
+  if (errcode)
+    to_p+= my_snprintf(to_p, end_p - to_p, ER(errcode), conv_name);
+  else
+    to_p+= my_snprintf(to_p, end_p - to_p, "`%s`", conv_name);
+  return to_p;
+}
+
+
+/**
+  @brief Explain a path name by split it to database, table etc.
+  
+  @details Break down the path name to its logic parts
+  (database, table, partition, subpartition).
+  filename_to_tablename cannot be used on partitions, due to the #P# part.
+  There can be up to 6 '#', #P# for partition, #SP# for subpartition
+  and #TMP# or #REN# for temporary or renamed partitions.
+  This should be used when something should be presented to a user in a
+  diagnostic, error etc. when it would be useful to know what a particular
+  file [and directory] means. Such as SHOW ENGINE STATUS, error messages etc.
+
+   @param      from         Path name in my_charset_filename
+                            Null terminated in my_charset_filename, normalized
+                            to use '/' as directory separation character.
+   @param      to           Explained name in system_charset_info
+   @param      to_length    Size of to buffer
+   @param      explain_mode Requested output format.
+                            EXPLAIN_ALL_VERBOSE ->
+                            [Database `db`, ]Table `tbl`[,[ Temporary| Renamed]
+                            Partition `p` [, Subpartition `sp`]]
+                            EXPLAIN_PARTITIONS_VERBOSE -> `db`.`tbl`
+                            [[ Temporary| Renamed] Partition `p`
+                            [, Subpartition `sp`]]
+                            EXPLAIN_PARTITIONS_AS_COMMENT -> `db`.`tbl` |*
+                            [,[ Temporary| Renamed] Partition `p`
+                            [, Subpartition `sp`]] *|
+                            (| is really a /, and it is all in one line)
+
+   @retval     Length of returned string
+*/
+
+uint explain_filename(const char *from,
+                      char *to,
+                      uint to_length,
+                      enum_explain_filename_mode explain_mode)
+{
+  uint res= 0;
+  char *to_p= to;
+  char *end_p= to_p + to_length;
+  const char *db_name= NULL;
+  int  db_name_len= 0;
+  const char *table_name;
+  int  table_name_len= 0;
+  const char *part_name= NULL;
+  int  part_name_len= 0;
+  const char *subpart_name= NULL;
+  int  subpart_name_len= 0;
+  enum enum_file_name_type {NORMAL, TEMP, RENAMED} name_type= NORMAL;
+  const char *tmp_p;
+  DBUG_ENTER("explain_filename");
+  DBUG_PRINT("enter", ("from '%s'", from));
+  tmp_p= from;
+  table_name= from;
+  /*
+    If '/' then take last directory part as database.
+    '/' is the directory separator, not FN_LIB_CHAR
+  */
+  while ((tmp_p= strchr(tmp_p, '/')))
+  {
+    db_name= table_name;
+    /* calculate the length */
+    db_name_len= tmp_p - db_name;
+    tmp_p++;
+    table_name= tmp_p;
+  }
+  tmp_p= table_name;
+  while (!res && (tmp_p= strchr(tmp_p, '#')))
+  {
+    tmp_p++;
+    switch (tmp_p[0]) {
+    case 'P':
+    case 'p':
+      if (tmp_p[1] == '#')
+        part_name= tmp_p + 2;
+      else
+        res= 1;
+      tmp_p+= 2;
+      break;
+    case 'S':
+    case 's':
+      if ((tmp_p[1] == 'P' || tmp_p[1] == 'p') && tmp_p[2] == '#')
+      {
+        part_name_len= tmp_p - part_name - 1;
+        subpart_name= tmp_p + 3;
+      }
+      else
+        res= 2;
+      tmp_p+= 3;
+      break;
+    case 'T':
+    case 't':
+      if ((tmp_p[1] == 'M' || tmp_p[1] == 'm') &&
+          (tmp_p[2] == 'P' || tmp_p[2] == 'p') &&
+          tmp_p[3] == '#' && !tmp_p[4])
+        name_type= TEMP;
+      else
+        res= 3;
+      tmp_p+= 4;
+      break;
+    case 'R':
+    case 'r':
+      if ((tmp_p[1] == 'E' || tmp_p[1] == 'e') &&
+          (tmp_p[2] == 'N' || tmp_p[2] == 'n') &&
+          tmp_p[3] == '#' && !tmp_p[4])
+        name_type= RENAMED;
+      else
+        res= 4;
+      tmp_p+= 4;
+      break;
+    default:
+      res= 5;
+    }
+  }
+  if (res)
+  {
+    /* Better to give something back if we fail parsing, than nothing at all */
+    DBUG_PRINT("info", ("Error in explain_filename: %u", res));
+    sql_print_warning("Invalid (old?) table or database name '%s'", from);
+    DBUG_RETURN(my_snprintf(to, to_length,
+                            "<result %u when explaining filename '%s'>",
+                            res, from));
+  }
+  if (part_name)
+  {
+    table_name_len= part_name - table_name - 3;
+    if (subpart_name)
+      subpart_name_len= strlen(subpart_name);
+    else
+      part_name_len= strlen(part_name);
+    if (name_type != NORMAL)
+    {
+      if (subpart_name)
+        subpart_name_len-= 5;
+      else
+        part_name_len-= 5;
+    }
+  }
+  if (db_name)
+  {
+    if (explain_mode == EXPLAIN_ALL_VERBOSE)
+    {
+      to_p= add_identifier(to_p, end_p, db_name, db_name_len,
+                           ER_DATABASE_NAME);
+      to_p= strnmov(to_p, ", ", end_p - to_p);
+    }
+    else
+    {
+      to_p= add_identifier(to_p, end_p, db_name, db_name_len, 0);
+      to_p= strnmov(to_p, ".", end_p - to_p);
+    }
+  }
+  if (explain_mode == EXPLAIN_ALL_VERBOSE)
+    to_p= add_identifier(to_p, end_p, table_name, table_name_len,
+                         ER_TABLE_NAME);
+  else
+    to_p= add_identifier(to_p, end_p, table_name, table_name_len, 0);
+  if (part_name)
+  {
+    if (explain_mode == EXPLAIN_PARTITIONS_AS_COMMENT)
+      to_p= strnmov(to_p, " /* ", end_p - to_p);
+    else if (explain_mode == EXPLAIN_PARTITIONS_VERBOSE)
+      to_p= strnmov(to_p, " ", end_p - to_p);
+    else
+      to_p= strnmov(to_p, ", ", end_p - to_p);
+    if (name_type != NORMAL)
+    {
+      if (name_type == TEMP)
+        to_p= strnmov(to_p, ER(ER_TEMPORARY_NAME), end_p - to_p);
+      else
+        to_p= strnmov(to_p, ER(ER_RENAMED_NAME), end_p - to_p);
+      to_p= strnmov(to_p, " ", end_p - to_p);
+    }
+    to_p= add_identifier(to_p, end_p, part_name, part_name_len,
+                         ER_PARTITION_NAME);
+    if (subpart_name)
+    {
+      to_p= strnmov(to_p, ", ", end_p - to_p);
+      to_p= add_identifier(to_p, end_p, subpart_name, subpart_name_len,
+                           ER_SUBPARTITION_NAME);
+    }
+    if (explain_mode == EXPLAIN_PARTITIONS_AS_COMMENT)
+      to_p= strnmov(to_p, " */", end_p - to_p);
+  }
+  DBUG_PRINT("exit", ("to '%s'", to));
+  DBUG_RETURN(to_p - to);
+}
+
+
 /*
   Translate a file name to a table name (WL #1324).
 
@@ -1287,7 +1515,7 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags)
   /*
     Build shadow frm file name
   */
-  build_table_shadow_filename(shadow_path, sizeof(shadow_path), lpt);
+  build_table_shadow_filename(shadow_path, sizeof(shadow_path) - 1, lpt);
   strxmov(shadow_frm_name, shadow_path, reg_ext, NullS);
   if (flags & WFRM_WRITE_SHADOW)
   {
@@ -1362,7 +1590,7 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags)
     /*
       Build frm file name
     */
-    build_table_filename(path, sizeof(path), lpt->db,
+    build_table_filename(path, sizeof(path) - 1, lpt->db,
                          lpt->table_name, "", 0);
     strxmov(frm_name, path, reg_ext, NullS);
     /*
@@ -1460,10 +1688,13 @@ void write_bin_log(THD *thd, bool clear_error,
 {
   if (mysql_bin_log.is_open())
   {
+    int errcode= 0;
     if (clear_error)
       thd->clear_error();
+    else
+      errcode= query_error_code(thd, TRUE);
     thd->binlog_query(THD::STMT_QUERY_TYPE,
-                      query, query_length, FALSE, FALSE, THD::NOT_KILLED);
+                      query, query_length, FALSE, FALSE, errcode);
   }
 }
 
@@ -1561,7 +1792,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
 			 bool dont_log_query)
 {
   TABLE_LIST *table;
-  char path[FN_REFLEN], *alias;
+  char path[FN_REFLEN + 1], *alias;
   uint path_length;
   String wrong_tables;
   int error= 0;
@@ -1691,13 +1922,14 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
       }
       alias= (lower_case_table_names == 2) ? table->alias : table->table_name;
       /* remove .frm file and engine files */
-      path_length= build_table_filename(path, sizeof(path), db, alias, reg_ext,
+      path_length= build_table_filename(path, sizeof(path) - 1, db, alias,
+                                        reg_ext,
                                         table->internal_tmp_table ?
                                         FN_IS_TMP : 0);
     }
     if (drop_temporary ||
-        (table_type == NULL &&        
-         (access(path, F_OK) &&
+        ((table_type == NULL &&        
+         access(path, F_OK) &&
           ha_create_table_from_engine(thd, db, alias)) ||
          (!drop_view &&
           mysql_frm_type(thd, path, &frm_db_type) != FRMTYPE_TABLE)))
@@ -1779,7 +2011,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
     if (!dont_log_query)
     {
       if (!thd->current_stmt_binlog_row_based ||
-          non_temp_tables_count > 0 && !tmp_table_deleted)
+          (non_temp_tables_count > 0 && !tmp_table_deleted))
       {
         /*
           In this case, we are either using statement-based
@@ -1847,11 +2079,11 @@ err_with_placeholders:
 bool quick_rm_table(handlerton *base,const char *db,
                     const char *table_name, uint flags)
 {
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   bool error= 0;
   DBUG_ENTER("quick_rm_table");
 
-  uint path_length= build_table_filename(path, sizeof(path),
+  uint path_length= build_table_filename(path, sizeof(path) - 1,
                                          db, table_name, reg_ext, flags);
   if (my_delete(path,MYF(0)))
     error= 1; /* purecov: inspected */
@@ -2488,8 +2720,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     }
     /* Don't pack rows in old tables if the user has requested this */
     if ((sql_field->flags & BLOB_FLAG) ||
-	sql_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED)
+	(sql_field->sql_type == MYSQL_TYPE_VARCHAR &&
+	create_info->row_type != ROW_TYPE_FIXED))
       (*db_options)|= HA_OPTION_PACK_RECORD;
     it2.rewind();
   }
@@ -2958,7 +3190,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	    sql_field->sql_type == MYSQL_TYPE_VARCHAR ||
 	    sql_field->pack_flag & FIELDFLAG_BLOB)))
       {
-	if (column_nr == 0 && (sql_field->pack_flag & FIELDFLAG_BLOB) ||
+	if ((column_nr == 0 && (sql_field->pack_flag & FIELDFLAG_BLOB)) ||
             sql_field->sql_type == MYSQL_TYPE_VARCHAR)
 	  key_info->flags|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY;
 	else
@@ -3126,7 +3358,7 @@ static bool prepare_blob_field(THD *thd, Create_field *sql_field)
     }
     sql_field->sql_type= MYSQL_TYPE_BLOB;
     sql_field->flags|= BLOB_FLAG;
-    sprintf(warn_buff, ER(ER_AUTO_CONVERT), sql_field->field_name,
+    my_snprintf(warn_buff, sizeof(warn_buff), ER(ER_AUTO_CONVERT), sql_field->field_name,
             (sql_field->charset == &my_charset_bin) ? "VARBINARY" : "VARCHAR",
             (sql_field->charset == &my_charset_bin) ? "BLOB" : "TEXT");
     push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_AUTO_CONVERT,
@@ -3240,7 +3472,7 @@ bool mysql_create_table_no_lock(THD *thd,
                                 bool internal_tmp_table,
                                 uint select_field_count)
 {
-  char		path[FN_REFLEN];
+  char		path[FN_REFLEN + 1];
   uint          path_length;
   const char	*alias;
   uint		db_options, key_count;
@@ -3448,7 +3680,7 @@ bool mysql_create_table_no_lock(THD *thd,
   }
   else  
   {
-    path_length= build_table_filename(path, sizeof(path), db, alias, reg_ext,
+    path_length= build_table_filename(path, sizeof(path) - 1, db, alias, reg_ext,
                                       internal_tmp_table ? FN_IS_TMP : 0);
   }
 
@@ -3762,7 +3994,8 @@ mysql_rename_table(handlerton *base, const char *old_db,
                    const char *new_name, uint flags)
 {
   THD *thd= current_thd;
-  char from[FN_REFLEN], to[FN_REFLEN], lc_from[FN_REFLEN], lc_to[FN_REFLEN];
+  char from[FN_REFLEN + 1], to[FN_REFLEN + 1],
+    lc_from[FN_REFLEN + 1], lc_to[FN_REFLEN + 1];
   char *from_base= from, *to_base= to;
   char tmp_name[NAME_LEN+1];
   handler *file;
@@ -3774,9 +4007,9 @@ mysql_rename_table(handlerton *base, const char *old_db,
   file= (base == NULL ? 0 :
          get_new_handler((TABLE_SHARE*) 0, thd->mem_root, base));
 
-  build_table_filename(from, sizeof(from), old_db, old_name, "",
+  build_table_filename(from, sizeof(from) - 1, old_db, old_name, "",
                        flags & FN_FROM_IS_TMP);
-  build_table_filename(to, sizeof(to), new_db, new_name, "",
+  build_table_filename(to, sizeof(to) - 1, new_db, new_name, "",
                        flags & FN_TO_IS_TMP);
 
   /*
@@ -3789,13 +4022,13 @@ mysql_rename_table(handlerton *base, const char *old_db,
   {
     strmov(tmp_name, old_name);
     my_casedn_str(files_charset_info, tmp_name);
-    build_table_filename(lc_from, sizeof(lc_from), old_db, tmp_name, "",
+    build_table_filename(lc_from, sizeof(lc_from) - 1, old_db, tmp_name, "",
                          flags & FN_FROM_IS_TMP);
     from_base= lc_from;
 
     strmov(tmp_name, new_name);
     my_casedn_str(files_charset_info, tmp_name);
-    build_table_filename(lc_to, sizeof(lc_to), new_db, tmp_name, "",
+    build_table_filename(lc_to, sizeof(lc_to) - 1, new_db, tmp_name, "",
                          flags & FN_TO_IS_TMP);
     to_base= lc_to;
   }
@@ -3926,16 +4159,16 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table,
   else
   {
     char* backup_dir= thd->lex->backup_dir;
-    char src_path[FN_REFLEN], dst_path[FN_REFLEN], uname[FN_REFLEN];
+    char src_path[FN_REFLEN], dst_path[FN_REFLEN + 1], uname[FN_REFLEN];
     char* table_name= table->table_name;
     char* db= table->db;
 
-    VOID(tablename_to_filename(table->table_name, uname, sizeof(uname)));
+    VOID(tablename_to_filename(table->table_name, uname, sizeof(uname) - 1));
 
     if (fn_format_relative_to_data_home(src_path, uname, backup_dir, reg_ext))
       DBUG_RETURN(-1); // protect buffer overflow
 
-    build_table_filename(dst_path, sizeof(dst_path),
+    build_table_filename(dst_path, sizeof(dst_path) - 1,
                          db, table_name, reg_ext, 0);
 
     if (lock_and_wait_for_table_name(thd,table))
@@ -4547,7 +4780,7 @@ send_result_message:
           const char *err_msg= thd->main_da.message();
           if (!thd->vio_ok())
           {
-            sql_print_error(err_msg);
+            sql_print_error("%s", err_msg);
           }
           else
           {
@@ -4857,7 +5090,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table,
                              HA_CREATE_INFO *create_info)
 {
   TABLE *name_lock= 0;
-  char src_path[FN_REFLEN], dst_path[FN_REFLEN];
+  char src_path[FN_REFLEN], dst_path[FN_REFLEN + 1];
   uint dst_path_length;
   char *db= table->db;
   char *table_name= table->table_name;
@@ -4867,7 +5100,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table,
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   char tmp_path[FN_REFLEN];
 #endif
-  char ts_name[FN_LEN];
+  char ts_name[FN_LEN + 1];
   DBUG_ENTER("mysql_create_like_table");
 
 
@@ -4916,7 +5149,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table,
       goto err;
     if (!name_lock)
       goto table_exists;
-    dst_path_length= build_table_filename(dst_path, sizeof(dst_path),
+    dst_path_length= build_table_filename(dst_path, sizeof(dst_path) - 1,
                                           db, table_name, reg_ext, 0);
     if (!access(dst_path, F_OK))
       goto table_exists;
@@ -5314,7 +5547,7 @@ compare_tables(TABLE *table,
       create_info->used_fields & HA_CREATE_USED_ENGINE ||
       create_info->used_fields & HA_CREATE_USED_CHARSET ||
       create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET ||
-      create_info->used_fields & HA_CREATE_USED_ROW_FORMAT ||
+      (table->s->row_type != create_info->row_type) ||
       create_info->used_fields & HA_CREATE_USED_PACK_KEYS ||
       create_info->used_fields & HA_CREATE_USED_MAX_ROWS ||
       (alter_info->flags & (ALTER_RECREATE | ALTER_FOREIGN_KEY)) ||
@@ -5358,8 +5591,8 @@ compare_tables(TABLE *table,
     /* Don't pack rows in old tables if the user has requested this. */
     if (create_info->row_type == ROW_TYPE_DYNAMIC ||
 	(tmp_new_field->flags & BLOB_FLAG) ||
-	tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED)
+	(tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
+	create_info->row_type != ROW_TYPE_FIXED))
       create_info->table_options|= HA_OPTION_PACK_RECORD;
 
     /* Check if field was renamed */
@@ -5657,7 +5890,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
 
   if (!create_info->tablespace && create_info->storage_media != HA_SM_MEMORY)
   {
-    char *tablespace= static_cast<char *>(thd->alloc(FN_LEN));
+    char *tablespace= static_cast<char *>(thd->alloc(FN_LEN + 1));
     /*
        Regular alter table of disk stored table (no tablespace/storage change)
        Copy tablespace name
@@ -6024,10 +6257,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
 {
   TABLE *table, *new_table= 0, *name_lock= 0;
   int error= 0;
-  char tmp_name[80],old_name[32],new_name_buff[FN_REFLEN];
+  char tmp_name[80],old_name[32],new_name_buff[FN_REFLEN + 1];
   char new_alias_buff[FN_REFLEN], *table_name, *db, *new_alias, *alias;
   char index_file[FN_REFLEN], data_file[FN_REFLEN];
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   char reg_path[FN_REFLEN+1];
   ha_rows copied,deleted;
   handlerton *old_db_type, *new_db_type, *save_old_db_type;
@@ -6040,21 +6273,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
 #endif
   bool need_lock_for_indexes= TRUE;
   KEY  *key_info_buffer;
-  uint index_drop_count;
-  uint *index_drop_buffer;
-  uint index_add_count;
-  uint *index_add_buffer;
-  uint candidate_key_count;
+  uint index_drop_count= 0;
+  uint *index_drop_buffer= NULL;
+  uint index_add_count= 0;
+  uint *index_add_buffer= NULL;
+  uint candidate_key_count= 0;
   bool committed= 0;
   bool no_pk;
   DBUG_ENTER("mysql_alter_table");
 
-  LINT_INIT(index_add_count);
-  LINT_INIT(index_drop_count);
-  LINT_INIT(index_add_buffer);
-  LINT_INIT(index_drop_buffer);
-  LINT_INIT(candidate_key_count);
-
   /*
     Check if we attempt to alter mysql.slow_log or
     mysql.general_log table and return an error if
@@ -6108,8 +6335,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   db=table_list->db;
   if (!new_db || !my_strcasecmp(table_alias_charset, new_db, db))
     new_db= db;
-  build_table_filename(reg_path, sizeof(reg_path), db, table_name, reg_ext, 0);
-  build_table_filename(path, sizeof(path), db, table_name, "", 0);
+  build_table_filename(reg_path, sizeof(reg_path) - 1, db, table_name, reg_ext, 0);
+  build_table_filename(path, sizeof(path) - 1, db, table_name, "", 0);
 
   mysql_ha_rm_tables(thd, table_list, FALSE);
 
@@ -6139,6 +6366,20 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   /* Sic: there is a race here */
   if (frm_type == FRMTYPE_VIEW && !(alter_info->flags & ~ALTER_RENAME))
   {
+    /*
+      The following branch handles "ALTER VIEW v1 /no arguments/;"
+      This feature is not documented one. 
+      However, before "OPTIMIZE TABLE t1;" was implemented, 
+      ALTER TABLE with no alter_specifications was used to force-rebuild
+      the table. That's why this grammar is allowed. That's why we ignore
+      it for views. So just do nothing in such a case.
+    */
+    if (!new_name)
+    {
+      my_ok(thd);
+      DBUG_RETURN(FALSE);
+    }
+
     /*
       Avoid problems with a rename on a table that we have locked or
       if the user is trying to to do this in a transcation context
@@ -6166,7 +6407,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
       {
         thd->clear_error();
         Query_log_event qinfo(thd, thd->query, thd->query_length,
-                              0, FALSE, THD::NOT_KILLED);
+                              0, FALSE, 0);
         mysql_bin_log.write(&qinfo);
       }
       my_ok(thd);
@@ -6242,7 +6483,7 @@ view_err:
 	  DBUG_RETURN(TRUE);
         }
 
-        build_table_filename(new_name_buff, sizeof(new_name_buff),
+        build_table_filename(new_name_buff, sizeof(new_name_buff) - 1,
                              new_db, new_name_buff, reg_ext, 0);
         if (!access(new_name_buff, F_OK))
 	{
@@ -6293,7 +6534,10 @@ view_err:
   }
 
   if (create_info->row_type == ROW_TYPE_NOT_USED)
+  {
     create_info->row_type= table->s->row_type;
+    create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
+  }
 
   DBUG_PRINT("info", ("old type: %s  new type: %s",
              ha_resolve_storage_engine_name(old_db_type),
@@ -6740,9 +6984,9 @@ view_err:
     }
     else
     {
-      char path[FN_REFLEN];
+      char path[FN_REFLEN + 1];
       /* table is a normal table: Create temporary table in same directory */
-      build_table_filename(path, sizeof(path), new_db, tmp_name, "",
+      build_table_filename(path, sizeof(path) - 1, new_db, tmp_name, "",
                            FN_IS_TMP);
       /* Open our intermediate table */
       new_table=open_temporary_table(thd, path, new_db, tmp_name,0);
@@ -6972,12 +7216,12 @@ view_err:
   }
   else if (mysql_rename_table(new_db_type, new_db, tmp_name, new_db,
                               new_alias, FN_FROM_IS_TMP) ||
-           (new_name != table_name || new_db != db) && // we also do rename
+           ((new_name != table_name || new_db != db) && // we also do rename
            (need_copy_table != ALTER_TABLE_METADATA_ONLY ||
             mysql_rename_table(save_old_db_type, db, table_name, new_db,
                                new_alias, NO_FRM_RENAME)) &&
            Table_triggers_list::change_table_name(thd, db, table_name,
-                                                  new_db, new_alias))
+                                                  new_db, new_alias)))
   {
     /* Try to get everything back. */
     error=1;
@@ -7070,7 +7314,7 @@ view_err:
     */
     char path[FN_REFLEN];
     TABLE *t_table;
-    build_table_filename(path, sizeof(path), new_db, table_name, "", 0);
+    build_table_filename(path + 1, sizeof(path) - 1, new_db, table_name, "", 0);
     t_table= open_temporary_table(thd, path, new_db, tmp_name, 0);
     if (t_table)
     {
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index 8cab8fff2f3..c055268ecca 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -344,7 +344,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create)
     need second part of condition below, since check_access() function also
     checks that db is specified.
   */
-  if (!thd->lex->spname->m_db.length || create && !tables->db_length)
+  if (!thd->lex->spname->m_db.length || (create && !tables->db_length))
   {
     my_error(ER_NO_DB_ERROR, MYF(0));
     DBUG_RETURN(TRUE);
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index fd3036e3d80..cbf94ad7181 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -653,10 +653,22 @@ bool st_select_lex_unit::cleanup()
       join->tables= 0;
     }
     error|= fake_select_lex->cleanup();
-    if (fake_select_lex->order_list.elements)
+    /*
+      There are two cases when we should clean order items:
+      1. UNION with SELECTs which all enclosed into braces
+        in this case global_parameters == fake_select_lex
+      2. UNION where last SELECT is not enclosed into braces
+        in this case global_parameters == 'last select'
+      So we should use global_parameters->order_list for
+      proper order list clean up.
+      Note: global_parameters and fake_select_lex are always
+            initialized for UNION
+    */
+    DBUG_ASSERT(global_parameters);
+    if (global_parameters->order_list.elements)
     {
       ORDER *ord;
-      for (ord= (ORDER*)fake_select_lex->order_list.first; ord; ord= ord->next)
+      for (ord= (ORDER*)global_parameters->order_list.first; ord; ord= ord->next)
         (*ord->item)->cleanup();
     }
   }
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 1edd6952a34..d7c9fc83270 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -797,12 +797,15 @@ int mysql_update(THD *thd,
   {
     if (mysql_bin_log.is_open())
     {
+      int errcode= 0;
       if (error < 0)
         thd->clear_error();
+      else
+        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query, thd->query_length,
-                            transactional_table, FALSE, killed_status) &&
-          transactional_table)
+                            transactional_table, FALSE, errcode))
       {
         error=1;				// Rollback update
       }
@@ -820,7 +823,7 @@ int mysql_update(THD *thd,
   if (error < 0)
   {
     char buff[STRING_BUFFER_USUAL_SIZE];
-    sprintf(buff, ER(ER_UPDATE_INFO), (ulong) found, (ulong) updated,
+    my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO), (ulong) found, (ulong) updated,
 	    (ulong) thd->cuted_fields);
     thd->row_count_func=
       (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
@@ -1035,7 +1038,6 @@ reopen_tables:
         DBUG_RETURN(TRUE);
       }
 
-      table->mark_columns_needed_for_update();
       DBUG_PRINT("info",("setting table `%s` for update", tl->alias));
       /*
         If table will be updated we should not downgrade lock for it and
@@ -1281,13 +1283,41 @@ int multi_update::prepare(List<Item> &not_used_values,
     DBUG_RETURN(1);
   }
 
+  /*
+    We gather the set of columns read during evaluation of SET expression in
+    TABLE::tmp_set by pointing TABLE::read_set to it and then restore it after
+    setup_fields().
+  */
+  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  {
+    TABLE *table= table_ref->table;
+    if (tables_to_update & table->map)
+    {
+      DBUG_ASSERT(table->read_set == &table->def_read_set);
+      table->read_set= &table->tmp_set;
+      bitmap_clear_all(table->read_set);
+    }
+  }
+
   /*
     We have to check values after setup_tables to get covering_keys right in
     reference tables
   */
 
-  if (setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0))
-    DBUG_RETURN(1);
+  int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0);
+
+  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  {
+    TABLE *table= table_ref->table;
+    if (tables_to_update & table->map)
+    {
+      table->read_set= &table->def_read_set;
+      bitmap_union(table->read_set, &table->tmp_set);
+    }
+  }
+  
+  if (error)
+    DBUG_RETURN(1);    
 
   /*
     Save tables beeing updated in update_tables
@@ -1382,6 +1412,8 @@ int multi_update::prepare(List<Item> &not_used_values,
     a row in this table will never be read twice. This is true under
     the following conditions:
 
+    - No column is both written to and read in SET expressions.
+
     - We are doing a table scan and the data is in a separate file (MyISAM) or
       if we don't update a clustered key.
 
@@ -1396,6 +1428,9 @@ int multi_update::prepare(List<Item> &not_used_values,
   WARNING
     This code is a bit dependent of how make_join_readinfo() works.
 
+    The field table->tmp_set is used for keeping track of which fields are
+    read during evaluation of the SET expression. See multi_update::prepare.
+
   RETURN
     0		Not safe to update
     1		Safe to update
@@ -1416,6 +1451,8 @@ static bool safe_update_on_fly(THD *thd, JOIN_TAB *join_tab,
   case JT_REF_OR_NULL:
     return !is_key_used(table, join_tab->ref.key, table->write_set);
   case JT_ALL:
+    if (bitmap_is_overlapping(&table->tmp_set, table->write_set))
+      return FALSE;
     /* If range search on index */
     if (join_tab->quick)
       return !join_tab->quick->is_keys_used(table->write_set);
@@ -1471,17 +1508,18 @@ multi_update::initialize_tables(JOIN *join)
     ORDER     group;
     TMP_TABLE_PARAM *tmp_param;
 
-    table->mark_columns_needed_for_update();
     if (ignore)
       table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
     if (table == main_table)			// First table in join
     {
       if (safe_update_on_fly(thd, join->join_tab, table_ref, all_tables))
       {
-	table_to_update= main_table;		// Update table on the fly
+        table->mark_columns_needed_for_update();
+	table_to_update= table;			// Update table on the fly
 	continue;
       }
     }
+    table->mark_columns_needed_for_update();
     table->prepare_for_position();
 
     /*
@@ -1782,7 +1820,7 @@ void multi_update::abort()
 {
   /* the error was handled or nothing deleted and no side effects return */
   if (error_handled ||
-      !thd->transaction.stmt.modified_non_trans_table && !updated)
+      (!thd->transaction.stmt.modified_non_trans_table && !updated))
     return;
 
   /* Something already updated so we have to invalidate cache */
@@ -1819,9 +1857,10 @@ void multi_update::abort()
         got caught and if happens later the killed error is written
         into repl event.
       */
+      int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
       thd->binlog_query(THD::ROW_QUERY_TYPE,
                         thd->query, thd->query_length,
-                        transactional_tables, FALSE);
+                        transactional_tables, FALSE, errcode);
     }
     thd->transaction.all.modified_non_trans_table= TRUE;
   }
@@ -2047,12 +2086,14 @@ bool multi_update::send_eof()
   {
     if (mysql_bin_log.is_open())
     {
+      int errcode= 0;
       if (local_error == 0)
         thd->clear_error();
+      else
+        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query, thd->query_length,
-                            transactional_tables, FALSE, killed_status) &&
-          trans_safe)
+                            transactional_tables, FALSE, errcode))
       {
 	local_error= 1;				// Rollback update
       }
@@ -2073,8 +2114,8 @@ bool multi_update::send_eof()
 
   id= thd->arg_of_last_insert_id_function ?
     thd->first_successful_insert_id_in_prev_stmt : 0;
-  sprintf(buff, ER(ER_UPDATE_INFO), (ulong) found, (ulong) updated,
-	  (ulong) thd->cuted_fields);
+  my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO),
+              (ulong) found, (ulong) updated, (ulong) thd->cuted_fields);
   thd->row_count_func=
     (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
   ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index 4f207f78688..2a4c5c950fe 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -661,8 +661,9 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views,
     buff.append(STRING_WITH_LEN(" AS "));
     buff.append(views->source.str, views->source.length);
 
+    int errcode= query_error_code(thd, TRUE);
     thd->binlog_query(THD::STMT_QUERY_TYPE,
-                      buff.ptr(), buff.length(), FALSE, FALSE, THD::NOT_KILLED);
+                      buff.ptr(), buff.length(), FALSE, FALSE, errcode);
   }
 
   VOID(pthread_mutex_unlock(&LOCK_open));
@@ -800,7 +801,7 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
 
   char md5[MD5_BUFF_LENGTH];
   bool can_be_merged;
-  char dir_buff[FN_REFLEN], path_buff[FN_REFLEN];
+  char dir_buff[FN_REFLEN + 1], path_buff[FN_REFLEN + 1];
   LEX_STRING dir, file, path;
   int error= 0;
   DBUG_ENTER("mysql_register_view");
@@ -877,11 +878,11 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
   }
 loop_out:
   /* print file name */
-  dir.length= build_table_filename(dir_buff, sizeof(dir_buff),
+  dir.length= build_table_filename(dir_buff, sizeof(dir_buff) - 1,
                                    view->db, "", "", 0);
   dir.str= dir_buff;
 
-  path.length= build_table_filename(path_buff, sizeof(path_buff),
+  path.length= build_table_filename(path_buff, sizeof(path_buff) - 1,
                                     view->db, view->table_name, reg_ext, 0);
   path.str= path_buff;
 
@@ -1568,7 +1569,7 @@ err:
 
 bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode)
 {
-  char path[FN_REFLEN];
+  char path[FN_REFLEN + 1];
   TABLE_LIST *view;
   String non_existant_views;
   char *wrong_object_db= NULL, *wrong_object_name= NULL;
@@ -1583,7 +1584,7 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode)
   {
     TABLE_SHARE *share;
     frm_type_enum type= FRMTYPE_ERROR;
-    build_table_filename(path, sizeof(path),
+    build_table_filename(path, sizeof(path) - 1,
                          view->db, view->table_name, reg_ext, 0);
 
     if (access(path, F_OK) || 
@@ -1928,7 +1929,7 @@ mysql_rename_view(THD *thd,
 {
   LEX_STRING pathstr;
   File_parser *parser;
-  char path_buff[FN_REFLEN];
+  char path_buff[FN_REFLEN + 1];
   bool error= TRUE;
   DBUG_ENTER("mysql_rename_view");
 
@@ -1941,7 +1942,7 @@ mysql_rename_view(THD *thd,
        is_equal(&view_type, parser->type()))
   {
     TABLE_LIST view_def;
-    char dir_buff[FN_REFLEN];
+    char dir_buff[FN_REFLEN + 1];
     LEX_STRING dir, file;
 
     /*
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 0b158ff7574..7d6a7ade540 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -5925,7 +5925,7 @@ alter_list_item:
               MYSQL_YYABORT;
             }
             if (check_table_name($3->table.str,$3->table.length) ||
-                $3->db.str && check_db_name(&$3->db))
+                ($3->db.str && check_db_name(&$3->db)))
             {
               my_error(ER_WRONG_TABLE_NAME, MYF(0), $3->table.str);
               MYSQL_YYABORT;
@@ -6079,8 +6079,8 @@ slave_until:
         | UNTIL_SYM slave_until_opts
           {
             LEX *lex=Lex;
-            if ((lex->mi.log_file_name || lex->mi.pos) &&
-                (lex->mi.relay_log_name || lex->mi.relay_log_pos) ||
+            if (((lex->mi.log_file_name || lex->mi.pos) &&
+                (lex->mi.relay_log_name || lex->mi.relay_log_pos)) ||
                 !((lex->mi.log_file_name && lex->mi.pos) ||
                   (lex->mi.relay_log_name && lex->mi.relay_log_pos)))
             {
@@ -8493,6 +8493,7 @@ table_factor:
                 MYSQL_YYABORT;
               sel->add_joined_table($$);
               lex->pop_context();
+              lex->nest_level--;
             }
             else if ($4 || $6)
             {
@@ -8501,7 +8502,11 @@ table_factor:
               MYSQL_YYABORT;
             }
             else
+            {
+              /* nested join: FROM (t1 JOIN t2 ...),
+                 nest_level is the same as in the outer query */
               $$= $3;
+            }
           }
         ;
 
@@ -12118,15 +12123,16 @@ text_or_password:
         | PASSWORD '(' TEXT_STRING ')'
           {
             $$= $3.length ? YYTHD->variables.old_passwords ?
-              Item_func_old_password::alloc(YYTHD, $3.str) :
-              Item_func_password::alloc(YYTHD, $3.str) :
+              Item_func_old_password::alloc(YYTHD, $3.str, $3.length) :
+              Item_func_password::alloc(YYTHD, $3.str, $3.length) :
               $3.str;
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
         | OLD_PASSWORD '(' TEXT_STRING ')'
           {
-            $$= $3.length ? Item_func_old_password::alloc(YYTHD, $3.str) :
+            $$= $3.length ? Item_func_old_password::alloc(YYTHD, $3.str,
+                                                          $3.length) :
               $3.str;
             if ($$ == NULL)
               MYSQL_YYABORT;
@@ -12588,7 +12594,7 @@ grant_user:
                   (char *) YYTHD->alloc(SCRAMBLED_PASSWORD_CHAR_LENGTH_323+1);
                 if (buff == NULL)
                   MYSQL_YYABORT;
-                make_scrambled_password_323(buff, $4.str);
+                my_make_scrambled_password_323(buff, $4.str, $4.length);
                 $1->password.str= buff;
                 $1->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323;
               }
@@ -12598,7 +12604,7 @@ grant_user:
                   (char *) YYTHD->alloc(SCRAMBLED_PASSWORD_CHAR_LENGTH+1);
                 if (buff == NULL)
                   MYSQL_YYABORT;
-                make_scrambled_password(buff, $4.str);
+                my_make_scrambled_password(buff, $4.str, $4.length);
                 $1->password.str= buff;
                 $1->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH;
               }
diff --git a/sql/structs.h b/sql/structs.h
index 0a20eee0e9a..a58c18f97c5 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -107,6 +107,10 @@ typedef struct st_reginfo {		/* Extra info about reg */
   struct st_join_table *join_tab;	/* Used by SELECT() */
   enum thr_lock_type lock_type;		/* How database is used */
   bool not_exists_optimize;
+  /*
+    TRUE <=> range optimizer found that there is no rows satisfying
+    table conditions.
+  */
   bool impossible_range;
 } REGINFO;
 
diff --git a/sql/table.cc b/sql/table.cc
index d24ee4c6a27..60a27e136b1 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -464,34 +464,35 @@ inline bool is_system_table_name(const char *name, uint length)
   CHARSET_INFO *ci= system_charset_info;
 
   return (
-          /* mysql.proc table */
-          length == 4 &&
-          my_tolower(ci, name[0]) == 'p' && 
-          my_tolower(ci, name[1]) == 'r' &&
-          my_tolower(ci, name[2]) == 'o' &&
-          my_tolower(ci, name[3]) == 'c' ||
+           /* mysql.proc table */
+           (length == 4 &&
+             my_tolower(ci, name[0]) == 'p' && 
+             my_tolower(ci, name[1]) == 'r' &&
+             my_tolower(ci, name[2]) == 'o' &&
+             my_tolower(ci, name[3]) == 'c') ||
 
-          length > 4 &&
-          (
-           /* one of mysql.help* tables */
-           my_tolower(ci, name[0]) == 'h' &&
-           my_tolower(ci, name[1]) == 'e' &&
-           my_tolower(ci, name[2]) == 'l' &&
-           my_tolower(ci, name[3]) == 'p' ||
+           (length > 4 &&
+             (
+               /* one of mysql.help* tables */
+               (my_tolower(ci, name[0]) == 'h' &&
+                 my_tolower(ci, name[1]) == 'e' &&
+                 my_tolower(ci, name[2]) == 'l' &&
+                 my_tolower(ci, name[3]) == 'p') ||
 
-           /* one of mysql.time_zone* tables */
-           my_tolower(ci, name[0]) == 't' &&
-           my_tolower(ci, name[1]) == 'i' &&
-           my_tolower(ci, name[2]) == 'm' &&
-           my_tolower(ci, name[3]) == 'e' ||
+               /* one of mysql.time_zone* tables */
+               (my_tolower(ci, name[0]) == 't' &&
+                 my_tolower(ci, name[1]) == 'i' &&
+                 my_tolower(ci, name[2]) == 'm' &&
+                 my_tolower(ci, name[3]) == 'e') ||
 
-           /* mysql.event table */
-           my_tolower(ci, name[0]) == 'e' &&
-           my_tolower(ci, name[1]) == 'v' &&
-           my_tolower(ci, name[2]) == 'e' &&
-           my_tolower(ci, name[3]) == 'n' &&
-           my_tolower(ci, name[4]) == 't'
-          )
+               /* mysql.event table */
+               (my_tolower(ci, name[0]) == 'e' &&
+                 my_tolower(ci, name[1]) == 'v' &&
+                 my_tolower(ci, name[2]) == 'e' &&
+                 my_tolower(ci, name[3]) == 'n' &&
+                 my_tolower(ci, name[4]) == 't')
+             )
+           )
          );
 }
 
@@ -779,7 +780,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   strpos=disk_buff+6;
 
   if (!(rec_per_key= (ulong*) alloc_root(&share->mem_root,
-					 sizeof(ulong*)*key_parts)))
+                                         sizeof(ulong)*key_parts)))
     goto err;
 
   for (i=0 ; i < keys ; i++, keyinfo++)
@@ -3314,8 +3315,8 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
   {
     const char *save_where= thd->where;
     thd->where= "check option";
-    if (!check_option->fixed &&
-        check_option->fix_fields(thd, &check_option) ||
+    if ((!check_option->fixed &&
+        check_option->fix_fields(thd, &check_option)) ||
         check_option->check_cols(1))
     {
       DBUG_RETURN(TRUE);
@@ -3341,6 +3342,7 @@ void TABLE_LIST::hide_view_error(THD *thd)
 
   if (thd->main_da.sql_errno() == ER_BAD_FIELD_ERROR ||
       thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST ||
+      thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
       thd->main_da.sql_errno() == ER_PROCACCESS_DENIED_ERROR ||
       thd->main_da.sql_errno() == ER_COLUMNACCESS_DENIED_ERROR ||
       thd->main_da.sql_errno() == ER_TABLEACCESS_DENIED_ERROR ||
@@ -4030,7 +4032,7 @@ void Field_iterator_table_ref::set_field_iterator()
     /* Necesary, but insufficient conditions. */
     DBUG_ASSERT(table_ref->is_natural_join ||
                 table_ref->nested_join ||
-                table_ref->join_columns &&
+                (table_ref->join_columns &&
                 /* This is a merge view. */
                 ((table_ref->field_translation &&
                   table_ref->join_columns->elements ==
@@ -4039,7 +4041,7 @@ void Field_iterator_table_ref::set_field_iterator()
                  /* This is stored table or a tmptable view. */
                  (!table_ref->field_translation &&
                   table_ref->join_columns->elements ==
-                  table_ref->table->s->fields)));
+                  table_ref->table->s->fields))));
     field_it= &natural_join_it;
     DBUG_PRINT("info",("field_it for '%s' is Field_iterator_natural_join",
                        table_ref->alias));
diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc
index 6bf43b51df0..0764fe8be33 100644
--- a/sql/thr_malloc.cc
+++ b/sql/thr_malloc.cc
@@ -21,7 +21,7 @@
 extern "C" {
   void sql_alloc_error_handler(void)
   {
-    sql_print_error(ER(ER_OUT_OF_RESOURCES));
+    sql_print_error("%s", ER(ER_OUT_OF_RESOURCES));
 
     THD *thd= current_thd;
     if (thd)
diff --git a/sql/time.cc b/sql/time.cc
index a6619cf4cee..962b65e454c 100644
--- a/sql/time.cc
+++ b/sql/time.cc
@@ -111,8 +111,8 @@ uint calc_week(MYSQL_TIME *l_time, uint week_behaviour, uint *year)
   if (l_time->month == 1 && l_time->day <= 7-weekday)
   {
     if (!week_year && 
-	(first_weekday && weekday != 0 ||
-	 !first_weekday && weekday >= 4))
+	((first_weekday && weekday != 0) ||
+	 (!first_weekday && weekday >= 4)))
       return 0;
     week_year= 1;
     (*year)--;
@@ -129,8 +129,8 @@ uint calc_week(MYSQL_TIME *l_time, uint week_behaviour, uint *year)
   if (week_year && days >= 52*7)
   {
     weekday= (weekday + calc_days_in_year(*year)) % 7;
-    if (!first_weekday && weekday < 4 ||
-	first_weekday && weekday == 0)
+    if ((!first_weekday && weekday < 4) ||
+	(first_weekday && weekday == 0))
     {
       (*year)++;
       return 1;
diff --git a/sql/tztime.cc b/sql/tztime.cc
index 2a94e179600..c7a4ad049ec 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -447,8 +447,8 @@ prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage)
     }
 
     if (end_t == MY_TIME_T_MAX ||
-        (cur_off_and_corr > 0) &&
-        (end_t >= MY_TIME_T_MAX - cur_off_and_corr))
+        ((cur_off_and_corr > 0) &&
+        (end_t >= MY_TIME_T_MAX - cur_off_and_corr)))
       /* end of t space */
       break;
 
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 858bedb04cd..7b6b628f924 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -603,9 +603,9 @@ bool Unique::get(TABLE *table)
   outfile=table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
                                 MYF(MY_ZEROFILL));
 
-  if (!outfile || ! my_b_inited(outfile) &&
+  if (!outfile || (! my_b_inited(outfile) &&
       open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-		       MYF(MY_WME)))
+		       MYF(MY_WME))))
     return 1;
   reinit_io_cache(outfile,WRITE_CACHE,0L,0,0);
 
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 51293184ad8..68a352e4a44 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -37,8 +37,7 @@ static bool pack_header(uchar *forminfo,enum legacy_db_type table_type,
 			List<Create_field> &create_fields,
 			uint info_length, uint screens, uint table_options,
 			ulong data_offset, handler *file);
-static uint get_interval_id(uint *int_count,List<Create_field> &create_fields,
-			    Create_field *last_field);
+static uint get_interval_id(uint *,List<Create_field> &, Create_field *);
 static bool pack_fields(File file, List<Create_field> &create_fields,
                         ulong data_offset);
 static bool make_empty_rec(THD *thd, int file, enum legacy_db_type table_type,
diff --git a/storage/Makefile.am b/storage/Makefile.am
index 4f19be3a361..8aa1e4f7dc6 100644
--- a/storage/Makefile.am
+++ b/storage/Makefile.am
@@ -18,7 +18,7 @@
 AUTOMAKE_OPTIONS =	foreign
 
 # These are built from source in the Docs directory
-EXTRA_DIST =		
+EXTRA_DIST = mysql_storage_engine.cmake	
 SUBDIRS = @mysql_se_dirs@
 DIST_SUBDIRS = @mysql_se_distdirs@
 
diff --git a/storage/archive/CMakeLists.txt b/storage/archive/CMakeLists.txt
index 1c53ad15c07..ce4d92d3f99 100644
--- a/storage/archive/CMakeLists.txt
+++ b/storage/archive/CMakeLists.txt
@@ -13,17 +13,6 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(ARCHIVE_SOURCES  azio.c ha_archive.cc ha_archive.h)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(archive ${ARCHIVE_SOURCES})
-  ADD_DEPENDENCIES(archive GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(ARCHIVE)
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 46e9a99c446..7f7f3534a22 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -1497,8 +1497,8 @@ int ha_archive::info(uint flag)
 
     stats.mean_rec_length= table->s->reclength + buffer.alloced_length();
     stats.data_file_length= file_stat.st_size;
-    stats.create_time= file_stat.st_ctime;
-    stats.update_time= file_stat.st_mtime;
+    stats.create_time= (ulong) file_stat.st_ctime;
+    stats.update_time= (ulong) file_stat.st_mtime;
     stats.max_data_file_length= share->rows_recorded * stats.mean_rec_length;
   }
   stats.delete_length= 0;
diff --git a/storage/blackhole/CMakeLists.txt b/storage/blackhole/CMakeLists.txt
index b11330db255..b762228d7fd 100644
--- a/storage/blackhole/CMakeLists.txt
+++ b/storage/blackhole/CMakeLists.txt
@@ -16,13 +16,7 @@
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(BLACKHOLE_SOURCES  ha_blackhole.cc ha_blackhole.h)
 
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(blackhole ${BLACKHOLE_SOURCES})
-  ADD_DEPENDENCIES(blackhole GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(BLACKHOLE)
diff --git a/storage/csv/CMakeLists.txt b/storage/csv/CMakeLists.txt
index 528b9928c76..eb21a9b048c 100644
--- a/storage/csv/CMakeLists.txt
+++ b/storage/csv/CMakeLists.txt
@@ -16,13 +16,6 @@
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(CSV_SOURCES  ha_tina.cc ha_tina.h transparent_file.cc transparent_file.h)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(csv ${CSV_SOURCES})
-  ADD_DEPENDENCIES(csv GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(CSV)
\ No newline at end of file
diff --git a/storage/example/CMakeLists.txt b/storage/example/CMakeLists.txt
index 0af60e1df83..a328da107bd 100644
--- a/storage/example/CMakeLists.txt
+++ b/storage/example/CMakeLists.txt
@@ -15,14 +15,6 @@
 
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(EXAMPLE_SOURCES ha_example.cc)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(example ${EXAMPLE_SOURCES})
-  ADD_DEPENDENCIES(example GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(EXAMPLE)
diff --git a/storage/federated/CMakeLists.txt b/storage/federated/CMakeLists.txt
index b96f68a3c37..fa54d36481a 100644
--- a/storage/federated/CMakeLists.txt
+++ b/storage/federated/CMakeLists.txt
@@ -13,16 +13,6 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(FEDERATED_SOURCES  ha_federated.cc)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(federated ${FEDERATED_SOURCES})
-  ADD_DEPENDENCIES(federated GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(FEDERATED)
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index e22e342c65e..a3fa42d7d05 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -2856,34 +2856,32 @@ int ha_federated::info(uint flag)
     if (!(row= mysql_fetch_row(result)))
       goto error;
 
-    if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST))
-    {
-      /*
-        deleted is set in ha_federated::info
-      */
-      /*
-        need to figure out what this means as far as federated is concerned,
-        since we don't have a "file"
+    /*
+      deleted is set in ha_federated::info
+    */
+    /*
+      need to figure out what this means as far as federated is concerned,
+      since we don't have a "file"
 
-        data_file_length = ?
-        index_file_length = ?
-        delete_length = ?
-      */
-      if (row[4] != NULL)
-        stats.records=   (ha_rows) my_strtoll10(row[4], (char**) 0,
+      data_file_length = ?
+      index_file_length = ?
+      delete_length = ?
+    */
+    if (row[4] != NULL)
+      stats.records=   (ha_rows) my_strtoll10(row[4], (char**) 0,
                                                        &error);
-      if (row[5] != NULL)
-        stats.mean_rec_length= (ulong) my_strtoll10(row[5], (char**) 0, &error);
+    if (row[5] != NULL)
+      stats.mean_rec_length= (ulong) my_strtoll10(row[5], (char**) 0, &error);
 
-      stats.data_file_length= stats.records * stats.mean_rec_length;
+    stats.data_file_length= stats.records * stats.mean_rec_length;
 
-      if (row[12] != NULL)
-        stats.update_time=     (time_t) my_strtoll10(row[12], (char**) 0,
+    if (row[12] != NULL)
+      stats.update_time=     (ulong) my_strtoll10(row[12], (char**) 0,
                                                       &error);
-      if (row[13] != NULL)
-        stats.check_time=      (time_t) my_strtoll10(row[13], (char**) 0,
+    if (row[13] != NULL)
+      stats.check_time=      (ulong) my_strtoll10(row[13], (char**) 0,
                                                       &error);
-    }
+
     /*
       size of IO operations (This is based on a good guess, no high science
       involved)
diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt
index f8f0aa91464..c2d2cd1290f 100755
--- a/storage/heap/CMakeLists.txt
+++ b/storage/heap/CMakeLists.txt
@@ -16,18 +16,10 @@
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 SET(HEAP_SOURCES  _check.c _rectest.c hp_block.c hp_clear.c hp_close.c hp_create.c
 				ha_heap.cc
 				hp_delete.c hp_extra.c hp_hash.c hp_info.c hp_open.c hp_panic.c
 				hp_rename.c hp_rfirst.c hp_rkey.c hp_rlast.c hp_rnext.c hp_rprev.c
 				hp_rrnd.c hp_rsame.c hp_scan.c hp_static.c hp_update.c hp_write.c)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(heap ${HEAP_SOURCES})
-  ADD_DEPENDENCIES(heap GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(HEAP)
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 1f74ad0f941..99c1ca5d2e9 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -440,6 +440,14 @@ int ha_heap::delete_all_rows()
   return 0;
 }
 
+
+int ha_heap::reset_auto_increment(ulonglong value)
+{
+  file->s->auto_increment= value;
+  return 0;
+}
+
+
 int ha_heap::external_lock(THD *thd, int lock_type)
 {
   return 0;					// No external locking
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 5c5ad43658e..22722129f4c 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -98,6 +98,7 @@ public:
   int reset();
   int external_lock(THD *thd, int lock_type);
   int delete_all_rows(void);
+  int reset_auto_increment(ulonglong value);
   int disable_indexes(uint mode);
   int enable_indexes(uint mode);
   int indexes_are_disabled(void);
diff --git a/storage/ibmdb2i/db2i_charsetSupport.cc b/storage/ibmdb2i/db2i_charsetSupport.cc
index 2609d42887e..83bf1b9448b 100644
--- a/storage/ibmdb2i/db2i_charsetSupport.cc
+++ b/storage/ibmdb2i/db2i_charsetSupport.cc
@@ -129,8 +129,8 @@ struct IconvMap
 {
   struct HashKey
   {
-    uint16 direction; // This is a uint16 instead of a uchar to avoid garbage data in the key from compiler padding
-    uint16 db2CCSID;
+    uint32 direction; // These are uint32s to avoid garbage data in the key from compiler padding
+    uint32 db2CCSID;
     const CHARSET_INFO* myCharset;
   } hashKey;
   iconv_t iconvDesc;
@@ -245,11 +245,16 @@ static int32 getNewTextDesc(const int32 inType,
   else if ((inType == Qlg_TypeAS400CCSID) && (outType == Qlg_TypeAix41))
   {
     // Override non-standard charsets
-    if (unlikely(strcmp("1148", in) == 0))
+    if (strcmp("1148", in) == 0)
     {
       strcpy(out, "IBM-1148");
       DBUG_RETURN(0);
     }
+    else if (unlikely(strcmp("1153", in) == 0))
+    {
+      strcpy(out, "IBM-1153");
+      DBUG_RETURN(0);
+    }
   }
 
   char argBuf[sizeof(ArgList)+15];
@@ -268,8 +273,15 @@ static int32 getNewTextDesc(const int32 inType,
       RESULT_INT32);
   if (unlikely(arguments->base.result.s_int32.r_int32 < 0))
   {
-     getErrTxt(DB2I_ERR_ILECALL,"QlgCvtTextDescToDesc",arguments->base.result.s_int32.r_int32);
-     DBUG_RETURN(DB2I_ERR_ILECALL);
+    if (arguments->base.result.s_int32.r_int32 == Qlg_InDescriptorNotFound)
+    {
+      DBUG_RETURN(DB2I_ERR_UNSUPP_CHARSET);
+    }
+    else
+    {
+      getErrTxt(DB2I_ERR_ILECALL,"QlgCvtTextDescToDesc",arguments->base.result.s_int32.r_int32);
+      DBUG_RETURN(DB2I_ERR_ILECALL);
+    }
   }
   
   // Store the conversion information into a cache entry
@@ -372,6 +384,11 @@ static int32 convertTextDesc(const int32 inType, const int32 outType, const char
         strcpy(outDesc,"IBM-1256");
         DBUG_RETURN(0);
       }
+      else if (strcmp("macce", inDescOverride) == 0)
+      {
+        strcpy(outDesc,"IBM-1282");
+        DBUG_RETURN(0);
+      }
     }
     else if (outType == Qlg_TypeAS400CCSID)
     {
@@ -428,8 +445,13 @@ int32 convertIANAToDb2Ccsid(const char* parmIANADesc, uint16* db2Ccsid)
   int aixEncodingScheme;
   int db2EncodingScheme;
   rc = convertTextDesc(Qlg_TypeIANA, Qlg_TypeAS400CCSID, parmIANADesc, aixCcsidString);
-  if (rc != 0)
+  if (unlikely(rc))
+  {
+    if (rc == DB2I_ERR_UNSUPP_CHARSET)
+      getErrTxt(DB2I_ERR_UNSUPP_CHARSET, parmIANADesc);
+    
     return rc;
+  }
   aixCcsid = atoi(aixCcsidString);
   rc = getEncodingScheme(aixCcsid, aixEncodingScheme);     
   if (rc != 0) 
@@ -571,6 +593,11 @@ int32 getAssociatedCCSID(const uint16 inCcsid, const int inEncodingScheme, uint1
     *outCcsid = 1148;
     DBUG_RETURN(0);
   }
+  else if ((inCcsid == 1250) && (inEncodingScheme == 0x1100))
+  {
+    *outCcsid = 1153;
+    DBUG_RETURN(0);
+  }
 
   if (!ptrInited)
   {  
@@ -646,32 +673,38 @@ static int32 openNewConversion(enum_conversionDirection direction,
      there equivalent iconv descriptions.
   */
   rc = convertTextDesc(Qlg_TypeIANA, Qlg_TypeAix41, mysqlCSName, mysqlAix41Desc);
-  if (rc)
+  if (unlikely(rc))
+  {
+    if (rc == DB2I_ERR_UNSUPP_CHARSET)
+      getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
+    
     DBUG_RETURN(rc);
+  }
   CHARSET_INFO *cs= &my_charset_bin;
   (uint)(cs->cset->long10_to_str)(cs,db2CcsidString,sizeof(db2CcsidString), 10, db2CCSID);  
   rc = convertTextDesc(Qlg_TypeAS400CCSID, Qlg_TypeAix41, db2CcsidString, db2Aix41Desc);
-  if (rc)
-      DBUG_RETURN(rc);
+  if (unlikely(rc))
+  {
+    if (rc == DB2I_ERR_UNSUPP_CHARSET)
+      getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
+    
+    DBUG_RETURN(rc);
+  }
   
   /* Call iconv to open the conversion. */
   if (direction == toDB2)
   {
     newConversion = iconv_open(db2Aix41Desc, mysqlAix41Desc);
-    if (newConversion == (iconv_t) -1)
-    {
-       getErrTxt(DB2I_ERR_ICONV_OPEN, mysqlAix41Desc, db2Aix41Desc,  errno);
-       DBUG_RETURN(DB2I_ERR_ICONV_OPEN);
-    }
   }
   else
   {
     newConversion = iconv_open(mysqlAix41Desc, db2Aix41Desc);
-    if (newConversion == (iconv_t) -1)
-    {
-       getErrTxt(DB2I_ERR_ICONV_OPEN, db2Aix41Desc, mysqlAix41Desc, errno);
-       DBUG_RETURN(DB2I_ERR_ICONV_OPEN);
-    }
+  }
+
+  if (unlikely(newConversion == (iconv_t) -1))
+  {
+    getErrTxt(DB2I_ERR_UNSUPP_CHARSET, mysqlCSName);
+    DBUG_RETURN(DB2I_ERR_UNSUPP_CHARSET);
   }
  
   /* Insert the new conversion into the cache. */
diff --git a/storage/ibmdb2i/db2i_collationSupport.cc b/storage/ibmdb2i/db2i_collationSupport.cc
index a41f211a689..65a17fd2452 100644
--- a/storage/ibmdb2i/db2i_collationSupport.cc
+++ b/storage/ibmdb2i/db2i_collationSupport.cc
@@ -44,7 +44,7 @@ OF SUCH DAMAGE.
   between corresponding array slots but is incomplete without case-sensitivity
   markers dynamically added to the mySqlSortSequence names.
 */
-#define MAX_COLLATION 89
+#define MAX_COLLATION 87
 static const char* mySQLCollation[MAX_COLLATION] = 
 {
     {"ascii_general"},
@@ -52,7 +52,6 @@ static const char* mySQLCollation[MAX_COLLATION] =
     {"big5_chinese"},
     {"big5"},
     {"cp1250_croatian"},
-    {"cp1250_czech"},
     {"cp1250_general"},
     {"cp1250_polish"},
     {"cp1250"},
@@ -84,7 +83,6 @@ static const char* mySQLCollation[MAX_COLLATION] =
     {"latin1_swedish"},
     {"latin1"},
     {"latin2_croatian"},
-    {"latin2_czech"},
     {"latin2_general"},
     {"latin2_hungarian"},
     {"latin2"},
@@ -146,7 +144,6 @@ static const char* mySqlSortSequence[MAX_COLLATION] =
     {"QACHT04B0"},  
     {"QBCHT04B0"},
     {"QALA20481"},  
-    {"QBLA20481"},
     {"QCLA20481"},
     {"QDLA20481"},
     {"QELA20481"},
@@ -178,7 +175,6 @@ static const char* mySqlSortSequence[MAX_COLLATION] =
     {"QELA1047C"},
     {"QFLA1047C"},
     {"QCLA20366"},  
-    {"QDLA20366"},
     {"QELA20366"},
     {"QFLA20366"},
     {"QGLA20366"},
@@ -190,8 +186,8 @@ static const char* mySqlSortSequence[MAX_COLLATION] =
     {"QDJPN04B0"},
     {"QATHA0346"},  
     {"QBTHA0346"},  
-    {"ACS"},        
-    {"ADA"},
+    {"ACS_CZ"},        
+    {"ADA_DK"},
     {"AEO"},
     {"AET"},
     {"QAUCS04B0"},  
@@ -211,8 +207,8 @@ static const char* mySqlSortSequence[MAX_COLLATION] =
     {"*HEX"},
     {"QEJPN04B0"},  
     {"QFJPN04B0"},
-    {"ACS"},        
-    {"ADA"},
+    {"ACS_CZ"},        
+    {"ADA_DK"},
     {"AEO"},
     {"AET"},
     {"QAUCS04B0"},
diff --git a/storage/ibmdb2i/db2i_conversion.cc b/storage/ibmdb2i/db2i_conversion.cc
index f746be6ab50..9a85eb01c9b 100644
--- a/storage/ibmdb2i/db2i_conversion.cc
+++ b/storage/ibmdb2i/db2i_conversion.cc
@@ -137,7 +137,9 @@ int ha_ibmdb2i::convertFieldChars(enum_conversionDirection direction,
                                   char* output, 
                                   size_t ilen, 
                                   size_t olen, 
-                                  size_t* outDataLen)
+                                  size_t* outDataLen,
+                                  bool tacitErrors,
+                                  size_t* substChars)
 {
   DBUG_PRINT("ha_ibmdb2i::convertFieldChars",("Direction: %d; length = %d", direction, ilen));
   
@@ -151,32 +153,32 @@ int ha_ibmdb2i::convertFieldChars(enum_conversionDirection direction,
   
   if (unlikely(conversion == (iconv_t)(-1)))
   {
-    return (DB2I_ERR_ICONV_OPEN);
+    return (DB2I_ERR_UNSUPP_CHARSET);
   }
 
   size_t initOLen= olen;
   size_t substitutedChars = 0;
   int rc = iconv(conversion, (char**)&input, &ilen, &output, &olen, &substitutedChars );
+  if (outDataLen) *outDataLen = initOLen - olen;
+  if (substChars) *substChars = substitutedChars;
   if (unlikely(rc < 0))
   {
     int er = errno;
     if (er == EILSEQ)
     {
-      getErrTxt(DB2I_ERR_ILL_CHAR, table->field[fieldID]->field_name);
+      if (!tacitErrors) getErrTxt(DB2I_ERR_ILL_CHAR, table->field[fieldID]->field_name);
       return (DB2I_ERR_ILL_CHAR);
     }
     else
     {
-      getErrTxt(DB2I_ERR_ICONV,er);
+      if (!tacitErrors) getErrTxt(DB2I_ERR_ICONV,er);
       return (DB2I_ERR_ICONV);
     }
   }
-  if (unlikely(substitutedChars))
+  if (unlikely(substitutedChars) && (!tacitErrors))
   {
     warning(ha_thd(), DB2I_ERR_SUB_CHARS, table->field[fieldID]->field_name);
   }
-    
-  if (outDataLen) *outDataLen = initOLen - olen;
 
   return (0);
 }
@@ -555,12 +557,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
                 return 1;
               if (fieldCharSet->mbmaxlen > 1)
               {
-                if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                 {
                   sprintf(stringBuildBuffer, "GRAPHIC(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                   db2Ccsid = 13488;
                 }
-                else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                          strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                 {
                   sprintf(stringBuildBuffer, "CHAR(%d)", max(fieldLength, 1)); // Number of bytes
@@ -584,12 +586,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
               {
                 if (fieldCharSet->mbmaxlen > 1)
                 {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                   {
                     sprintf(stringBuildBuffer, "VARGRAPHIC(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                     db2Ccsid = 13488;
                   }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                            strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                   {
                     sprintf(stringBuildBuffer, "VARCHAR(%d)", max(fieldLength, 1)); // Number of bytes
@@ -611,12 +613,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
               {
                 if (fieldCharSet->mbmaxlen > 1)
                 {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                   {
                     sprintf(stringBuildBuffer, "LONG VARGRAPHIC ");
                     db2Ccsid = 13488;
                   }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                            strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                   {
                     sprintf(stringBuildBuffer, "LONG VARCHAR ");
@@ -639,12 +641,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
 
                 if (fieldCharSet->mbmaxlen > 1)
                 {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                   {
                     sprintf(stringBuildBuffer, "DBCLOB(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                     db2Ccsid = 13488;
                   }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                            strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                   {
                     sprintf(stringBuildBuffer, "CLOB(%d)", max(fieldLength, 1)); // Number of bytes
@@ -670,6 +672,17 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
               if (rtnCode)
                 return rtnCode;
             }
+            
+            if (db2Ccsid != 1208 &&
+                db2Ccsid != 13488)
+            {
+              // Check whether there is a character conversion available.
+              iconv_t temp;
+              int32 rc = getConversion(toDB2, fieldCharSet, db2Ccsid, temp);
+              if (unlikely(rc))
+                return rc;
+            }
+            
             sprintf(stringBuildBuffer, " CCSID %d ", db2Ccsid);
             mapping.append(stringBuildBuffer);
           }
@@ -1078,7 +1091,7 @@ int32 ha_ibmdb2i::convertMySQLtoDB2(Field* field, const DB2Field& db2Field, char
                   if (bytesToStore)
                     memcpy(db2Buf, dataToStore, bytesToStore);
                   if (bytesToPad)
-                    wmemset((wchar_t*)(db2Buf + bytesToStore), 0x0020, bytesToPad/2);
+                    memset16((db2Buf + bytesToStore), 0x0020, bytesToPad/2);
                 }
                 else
                 {
@@ -1101,7 +1114,7 @@ int32 ha_ibmdb2i::convertMySQLtoDB2(Field* field, const DB2Field& db2Field, char
                     bytesToStore = db2BytesToStore;
                   }
                   if (db2BytesToStore < maxDb2BytesToStore) // If need to pad
-                    wmemset((wchar_t*)(db2Buf + db2BytesToStore), 0x0020, (maxDb2BytesToStore - db2BytesToStore)/2);
+                    memset16((db2Buf + db2BytesToStore), 0x0020, (maxDb2BytesToStore - db2BytesToStore)/2);
                 }
 
                 if (db2FieldType == QMY_VARGRAPHIC)
diff --git a/storage/ibmdb2i/db2i_errors.cc b/storage/ibmdb2i/db2i_errors.cc
index 43dd539447f..dd50e40e61b 100644
--- a/storage/ibmdb2i/db2i_errors.cc
+++ b/storage/ibmdb2i/db2i_errors.cc
@@ -52,7 +52,7 @@ static const char* engineErrors[MAX_MSGSTRING] =
   {"Error opening codeset conversion from %.64s to %.64s (errno = %d)"},      
   {"Invalid %-.10s name '%-.128s'"},                                          
   {"Unsupported move from '%-.128s' to '%-.128s' on RENAME TABLE statement"}, 
-  {"Unsupported schema '%-.128s' specified on RENAME TABLE statement"},       
+  {"The %-.64s character set is not supported."},       
   {"Auto_increment is not allowed for a partitioned table"},                  
   {"Character set conversion error due to unknown encoding scheme %d"},       
   {""}, 
diff --git a/storage/ibmdb2i/db2i_errors.h b/storage/ibmdb2i/db2i_errors.h
index 0f6fbef33f6..b6dd314ef50 100644
--- a/storage/ibmdb2i/db2i_errors.h
+++ b/storage/ibmdb2i/db2i_errors.h
@@ -54,7 +54,7 @@ enum DB2I_errors
   DB2I_ERR_ICONV_OPEN,
   DB2I_ERR_INVALID_NAME,
   DB2I_ERR_RENAME_MOVE,
-  DB2I_ERR_RENAME_QTEMP,
+  DB2I_ERR_UNSUPP_CHARSET,
   DB2I_ERR_PART_AUTOINC,
   DB2I_ERR_UNKNOWN_ENCODING,
   DB2I_ERR_RESERVED,
diff --git a/storage/ibmdb2i/db2i_misc.h b/storage/ibmdb2i/db2i_misc.h
index 9e20f01208b..f0b527aaad0 100644
--- a/storage/ibmdb2i/db2i_misc.h
+++ b/storage/ibmdb2i/db2i_misc.h
@@ -109,5 +109,21 @@ bool isOrdinaryIdentifier(const char* s)
   }
   return true;
 }
+
+/**
+  Fill memory with a 16-bit word.
   
+  @param p  Pointer to space to fill.
+  @param v  Value to fill
+  @param l  Length of space (in 16-bit words)
+*/
+void memset16(void* p, uint16 v, size_t l)
+{
+  uint16* p2=(uint16*)p;
+  while (l--)
+  {
+    *(p2++) = v;
+  }
+}
+
 #endif
diff --git a/storage/ibmdb2i/db2i_myconv.h b/storage/ibmdb2i/db2i_myconv.h
index a9e87474505..98032748148 100644
--- a/storage/ibmdb2i/db2i_myconv.h
+++ b/storage/ibmdb2i/db2i_myconv.h
@@ -220,6 +220,7 @@ INTERN  size_t	        myconv_dmap(myconv_t    cd,
       } else {
         *pOut=dmapS2S[*pIn];
         if (*pOut == 0x00) {
+          errno=EILSEQ;  /* 116 */
           *outBytesLeft-=(*inBytesLeft-inLen);
           *inBytesLeft=inLen;
           *outBuf=pOut;
diff --git a/storage/ibmdb2i/db2i_rir.cc b/storage/ibmdb2i/db2i_rir.cc
index a80a181c9ac..091c4d98383 100644
--- a/storage/ibmdb2i/db2i_rir.cc
+++ b/storage/ibmdb2i/db2i_rir.cc
@@ -51,7 +51,6 @@ static inline int getKeyCntFromMap(key_part_map keypart_map)
   return (cnt); 
 }
 
-
 /**
   @brief
   Given a starting key and an ending key, estimate the number of rows that
@@ -270,81 +269,163 @@ ha_rows ha_ibmdb2i::records_in_range(uint inx,
         DB2Field& db2Field = db2Table->db2Field(field->field_index);
         litDefPtr->DataType = db2Field.getType();
         /*
-           Convert the literal to DB2 format.  
-                                                                                   */
-        rc = convertMySQLtoDB2(field,
-                 db2Field,
-                 literalPtr,
-                 (uchar*)minPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0));
+           Convert the literal to DB2 format
+                                                                                               */
+        if ((field->type() != MYSQL_TYPE_BIT) &&           // Don't do conversion on BIT data
+            (field->charset() != &my_charset_bin) &&       // Don't do conversion on BINARY data
+            (litDefPtr->DataType == QMY_CHAR ||
+             litDefPtr->DataType == QMY_VARCHAR ||
+             litDefPtr->DataType == QMY_GRAPHIC ||
+             litDefPtr->DataType == QMY_VARGRAPHIC))
+        {
+          // Most of the code is required by the considerable wrangling needed
+          // to prepare partial keys for use by DB2
+          // 1. UTF8 (CCSID 1208) data can be copied across unmodified if it is
+          //    utf8_bin. Otherwise, we need to convert the min and max
+          //    characters into the min and max characters employed
+          //    by the DB2 sort sequence. This is complicated by the fact that
+          //    the character widths are not always equal.
+          //  2. Likewise, UCS2 (CCSID 13488) data can be copied across unmodified
+          //     if it is ucs2_bin or ucs2_general_ci. Otherwise, we need to
+          //     convert the min and max characters into the min and max characters
+          //     employed by the DB2 sort sequence.
+          //  3. All other data will use standard iconv conversions. If an
+          //     unconvertible character is encountered, we assume it is the min
+          //     char and fill the remainder of the DB2 key with 0s. This may not
+          //     always be accurate, but it is probably sufficient for range
+          //     estimations.
+          const char* keyData = minPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0);
+          char* db2Data = literalPtr;
+          uint16 outLen = db2Field.getByteLengthInRecord();
+          uint16 inLen;
+          if (litDefPtr->DataType == QMY_VARCHAR ||
+              litDefPtr->DataType == QMY_VARGRAPHIC)
+          {
+            inLen = *(uint8*)keyData + ((*(uint8*)(keyData+1)) << 8);
+            keyData += 2;
+            outLen -= sizeof(uint16);
+            db2Data += sizeof(uint16);
+          }
+          else
+          {
+            inLen = field->max_display_length();
+          }
+          
+          size_t convertedBytes = 0;
+          if (db2Field.getCCSID() == 1208)
+          {
+            DBUG_ASSERT(inLen <= outLen);
+            if (strcmp(field->charset()->name, "utf8_bin"))
+            {
+              const char* end = keyData+inLen;
+              const char* curKey = keyData;
+              char* curDB2 = db2Data;
+              uint32 min = field->charset()->min_sort_char;
+              while ((curKey < end) && (curDB2 < db2Data+outLen-3))
+              {
+                my_wc_t temp;
+                int len = field->charset()->cset->mb_wc(field->charset(),
+                                                        &temp, 
+                                                        (const uchar*)curKey, 
+                                                        (const uchar*)end);
+                if (temp != min)
+                {
+                  DBUG_ASSERT(len <= 3);
+                  switch (len)
+                  {
+                    case 3: *(curDB2+2) = *(curKey+2);
+                    case 2: *(curDB2+1) = *(curKey+1);
+                    case 1: *(curDB2) = *(curKey);
+                  }                      
+                  curDB2 += len;
+                }
+                else
+                {
+                  *(curDB2++) = 0xEF;
+                  *(curDB2++) = 0xBF;
+                  *(curDB2++) = 0xBF;
+                }
+                curKey += len;
+              }
+              convertedBytes = curDB2 - db2Data;
+            }
+            else
+            {
+              memcpy(db2Data, keyData, inLen);
+              convertedBytes = inLen;
+            }
+            rc = 0;
+          }
+          else if (db2Field.getCCSID() == 13488)
+          {
+            DBUG_ASSERT(inLen <= outLen);
+            if (strcmp(field->charset()->name, "ucs2_bin") &&
+                strcmp(field->charset()->name, "ucs2_general_ci"))
+            {
+              const char* end = keyData+inLen;
+              const uint16* curKey = (uint16*)keyData;
+              uint16* curDB2 = (uint16*)db2Data;
+              uint16 min = field->charset()->min_sort_char;
+              while (curKey < (uint16*)end)
+              {
+                if (*curKey != min)
+                  *curDB2 = *curKey;
+                else
+                  *curDB2 = 0xFFFF;
+                ++curKey;
+                ++curDB2;
+              }
+            }
+            else
+            {
+              memcpy(db2Data, keyData, inLen);
+            }
+            convertedBytes = inLen;
+            rc = 0;
+          }
+          else
+          {
+            rc = convertFieldChars(toDB2, 
+                                   field->field_index, 
+                                   keyData,
+                                   db2Data,
+                                   inLen,
+                                   outLen,
+                                   &convertedBytes,
+                                   true);
+
+            if (rc == DB2I_ERR_ILL_CHAR)
+            {
+              // If an illegal character is encountered, we fill the remainder
+              // of the key with 0x00. This was implemented as a corollary to
+              // Bug#45012, though it should probably remain even after that
+              // bug is fixed.
+              memset(db2Data+convertedBytes, 0x00, outLen-convertedBytes);
+              convertedBytes = outLen;
+              rc = 0;
+            }
+          }
+          
+          if (!rc &&
+              (litDefPtr->DataType == QMY_VARGRAPHIC ||
+               litDefPtr->DataType == QMY_VARCHAR))
+          {
+            *(uint16*)(db2Data-sizeof(uint16)) = 
+                convertedBytes / (litDefPtr->DataType == QMY_VARGRAPHIC ? 2 : 1);
+          }
+
+        }
+        else // Non-character fields
+        {
+          rc = convertMySQLtoDB2(field,
+                                 db2Field,
+                                 literalPtr,
+                                 (uchar*)minPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0));
+        }
+
         if (rc != 0) break;
         litDefPtr->Offset = (uint32_t)(literalPtr - literalsPtr);
         litDefPtr->Length = db2Field.getByteLengthInRecord();
-        tempLen = litDefPtr->Length;
-        /*
-           Do additional conversion of a character or graphic value.
-                                                                                 */
-        CHARSET_INFO* fieldCharSet = field->charset();                                    
-        if ((field->type() != MYSQL_TYPE_BIT) &&           // Don't do conversion on BIT data
-            (field->charset() != &my_charset_bin) &&       // Don't do conversion on BINARY data
-            (litDefPtr->DataType == QMY_CHAR || litDefPtr->DataType == QMY_VARCHAR ||
-             litDefPtr->DataType == QMY_GRAPHIC || litDefPtr->DataType == QMY_VARGRAPHIC))
-        {
-           if (litDefPtr->DataType == QMY_VARCHAR ||
-               litDefPtr->DataType == QMY_VARGRAPHIC) 
-             tempPtr = literalPtr + sizeof(uint16);
-           else
-             tempPtr = literalPtr;
-           /* The following code checks to determine if MySQL is passing a
-              partial key. DB2 will accept a partial field value, but only
-              in the last field position of the key composite (and only if
-              there is no ICU sort sequence on the index).                  */
-           tempMinPtr = (char*)minPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0);
-           if (field->type() == MYSQL_TYPE_VARCHAR)
-           {
-             /* MySQL always stores key lengths as 2 bytes, little-endian. */
-               tempLen = *(uint8*)tempMinPtr + ((*(uint8*)(tempMinPtr+1)) << 8);
-               tempMinPtr = (char*)((char*)tempMinPtr + 2);
-           }
-           else
-             tempLen = field->field_length;                         
-
-           /* Determine if we are dealing with a partial key and if so, find the end of the partial key. */
-           if (litDefPtr->DataType == QMY_CHAR || litDefPtr->DataType == QMY_VARCHAR )
-           { /* Char or varchar.  If UTF8, no conversion is done to DB2 graphic.) */
-               endOfMinPtr = (char*)memchr(tempMinPtr,field->charset()->min_sort_char,tempLen);
-               if (endOfMinPtr)
-                 endOfLiteralPtr = tempPtr + ((uint32_t)(endOfMinPtr - tempMinPtr));
-           }
-           else
-           {
-              if (strncmp(fieldCharSet->csname, "utf8", sizeof("utf8")) == 0)
-              {  /* The MySQL charset is UTF8 but we are converting to graphic on DB2. Divide number of UTF8 bytes
-                    by 3 to get the number of characters, then multiple by 2 for double-byte graphic.*/
-               endOfMinPtr = (char*)memchr(tempMinPtr,field->charset()->min_sort_char,tempLen);
-               if (endOfMinPtr)
-                 endOfLiteralPtr = tempPtr + (((uint32_t)((endOfMinPtr - tempMinPtr)) / 3) * 2);
-              } 
-              else
-              { /* The DB2 data type is graphic or vargraphic, and we are not converting from UTF8 to graphic. */  
-                endOfMinPtr = (char*)wmemchr((wchar_t*)tempMinPtr,field->charset()->min_sort_char,tempLen/2);
-                if (endOfMinPtr)
-                  endOfLiteralPtr = tempPtr + (endOfMinPtr - tempMinPtr);
-              }
-           }
-           /* Enforce here that a partial is only allowed on the last field position 
-              of the key composite                                                    */
-           if (endOfLiteralPtr)
-           {
-             if ((partsInUse + 1) < minKeyCnt)   
-             {
-               rc = HA_POS_ERROR;
-               break;
-             }
-             endByte = endOfLiteralPtr - tempPtr;
-             /* We're making an assumption that if MySQL gives us a partial key, 
-                 the length of the partial is the same for both the min_key and max_key.      */
-           }  
-        }
         literalPtr = literalPtr + litDefPtr->Length;  // Bump pointer for next literal
       }
       /* If there is a max_key value for this field, and if the max_key value is 
@@ -389,28 +470,168 @@ ha_rows ha_ibmdb2i::records_in_range(uint inx,
         /*
            Convert the literal to DB2 format
                                                                                                */
-          rc = convertMySQLtoDB2(field,
-                 db2Field,
-                 literalPtr,
-                 (uchar*)maxPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0));
+        if ((field->type() != MYSQL_TYPE_BIT) &&           // Don't do conversion on BIT data
+            (field->charset() != &my_charset_bin) &&       // Don't do conversion on BINARY data
+            (litDefPtr->DataType == QMY_CHAR ||
+             litDefPtr->DataType == QMY_VARCHAR ||
+             litDefPtr->DataType == QMY_GRAPHIC ||
+             litDefPtr->DataType == QMY_VARGRAPHIC))
+          {
+            // We need to handle char fields in a special way in order to account
+            // for partial keys. Refer to the note above for a description of the
+            // basic design.
+            char* keyData = maxPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0);
+            char* db2Data = literalPtr;
+            uint16 outLen = db2Field.getByteLengthInRecord();
+            uint16 inLen;
+            if (litDefPtr->DataType == QMY_VARCHAR ||
+                litDefPtr->DataType == QMY_VARGRAPHIC)
+            {
+              inLen = *(uint8*)keyData + ((*(uint8*)(keyData+1)) << 8);
+              keyData += 2;
+              outLen -= sizeof(uint16);
+              db2Data += sizeof(uint16);
+            }
+            else
+            {
+              inLen = field->max_display_length();
+            }
+            
+            size_t convertedBytes;
+            if (db2Field.getCCSID() == 1208)
+            {
+              if (strcmp(field->charset()->name, "utf8_bin"))
+              {
+                const char* end = keyData+inLen;
+                const char* curKey = keyData;
+                char* curDB2 = db2Data;
+                uint32 max = field->charset()->max_sort_char;
+                while (curKey < end && (curDB2 < db2Data+outLen-3))
+                {
+                  my_wc_t temp;
+                  int len = field->charset()->cset->mb_wc(field->charset(), &temp, (const uchar*)curKey, (const uchar*)end);
+                  if (temp != max)
+                  {
+                    DBUG_ASSERT(len <= 3);
+                    switch (len)
+                    {
+                      case 3: *(curDB2+2) = *(curKey+2);
+                      case 2: *(curDB2+1) = *(curKey+1);
+                      case 1: *(curDB2) = *(curKey);
+                    }                      
+                    curDB2 += len;
+                  }
+                  else
+                  {
+                    *(curDB2++) = 0xE4;
+                    *(curDB2++) = 0xB6;
+                    *(curDB2++) = 0xBF;
+                  }
+                  curKey += len;
+                }
+                convertedBytes = curDB2 - db2Data;
+              }
+              else
+              {
+                DBUG_ASSERT(inLen <= outLen);
+                memcpy(db2Data, keyData, inLen);
+                convertedBytes = inLen;
+              }
+              rc = 0;
+            }
+            else if (db2Field.getCCSID() == 13488)
+            {
+              if (strcmp(field->charset()->name, "ucs2_bin") &&
+                  strcmp(field->charset()->name, "ucs2_general_ci"))
+              {
+                char* end = keyData+inLen;
+                uint16* curKey = (uint16*)keyData;
+                uint16* curDB2 = (uint16*)db2Data;
+                uint16 max = field->charset()->max_sort_char;
+                while (curKey < (uint16*)end)
+                {
+                  if (*curKey != max)
+                    *curDB2 = *curKey;
+                  else
+                    *curDB2 = 0x4DBF;
+                  ++curKey;
+                  ++curDB2;
+                }
+              }
+              else
+              {
+                memcpy(db2Data, keyData, outLen);
+              }
+              rc = 0;
+            }
+            else
+            {
+              size_t substituteChars = 0;
+              rc = convertFieldChars(toDB2, 
+                                     field->field_index, 
+                                     keyData,
+                                     db2Data,
+                                     inLen,
+                                     outLen,
+                                     &convertedBytes,
+                                     true,
+                                     &substituteChars);
+
+              if (rc == DB2I_ERR_ILL_CHAR)
+              {
+                // If an illegal character is encountered, we fill the remainder
+                // of the key with 0xFF. This was implemented to work around
+                // Bug#45012, though it should probably remain even after that
+                // bug is fixed.
+                memset(db2Data+convertedBytes, 0xFF, outLen-convertedBytes);
+                rc = 0;
+              }
+              else if ((substituteChars &&
+                        (litDefPtr->DataType == QMY_VARCHAR ||
+                         litDefPtr->DataType == QMY_CHAR)) ||
+                       strcmp(field->charset()->name, "cp1251_bulgarian_ci") == 0)
+              {
+                // When iconv translates the max_sort_char with a substitute 
+                // character, we have no way to know whether this affects
+                // the sort order of the key. Therefore, to be safe, when
+                // we know that substitute characters have been used in a
+                // single-byte string, we traverse the translated key
+                // in reverse, replacing substitue characters with 0xFF, which
+                // always sorts with the greatest weight in DB2 sort sequences.
+                // cp1251_bulgarian_ci is also handled this way because the
+                // max_sort_char is a control character which does not sort
+                // equivalently in DB2.
+                DBUG_ASSERT(inLen == outLen);
+                char* tmpKey = keyData + inLen - 1;
+                char* tmpDB2 = db2Data + outLen - 1;
+                while (*tmpKey == field->charset()->max_sort_char &&
+                       *tmpDB2 != 0xFF)
+                {
+                  *tmpDB2 = 0xFF;
+                  --tmpKey;
+                  --tmpDB2;
+                }                  
+              }
+            }
+            
+            if (!rc &&
+                (litDefPtr->DataType == QMY_VARGRAPHIC ||
+                 litDefPtr->DataType == QMY_VARCHAR))
+            {
+              *(uint16*)(db2Data-sizeof(uint16)) = 
+                  outLen / (litDefPtr->DataType == QMY_VARGRAPHIC ? 2 : 1);
+            }
+          }
+          else
+          {
+            rc = convertMySQLtoDB2(field,
+                                   db2Field,
+                                   literalPtr,
+                                   (uchar*)maxPtr+((curKey.key_part[partsInUse].null_bit)? 1 : 0));
+          }
           if (rc != 0) break;
           litDefPtr->Offset = (uint32_t)(literalPtr - literalsPtr);
           litDefPtr->Length = db2Field.getByteLengthInRecord();
-          tempLen = litDefPtr->Length;
-          /*
-             Now convert a character or graphic value.
-                                                                                 */
-          if ((field->type() != MYSQL_TYPE_BIT) &&     
-             (litDefPtr->DataType == QMY_CHAR || litDefPtr->DataType == QMY_VARCHAR ||
-              litDefPtr->DataType == QMY_GRAPHIC || litDefPtr->DataType == QMY_VARGRAPHIC))
-          {
-             if (litDefPtr->DataType == QMY_VARCHAR || litDefPtr->DataType == QMY_VARGRAPHIC)
-             {
-                tempPtr = literalPtr + sizeof(uint16);
-             }
-             else
-               tempPtr = literalPtr;
-          }
           literalPtr = literalPtr + litDefPtr->Length;   // Bump pointer for next literal
         }
         boundsPtr->HiBound.Position = literalCnt;
diff --git a/storage/ibmdb2i/ha_ibmdb2i.cc b/storage/ibmdb2i/ha_ibmdb2i.cc
index 46c84de4aee..0fc2d1e83dc 100644
--- a/storage/ibmdb2i/ha_ibmdb2i.cc
+++ b/storage/ibmdb2i/ha_ibmdb2i.cc
@@ -2230,34 +2230,19 @@ int ha_ibmdb2i::create(const char *name, TABLE *table_arg,
     } 
   }
   
-  bool primaryHasStringField = false;
-
+  String fieldDefinition(128);
+  
   if (table_arg->s->primary_key != MAX_KEY && !isTemporary)
   {
-    KEY& curKey = table_arg->key_info[table_arg->s->primary_key];
-    query.append(STRING_WITH_LEN(", PRIMARY KEY( "));
-    for (int j = 0; j < curKey.key_parts; ++j)
-    {
-      if (j != 0)
-      {
-        query.append( STRING_WITH_LEN(" , ") );
-      }
-      Field* field = curKey.key_part[j].field;
-      convertMySQLNameToDB2Name(field->field_name, colName, sizeof(colName));
-      query.append(colName);
-      enum_field_types type = field->real_type();
-      if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_BLOB ||
-          type == MYSQL_TYPE_STRING)
-      {
-        rc = updateAssociatedSortSequence(field->charset(),
-                                          &fileSortSequenceType,
-                                          fileSortSequence,
-                                          fileSortSequenceLibrary);
-        if (rc) DBUG_RETURN (rc);
-        primaryHasStringField = true;
-      }
-    }
-    query.append(STRING_WITH_LEN(" ) "));
+    query.append(STRING_WITH_LEN(", PRIMARY KEY "));
+    rc = buildIndexFieldList(fieldDefinition, 
+                             table_arg->key_info[table_arg->s->primary_key],
+                             true,
+                             &fileSortSequenceType,
+                             fileSortSequence,
+                             fileSortSequenceLibrary);
+    if (rc) DBUG_RETURN(rc);
+    query.append(fieldDefinition);
   }
 
   rc = buildDB2ConstraintString(thd->lex, 
@@ -2273,11 +2258,29 @@ int ha_ibmdb2i::create(const char *name, TABLE *table_arg,
   
   if (isTemporary)
     query.append(STRING_WITH_LEN(" ON COMMIT PRESERVE ROWS "));
-    
+  
+  if (create_info->alias)
+    generateAndAppendRCDFMT(create_info->alias, query);
+  else if (((TABLE_LIST*)(thd->lex->select_lex.table_list.first))->table_name)  
+    generateAndAppendRCDFMT((char*)((TABLE_LIST*)(thd->lex->select_lex.table_list.first))->table_name, query);
+        
   DBUG_PRINT("ha_ibmdb2i::create", ("Sent to DB2: %s",query.c_ptr()));
   SqlStatementStream sqlStream(query.length());
   sqlStream.addStatement(query,fileSortSequence,fileSortSequenceLibrary);
   
+  if (table_arg->s->primary_key != MAX_KEY && 
+      !isTemporary &&
+      (THDVAR(thd, create_index_option)==1) &&
+      (fileSortSequenceType != 'B') &&
+      (fileSortSequenceType != ' '))
+  {
+    rc = generateShadowIndex(sqlStream, 
+                             table_arg->key_info[table_arg->s->primary_key], 
+                             libName, 
+                             fileName, 
+                             fieldDefinition);
+    if (rc) DBUG_RETURN(rc);
+  }
   for (uint i = 0; i < table_arg->s->keys; ++i)
   {
     if (i != table_arg->s->primary_key || isTemporary)
@@ -3007,52 +3010,27 @@ int32 ha_ibmdb2i::buildCreateIndexStatement(SqlStatementStream& sqlStream,
   }
   
   String fieldDefinition(128);
-  fieldDefinition.length(0);
-  fieldDefinition.append(STRING_WITH_LEN(" ( "));
-  for (int j = 0; j < key.key_parts; ++j)
-  {
-    char colName[MAX_DB2_COLNAME_LENGTH+1];
-    if (j != 0)
-    {
-      fieldDefinition.append(STRING_WITH_LEN(" , "));
-    }
-    Field* field = key.key_part[j].field;
-    convertMySQLNameToDB2Name(field->field_name, colName, sizeof(colName));
-    fieldDefinition.append(colName);
-    rc = updateAssociatedSortSequence(field->charset(),
-                                      &fileSortSequenceType,
-                                      fileSortSequence,
-                                      fileSortSequenceLibrary);
-    if (rc) DBUG_RETURN (rc);
-  }
-  fieldDefinition.append(STRING_WITH_LEN(" ) "));
+  rc = buildIndexFieldList(fieldDefinition,
+                           key,
+                           isPrimary,
+                           &fileSortSequenceType, 
+                           fileSortSequence,
+                           fileSortSequenceLibrary);
   
+  if (rc) DBUG_RETURN(rc);
+   
   query.append(fieldDefinition);
   
   if ((THDVAR(ha_thd(), create_index_option)==1) &&
-      (fileSortSequenceType != 'B'))
+      (fileSortSequenceType != 'B') &&
+      (fileSortSequenceType != ' '))
   {
-    String shadowQuery(256);
-    shadowQuery.length(0);
-    
-    shadowQuery.append(STRING_WITH_LEN("CREATE INDEX "));
-
-    shadowQuery.append(db2LibName);
-    shadowQuery.append('.');
-    if (db2i_table::appendQualifiedIndexFileName(key.name, db2FileName, shadowQuery, db2i_table::ASCII_SQL, typeHex))
-    {
-      getErrTxt(DB2I_ERR_INVALID_NAME,"index","*generated*");
-      DBUG_RETURN(DB2I_ERR_INVALID_NAME );
-    }
-
-    shadowQuery.append(STRING_WITH_LEN(" ON "));
-
-    shadowQuery.append(db2LibName);
-    shadowQuery.append('.');
-    shadowQuery.append(db2FileName);
-    shadowQuery.append(fieldDefinition);
-    DBUG_PRINT("ha_ibmdb2i::buildCreateIndexStatement", ("Sent to DB2: %s",shadowQuery.c_ptr_safe()));
-    sqlStream.addStatement(shadowQuery,"*HEX","QSYS");
+    rc = generateShadowIndex(sqlStream, 
+                             key, 
+                             db2LibName, 
+                             db2FileName, 
+                             fieldDefinition);
+    if (rc) DBUG_RETURN(rc);
   }
     
   DBUG_PRINT("ha_ibmdb2i::buildCreateIndexStatement", ("Sent to DB2: %s",query.c_ptr_safe()));
@@ -3061,7 +3039,97 @@ int32 ha_ibmdb2i::buildCreateIndexStatement(SqlStatementStream& sqlStream,
   DBUG_RETURN(0);
 }
 
+/**
+  Generate the SQL syntax for the list of fields to be assigned to the 
+  specified key. The corresponding sort sequence is also calculated.
+      
+  @param[out] appendHere  The string to receive the generated SQL
+  @param key  The key to evaluate
+  @param isPrimary  True if this is being generated on behalf of the primary key
+  @param[out] fileSortSequenceType  The type of the associated sort sequence
+  @param[out] fileSortSequence  The name of the associated sort sequence
+  @param[out] fileSortSequenceLibrary  The library of the associated sort sequence
+  
+  @return  0 if successful; error value otherwise
+*/
+int32 ha_ibmdb2i::buildIndexFieldList(String& appendHere,
+                                      const KEY& key,
+                                      bool isPrimary,
+                                      char* fileSortSequenceType, 
+                                      char* fileSortSequence, 
+                                      char* fileSortSequenceLibrary)
+{
+  DBUG_ENTER("ha_ibmdb2i::buildIndexFieldList");
+  appendHere.append(STRING_WITH_LEN(" ( "));
+  for (int j = 0; j < key.key_parts; ++j)
+  {
+    char colName[MAX_DB2_COLNAME_LENGTH+1];
+    if (j != 0)
+    {
+      appendHere.append(STRING_WITH_LEN(" , "));
+    }
+    
+    KEY_PART_INFO& kpi = key.key_part[j];
+    Field* field = kpi.field;
+    
+    convertMySQLNameToDB2Name(field->field_name, 
+                              colName, 
+                              sizeof(colName));
+    appendHere.append(colName);
+    
+    int32 rc;
+    rc = updateAssociatedSortSequence(field->charset(),
+                                      fileSortSequenceType,
+                                      fileSortSequence,
+                                      fileSortSequenceLibrary);
+    if (rc) DBUG_RETURN (rc);
+  }
+    
+  appendHere.append(STRING_WITH_LEN(" ) "));
+  
+  DBUG_RETURN(0);
+}
 
+
+/**
+  Generate an SQL statement that defines a *HEX sorted index to implement 
+  the ibmdb2i_create_index.
+      
+  @param[out] stream  The stream to append the generated statement to
+  @param key  The key to evaluate
+  @param[out] libName  The library containg the table
+  @param[out] fileName  The DB2-compatible name of the table 
+  @param[out] fieldDefinition  The list of the fields in the index, in SQL syntax
+  
+  @return  0 if successful; error value otherwise
+*/
+int32 ha_ibmdb2i::generateShadowIndex(SqlStatementStream& stream, 
+                                      const KEY& key,
+                                      const char* libName,
+                                      const char* fileName,
+                                      const String& fieldDefinition)
+{
+  String shadowQuery(256);
+  shadowQuery.length(0);
+  shadowQuery.append(STRING_WITH_LEN("CREATE INDEX "));
+  shadowQuery.append(libName);
+  shadowQuery.append('.');
+  if (db2i_table::appendQualifiedIndexFileName(key.name, fileName, shadowQuery, db2i_table::ASCII_SQL, typeHex))
+  {
+    getErrTxt(DB2I_ERR_INVALID_NAME,"index","*generated*");
+    return DB2I_ERR_INVALID_NAME;
+  }
+  shadowQuery.append(STRING_WITH_LEN(" ON "));
+  shadowQuery.append(libName);
+  shadowQuery.append('.');
+  shadowQuery.append(fileName);
+  shadowQuery.append(fieldDefinition);
+  DBUG_PRINT("ha_ibmdb2i::generateShadowIndex", ("Sent to DB2: %s",shadowQuery.c_ptr_safe()));
+  stream.addStatement(shadowQuery,"*HEX","QSYS");
+  return 0;
+}
+  
+  
 void ha_ibmdb2i::doInitialRead(char orientation,
                                 uint32 rowsToBuffer,
                                 ILEMemHandle key,
diff --git a/storage/ibmdb2i/ha_ibmdb2i.h b/storage/ibmdb2i/ha_ibmdb2i.h
index e90f152919c..b2a43232f2d 100644
--- a/storage/ibmdb2i/ha_ibmdb2i.h
+++ b/storage/ibmdb2i/ha_ibmdb2i.h
@@ -383,7 +383,15 @@ private:
   int32 prepareWriteBufferForLobs();
   uint32 adjustLobBuffersForRead();
   bool lobFieldsRequested();
-  int convertFieldChars(enum_conversionDirection direction, uint16 fieldID, const char* input, char* output, size_t ilen, size_t olen, size_t* outDataLen);
+  int convertFieldChars(enum_conversionDirection direction, 
+                        uint16 fieldID, 
+                        const char* input, 
+                        char* output, 
+                        size_t ilen, 
+                        size_t olen, 
+                        size_t* outDataLen,
+                        bool tacitErrors=FALSE,
+                        size_t* substChars=NULL);
 
   /**
     Fast integer log2 function
@@ -522,6 +530,13 @@ private:
                                  bool isPrimary,
                                  const char* db2LibName,    
                                  const char* db2FileName);
+  
+  int32 buildIndexFieldList(String& appendHere,
+                            const KEY& key,
+                            bool isPrimary,
+                            char* fileSortSequenceType, 
+                            char* fileSortSequence, 
+                            char* fileSortSequenceLibrary);
 
   // Specify NULL for data when using the data pointed to by field
   int32 convertMySQLtoDB2(Field* field, const DB2Field& db2Field, char* db2Buf, const uchar* data = NULL); 
@@ -746,5 +761,62 @@ private:
       free_root(&conversionBufferMemroot, MYF(0));
     }    
   }
- 
+  
+  
+/**
+  Generate a valid RCDFMT name based on the name of the table.
+  
+  The RCDFMT name is devised by munging the name of the table,
+  uppercasing all ascii alpha-numeric characters and replacing all other
+  characters with underscores until up to ten characters have been generated.
+    
+  @param tableName  The name of the table, as given on the MySQL
+                    CREATE TABLE statement
+  @param[out] query  The string to receive the generated RCDFMT name
+*/
+  static void generateAndAppendRCDFMT(const char* tableName, String& query)
+  {
+    char rcdfmt[11];
+    
+    // The RCDFMT name must begin with an alpha character.
+    // We enforce this by skipping to the first alpha character in the table
+    // name. If no alpha character exists, we use 'X' for the RCDFMT name;
+    
+    while (*tableName &&
+           (!my_isascii(*tableName) ||
+            !my_isalpha(system_charset_info, *tableName)))
+    {
+      tableName += my_mbcharlen(system_charset_info, *tableName);
+    }
+    
+    if (unlikely(!(*tableName)))
+    { 
+      rcdfmt[0]= 'X';
+      rcdfmt[1]= 0;
+    }
+    else
+    {
+      int r= 0;
+      while ((r < sizeof(rcdfmt)-1) && *tableName)
+      {
+        if (my_isascii(*tableName) &&
+            my_isalnum(system_charset_info, *tableName))
+          rcdfmt[r] = my_toupper(system_charset_info, *tableName);
+        else
+          rcdfmt[r] = '_';
+        
+        ++r;
+        tableName += my_mbcharlen(system_charset_info, *tableName);
+      }
+      rcdfmt[r]= 0;
+    }
+    query.append(STRING_WITH_LEN(" RCDFMT "));
+    query.append(rcdfmt);
+  }
+  
+  int32 generateShadowIndex(SqlStatementStream& stream, 
+                           const KEY& key,
+                           const char* libName,
+                           const char* fileName,
+                           const String& fieldDefinition);
 };
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
deleted file mode 100755
index 249a600834d..00000000000
--- a/storage/innobase/CMakeLists.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (C) 2006 MySQL AB
-# 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
-
-# Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C)
-# Removing Win64 compiler optimizations for all innodb/mem/* files.
-IF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
-  SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0mem.c
-                              ${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0pool.c
-                              PROPERTIES COMPILE_FLAGS -Od)
-ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
-
-IF (WIN32)
-  IF (NOT WITHOUT_ATOMICS)
-# Check if this Windows version supports atomic instructions
-    IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
-# Check for 64 bit atomics
-      TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
-              ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics64_test.c)
-      IF (COMPILE_RES AND NOT RUN_RES)
-        MESSAGE("Adding support for Win64 atomics")
-        ADD_DEFINITIONS(-DWIN_ATOMICS64)
-      ENDIF (COMPILE_RES AND NOT RUN_RES)
-    ELSE (CMAKE_SIZEOF_VOID_P MATCHES 8)
-# Check for 32 bit atomics
-      TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
-              ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics32_test.c)
-      IF (COMPILE_RES AND NOT RUN_RES)
-        MESSAGE("Adding support for Win32 atomics")
-        ADD_DEFINITIONS(-DWIN_ATOMICS32)
-      ENDIF (COMPILE_RES AND NOT RUN_RES)
-    ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
-  ENDIF (NOT WITHOUT_ATOMICS)
-ENDIF (WIN32)
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/storage/innobase/include
-                    ${CMAKE_SOURCE_DIR}/storage/innobase/handler
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
-SET(INNOBASE_SOURCES  btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c 
-					 buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c 
-					 data/data0data.c data/data0type.c 
-					 dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c 
-					 dyn/dyn0dyn.c 
-					 eval/eval0eval.c eval/eval0proc.c 
-					 fil/fil0fil.c 
-					 fsp/fsp0fsp.c
-					 fut/fut0fut.c fut/fut0lst.c 
-					 ha/ha0ha.c ha/hash0hash.c 
-					 ibuf/ibuf0ibuf.c 
-					 pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c 
-					 lock/lock0lock.c
-					 log/log0log.c log/log0recv.c 
-					 mach/mach0data.c 
-					 mem/mem0mem.c mem/mem0pool.c 
-					 mtr/mtr0log.c mtr/mtr0mtr.c 
-					 os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c 
-					 page/page0cur.c page/page0page.c 
-					 que/que0que.c 
-					 handler/ha_innodb.cc
-					 read/read0read.c 
-					 rem/rem0cmp.c rem/rem0rec.c
-					 row/row0ins.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c 
-					 row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c 
-					 srv/srv0que.c srv/srv0srv.c srv/srv0start.c 
-					 sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c 
-					 thr/thr0loc.c 
-					 trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c 
-					 usr/usr0sess.c 
-					 ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
-
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
-  ADD_DEPENDENCIES(innobase GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am
deleted file mode 100644
index 180d2ca0b87..00000000000
--- a/storage/innobase/Makefile.am
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-# Process this file with automake to create Makefile.in
-
-MYSQLDATAdir=		$(localstatedir)
-MYSQLSHAREdir=		$(pkgdatadir)
-MYSQLBASEdir=		$(prefix)
-MYSQLLIBdir=		$(pkglibdir)
-pkgplugindir=		$(pkglibdir)/plugin
-INCLUDES=		-I$(top_srcdir)/include -I$(top_builddir)/include \
-			-I$(top_srcdir)/regex \
-			-I$(top_srcdir)/storage/innobase/include \
-			-I$(top_srcdir)/sql \
-			-I$(srcdir)
-
-DEFS=			@DEFS@
-
-
-noinst_HEADERS=		include/btr0btr.h include/btr0btr.ic		\
-			include/btr0cur.h include/btr0cur.ic		\
-			include/btr0pcur.h include/btr0pcur.ic		\
-			include/btr0sea.h include/btr0sea.ic		\
-			include/btr0types.h include/buf0buf.h		\
-			include/buf0buf.ic include/buf0flu.h		\
-			include/buf0flu.ic include/buf0lru.h		\
-			include/buf0lru.ic include/buf0rea.h		\
-			include/buf0types.h include/data0data.h		\
-			include/data0data.ic include/data0type.h	\
-			include/data0type.ic include/data0types.h	\
-			include/db0err.h include/dict0boot.h		\
-			include/dict0boot.ic include/dict0crea.h	\
-			include/dict0crea.ic include/dict0dict.h	\
-			include/dict0dict.ic include/dict0load.h	\
-			include/dict0load.ic include/dict0mem.h		\
-			include/dict0mem.ic include/dict0types.h	\
-			include/dyn0dyn.h include/dyn0dyn.ic		\
-			include/eval0eval.h include/eval0eval.ic	\
-			include/eval0proc.h include/eval0proc.ic	\
-			include/fil0fil.h include/fsp0fsp.h		\
-			include/fsp0fsp.ic include/fut0fut.h		\
-			include/fut0fut.ic include/fut0lst.h		\
-			include/fut0lst.ic include/ha0ha.h		\
-			include/ha0ha.ic include/hash0hash.h		\
-			include/hash0hash.ic include/ibuf0ibuf.h	\
-			include/ibuf0ibuf.ic include/ibuf0types.h	\
-			include/lock0iter.h				\
-			include/lock0lock.h include/lock0lock.ic	\
-			include/lock0priv.h include/lock0priv.ic	\
-			include/lock0types.h include/log0log.h		\
-			include/log0log.ic include/log0recv.h		\
-			include/log0recv.ic include/mach0data.h		\
-			include/mach0data.ic include/mem0dbg.h		\
-			include/mem0dbg.ic mem/mem0dbg.c		\
-			include/mem0mem.h include/mem0mem.ic		\
-			include/mem0pool.h include/mem0pool.ic		\
-			include/mtr0log.h include/mtr0log.ic		\
-			include/mtr0mtr.h include/mtr0mtr.ic		\
-			include/mtr0types.h include/os0file.h		\
-			include/os0proc.h include/os0proc.ic		\
-			include/os0sync.h include/os0sync.ic		\
-			include/os0thread.h include/os0thread.ic	\
-			include/page0cur.h include/page0cur.ic		\
-			include/page0page.h include/page0page.ic	\
-			include/page0types.h include/pars0grm.h		\
-			include/pars0opt.h include/pars0opt.ic		\
-			include/pars0pars.h include/pars0pars.ic	\
-			include/pars0sym.h include/pars0sym.ic		\
-			include/pars0types.h include/que0que.h		\
-			include/que0que.ic include/que0types.h		\
-			include/read0read.h include/read0read.ic	\
-			include/read0types.h include/rem0cmp.h		\
-			include/rem0cmp.ic include/rem0rec.h		\
-			include/rem0rec.ic include/rem0types.h		\
-			include/row0ins.h include/row0ins.ic		\
-			include/row0mysql.h include/row0mysql.ic	\
-			include/row0purge.h include/row0purge.ic	\
-			include/row0row.h include/row0row.ic		\
-			include/row0sel.h include/row0sel.ic		\
-			include/row0types.h include/row0uins.h		\
-			include/row0uins.ic include/row0umod.h		\
-			include/row0umod.ic include/row0undo.h		\
-			include/row0undo.ic include/row0upd.h		\
-			include/row0upd.ic include/row0vers.h		\
-			include/row0vers.ic include/srv0que.h		\
-			include/srv0srv.h include/srv0srv.ic		\
-			include/srv0start.h include/sync0arr.h		\
-			include/sync0arr.ic include/sync0rw.h		\
-			include/sync0rw.ic include/sync0sync.h		\
-			include/sync0sync.ic include/sync0types.h	\
-			include/thr0loc.h include/thr0loc.ic		\
-			include/trx0purge.h include/trx0purge.ic	\
-			include/trx0rec.h include/trx0rec.ic		\
-			include/trx0roll.h include/trx0roll.ic		\
-			include/trx0rseg.h include/trx0rseg.ic		\
-			include/trx0sys.h include/trx0sys.ic		\
-			include/trx0trx.h include/trx0trx.ic		\
-			include/trx0types.h include/trx0undo.h		\
-			include/trx0undo.ic include/trx0xa.h		\
-			include/univ.i include/usr0sess.h		\
-			include/usr0sess.ic include/usr0types.h		\
-			include/ut0byte.h include/ut0byte.ic		\
-			include/ut0dbg.h include/ut0lst.h		\
-			include/ut0mem.h include/ut0mem.ic		\
-			include/ut0rnd.h include/ut0rnd.ic		\
-			include/ut0sort.h include/ut0ut.h		\
-			include/ut0ut.ic include/ut0vec.h		\
-			include/ut0vec.ic include/ut0list.h		\
-			include/ut0list.ic include/ut0wqueue.h		\
-			include/ha_prototypes.h handler/ha_innodb.h
-
-EXTRA_LIBRARIES=	libinnobase.a
-noinst_LIBRARIES=	@plugin_innobase_static_target@
-libinnobase_a_SOURCES=	btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c	\
-			btr/btr0sea.c buf/buf0buf.c buf/buf0flu.c	\
-			buf/buf0lru.c buf/buf0rea.c data/data0data.c	\
-			data/data0type.c dict/dict0boot.c		\
-			dict/dict0crea.c dict/dict0dict.c		\
-			dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c	\
-			eval/eval0eval.c eval/eval0proc.c		\
-			fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c	\
-			fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c		\
-			ibuf/ibuf0ibuf.c lock/lock0iter.c		\
-			lock/lock0lock.c				\
-			log/log0log.c log/log0recv.c mach/mach0data.c	\
-			mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c	\
-			mtr/mtr0mtr.c os/os0file.c os/os0proc.c		\
-			os/os0sync.c os/os0thread.c page/page0cur.c	\
-			page/page0page.c pars/lexyy.c pars/pars0grm.c	\
-			pars/pars0opt.c pars/pars0pars.c		\
-			pars/pars0sym.c que/que0que.c read/read0read.c	\
-			rem/rem0cmp.c rem/rem0rec.c row/row0ins.c	\
-			row/row0mysql.c row/row0purge.c row/row0row.c	\
-			row/row0sel.c row/row0uins.c row/row0umod.c	\
-			row/row0undo.c row/row0upd.c row/row0vers.c	\
-			srv/srv0que.c srv/srv0srv.c srv/srv0start.c	\
-			sync/sync0arr.c sync/sync0rw.c			\
-			sync/sync0sync.c thr/thr0loc.c trx/trx0purge.c	\
-			trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c	\
-			trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c	\
-			usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c		\
-			ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c		\
-			ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c		\
-			handler/ha_innodb.cc
-
-libinnobase_a_CXXFLAGS=	$(AM_CFLAGS)
-libinnobase_a_CFLAGS=	$(AM_CFLAGS)
-
-EXTRA_LTLIBRARIES=	ha_innodb.la
-pkgplugin_LTLIBRARIES=	@plugin_innobase_shared_target@
-
-ha_innodb_la_LDFLAGS=	-module -rpath $(pkgplugindir)
-ha_innodb_la_CXXFLAGS=	$(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_la_CFLAGS=	$(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_la_SOURCES=	$(libinnobase_a_SOURCES)
-
-EXTRA_DIST=		CMakeLists.txt plug.in \
-			pars/make_bison.sh pars/make_flex.sh \
-			pars/pars0grm.y pars/pars0lex.l \
-			win_atomics32_test.c win_atomics64_test.c
-
-# Don't update the files from bitkeeper
-%::SCCS/s.%
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
deleted file mode 100644
index 6e8b43aeb8d..00000000000
--- a/storage/innobase/btr/btr0btr.c
+++ /dev/null
@@ -1,3077 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-#ifdef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "page0page.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "btr0pcur.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#include "trx0trx.h"
-
-/*
-Latching strategy of the InnoDB B-tree
---------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
-
-Node pointers
--------------
-Leaf pages of a B-tree contain the index records stored in the
-tree. On levels n > 0 we store 'node pointers' to pages on level
-n - 1. For each page there is exactly one node pointer stored:
-thus the our tree is an ordinary B-tree, not a B-link tree.
-
-A node pointer contains a prefix P of an index record. The prefix
-is long enough so that it determines an index record uniquely.
-The file page number of the child page is added as the last
-field. To the child page we can store node pointers or index records
-which are >= P in the alphabetical order, but < P1 if there is
-a next node pointer on the level, and P1 is its prefix.
-
-If a node pointer with a prefix P points to a non-leaf child,
-then the leftmost record in the child must have the same
-prefix P. If it points to a leaf node, the child is not required
-to contain any record with a prefix equal to P. The leaf case
-is decided this way to allow arbitrary deletions in a leaf node
-without touching upper levels of the tree.
-
-We have predefined a special minimum record which we
-define as the smallest record in any alphabetical order.
-A minimum record is denoted by setting a bit in the record
-header. A minimum record acts as the prefix of a node pointer
-which points to a leftmost node on any level of the tree.
-
-File page allocation
---------------------
-In the root node of a B-tree there are two file segment headers.
-The leaf pages of a tree are allocated from one file segment, to
-make them consecutive on disk if possible. From the other file segment
-we allocate pages for the non-leaf levels of the tree.
-*/
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
-				/* out: pointer to node pointer record */
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page: must contain at least one
-				user record */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Empties an index page. */
-static
-void
-btr_page_empty(
-/*===========*/
-	page_t*	page,	/* in: page to be emptied */
-	mtr_t*	mtr);	/* in: mtr */
-/*****************************************************************
-Returns TRUE if the insert fits on the appropriate half-page
-with the chosen split_rec. */
-static
-ibool
-btr_page_insert_fits(
-/*=================*/
-					/* out: TRUE if fits */
-	btr_cur_t*	cursor,		/* in: cursor at which insert
-					should be made */
-	rec_t*		split_rec,	/* in: suggestion for first record
-					on upper half-page, or NULL if
-					tuple should be first */
-	const ulint*	offsets,	/* in: rec_get_offsets(
-					split_rec, cursor->index) */
-	dtuple_t*	tuple,		/* in: tuple to insert */
-	mem_heap_t*	heap);		/* in: temporary memory heap */
-
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
-
-page_t*
-btr_root_get(
-/*=========*/
-				/* out: root page, x-latched */
-	dict_index_t*	index,	/* in: index tree */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint	space;
-	ulint	root_page_no;
-	page_t*	root;
-
-	space = dict_index_get_space(index);
-	root_page_no = dict_index_get_page(index);
-
-	root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
-	ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table));
-
-	return(root);
-}
-
-/*****************************************************************
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-			/* out: previous user record, NULL if there is none */
-	rec_t*	rec,	/* in: record on leaf level */
-	mtr_t*	mtr)	/* in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-{
-	page_t*	page;
-	page_t*	prev_page;
-	ulint	prev_page_no;
-	ulint	space;
-
-	if (!page_rec_is_infimum(rec)) {
-
-		rec_t*	prev_rec = page_rec_get_prev(rec);
-
-		if (!page_rec_is_infimum(prev_rec)) {
-
-			return(prev_rec);
-		}
-	}
-
-	page = buf_frame_align(rec);
-	prev_page_no = btr_page_get_prev(page, mtr);
-	space = buf_frame_get_space_id(page);
-
-	if (prev_page_no != FIL_NULL) {
-
-		prev_page = buf_page_get_with_no_latch(space, prev_page_no,
-						       mtr);
-		/* The caller must already have a latch to the brother */
-		ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
-					 MTR_MEMO_PAGE_S_FIX))
-		      || (mtr_memo_contains(mtr, buf_block_align(prev_page),
-					    MTR_MEMO_PAGE_X_FIX)));
-		ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_next(prev_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
-	}
-
-	return(NULL);
-}
-
-/*****************************************************************
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-			/* out: next user record, NULL if there is none */
-	rec_t*	rec,	/* in: record on leaf level */
-	mtr_t*	mtr)	/* in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-{
-	page_t*	page;
-	page_t*	next_page;
-	ulint	next_page_no;
-	ulint	space;
-
-	if (!page_rec_is_supremum(rec)) {
-
-		rec_t*	next_rec = page_rec_get_next(rec);
-
-		if (!page_rec_is_supremum(next_rec)) {
-
-			return(next_rec);
-		}
-	}
-
-	page = buf_frame_align(rec);
-	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_frame_get_space_id(page);
-
-	if (next_page_no != FIL_NULL) {
-
-		next_page = buf_page_get_with_no_latch(space, next_page_no,
-						       mtr);
-		/* The caller must already have a latch to the brother */
-		ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
-					 MTR_MEMO_PAGE_S_FIX))
-		      || (mtr_memo_contains(mtr, buf_block_align(next_page),
-					    MTR_MEMO_PAGE_X_FIX)));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_prev(next_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		ut_a(page_is_comp(next_page) == page_is_comp(page));
-		return(page_rec_get_next(page_get_infimum_rec(next_page)));
-	}
-
-	return(NULL);
-}
-
-/******************************************************************
-Creates a new index page (not the root, and also not
-used in page reorganization). */
-static
-void
-btr_page_create(
-/*============*/
-	page_t*		page,	/* in: page to be created */
-	dict_index_t*	index,	/* in: index */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	page_create(page, mtr, dict_table_is_comp(index->table));
-	buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-	btr_page_set_index_id(page, index->id, mtr);
-}
-
-/******************************************************************
-Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages! */
-static
-page_t*
-btr_page_alloc_for_ibuf(
-/*====================*/
-				/* out: new allocated page, x-latched */
-	dict_index_t*	index,	/* in: index tree */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	fil_addr_t	node_addr;
-	page_t*		root;
-	page_t*		new_page;
-
-	root = btr_root_get(index, mtr);
-
-	node_addr = flst_get_first(root + PAGE_HEADER
-				   + PAGE_BTR_IBUF_FREE_LIST, mtr);
-	ut_a(node_addr.page != FIL_NULL);
-
-	new_page = buf_page_get(dict_index_get_space(index), node_addr.page,
-				RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
-	flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
-		    new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
-		    mtr);
-	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
-			    mtr));
-
-	return(new_page);
-}
-
-/******************************************************************
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
-					/* out: new allocated page, x-latched;
-					NULL if out of space */
-	dict_index_t*	index,		/* in: index */
-	ulint		hint_page_no,	/* in: hint of a good page */
-	byte		file_direction,	/* in: direction where a possible
-					page split is made */
-	ulint		level,		/* in: level where the page is placed
-					in the tree */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	fseg_header_t*	seg_header;
-	page_t*		root;
-	page_t*		new_page;
-	ulint		new_page_no;
-
-	if (index->type & DICT_IBUF) {
-
-		return(btr_page_alloc_for_ibuf(index, mtr));
-	}
-
-	root = btr_root_get(index, mtr);
-
-	if (level == 0) {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-	} else {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-	}
-
-	/* Parameter TRUE below states that the caller has made the
-	reservation for free extents, and thus we know that a page can
-	be allocated: */
-
-	new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
-						   file_direction, TRUE, mtr);
-	if (new_page_no == FIL_NULL) {
-
-		return(NULL);
-	}
-
-	new_page = buf_page_get(dict_index_get_space(index), new_page_no,
-				RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(new_page);
-}
-
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
-ulint
-btr_get_size(
-/*=========*/
-				/* out: number of pages */
-	dict_index_t*	index,	/* in: index */
-	ulint		flag)	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-{
-	fseg_header_t*	seg_header;
-	page_t*		root;
-	ulint		n;
-	ulint		dummy;
-	mtr_t		mtr;
-
-	mtr_start(&mtr);
-
-	mtr_s_lock(dict_index_get_lock(index), &mtr);
-
-	root = btr_root_get(index, &mtr);
-
-	if (flag == BTR_N_LEAF_PAGES) {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
-		fseg_n_reserved_pages(seg_header, &n, &mtr);
-
-	} else if (flag == BTR_TOTAL_SIZE) {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
-		n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
-
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
-		n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
-	} else {
-		ut_error;
-	}
-
-	mtr_commit(&mtr);
-
-	return(n);
-}
-
-/******************************************************************
-Frees a page used in an ibuf tree. Puts the page to the free list of the
-ibuf tree. */
-static
-void
-btr_page_free_for_ibuf(
-/*===================*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page to be freed, x-latched */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		root;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	root = btr_root_get(index, mtr);
-
-	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
-		       page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
-
-	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
-			    mtr));
-}
-
-/******************************************************************
-Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-
-void
-btr_page_free_low(
-/*==============*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page to be freed, x-latched */
-	ulint		level,	/* in: page level */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	fseg_header_t*	seg_header;
-	page_t*		root;
-	ulint		space;
-	ulint		page_no;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	/* The page gets invalid for optimistic searches: increment the frame
-	modify clock */
-
-	buf_frame_modify_clock_inc(page);
-
-	if (index->type & DICT_IBUF) {
-
-		btr_page_free_for_ibuf(index, page, mtr);
-
-		return;
-	}
-
-	root = btr_root_get(index, mtr);
-
-	if (level == 0) {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-	} else {
-		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-	}
-
-	space = buf_frame_get_space_id(page);
-	page_no = buf_frame_get_page_no(page);
-
-	fseg_free_page(seg_header, space, page_no, mtr);
-}
-
-/******************************************************************
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-
-void
-btr_page_free(
-/*==========*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page to be freed, x-latched */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint		level;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	level = btr_page_get_level(page, mtr);
-
-	btr_page_free_low(index, page, level, mtr);
-}
-
-/******************************************************************
-Sets the child node file address in a node pointer. */
-UNIV_INLINE
-void
-btr_node_ptr_set_child_page_no(
-/*===========================*/
-	rec_t*		rec,	/* in: node pointer record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		page_no,/* in: child node address */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	field;
-	ulint	len;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
-	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
-	/* The child address is in the last field */
-	field = rec_get_nth_field(rec, offsets,
-				  rec_offs_n_fields(offsets) - 1, &len);
-
-	ut_ad(len == 4);
-
-	mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
-}
-
-/****************************************************************
-Returns the child page of a node pointer and x-latches it. */
-static
-page_t*
-btr_node_ptr_get_child(
-/*===================*/
-				/* out: child page, x-latched */
-	rec_t*		node_ptr,/* in: node pointer */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint	page_no;
-	ulint	space;
-	page_t*	page;
-
-	ut_ad(rec_offs_validate(node_ptr, NULL, offsets));
-	space = buf_frame_get_space_id(node_ptr);
-	page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-
-	page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-
-	return(page);
-}
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_for_rec(
-/*========================*/
-				/* out: pointer to node pointer record,
-				its page x-latched */
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page: must contain at least one
-				user record */
-	rec_t*		user_rec,/* in: user_record on page */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	mem_heap_t*	heap;
-	dtuple_t*	tuple;
-	btr_cur_t	cursor;
-	rec_t*		node_ptr;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_a(page_rec_is_user_rec(user_rec));
-
-	ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
-
-	heap = mem_heap_create(100);
-
-	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap,
-					  btr_page_get_level(page, mtr));
-
-	btr_cur_search_to_nth_level(index,
-				    btr_page_get_level(page, mtr) + 1,
-				    tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
-
-	node_ptr = btr_cur_get_rec(&cursor);
-	offsets = rec_get_offsets(node_ptr, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-
-	if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
-			  != buf_frame_get_page_no(page))) {
-		rec_t*	print_rec;
-		fputs("InnoDB: Dump of the child page:\n", stderr);
-		buf_page_print(buf_frame_align(page));
-		fputs("InnoDB: Dump of the parent page:\n", stderr);
-		buf_page_print(buf_frame_align(node_ptr));
-
-		fputs("InnoDB: Corruption of an index tree: table ", stderr);
-		ut_print_name(stderr, NULL, TRUE, index->table_name);
-		fputs(", index ", stderr);
-		ut_print_name(stderr, NULL, FALSE, index->name);
-		fprintf(stderr, ",\n"
-			"InnoDB: father ptr page no %lu, child page no %lu\n",
-			(ulong)
-			btr_node_ptr_get_child_page_no(node_ptr, offsets),
-			(ulong) buf_frame_get_page_no(page));
-		print_rec = page_rec_get_next(page_get_infimum_rec(page));
-		offsets = rec_get_offsets(print_rec, index,
-					  offsets, ULINT_UNDEFINED, &heap);
-		page_rec_print(print_rec, offsets);
-		offsets = rec_get_offsets(node_ptr, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		page_rec_print(node_ptr, offsets);
-
-		fputs("InnoDB: You should dump + drop + reimport the table"
-		      " to fix the\n"
-		      "InnoDB: corruption. If the crash happens at "
-		      "the database startup, see\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "forcing-recovery.html about\n"
-		      "InnoDB: forcing recovery. "
-		      "Then dump + drop + reimport.\n", stderr);
-	}
-
-	ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets)
-	     == buf_frame_get_page_no(page));
-	mem_heap_free(heap);
-
-	return(node_ptr);
-}
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
-				/* out: pointer to node pointer record */
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page: must contain at least one
-				user record */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	return(btr_page_get_father_for_rec(
-		       index, page,
-		       page_rec_get_next(page_get_infimum_rec(page)), mtr));
-}
-
-/****************************************************************
-Creates the root node for a new index tree. */
-
-ulint
-btr_create(
-/*=======*/
-			/* out: page number of the created root, FIL_NULL if
-			did not succeed */
-	ulint	type,	/* in: type of the index */
-	ulint	space,	/* in: space where created */
-	dulint	index_id,/* in: index id */
-	ulint	comp,	/* in: nonzero=compact page format */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
-{
-	ulint		page_no;
-	buf_frame_t*	ibuf_hdr_frame;
-	buf_frame_t*	frame;
-	page_t*		page;
-
-	/* Create the two new segments (one, in the case of an ibuf tree) for
-	the index tree; the segment headers are put on the allocated root page
-	(for an ibuf tree, not in the root, but on a separate ibuf header
-	page) */
-
-	if (type & DICT_IBUF) {
-		/* Allocate first the ibuf header page */
-		ibuf_hdr_frame = fseg_create(
-			space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-		ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
-		      == IBUF_HEADER_PAGE_NO);
-		/* Allocate then the next page to the segment: it will be the
-		tree root page */
-
-		page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER
-					       + IBUF_TREE_SEG_HEADER,
-					       IBUF_TREE_ROOT_PAGE_NO,
-					       FSP_UP, mtr);
-		ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
-		frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-	} else {
-		frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
-				    mtr);
-	}
-
-	if (frame == NULL) {
-
-		return(FIL_NULL);
-	}
-
-	page_no = buf_frame_get_page_no(frame);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
-	if (type & DICT_IBUF) {
-		/* It is an insert buffer tree: initialize the free list */
-
-		ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
-		flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
-	} else {
-		/* It is a non-ibuf tree: create a file segment for leaf
-		pages */
-		fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
-			    mtr);
-		/* The fseg create acquires a second latch on the page,
-		therefore we must declare it: */
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-	}
-
-	/* Create a new index page on the the allocated segment page */
-	page = page_create(frame, mtr, comp);
-	buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-	/* Set the index id of the page */
-	btr_page_set_index_id(page, index_id, mtr);
-
-	/* Set the level of the new index page */
-	btr_page_set_level(page, 0, mtr);
-
-	/* Set the next node and previous node fields */
-	btr_page_set_next(page, FIL_NULL, mtr);
-	btr_page_set_prev(page, FIL_NULL, mtr);
-
-	/* We reset the free bits for the page to allow creation of several
-	trees in the same mtr, otherwise the latch on a bitmap page would
-	prevent it because of the latching order */
-
-	ibuf_reset_free_bits_with_type(type, page);
-
-	/* In the following assertion we test that two records of maximum
-	allowed size fit on the root page: this fact is needed to ensure
-	correctness of split algorithms */
-
-	ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
-
-	return(page_no);
-}
-
-/****************************************************************
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-
-void
-btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/* in: space where created */
-	ulint	root_page_no)	/* in: root page number */
-{
-	ibool	finished;
-	page_t*	root;
-	mtr_t	mtr;
-
-leaf_loop:
-	mtr_start(&mtr);
-
-	root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
-
-	/* NOTE: page hash indexes are dropped when a page is freed inside
-	fsp0fsp. */
-
-	finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
-				  &mtr);
-	mtr_commit(&mtr);
-
-	if (!finished) {
-
-		goto leaf_loop;
-	}
-top_loop:
-	mtr_start(&mtr);
-
-	root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
-
-	finished = fseg_free_step_not_header(
-		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
-	mtr_commit(&mtr);
-
-	if (!finished) {
-
-		goto top_loop;
-	}
-}
-
-/****************************************************************
-Frees the B-tree root page. Other tree MUST already have been freed. */
-
-void
-btr_free_root(
-/*==========*/
-	ulint	space,		/* in: space where created */
-	ulint	root_page_no,	/* in: root page number */
-	mtr_t*	mtr)		/* in: a mini-transaction which has already
-				been started */
-{
-	ibool	finished;
-	page_t*	root;
-
-	root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
-
-	btr_search_drop_page_hash_index(root);
-top_loop:
-	finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
-	if (!finished) {
-
-		goto top_loop;
-	}
-}
-
-/*****************************************************************
-Reorganizes an index page. */
-static
-void
-btr_page_reorganize_low(
-/*====================*/
-	ibool		recovery,/* in: TRUE if called in recovery:
-				locks should not be updated, i.e.,
-				there cannot exist locks on the
-				page, and a hash index should not be
-				dropped: it cannot exist */
-	page_t*		page,	/* in: page to be reorganized */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*	new_page;
-	ulint	log_mode;
-	ulint	data_size1;
-	ulint	data_size2;
-	ulint	max_ins_size1;
-	ulint	max_ins_size2;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-	data_size1 = page_get_data_size(page);
-	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-
-	/* Write the log record */
-	mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
-				  ? MLOG_COMP_PAGE_REORGANIZE
-				  : MLOG_PAGE_REORGANIZE, 0);
-
-	/* Turn logging off */
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
-	new_page = buf_frame_alloc();
-
-	/* Copy the old page to temporary space */
-	buf_frame_copy(new_page, page);
-
-	if (!recovery) {
-		btr_search_drop_page_hash_index(page);
-	}
-
-	/* Recreate the page: note that global data on page (possible
-	segment headers, next page-field, etc.) is preserved intact */
-
-	page_create(page, mtr, page_is_comp(page));
-	buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-	/* Copy the records from the temporary space to the recreated page;
-	do not copy the lock bits yet */
-
-	page_copy_rec_list_end_no_locks(page, new_page,
-					page_get_infimum_rec(new_page),
-					index, mtr);
-	/* Copy max trx id to recreated page */
-	page_set_max_trx_id(page, page_get_max_trx_id(new_page));
-
-	if (!recovery) {
-		/* Update the record lock bitmaps */
-		lock_move_reorganize_page(page, new_page);
-	}
-
-	data_size2 = page_get_data_size(page);
-	max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
-
-	if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
-		buf_page_print(page);
-		buf_page_print(new_page);
-		fprintf(stderr,
-			"InnoDB: Error: page old data size %lu"
-			" new data size %lu\n"
-			"InnoDB: Error: page old max ins size %lu"
-			" new max ins size %lu\n"
-			"InnoDB: Submit a detailed bug report"
-			" to http://bugs.mysql.com\n",
-			(unsigned long) data_size1, (unsigned long) data_size2,
-			(unsigned long) max_ins_size1,
-			(unsigned long) max_ins_size2);
-	}
-
-	buf_frame_free(new_page);
-
-	/* Restore logging mode */
-	mtr_set_log_mode(mtr, log_mode);
-}
-
-/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize(
-/*================*/
-	page_t*		page,	/* in: page to be reorganized */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	btr_page_reorganize_low(FALSE, page, index, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
-byte*
-btr_parse_page_reorganize(
-/*======================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr __attribute__((unused)),
-				/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr)	/* in: mtr or NULL */
-{
-	ut_ad(ptr && end_ptr);
-
-	/* The record is empty, except for the record initial part */
-
-	if (page) {
-		btr_page_reorganize_low(TRUE, page, index, mtr);
-	}
-
-	return(ptr);
-}
-
-/*****************************************************************
-Empties an index page. */
-static
-void
-btr_page_empty(
-/*===========*/
-	page_t*	page,	/* in: page to be emptied */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	btr_search_drop_page_hash_index(page);
-
-	/* Recreate the page: note that global data on page (possible
-	segment headers, next page-field, etc.) is preserved intact */
-
-	page_create(page, mtr, page_is_comp(page));
-	buf_block_align(page)->check_index_page_at_flush = TRUE;
-}
-
-/*****************************************************************
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
-				/* out: inserted record */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
-				on the root page; when the function returns,
-				the cursor is positioned on the predecessor
-				of the inserted record */
-	dtuple_t*	tuple,	/* in: tuple to insert */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	page_t*		root;
-	page_t*		new_page;
-	ulint		new_page_no;
-	rec_t*		rec;
-	mem_heap_t*	heap;
-	dtuple_t*	node_ptr;
-	ulint		level;
-	rec_t*		node_ptr_rec;
-	page_cur_t*	page_cursor;
-
-	root = btr_cur_get_page(cursor);
-	index = btr_cur_get_index(cursor);
-
-	ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
-				MTR_MEMO_PAGE_X_FIX));
-	btr_search_drop_page_hash_index(root);
-
-	/* Allocate a new page to the tree. Root splitting is done by first
-	moving the root records to the new page, emptying the root, putting
-	a node pointer to the new page, and then splitting the new page. */
-
-	new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
-				  btr_page_get_level(root, mtr), mtr);
-
-	btr_page_create(new_page, index, mtr);
-
-	level = btr_page_get_level(root, mtr);
-
-	/* Set the levels of the new index page and root page */
-	btr_page_set_level(new_page, level, mtr);
-	btr_page_set_level(root, level + 1, mtr);
-
-	/* Set the next node and previous node fields of new page */
-	btr_page_set_next(new_page, FIL_NULL, mtr);
-	btr_page_set_prev(new_page, FIL_NULL, mtr);
-
-	/* Move the records from root to the new page */
-
-	page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
-			       index, mtr);
-	/* If this is a pessimistic insert which is actually done to
-	perform a pessimistic update then we have stored the lock
-	information of the record to be inserted on the infimum of the
-	root page: we cannot discard the lock structs on the root page */
-
-	lock_update_root_raise(new_page, root);
-
-	/* Create a memory heap where the node pointer is stored */
-	heap = mem_heap_create(100);
-
-	rec = page_rec_get_next(page_get_infimum_rec(new_page));
-	new_page_no = buf_frame_get_page_no(new_page);
-
-	/* Build the node pointer (= node key and page address) for the
-	child */
-
-	node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
-					     level);
-	/* Reorganize the root to get free space */
-	btr_page_reorganize(root, index, mtr);
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	/* Insert node pointer to the root */
-
-	page_cur_set_before_first(root, page_cursor);
-
-	node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
-					     index, mtr);
-
-	ut_ad(node_ptr_rec);
-
-	/* The node pointer must be marked as the predefined minimum record,
-	as there is no lower alphabetical limit to records in the leftmost
-	node of a level: */
-
-	btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr);
-
-	/* Free the memory heap */
-	mem_heap_free(heap);
-
-	/* We play safe and reset the free bits for the new page */
-
-#if 0
-	fprintf(stderr, "Root raise new page no %lu\n",
-		buf_frame_get_page_no(new_page));
-#endif
-
-	ibuf_reset_free_bits(index, new_page);
-	/* Reposition the cursor to the child node */
-	page_cur_search(new_page, index, tuple,
-			PAGE_CUR_LE, page_cursor);
-
-	/* Split the child and insert tuple */
-	return(btr_page_split_and_insert(cursor, tuple, mtr));
-}
-
-/*****************************************************************
-Decides if the page should be split at the convergence point of inserts
-converging to the left. */
-
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
-				/* out: TRUE if split recommended */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert */
-	rec_t**		split_rec) /* out: if split recommended,
-				the first record on upper half page,
-				or NULL if tuple to be inserted should
-				be first */
-{
-	page_t*	page;
-	rec_t*	insert_point;
-	rec_t*	infimum;
-
-	page = btr_cur_get_page(cursor);
-	insert_point = btr_cur_get_rec(cursor);
-
-	if (page_header_get_ptr(page, PAGE_LAST_INSERT)
-	    == page_rec_get_next(insert_point)) {
-
-		infimum = page_get_infimum_rec(page);
-
-		/* If the convergence is in the middle of a page, include also
-		the record immediately before the new insert to the upper
-		page. Otherwise, we could repeatedly move from page to page
-		lots of records smaller than the convergence point. */
-
-		if (infimum != insert_point
-		    && page_rec_get_next(infimum) != insert_point) {
-
-			*split_rec = insert_point;
-		} else {
-			*split_rec = page_rec_get_next(insert_point);
-		}
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/*****************************************************************
-Decides if the page should be split at the convergence point of inserts
-converging to the right. */
-
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
-				/* out: TRUE if split recommended */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert */
-	rec_t**		split_rec) /* out: if split recommended,
-				the first record on upper half page,
-				or NULL if tuple to be inserted should
-				be first */
-{
-	page_t*	page;
-	rec_t*	insert_point;
-
-	page = btr_cur_get_page(cursor);
-	insert_point = btr_cur_get_rec(cursor);
-
-	/* We use eager heuristics: if the new insert would be right after
-	the previous insert on the same page, we assume that there is a
-	pattern of sequential inserts here. */
-
-	if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
-			== insert_point)) {
-
-		rec_t*	next_rec;
-
-		next_rec = page_rec_get_next(insert_point);
-
-		if (page_rec_is_supremum(next_rec)) {
-split_at_new:
-			/* Split at the new record to insert */
-			*split_rec = NULL;
-		} else {
-			rec_t*	next_next_rec = page_rec_get_next(next_rec);
-			if (page_rec_is_supremum(next_next_rec)) {
-
-				goto split_at_new;
-			}
-
-			/* If there are >= 2 user records up from the insert
-			point, split all but 1 off. We want to keep one because
-			then sequential inserts can use the adaptive hash
-			index, as they can do the necessary checks of the right
-			search position just by looking at the records on this
-			page. */
-
-			*split_rec = next_next_rec;
-		}
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/*****************************************************************
-Calculates a split record such that the tuple will certainly fit on
-its half-page when the split is performed. We assume in this function
-only that the cursor page has at least one user record. */
-static
-rec_t*
-btr_page_get_sure_split_rec(
-/*========================*/
-					/* out: split record, or NULL if
-					tuple will be the first record on
-					upper half-page */
-	btr_cur_t*	cursor,		/* in: cursor at which insert
-					should be made */
-	dtuple_t*	tuple)		/* in: tuple to insert */
-{
-	page_t*	page;
-	ulint	insert_size;
-	ulint	free_space;
-	ulint	total_data;
-	ulint	total_n_recs;
-	ulint	total_space;
-	ulint	incl_data;
-	rec_t*	ins_rec;
-	rec_t*	rec;
-	rec_t*	next_rec;
-	ulint	n;
-	mem_heap_t* heap;
-	ulint*	offsets;
-
-	page = btr_cur_get_page(cursor);
-
-	insert_size = rec_get_converted_size(cursor->index, tuple);
-	free_space  = page_get_free_space_of_empty(page_is_comp(page));
-
-	/* free_space is now the free space of a created new page */
-
-	total_data   = page_get_data_size(page) + insert_size;
-	total_n_recs = page_get_n_recs(page) + 1;
-	ut_ad(total_n_recs >= 2);
-	total_space  = total_data + page_dir_calc_reserved_space(total_n_recs);
-
-	n = 0;
-	incl_data = 0;
-	ins_rec = btr_cur_get_rec(cursor);
-	rec = page_get_infimum_rec(page);
-
-	heap = NULL;
-	offsets = NULL;
-
-	/* We start to include records to the left half, and when the
-	space reserved by them exceeds half of total_space, then if
-	the included records fit on the left page, they will be put there
-	if something was left over also for the right page,
-	otherwise the last included record will be the first on the right
-	half page */
-
-	for (;;) {
-		/* Decide the next record to include */
-		if (rec == ins_rec) {
-			rec = NULL;	/* NULL denotes that tuple is
-					now included */
-		} else if (rec == NULL) {
-			rec = page_rec_get_next(ins_rec);
-		} else {
-			rec = page_rec_get_next(rec);
-		}
-
-		if (rec == NULL) {
-			/* Include tuple */
-			incl_data += insert_size;
-		} else {
-			offsets = rec_get_offsets(rec, cursor->index,
-						  offsets, ULINT_UNDEFINED,
-						  &heap);
-			incl_data += rec_offs_size(offsets);
-		}
-
-		n++;
-
-		if (incl_data + page_dir_calc_reserved_space(n)
-		    >= total_space / 2) {
-
-			if (incl_data + page_dir_calc_reserved_space(n)
-			    <= free_space) {
-				/* The next record will be the first on
-				the right half page if it is not the
-				supremum record of page */
-
-				if (rec == ins_rec) {
-					rec = NULL;
-
-					goto func_exit;
-				} else if (rec == NULL) {
-					next_rec = page_rec_get_next(ins_rec);
-				} else {
-					next_rec = page_rec_get_next(rec);
-				}
-				ut_ad(next_rec);
-				if (!page_rec_is_supremum(next_rec)) {
-					rec = next_rec;
-				}
-			}
-
-func_exit:
-			if (UNIV_LIKELY_NULL(heap)) {
-				mem_heap_free(heap);
-			}
-			return(rec);
-		}
-	}
-}
-
-/*****************************************************************
-Returns TRUE if the insert fits on the appropriate half-page with the
-chosen split_rec. */
-static
-ibool
-btr_page_insert_fits(
-/*=================*/
-					/* out: TRUE if fits */
-	btr_cur_t*	cursor,		/* in: cursor at which insert
-					should be made */
-	rec_t*		split_rec,	/* in: suggestion for first record
-					on upper half-page, or NULL if
-					tuple to be inserted should be first */
-	const ulint*	offsets,	/* in: rec_get_offsets(
-					split_rec, cursor->index) */
-	dtuple_t*	tuple,		/* in: tuple to insert */
-	mem_heap_t*	heap)		/* in: temporary memory heap */
-{
-	page_t*	page;
-	ulint	insert_size;
-	ulint	free_space;
-	ulint	total_data;
-	ulint	total_n_recs;
-	rec_t*	rec;
-	rec_t*	end_rec;
-	ulint*	offs;
-
-	page = btr_cur_get_page(cursor);
-
-	ut_ad(!split_rec == !offsets);
-	ut_ad(!offsets
-	      || !page_is_comp(page) == !rec_offs_comp(offsets));
-	ut_ad(!offsets
-	      || rec_offs_validate(split_rec, cursor->index, offsets));
-
-	insert_size = rec_get_converted_size(cursor->index, tuple);
-	free_space  = page_get_free_space_of_empty(page_is_comp(page));
-
-	/* free_space is now the free space of a created new page */
-
-	total_data   = page_get_data_size(page) + insert_size;
-	total_n_recs = page_get_n_recs(page) + 1;
-
-	/* We determine which records (from rec to end_rec, not including
-	end_rec) will end up on the other half page from tuple when it is
-	inserted. */
-
-	if (split_rec == NULL) {
-		rec = page_rec_get_next(page_get_infimum_rec(page));
-		end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
-	} else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
-
-		rec = page_rec_get_next(page_get_infimum_rec(page));
-		end_rec = split_rec;
-	} else {
-		rec = split_rec;
-		end_rec = page_get_supremum_rec(page);
-	}
-
-	if (total_data + page_dir_calc_reserved_space(total_n_recs)
-	    <= free_space) {
-
-		/* Ok, there will be enough available space on the
-		half page where the tuple is inserted */
-
-		return(TRUE);
-	}
-
-	offs = NULL;
-
-	while (rec != end_rec) {
-		/* In this loop we calculate the amount of reserved
-		space after rec is removed from page. */
-
-		offs = rec_get_offsets(rec, cursor->index, offs,
-				       ULINT_UNDEFINED, &heap);
-
-		total_data -= rec_offs_size(offs);
-		total_n_recs--;
-
-		if (total_data + page_dir_calc_reserved_space(total_n_recs)
-		    <= free_space) {
-
-			/* Ok, there will be enough available space on the
-			half page where the tuple is inserted */
-
-			return(TRUE);
-		}
-
-		rec = page_rec_get_next(rec);
-	}
-
-	return(FALSE);
-}
-
-/***********************************************************
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: level, must be > 0 */
-	dtuple_t*	tuple,	/* in: the record to be inserted */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	big_rec_t*	dummy_big_rec;
-	btr_cur_t	cursor;
-	ulint		err;
-	rec_t*		rec;
-
-	ut_ad(level > 0);
-
-	btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE,
-				    &cursor, 0, mtr);
-
-	err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
-					 | BTR_KEEP_SYS_FLAG
-					 | BTR_NO_UNDO_LOG_FLAG,
-					 &cursor, tuple, &rec,
-					 &dummy_big_rec, NULL, mtr);
-	ut_a(err == DB_SUCCESS);
-}
-
-/******************************************************************
-Attaches the halves of an index page on the appropriate level in an
-index tree. */
-static
-void
-btr_attach_half_pages(
-/*==================*/
-	dict_index_t*	index,		/* in: the index tree */
-	page_t*		page,		/* in: page to be split */
-	rec_t*		split_rec,	/* in: first record on upper
-					half page */
-	page_t*		new_page,	/* in: the new half page */
-	ulint		direction,	/* in: FSP_UP or FSP_DOWN */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	ulint		space;
-	rec_t*		node_ptr;
-	page_t*		prev_page;
-	page_t*		next_page;
-	ulint		prev_page_no;
-	ulint		next_page_no;
-	ulint		level;
-	page_t*		lower_page;
-	page_t*		upper_page;
-	ulint		lower_page_no;
-	ulint		upper_page_no;
-	dtuple_t*	node_ptr_upper;
-	mem_heap_t*	heap;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_a(page_is_comp(page) == page_is_comp(new_page));
-
-	/* Create a memory heap where the data tuple is stored */
-	heap = mem_heap_create(1024);
-
-	/* Based on split direction, decide upper and lower pages */
-	if (direction == FSP_DOWN) {
-
-		lower_page_no = buf_frame_get_page_no(new_page);
-		upper_page_no = buf_frame_get_page_no(page);
-		lower_page = new_page;
-		upper_page = page;
-
-		/* Look up the index for the node pointer to page */
-		node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
-		/* Replace the address of the old child node (= page) with the
-		address of the new lower half */
-
-		btr_node_ptr_set_child_page_no(node_ptr,
-					       rec_get_offsets(
-						       node_ptr, index,
-						       NULL, ULINT_UNDEFINED,
-						       &heap),
-					       lower_page_no, mtr);
-		mem_heap_empty(heap);
-	} else {
-		lower_page_no = buf_frame_get_page_no(page);
-		upper_page_no = buf_frame_get_page_no(new_page);
-		lower_page = page;
-		upper_page = new_page;
-	}
-
-	/* Get the level of the split pages */
-	level = btr_page_get_level(page, mtr);
-
-	/* Build the node pointer (= node key and page address) for the upper
-	half */
-
-	node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
-						   upper_page_no, heap, level);
-
-	/* Insert it next to the pointer to the lower half. Note that this
-	may generate recursion leading to a split on the higher level. */
-
-	btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
-
-	/* Free the memory heap */
-	mem_heap_free(heap);
-
-	/* Get the previous and next pages of page */
-
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_frame_get_space_id(page);
-
-	/* Update page links of the level */
-
-	if (prev_page_no != FIL_NULL) {
-
-		prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_next(prev_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		btr_page_set_next(prev_page, lower_page_no, mtr);
-	}
-
-	if (next_page_no != FIL_NULL) {
-
-		next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(next_page) == page_is_comp(page));
-
-		btr_page_set_prev(next_page, upper_page_no, mtr);
-	}
-
-	btr_page_set_prev(lower_page, prev_page_no, mtr);
-	btr_page_set_next(lower_page, upper_page_no, mtr);
-	btr_page_set_level(lower_page, level, mtr);
-
-	btr_page_set_prev(upper_page, lower_page_no, mtr);
-	btr_page_set_next(upper_page, next_page_no, mtr);
-	btr_page_set_level(upper_page, level, mtr);
-}
-
-/*****************************************************************
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
-				/* out: inserted record; NOTE: the tree
-				x-latch is released! NOTE: 2 free disk
-				pages must be available! */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
-				function returns, the cursor is positioned
-				on the predecessor of the inserted record */
-	dtuple_t*	tuple,	/* in: tuple to insert */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		page;
-	ulint		page_no;
-	byte		direction;
-	ulint		hint_page_no;
-	page_t*		new_page;
-	rec_t*		split_rec;
-	page_t*		left_page;
-	page_t*		right_page;
-	page_t*		insert_page;
-	page_cur_t*	page_cursor;
-	rec_t*		first_rec;
-	byte*		buf = 0; /* remove warning */
-	rec_t*		move_limit;
-	ibool		insert_will_fit;
-	ulint		n_iterations = 0;
-	rec_t*		rec;
-	mem_heap_t*	heap;
-	ulint		n_uniq;
-	ulint*		offsets;
-
-	heap = mem_heap_create(1024);
-	n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
-func_start:
-	mem_heap_empty(heap);
-	offsets = NULL;
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	page = btr_cur_get_page(cursor);
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(page_get_n_recs(page) >= 2);
-
-	page_no = buf_frame_get_page_no(page);
-
-	/* 1. Decide the split record; split_rec == NULL means that the
-	tuple to be inserted should be the first record on the upper
-	half-page */
-
-	if (n_iterations > 0) {
-		direction = FSP_UP;
-		hint_page_no = page_no + 1;
-		split_rec = btr_page_get_sure_split_rec(cursor, tuple);
-
-	} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
-		direction = FSP_UP;
-		hint_page_no = page_no + 1;
-
-	} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
-		direction = FSP_DOWN;
-		hint_page_no = page_no - 1;
-	} else {
-		direction = FSP_UP;
-		hint_page_no = page_no + 1;
-		split_rec = page_get_middle_rec(page);
-	}
-
-	/* 2. Allocate a new page to the index */
-	new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
-				  btr_page_get_level(page, mtr), mtr);
-	btr_page_create(new_page, cursor->index, mtr);
-
-	/* 3. Calculate the first record on the upper half-page, and the
-	first record (move_limit) on original page which ends up on the
-	upper half */
-
-	if (split_rec != NULL) {
-		first_rec = split_rec;
-		move_limit = split_rec;
-	} else {
-		buf = mem_alloc(rec_get_converted_size(cursor->index, tuple));
-
-		first_rec = rec_convert_dtuple_to_rec(buf,
-						      cursor->index, tuple);
-		move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
-	}
-
-	/* 4. Do first the modifications in the tree structure */
-
-	btr_attach_half_pages(cursor->index, page, first_rec,
-			      new_page, direction, mtr);
-
-	if (split_rec == NULL) {
-		mem_free(buf);
-	}
-
-	/* If the split is made on the leaf level and the insert will fit
-	on the appropriate half-page, we may release the tree x-latch.
-	We can then move the records after releasing the tree latch,
-	thus reducing the tree latch contention. */
-
-	if (split_rec) {
-		offsets = rec_get_offsets(split_rec, cursor->index, offsets,
-					  n_uniq, &heap);
-
-		insert_will_fit = btr_page_insert_fits(cursor,
-						       split_rec, offsets,
-						       tuple, heap);
-	} else {
-		insert_will_fit = btr_page_insert_fits(cursor,
-						       NULL, NULL,
-						       tuple, heap);
-	}
-
-	if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
-
-		mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
-				 MTR_MEMO_X_LOCK);
-	}
-
-	/* 5. Move then the records to the new page */
-	if (direction == FSP_DOWN) {
-		/*		fputs("Split left\n", stderr); */
-
-		page_move_rec_list_start(new_page, page, move_limit,
-					 cursor->index, mtr);
-		left_page = new_page;
-		right_page = page;
-
-		lock_update_split_left(right_page, left_page);
-	} else {
-		/*		fputs("Split right\n", stderr); */
-
-		page_move_rec_list_end(new_page, page, move_limit,
-				       cursor->index, mtr);
-		left_page = page;
-		right_page = new_page;
-
-		lock_update_split_right(right_page, left_page);
-	}
-
-	/* 6. The split and the tree modification is now completed. Decide the
-	page where the tuple should be inserted */
-
-	if (split_rec == NULL) {
-		insert_page = right_page;
-
-	} else {
-		offsets = rec_get_offsets(first_rec, cursor->index,
-					  offsets, n_uniq, &heap);
-
-		if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) {
-
-			insert_page = right_page;
-		} else {
-			insert_page = left_page;
-		}
-	}
-
-	/* 7. Reposition the cursor for insert and try insertion */
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	page_cur_search(insert_page, cursor->index, tuple,
-			PAGE_CUR_LE, page_cursor);
-
-	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
-	if (rec != NULL) {
-		/* Insert fit on the page: update the free bits for the
-		left and right pages in the same mtr */
-
-		ibuf_update_free_bits_for_two_pages_low(cursor->index,
-							left_page,
-							right_page, mtr);
-		/* fprintf(stderr, "Split and insert done %lu %lu\n",
-		buf_frame_get_page_no(left_page),
-		buf_frame_get_page_no(right_page)); */
-		mem_heap_free(heap);
-		return(rec);
-	}
-
-	/* 8. If insert did not fit, try page reorganization */
-
-	btr_page_reorganize(insert_page, cursor->index, mtr);
-
-	page_cur_search(insert_page, cursor->index, tuple,
-			PAGE_CUR_LE, page_cursor);
-	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
-	if (rec == NULL) {
-		/* The insert did not fit on the page: loop back to the
-		start of the function for a new split */
-
-		/* We play safe and reset the free bits for new_page */
-		ibuf_reset_free_bits(cursor->index, new_page);
-
-		/* fprintf(stderr, "Split second round %lu\n",
-		buf_frame_get_page_no(page)); */
-		n_iterations++;
-		ut_ad(n_iterations < 2);
-		ut_ad(!insert_will_fit);
-
-		goto func_start;
-	}
-
-	/* Insert fit on the page: update the free bits for the
-	left and right pages in the same mtr */
-
-	ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
-						right_page, mtr);
-#if 0
-	fprintf(stderr, "Split and insert done %lu %lu\n",
-		buf_frame_get_page_no(left_page),
-		buf_frame_get_page_no(right_page));
-#endif
-
-	ut_ad(page_validate(left_page, cursor->index));
-	ut_ad(page_validate(right_page, cursor->index));
-
-	mem_heap_free(heap);
-	return(rec);
-}
-
-/*****************************************************************
-Removes a page from the level list of pages. */
-static
-void
-btr_level_list_remove(
-/*==================*/
-	page_t*		page,	/* in: page to remove */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint	space;
-	ulint	prev_page_no;
-	page_t*	prev_page;
-	ulint	next_page_no;
-	page_t*	next_page;
-
-	ut_ad(page && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	/* Get the previous and next page numbers of page */
-
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_frame_get_space_id(page);
-
-	/* Update page links of the level */
-
-	if (prev_page_no != FIL_NULL) {
-
-		prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_next(prev_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		btr_page_set_next(prev_page, next_page_no, mtr);
-	}
-
-	if (next_page_no != FIL_NULL) {
-
-		next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(next_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_prev(next_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		btr_page_set_prev(next_page, prev_page_no, mtr);
-	}
-}
-
-/********************************************************************
-Writes the redo log record for setting an index record as the predefined
-minimum record. */
-UNIV_INLINE
-void
-btr_set_min_rec_mark_log(
-/*=====================*/
-	rec_t*	rec,	/* in: record */
-	ulint	comp,	/* nonzero=compact record format */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	mlog_write_initial_log_record(
-		rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
-
-	/* Write rec offset as a 2-byte ulint */
-	mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
-}
-
-/********************************************************************
-Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	ulint	comp,	/* in: nonzero=compact page format */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
-{
-	rec_t*	rec;
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	if (page) {
-		ut_a(!page_is_comp(page) == !comp);
-
-		rec = page + mach_read_from_2(ptr);
-
-		btr_set_min_rec_mark(rec, comp, mtr);
-	}
-
-	return(ptr + 2);
-}
-
-/********************************************************************
-Sets a record as the predefined minimum record. */
-
-void
-btr_set_min_rec_mark(
-/*=================*/
-	rec_t*	rec,	/* in: record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	ulint	info_bits;
-
-	info_bits = rec_get_info_bits(rec, comp);
-
-	rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
-
-	btr_set_min_rec_mark_log(rec, comp, mtr);
-}
-
-/*****************************************************************
-Deletes on the upper level the node pointer to a page. */
-
-void
-btr_node_ptr_delete(
-/*================*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page whose node pointer is deleted */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	rec_t*		node_ptr;
-	btr_cur_t	cursor;
-	ibool		compressed;
-	ulint		err;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	/* Delete node pointer on father page */
-
-	node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
-	btr_cur_position(index, node_ptr, &cursor);
-	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
-						mtr);
-	ut_a(err == DB_SUCCESS);
-
-	if (!compressed) {
-		btr_cur_compress_if_useful(&cursor, mtr);
-	}
-}
-
-/*****************************************************************
-If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
-static
-void
-btr_lift_page_up(
-/*=============*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page which is the only on its level;
-				must not be empty: use
-				btr_discard_only_page_on_level if the last
-				record from the page should be removed */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		father_page;
-	page_t*		iter_page;
-	page_t*		pages[BTR_MAX_LEVELS];
-	ulint		page_level;
-	ulint		root_page_no;
-	ulint		ancestors;
-	ulint		i;
-
-	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
-	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	father_page = buf_frame_align(
-		btr_page_get_father_node_ptr(index, page, mtr));
-
-	page_level = btr_page_get_level(page, mtr);
-	root_page_no = dict_index_get_page(index);
-
-	ancestors = 1;
-	pages[0] = father_page;
-
-	/* Store all ancestor pages so we can reset their levels later on.
-	We have to do all the searches on the tree now because later on,
-	after we've replaced the first level, the tree is in an inconsistent
-	state and can not be searched. */
-	iter_page = father_page;
-	for (;;) {
-		if (buf_block_get_page_no(buf_block_align(iter_page))
-		    == root_page_no) {
-
-			break;
-		}
-
-		ut_a(ancestors < BTR_MAX_LEVELS);
-
-		iter_page = buf_frame_align(
-			btr_page_get_father_node_ptr(index, iter_page, mtr));
-
-		pages[ancestors++] = iter_page;
-	}
-
-	btr_search_drop_page_hash_index(page);
-
-	/* Make the father empty */
-	btr_page_empty(father_page, mtr);
-
-	/* Move records to the father */
-	page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
-			       index, mtr);
-	lock_update_copy_and_discard(father_page, page);
-
-	/* Go upward to root page, decreasing levels by one. */
-	for (i = 0; i < ancestors; i++) {
-		iter_page = pages[i];
-
-		ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1));
-
-		btr_page_set_level(iter_page, page_level, mtr);
-		page_level++;
-	}
-
-	/* Free the file page */
-	btr_page_free(index, page, mtr);
-
-	/* We play safe and reset the free bits for the father */
-	ibuf_reset_free_bits(index, father_page);
-	ut_ad(page_validate(father_page, index));
-	ut_ad(btr_check_node_ptr(index, father_page, mtr));
-}
-
-/*****************************************************************
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the brother
-reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to the
-brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-
-void
-btr_compress(
-/*=========*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	ulint		space;
-	ulint		left_page_no;
-	ulint		right_page_no;
-	page_t*		merge_page;
-	page_t*		father_page;
-	ibool		is_left;
-	page_t*		page;
-	rec_t*		orig_pred;
-	rec_t*		orig_succ;
-	rec_t*		node_ptr;
-	ulint		data_size;
-	ulint		n_recs;
-	ulint		max_ins_size;
-	ulint		max_ins_size_reorg;
-	ulint		level;
-	ulint		comp;
-
-	page = btr_cur_get_page(cursor);
-	index = btr_cur_get_index(cursor);
-	comp = page_is_comp(page);
-	ut_a((ibool)!!comp == dict_table_is_comp(index->table));
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	level = btr_page_get_level(page, mtr);
-	space = dict_index_get_space(index);
-
-	left_page_no = btr_page_get_prev(page, mtr);
-	right_page_no = btr_page_get_next(page, mtr);
-
-#if 0
-	fprintf(stderr, "Merge left page %lu right %lu \n",
-		left_page_no, right_page_no);
-#endif
-
-	node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-	ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
-	father_page = buf_frame_align(node_ptr);
-	ut_a(comp == page_is_comp(father_page));
-
-	/* Decide the page to which we try to merge and which will inherit
-	the locks */
-
-	is_left = left_page_no != FIL_NULL;
-
-	if (is_left) {
-
-		merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
-					  mtr);
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_next(merge_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-	} else if (right_page_no != FIL_NULL) {
-
-		merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
-					  mtr);
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_prev(merge_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-	} else {
-		/* The page is the only one on the level, lift the records
-		to the father */
-		btr_lift_page_up(index, page, mtr);
-
-		return;
-	}
-
-	n_recs = page_get_n_recs(page);
-	data_size = page_get_data_size(page);
-	ut_a(page_is_comp(merge_page) == comp);
-
-	max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
-		merge_page, n_recs);
-	if (data_size > max_ins_size_reorg) {
-
-		/* No space for merge */
-
-		return;
-	}
-
-	ut_ad(page_validate(merge_page, index));
-
-	max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
-	if (data_size > max_ins_size) {
-
-		/* We have to reorganize merge_page */
-
-		btr_page_reorganize(merge_page, index, mtr);
-
-		max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
-		ut_ad(page_validate(merge_page, index));
-		ut_ad(page_get_max_insert_size(merge_page, n_recs)
-		      == max_ins_size_reorg);
-	}
-
-	if (data_size > max_ins_size) {
-
-		/* Add fault tolerance, though this should never happen */
-
-		return;
-	}
-
-	btr_search_drop_page_hash_index(page);
-
-	/* Remove the page from the level list */
-	btr_level_list_remove(page, mtr);
-
-	if (is_left) {
-		btr_node_ptr_delete(index, page, mtr);
-	} else {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-		/* Replace the address of the old child node (= page) with the
-		address of the merge page to the right */
-
-		btr_node_ptr_set_child_page_no(node_ptr,
-					       rec_get_offsets(
-						       node_ptr, index,
-						       offsets_,
-						       ULINT_UNDEFINED,
-						       &heap),
-					       right_page_no, mtr);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		btr_node_ptr_delete(index, merge_page, mtr);
-	}
-
-	/* Move records to the merge page */
-	if (is_left) {
-		orig_pred = page_rec_get_prev(
-			page_get_supremum_rec(merge_page));
-		page_copy_rec_list_start(merge_page, page,
-					 page_get_supremum_rec(page),
-					 index, mtr);
-
-		lock_update_merge_left(merge_page, orig_pred, page);
-	} else {
-		orig_succ = page_rec_get_next(
-			page_get_infimum_rec(merge_page));
-		page_copy_rec_list_end(merge_page, page,
-				       page_get_infimum_rec(page),
-				       index, mtr);
-
-		lock_update_merge_right(orig_succ, page);
-	}
-
-	/* We have added new records to merge_page: update its free bits */
-	ibuf_update_free_bits_if_full(index, merge_page,
-				      UNIV_PAGE_SIZE, ULINT_UNDEFINED);
-
-	ut_ad(page_validate(merge_page, index));
-
-	/* Free the file page */
-	btr_page_free(index, page, mtr);
-
-	ut_ad(btr_check_node_ptr(index, merge_page, mtr));
-}
-
-/*****************************************************************
-Discards a page that is the only page on its level. */
-static
-void
-btr_discard_only_page_on_level(
-/*===========================*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page which is the only on its level */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	rec_t*	node_ptr;
-	page_t*	father_page;
-	ulint	page_level;
-
-	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
-	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	btr_search_drop_page_hash_index(page);
-
-	node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-	father_page = buf_frame_align(node_ptr);
-
-	page_level = btr_page_get_level(page, mtr);
-
-	lock_update_discard(page_get_supremum_rec(father_page), page);
-
-	btr_page_set_level(father_page, page_level, mtr);
-
-	/* Free the file page */
-	btr_page_free(index, page, mtr);
-
-	if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) {
-		/* The father is the root page */
-
-		btr_page_empty(father_page, mtr);
-
-		/* We play safe and reset the free bits for the father */
-		ibuf_reset_free_bits(index, father_page);
-	} else {
-		ut_ad(page_get_n_recs(father_page) == 1);
-
-		btr_discard_only_page_on_level(index, father_page, mtr);
-	}
-}
-
-/*****************************************************************
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-
-void
-btr_discard_page(
-/*=============*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
-				the root page */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	ulint		space;
-	ulint		left_page_no;
-	ulint		right_page_no;
-	page_t*		merge_page;
-	page_t*		page;
-	rec_t*		node_ptr;
-
-	page = btr_cur_get_page(cursor);
-	index = btr_cur_get_index(cursor);
-
-	ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	space = dict_index_get_space(index);
-
-	/* Decide the page which will inherit the locks */
-
-	left_page_no = btr_page_get_prev(page, mtr);
-	right_page_no = btr_page_get_next(page, mtr);
-
-	if (left_page_no != FIL_NULL) {
-		merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
-					  mtr);
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_next(merge_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-	} else if (right_page_no != FIL_NULL) {
-		merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
-					  mtr);
-#ifdef UNIV_BTR_DEBUG
-		ut_a(btr_page_get_prev(merge_page, mtr)
-		     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-	} else {
-		btr_discard_only_page_on_level(index, page, mtr);
-
-		return;
-	}
-
-	ut_a(page_is_comp(merge_page) == page_is_comp(page));
-	btr_search_drop_page_hash_index(page);
-
-	if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
-
-		/* We have to mark the leftmost node pointer on the right
-		side page as the predefined minimum record */
-
-		node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
-
-		ut_ad(page_rec_is_user_rec(node_ptr));
-
-		btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr);
-	}
-
-	btr_node_ptr_delete(index, page, mtr);
-
-	/* Remove the page from the level list */
-	btr_level_list_remove(page, mtr);
-
-	if (left_page_no != FIL_NULL) {
-		lock_update_discard(page_get_supremum_rec(merge_page), page);
-	} else {
-		lock_update_discard(page_rec_get_next(
-					    page_get_infimum_rec(merge_page)),
-				    page);
-	}
-
-	/* Free the file page */
-	btr_page_free(index, page, mtr);
-
-	ut_ad(btr_check_node_ptr(index, merge_page, mtr));
-}
-
-#ifdef UNIV_BTR_PRINT
-/*****************************************************************
-Prints size info of a B-tree. */
-
-void
-btr_print_size(
-/*===========*/
-	dict_index_t*	index)	/* in: index tree */
-{
-	page_t*		root;
-	fseg_header_t*	seg;
-	mtr_t		mtr;
-
-	if (index->type & DICT_IBUF) {
-		fputs("Sorry, cannot print info of an ibuf tree:"
-		      " use ibuf functions\n", stderr);
-
-		return;
-	}
-
-	mtr_start(&mtr);
-
-	root = btr_root_get(index, &mtr);
-
-	seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
-	fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
-	fseg_print(seg, &mtr);
-
-	if (!(index->type & DICT_UNIVERSAL)) {
-
-		seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
-		fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
-		fseg_print(seg, &mtr);
-	}
-
-	mtr_commit(&mtr);
-}
-
-/****************************************************************
-Prints recursively index tree pages. */
-static
-void
-btr_print_recursive(
-/*================*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: index page */
-	ulint		width,	/* in: print this many entries from start
-				and end */
-	mem_heap_t**	heap,	/* in/out: heap for rec_get_offsets() */
-	ulint**		offsets,/* in/out: buffer for rec_get_offsets() */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_cur_t	cursor;
-	ulint		n_recs;
-	ulint		i	= 0;
-	mtr_t		mtr2;
-	rec_t*		node_ptr;
-	page_t*		child;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
-		(ulong) btr_page_get_level(page, mtr),
-		(ulong) buf_frame_get_page_no(page));
-
-	page_print(page, index, width, width);
-
-	n_recs = page_get_n_recs(page);
-
-	page_cur_set_before_first(page, &cursor);
-	page_cur_move_to_next(&cursor);
-
-	while (!page_cur_is_after_last(&cursor)) {
-
-		if (0 == btr_page_get_level(page, mtr)) {
-
-			/* If this is the leaf level, do nothing */
-
-		} else if ((i <= width) || (i >= n_recs - width)) {
-
-			mtr_start(&mtr2);
-
-			node_ptr = page_cur_get_rec(&cursor);
-
-			*offsets = rec_get_offsets(node_ptr, index, *offsets,
-						   ULINT_UNDEFINED, heap);
-			child = btr_node_ptr_get_child(node_ptr,
-						       *offsets, &mtr2);
-			btr_print_recursive(index, child, width,
-					    heap, offsets, &mtr2);
-			mtr_commit(&mtr2);
-		}
-
-		page_cur_move_to_next(&cursor);
-		i++;
-	}
-}
-
-/******************************************************************
-Prints directories and other info of all nodes in the tree. */
-
-void
-btr_print_index(
-/*============*/
-	dict_index_t*	index,	/* in: index */
-	ulint		width)	/* in: print this many entries from start
-				and end */
-{
-	mtr_t		mtr;
-	page_t*		root;
-	mem_heap_t*	heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	fputs("--------------------------\n"
-	      "INDEX TREE PRINT\n", stderr);
-
-	mtr_start(&mtr);
-
-	root = btr_root_get(index, &mtr);
-
-	btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	mtr_commit(&mtr);
-
-	btr_validate_index(index, NULL);
-}
-#endif /* UNIV_BTR_PRINT */
-
-#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
-ibool
-btr_check_node_ptr(
-/*===============*/
-				/* out: TRUE */
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: index page */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	mem_heap_t*	heap;
-	rec_t*		node_ptr;
-	dtuple_t*	node_ptr_tuple;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
-
-		return(TRUE);
-	}
-
-	node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
-	if (btr_page_get_level(page, mtr) == 0) {
-
-		return(TRUE);
-	}
-
-	heap = mem_heap_create(256);
-
-	node_ptr_tuple = dict_index_build_node_ptr(
-		index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
-		btr_page_get_level(page, mtr));
-
-	ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr,
-			     rec_get_offsets(node_ptr, index,
-					     NULL, ULINT_UNDEFINED, &heap)));
-
-	mem_heap_free(heap);
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/****************************************************************
-Display identification information for a record. */
-static
-void
-btr_index_rec_validate_report(
-/*==========================*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: index record */
-	dict_index_t*	index)	/* in: index */
-{
-	fputs("InnoDB: Record in ", stderr);
-	dict_index_name_print(stderr, NULL, index);
-	fprintf(stderr, ", page %lu, at offset %lu\n",
-		buf_frame_get_page_no(page), (ulint)(rec - page));
-}
-
-/****************************************************************
-Checks the size and number of fields in a record based on the definition of
-the index. */
-
-ibool
-btr_index_rec_validate(
-/*===================*/
-					/* out: TRUE if ok */
-	rec_t*		rec,		/* in: index record */
-	dict_index_t*	index,		/* in: index */
-	ibool		dump_on_error)	/* in: TRUE if the function
-					should print hex dump of record
-					and page on error */
-{
-	ulint		len;
-	ulint		n;
-	ulint		i;
-	page_t*		page;
-	mem_heap_t*	heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	page = buf_frame_align(rec);
-
-	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
-		/* The insert buffer index tree can contain records from any
-		other index: we cannot check the number of fields or
-		their length */
-
-		return(TRUE);
-	}
-
-	if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
-			  != dict_table_is_comp(index->table))) {
-		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
-			(ulong) !!page_is_comp(page),
-			(ulong) dict_table_is_comp(index->table));
-
-		return(FALSE);
-	}
-
-	n = dict_index_get_n_fields(index);
-
-	if (!page_is_comp(page)
-	    && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
-		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
-			(ulong) rec_get_n_fields_old(rec), (ulong) n);
-
-		if (dump_on_error) {
-			buf_page_print(page);
-
-			fputs("InnoDB: corrupt record ", stderr);
-			rec_print_old(stderr, rec);
-			putc('\n', stderr);
-		}
-		return(FALSE);
-	}
-
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-	for (i = 0; i < n; i++) {
-		ulint	fixed_size = dict_col_get_fixed_size(
-			dict_index_get_nth_col(index, i));
-
-		rec_get_nth_field(rec, offsets, i, &len);
-
-		/* Note that if fixed_size != 0, it equals the
-		length of a fixed-size column in the clustered index.
-		A prefix index of the column is of fixed, but different
-		length.  When fixed_size == 0, prefix_len is the maximum
-		length of the prefix index column. */
-
-		if ((dict_index_get_nth_field(index, i)->prefix_len == 0
-		     && len != UNIV_SQL_NULL && fixed_size
-		     && len != fixed_size)
-		    || (dict_index_get_nth_field(index, i)->prefix_len > 0
-			&& len != UNIV_SQL_NULL
-			&& len
-			> dict_index_get_nth_field(index, i)->prefix_len)) {
-
-			btr_index_rec_validate_report(page, rec, index);
-			fprintf(stderr,
-				"InnoDB: field %lu len is %lu,"
-				" should be %lu\n",
-				(ulong) i, (ulong) len, (ulong) fixed_size);
-
-			if (dump_on_error) {
-				buf_page_print(page);
-
-				fputs("InnoDB: corrupt record ", stderr);
-				rec_print_new(stderr, rec, offsets);
-				putc('\n', stderr);
-			}
-			if (UNIV_LIKELY_NULL(heap)) {
-				mem_heap_free(heap);
-			}
-			return(FALSE);
-		}
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(TRUE);
-}
-
-/****************************************************************
-Checks the size and number of fields in records based on the definition of
-the index. */
-static
-ibool
-btr_index_page_validate(
-/*====================*/
-				/* out: TRUE if ok */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index)	/* in: index */
-{
-	page_cur_t	cur;
-	ibool		ret	= TRUE;
-
-	page_cur_set_before_first(page, &cur);
-	page_cur_move_to_next(&cur);
-
-	for (;;) {
-		if (page_cur_is_after_last(&cur)) {
-
-			break;
-		}
-
-		if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
-
-			return(FALSE);
-		}
-
-		page_cur_move_to_next(&cur);
-	}
-
-	return(ret);
-}
-
-/****************************************************************
-Report an error on one page of an index tree. */
-static
-void
-btr_validate_report1(
-/*=================*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: B-tree level */
-	page_t*		page)	/* in: index page */
-{
-	fprintf(stderr, "InnoDB: Error in page %lu of ",
-		buf_frame_get_page_no(page));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
-	}
-	putc('\n', stderr);
-}
-
-/****************************************************************
-Report an error on two pages of an index tree. */
-static
-void
-btr_validate_report2(
-/*=================*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: B-tree level */
-	page_t*		page1,	/* in: first index page */
-	page_t*		page2)	/* in: second index page */
-{
-	fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
-		buf_frame_get_page_no(page1),
-		buf_frame_get_page_no(page2));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
-	}
-	putc('\n', stderr);
-}
-
-/****************************************************************
-Validates index tree level. */
-static
-ibool
-btr_validate_level(
-/*===============*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index tree */
-	trx_t*		trx,	/* in: transaction or NULL */
-	ulint		level)	/* in: level number */
-{
-	ulint		space;
-	page_t*		page;
-	page_t*		right_page = 0; /* remove warning */
-	page_t*		father_page;
-	page_t*		right_father_page;
-	rec_t*		node_ptr;
-	rec_t*		right_node_ptr;
-	rec_t*		rec;
-	ulint		right_page_no;
-	ulint		left_page_no;
-	page_cur_t	cursor;
-	dtuple_t*	node_ptr_tuple;
-	ibool		ret	= TRUE;
-	mtr_t		mtr;
-	mem_heap_t*	heap	= mem_heap_create(256);
-	ulint*		offsets	= NULL;
-	ulint*		offsets2= NULL;
-
-	mtr_start(&mtr);
-
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
-
-	page = btr_root_get(index, &mtr);
-
-	space = buf_frame_get_space_id(page);
-
-	while (level != btr_page_get_level(page, &mtr)) {
-
-		ut_a(btr_page_get_level(page, &mtr) > 0);
-
-		page_cur_set_before_first(page, &cursor);
-		page_cur_move_to_next(&cursor);
-
-		node_ptr = page_cur_get_rec(&cursor);
-		offsets = rec_get_offsets(node_ptr, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		page = btr_node_ptr_get_child(node_ptr, offsets, &mtr);
-	}
-
-	/* Now we are on the desired level. Loop through the pages on that
-	level. */
-loop:
-	if (trx_is_interrupted(trx)) {
-		mtr_commit(&mtr);
-		mem_heap_free(heap);
-		return(ret);
-	}
-	mem_heap_empty(heap);
-	offsets = offsets2 = NULL;
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
-
-	/* Check ordering etc. of records */
-
-	if (!page_validate(page, index)) {
-		btr_validate_report1(index, level, page);
-
-		ret = FALSE;
-	} else if (level == 0) {
-		/* We are on level 0. Check that the records have the right
-		number of fields, and field lengths are right. */
-
-		if (!btr_index_page_validate(page, index)) {
-
-			ret = FALSE;
-		}
-	}
-
-	ut_a(btr_page_get_level(page, &mtr) == level);
-
-	right_page_no = btr_page_get_next(page, &mtr);
-	left_page_no = btr_page_get_prev(page, &mtr);
-
-	ut_a((page_get_n_recs(page) > 0)
-	     || ((level == 0)
-		 && (buf_frame_get_page_no(page)
-		     == dict_index_get_page(index))));
-
-	if (right_page_no != FIL_NULL) {
-		rec_t*	right_rec;
-		right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
-					  &mtr);
-		if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
-				  != buf_frame_get_page_no(page))) {
-			btr_validate_report2(index, level, page, right_page);
-			fputs("InnoDB: broken FIL_PAGE_NEXT"
-			      " or FIL_PAGE_PREV links\n", stderr);
-			buf_page_print(page);
-			buf_page_print(right_page);
-
-			ret = FALSE;
-		}
-
-		if (UNIV_UNLIKELY(page_is_comp(right_page)
-				  != page_is_comp(page))) {
-			btr_validate_report2(index, level, page, right_page);
-			fputs("InnoDB: 'compact' flag mismatch\n", stderr);
-			buf_page_print(page);
-			buf_page_print(right_page);
-
-			ret = FALSE;
-
-			goto node_ptr_fails;
-		}
-
-		rec = page_rec_get_prev(page_get_supremum_rec(page));
-		right_rec = page_rec_get_next(page_get_infimum_rec(
-						      right_page));
-		offsets = rec_get_offsets(rec, index,
-					  offsets, ULINT_UNDEFINED, &heap);
-		offsets2 = rec_get_offsets(right_rec, index,
-					   offsets2, ULINT_UNDEFINED, &heap);
-		if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
-					      offsets, offsets2,
-					      index) >= 0)) {
-
-			btr_validate_report2(index, level, page, right_page);
-
-			fputs("InnoDB: records in wrong order"
-			      " on adjacent pages\n", stderr);
-
-			buf_page_print(page);
-			buf_page_print(right_page);
-
-			fputs("InnoDB: record ", stderr);
-			rec = page_rec_get_prev(page_get_supremum_rec(page));
-			rec_print(stderr, rec, index);
-			putc('\n', stderr);
-			fputs("InnoDB: record ", stderr);
-			rec = page_rec_get_next(
-				page_get_infimum_rec(right_page));
-			rec_print(stderr, rec, index);
-			putc('\n', stderr);
-
-			ret = FALSE;
-		}
-	}
-
-	if (level > 0 && left_page_no == FIL_NULL) {
-		ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
-			     page_rec_get_next(page_get_infimum_rec(page)),
-			     page_is_comp(page)));
-	}
-
-	if (buf_frame_get_page_no(page) != dict_index_get_page(index)) {
-
-		/* Check father node pointers */
-
-		node_ptr = btr_page_get_father_node_ptr(index, page, &mtr);
-		father_page = buf_frame_align(node_ptr);
-		offsets	= rec_get_offsets(node_ptr, index,
-					  offsets, ULINT_UNDEFINED, &heap);
-
-		if (btr_node_ptr_get_child_page_no(node_ptr, offsets)
-		    != buf_frame_get_page_no(page)
-		    || node_ptr != btr_page_get_father_for_rec(
-			    index, page,
-			    page_rec_get_prev(page_get_supremum_rec(page)),
-			    &mtr)) {
-			btr_validate_report1(index, level, page);
-
-			fputs("InnoDB: node pointer to the page is wrong\n",
-			      stderr);
-
-			buf_page_print(father_page);
-			buf_page_print(page);
-
-			fputs("InnoDB: node ptr ", stderr);
-			rec_print_new(stderr, node_ptr, offsets);
-
-			fprintf(stderr, "\n"
-				"InnoDB: node ptr child page n:o %lu\n",
-				(unsigned long) btr_node_ptr_get_child_page_no
-				(node_ptr, offsets));
-
-			fputs("InnoDB: record on page ", stderr);
-			rec = btr_page_get_father_for_rec(
-				index, page,
-				page_rec_get_prev(page_get_supremum_rec(page)),
-				&mtr);
-			rec_print(stderr, rec, index);
-			putc('\n', stderr);
-			ret = FALSE;
-
-			goto node_ptr_fails;
-		}
-
-		if (btr_page_get_level(page, &mtr) > 0) {
-			offsets	= rec_get_offsets(node_ptr, index,
-						  offsets, ULINT_UNDEFINED,
-						  &heap);
-
-			node_ptr_tuple = dict_index_build_node_ptr(
-				index,
-				page_rec_get_next(page_get_infimum_rec(page)),
-				0, heap, btr_page_get_level(page, &mtr));
-
-			if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
-					   offsets)) {
-				rec_t*	first_rec	= page_rec_get_next(
-					page_get_infimum_rec(page));
-
-				btr_validate_report1(index, level, page);
-
-				buf_page_print(father_page);
-				buf_page_print(page);
-
-				fputs("InnoDB: Error: node ptrs differ"
-				      " on levels > 0\n"
-				      "InnoDB: node ptr ", stderr);
-				rec_print_new(stderr, node_ptr, offsets);
-				fputs("InnoDB: first rec ", stderr);
-				rec_print(stderr, first_rec, index);
-				putc('\n', stderr);
-				ret = FALSE;
-
-				goto node_ptr_fails;
-			}
-		}
-
-		if (left_page_no == FIL_NULL) {
-			ut_a(node_ptr == page_rec_get_next(
-				     page_get_infimum_rec(father_page)));
-			ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
-		}
-
-		if (right_page_no == FIL_NULL) {
-			ut_a(node_ptr == page_rec_get_prev(
-				     page_get_supremum_rec(father_page)));
-			ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
-		} else {
-			right_node_ptr = btr_page_get_father_node_ptr(
-				index, right_page, &mtr);
-			if (page_rec_get_next(node_ptr)
-			    != page_get_supremum_rec(father_page)) {
-
-				if (right_node_ptr
-				    != page_rec_get_next(node_ptr)) {
-					ret = FALSE;
-					fputs("InnoDB: node pointer to"
-					      " the right page is wrong\n",
-					      stderr);
-
-					btr_validate_report1(index, level,
-							     page);
-
-					buf_page_print(father_page);
-					buf_page_print(page);
-					buf_page_print(right_page);
-				}
-			} else {
-				right_father_page = buf_frame_align(
-					right_node_ptr);
-
-				if (right_node_ptr != page_rec_get_next(
-					    page_get_infimum_rec(
-						    right_father_page))) {
-					ret = FALSE;
-					fputs("InnoDB: node pointer 2 to"
-					      " the right page is wrong\n",
-					      stderr);
-
-					btr_validate_report1(index, level,
-							     page);
-
-					buf_page_print(father_page);
-					buf_page_print(right_father_page);
-					buf_page_print(page);
-					buf_page_print(right_page);
-				}
-
-				if (buf_frame_get_page_no(right_father_page)
-				    != btr_page_get_next(father_page, &mtr)) {
-
-					ret = FALSE;
-					fputs("InnoDB: node pointer 3 to"
-					      " the right page is wrong\n",
-					      stderr);
-
-					btr_validate_report1(index, level,
-							     page);
-
-					buf_page_print(father_page);
-					buf_page_print(right_father_page);
-					buf_page_print(page);
-					buf_page_print(right_page);
-				}
-			}
-		}
-	}
-
-node_ptr_fails:
-	/* Commit the mini-transaction to release the latch on 'page'.
-	Re-acquire the latch on right_page, which will become 'page'
-	on the next loop.  The page has already been checked. */
-	mtr_commit(&mtr);
-
-	if (right_page_no != FIL_NULL) {
-		mtr_start(&mtr);
-
-		page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
-
-		goto loop;
-	}
-
-	mem_heap_free(heap);
-	return(ret);
-}
-
-/******************************************************************
-Checks the consistency of an index tree. */
-
-ibool
-btr_validate_index(
-/*===============*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index */
-	trx_t*		trx)	/* in: transaction or NULL */
-{
-	mtr_t	mtr;
-	page_t*	root;
-	ulint	i;
-	ulint	n;
-
-	mtr_start(&mtr);
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
-
-	root = btr_root_get(index, &mtr);
-	n = btr_page_get_level(root, &mtr);
-
-	for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
-		if (!btr_validate_level(index, trx, n - i)) {
-
-			mtr_commit(&mtr);
-
-			return(FALSE);
-		}
-	}
-
-	mtr_commit(&mtr);
-
-	return(TRUE);
-}
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
deleted file mode 100644
index a2f62255dd6..00000000000
--- a/storage/innobase/btr/btr0cur.c
+++ /dev/null
@@ -1,3848 +0,0 @@
-/******************************************************
-The index tree cursor
-
-All changes that row operations make to a B-tree or the records
-there must go through this module! Undo log records are written here
-of every modify or insert of a clustered index record.
-
-			NOTE!!!
-To make sure we do not run out of disk space during a pessimistic
-insert or update, we have to reserve 2 x the height of the index tree
-many pages in the tablespace before we start the operation, because
-if leaf splitting has been started, it is difficult to undo, except
-by crashing the database and doing a roll-forward.
-
-(c) 1994-2001 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0cur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#include "page0page.h"
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "lock0lock.h"
-
-#ifdef UNIV_DEBUG
-/* If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-ibool	btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
-
-ulint	btr_cur_n_non_sea	= 0;
-ulint	btr_cur_n_sea		= 0;
-ulint	btr_cur_n_non_sea_old	= 0;
-ulint	btr_cur_n_sea_old	= 0;
-
-/* In the optimistic insert, if the insert does not fit, but this much space
-can be released by page reorganize, then it is reorganized */
-
-#define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
-
-/* When estimating number of different key values in an index, sample
-this many index pages */
-#define BTR_KEY_VAL_ESTIMATE_N_PAGES	8
-
-/* The structure of a BLOB part header */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_PART_LEN		0	/* BLOB part len on this
-						page */
-#define BTR_BLOB_HDR_NEXT_PAGE_NO	4	/* next BLOB part page no,
-						FIL_NULL if none */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_SIZE		8
-
-/***********************************************************************
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
-	rec_t*		rec,	/* in: record in a clustered index */
-	mtr_t*		mtr,	/* in: mtr */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/***********************************************************************
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
-	btr_cur_t*	cursor,		/* in: cursor positioned on a page */
-	ulint		height,		/* in: height of the page in tree;
-					0 means leaf node */
-	ulint		root_height);	/* in: root node height in tree */
-/***************************************************************
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
-	dict_index_t*	index,	/* in: index of rec; the index tree MUST be
-				X-latched */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	upd_t*		update,	/* in: update vector */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-				rollback and we do not want to free
-				inherited fields */
-	mtr_t*		mtr);	/* in: mini-transaction handle which contains
-				an X-latch to record page and to the tree */
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
-				/* out: externally stored part,
-				in units of a database page */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-
-/*==================== B-TREE SEARCH =========================*/
-
-/************************************************************************
-Latches the leaf page or pages requested. */
-static
-void
-btr_cur_latch_leaves(
-/*=================*/
-	page_t*		page,		/* in: leaf page where the search
-					converged */
-	ulint		space,		/* in: space id */
-	ulint		page_no,	/* in: page number of the leaf */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/* in: cursor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	ulint	left_page_no;
-	ulint	right_page_no;
-	page_t*	get_page;
-
-	ut_ad(page && mtr);
-
-	if (latch_mode == BTR_SEARCH_LEAF) {
-
-		get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
-		ut_a(page_is_comp(get_page) == page_is_comp(page));
-		buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
-	} else if (latch_mode == BTR_MODIFY_LEAF) {
-
-		get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(get_page) == page_is_comp(page));
-		buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
-	} else if (latch_mode == BTR_MODIFY_TREE) {
-
-		/* x-latch also brothers from left to right */
-		left_page_no = btr_page_get_prev(page, mtr);
-
-		if (left_page_no != FIL_NULL) {
-			get_page = btr_page_get(space, left_page_no,
-						RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(btr_page_get_next(get_page, mtr)
-			     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			ut_a(page_is_comp(get_page) == page_is_comp(page));
-			buf_block_align(get_page)->check_index_page_at_flush
-				= TRUE;
-		}
-
-		get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(get_page) == page_is_comp(page));
-		buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
-		right_page_no = btr_page_get_next(page, mtr);
-
-		if (right_page_no != FIL_NULL) {
-			get_page = btr_page_get(space, right_page_no,
-						RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(btr_page_get_prev(get_page, mtr)
-			     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			buf_block_align(get_page)->check_index_page_at_flush
-				= TRUE;
-		}
-
-	} else if (latch_mode == BTR_SEARCH_PREV) {
-
-		/* s-latch also left brother */
-		left_page_no = btr_page_get_prev(page, mtr);
-
-		if (left_page_no != FIL_NULL) {
-			cursor->left_page = btr_page_get(space, left_page_no,
-							 RW_S_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(btr_page_get_next(cursor->left_page, mtr)
-			     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			ut_a(page_is_comp(cursor->left_page)
-			     == page_is_comp(page));
-			buf_block_align(cursor->left_page)
-				->check_index_page_at_flush = TRUE;
-		}
-
-		get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
-		ut_a(page_is_comp(get_page) == page_is_comp(page));
-		buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
-	} else if (latch_mode == BTR_MODIFY_PREV) {
-
-		/* x-latch also left brother */
-		left_page_no = btr_page_get_prev(page, mtr);
-
-		if (left_page_no != FIL_NULL) {
-			cursor->left_page = btr_page_get(space, left_page_no,
-							 RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(btr_page_get_next(cursor->left_page, mtr)
-			     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			ut_a(page_is_comp(cursor->left_page)
-			     == page_is_comp(page));
-			buf_block_align(cursor->left_page)
-				->check_index_page_at_flush = TRUE;
-		}
-
-		get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-		ut_a(page_is_comp(get_page) == page_is_comp(page));
-		buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-	} else {
-		ut_error;
-	}
-}
-
-/************************************************************************
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record. */
-
-void
-btr_cur_search_to_nth_level(
-/*========================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: the tree level of search */
-	dtuple_t*	tuple,	/* in: data tuple; NOTE: n_fields_cmp in
-				tuple must be set so that it cannot get
-				compared to the node ptr page number field! */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
-				Inserts should always be made using
-				PAGE_CUR_LE to search the position! */
-	ulint		latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
-				BTR_INSERT and BTR_ESTIMATE;
-				cursor->left_page is used to store a pointer
-				to the left neighbor page, in the cases
-				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
-				NOTE that if has_search_latch
-				is != 0, we maybe do not have a latch set
-				on the cursor page, we assume
-				the caller uses his search latch
-				to protect the record! */
-	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
-				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/* in: info on the latch mode the
-				caller currently has on btr_search_latch:
-				RW_S_LATCH, or 0 */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_cur_t*	page_cursor;
-	page_t*		page;
-	page_t*		guess;
-	rec_t*		node_ptr;
-	ulint		page_no;
-	ulint		space;
-	ulint		up_match;
-	ulint		up_bytes;
-	ulint		low_match;
-	ulint		low_bytes;
-	ulint		height;
-	ulint		savepoint;
-	ulint		rw_latch;
-	ulint		page_mode;
-	ulint		insert_planned;
-	ulint		buf_mode;
-	ulint		estimate;
-	ulint		ignore_sec_unique;
-	ulint		root_height = 0; /* remove warning */
-#ifdef BTR_CUR_ADAPT
-	btr_search_t*	info;
-#endif
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-	/* Currently, PAGE_CUR_LE is the only search mode used for searches
-	ending to upper levels */
-
-	ut_ad(level == 0 || mode == PAGE_CUR_LE);
-	ut_ad(dict_index_check_search_tuple(index, tuple));
-	ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
-	ut_ad(dtuple_check_typed(tuple));
-
-#ifdef UNIV_DEBUG
-	cursor->up_match = ULINT_UNDEFINED;
-	cursor->low_match = ULINT_UNDEFINED;
-#endif
-	insert_planned = latch_mode & BTR_INSERT;
-	estimate = latch_mode & BTR_ESTIMATE;
-	ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
-	latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
-				    | BTR_IGNORE_SEC_UNIQUE);
-
-	ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
-
-	cursor->flag = BTR_CUR_BINARY;
-	cursor->index = index;
-
-#ifndef BTR_CUR_ADAPT
-	guess = NULL;
-#else
-	info = btr_search_get_info(index);
-
-	guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
-
-#ifdef UNIV_SEARCH_PERF_STAT
-	info->n_searches++;
-#endif
-	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
-	    && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
-	    && !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-	    && mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-	    && srv_use_adaptive_hash_indexes
-	    && btr_search_guess_on_hash(index, info, tuple, mode,
-					latch_mode, cursor,
-					has_search_latch, mtr)) {
-
-		/* Search using the hash index succeeded */
-
-		ut_ad(cursor->up_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_GE);
-		ut_ad(cursor->up_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_LE);
-		ut_ad(cursor->low_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_LE);
-		btr_cur_n_sea++;
-
-		return;
-	}
-#endif
-#endif
-	btr_cur_n_non_sea++;
-
-	/* If the hash search did not succeed, do binary search down the
-	tree */
-
-	if (has_search_latch) {
-		/* Release possible search latch to obey latching order */
-		rw_lock_s_unlock(&btr_search_latch);
-	}
-
-	/* Store the position of the tree latch we push to mtr so that we
-	know how to release it when we have latched leaf node(s) */
-
-	savepoint = mtr_set_savepoint(mtr);
-
-	if (latch_mode == BTR_MODIFY_TREE) {
-		mtr_x_lock(dict_index_get_lock(index), mtr);
-
-	} else if (latch_mode == BTR_CONT_MODIFY_TREE) {
-		/* Do nothing */
-		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-					MTR_MEMO_X_LOCK));
-	} else {
-		mtr_s_lock(dict_index_get_lock(index), mtr);
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	space = dict_index_get_space(index);
-	page_no = dict_index_get_page(index);
-
-	up_match = 0;
-	up_bytes = 0;
-	low_match = 0;
-	low_bytes = 0;
-
-	height = ULINT_UNDEFINED;
-	rw_latch = RW_NO_LATCH;
-	buf_mode = BUF_GET;
-
-	/* We use these modified search modes on non-leaf levels of the
-	B-tree. These let us end up in the right B-tree leaf. In that leaf
-	we use the original search mode. */
-
-	switch (mode) {
-	case PAGE_CUR_GE:
-		page_mode = PAGE_CUR_L;
-		break;
-	case PAGE_CUR_G:
-		page_mode = PAGE_CUR_LE;
-		break;
-	default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
-		      || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
-		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-		page_mode = mode;
-		break;
-	}
-
-	/* Loop and search until we arrive at the desired level */
-
-	for (;;) {
-		if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
-
-			rw_latch = latch_mode;
-
-			if (insert_planned
-			    && ibuf_should_try(index, ignore_sec_unique)) {
-
-				/* Try insert to the insert buffer if the
-				page is not in the buffer pool */
-
-				buf_mode = BUF_GET_IF_IN_POOL;
-			}
-		}
-retry_page_get:
-		page = buf_page_get_gen(space, page_no, rw_latch, guess,
-					buf_mode,
-					__FILE__, __LINE__,
-					mtr);
-		if (page == NULL) {
-			/* This must be a search to perform an insert;
-			try insert to the insert buffer */
-
-			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-			ut_ad(insert_planned);
-			ut_ad(cursor->thr);
-
-			if (ibuf_should_try(index, ignore_sec_unique)
-			    && ibuf_insert(tuple, index, space, page_no,
-					   cursor->thr)) {
-				/* Insertion to the insert buffer succeeded */
-				cursor->flag = BTR_CUR_INSERT_TO_IBUF;
-				if (UNIV_LIKELY_NULL(heap)) {
-					mem_heap_free(heap);
-				}
-				goto func_exit;
-			}
-
-			/* Insert to the insert buffer did not succeed:
-			retry page get */
-
-			buf_mode = BUF_GET;
-
-			goto retry_page_get;
-		}
-
-		buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
-		if (rw_latch != RW_NO_LATCH) {
-			buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-		}
-#endif
-		ut_ad(0 == ut_dulint_cmp(index->id,
-					 btr_page_get_index_id(page)));
-
-		if (height == ULINT_UNDEFINED) {
-			/* We are in the root node */
-
-			height = btr_page_get_level(page, mtr);
-			root_height = height;
-			cursor->tree_height = root_height + 1;
-#ifdef BTR_CUR_ADAPT
-			if (page != guess) {
-				info->root_guess = page;
-			}
-#endif
-		}
-
-		if (height == 0) {
-			if (rw_latch == RW_NO_LATCH) {
-
-				btr_cur_latch_leaves(page, space,
-						     page_no, latch_mode,
-						     cursor, mtr);
-			}
-
-			if ((latch_mode != BTR_MODIFY_TREE)
-			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
-				/* Release the tree s-latch */
-
-				mtr_release_s_latch_at_savepoint(
-					mtr, savepoint,
-					dict_index_get_lock(index));
-			}
-
-			page_mode = mode;
-		}
-
-		page_cur_search_with_match(page, index, tuple, page_mode,
-					   &up_match, &up_bytes,
-					   &low_match, &low_bytes,
-					   page_cursor);
-		if (estimate) {
-			btr_cur_add_path_info(cursor, height, root_height);
-		}
-
-		/* If this is the desired level, leave the loop */
-
-		ut_ad(height == btr_page_get_level(
-			      page_cur_get_page(page_cursor), mtr));
-
-		if (level == height) {
-
-			if (level > 0) {
-				/* x-latch the page */
-				page = btr_page_get(space,
-						    page_no, RW_X_LATCH, mtr);
-				ut_a((ibool)!!page_is_comp(page)
-				     == dict_table_is_comp(index->table));
-			}
-
-			break;
-		}
-
-		ut_ad(height > 0);
-
-		height--;
-		guess = NULL;
-
-		node_ptr = page_cur_get_rec(page_cursor);
-		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	if (level == 0) {
-		cursor->low_match = low_match;
-		cursor->low_bytes = low_bytes;
-		cursor->up_match = up_match;
-		cursor->up_bytes = up_bytes;
-
-#ifdef BTR_CUR_ADAPT
-		if (srv_use_adaptive_hash_indexes) {
-
-			btr_search_info_update(index, cursor);
-		}
-#endif
-		ut_ad(cursor->up_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_GE);
-		ut_ad(cursor->up_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_LE);
-		ut_ad(cursor->low_match != ULINT_UNDEFINED
-		      || mode != PAGE_CUR_LE);
-	}
-
-func_exit:
-	if (has_search_latch) {
-
-		rw_lock_s_lock(&btr_search_latch);
-	}
-}
-
-/*********************************************************************
-Opens a cursor at either end of an index. */
-
-void
-btr_cur_open_at_index_side(
-/*=======================*/
-	ibool		from_left,	/* in: TRUE if open to the low end,
-					FALSE if to the high end */
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: latch mode */
-	btr_cur_t*	cursor,		/* in: cursor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_cur_t*	page_cursor;
-	page_t*		page;
-	ulint		page_no;
-	ulint		space;
-	ulint		height;
-	ulint		root_height = 0; /* remove warning */
-	rec_t*		node_ptr;
-	ulint		estimate;
-	ulint		savepoint;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	estimate = latch_mode & BTR_ESTIMATE;
-	latch_mode = latch_mode & ~BTR_ESTIMATE;
-
-	/* Store the position of the tree latch we push to mtr so that we
-	know how to release it when we have latched the leaf node */
-
-	savepoint = mtr_set_savepoint(mtr);
-
-	if (latch_mode == BTR_MODIFY_TREE) {
-		mtr_x_lock(dict_index_get_lock(index), mtr);
-	} else {
-		mtr_s_lock(dict_index_get_lock(index), mtr);
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-	cursor->index = index;
-
-	space = dict_index_get_space(index);
-	page_no = dict_index_get_page(index);
-
-	height = ULINT_UNDEFINED;
-
-	for (;;) {
-		page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
-					BUF_GET,
-					__FILE__, __LINE__,
-					mtr);
-		ut_ad(0 == ut_dulint_cmp(index->id,
-					 btr_page_get_index_id(page)));
-
-		buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-		if (height == ULINT_UNDEFINED) {
-			/* We are in the root node */
-
-			height = btr_page_get_level(page, mtr);
-			root_height = height;
-		}
-
-		if (height == 0) {
-			btr_cur_latch_leaves(page, space, page_no,
-					     latch_mode, cursor, mtr);
-
-			/* In versions <= 3.23.52 we had forgotten to
-			release the tree latch here. If in an index scan
-			we had to scan far to find a record visible to the
-			current transaction, that could starve others
-			waiting for the tree latch. */
-
-			if ((latch_mode != BTR_MODIFY_TREE)
-			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
-				/* Release the tree s-latch */
-
-				mtr_release_s_latch_at_savepoint(
-					mtr, savepoint,
-					dict_index_get_lock(index));
-			}
-		}
-
-		if (from_left) {
-			page_cur_set_before_first(page, page_cursor);
-		} else {
-			page_cur_set_after_last(page, page_cursor);
-		}
-
-		if (height == 0) {
-			if (estimate) {
-				btr_cur_add_path_info(cursor, height,
-						      root_height);
-			}
-
-			break;
-		}
-
-		ut_ad(height > 0);
-
-		if (from_left) {
-			page_cur_move_to_next(page_cursor);
-		} else {
-			page_cur_move_to_prev(page_cursor);
-		}
-
-		if (estimate) {
-			btr_cur_add_path_info(cursor, height, root_height);
-		}
-
-		height--;
-
-		node_ptr = page_cur_get_rec(page_cursor);
-		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/* in/out: B-tree cursor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_cur_t*	page_cursor;
-	page_t*		page;
-	ulint		page_no;
-	ulint		space;
-	ulint		height;
-	rec_t*		node_ptr;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	if (latch_mode == BTR_MODIFY_TREE) {
-		mtr_x_lock(dict_index_get_lock(index), mtr);
-	} else {
-		mtr_s_lock(dict_index_get_lock(index), mtr);
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-	cursor->index = index;
-
-	space = dict_index_get_space(index);
-	page_no = dict_index_get_page(index);
-
-	height = ULINT_UNDEFINED;
-
-	for (;;) {
-		page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
-					BUF_GET,
-					__FILE__, __LINE__,
-					mtr);
-		ut_ad(0 == ut_dulint_cmp(index->id,
-					 btr_page_get_index_id(page)));
-
-		if (height == ULINT_UNDEFINED) {
-			/* We are in the root node */
-
-			height = btr_page_get_level(page, mtr);
-		}
-
-		if (height == 0) {
-			btr_cur_latch_leaves(page, space, page_no,
-					     latch_mode, cursor, mtr);
-		}
-
-		page_cur_open_on_rnd_user_rec(page, page_cursor);
-
-		if (height == 0) {
-
-			break;
-		}
-
-		ut_ad(height > 0);
-
-		height--;
-
-		node_ptr = page_cur_get_rec(page_cursor);
-		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/*==================== B-TREE INSERT =========================*/
-
-/*****************************************************************
-Inserts a record if there is enough space, or if enough space can
-be freed by reorganizing. Differs from _optimistic_insert because
-no heuristics is applied to whether it pays to use CPU time for
-reorganizing the page or not. */
-static
-rec_t*
-btr_cur_insert_if_possible(
-/*=======================*/
-				/* out: pointer to inserted record if succeed,
-				else NULL */
-	btr_cur_t*	cursor,	/* in: cursor on page after which to insert;
-				cursor stays valid */
-	dtuple_t*	tuple,	/* in: tuple to insert; the size info need not
-				have been stored to tuple */
-	ibool*		reorg,	/* out: TRUE if reorganization occurred */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_cur_t*	page_cursor;
-	page_t*		page;
-	rec_t*		rec;
-
-	ut_ad(dtuple_check_typed(tuple));
-
-	*reorg = FALSE;
-
-	page = btr_cur_get_page(cursor);
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	/* Now, try the insert */
-	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
-	if (!rec) {
-		/* If record did not fit, reorganize */
-
-		btr_page_reorganize(page, cursor->index, mtr);
-
-		*reorg = TRUE;
-
-		page_cur_search(page, cursor->index, tuple,
-				PAGE_CUR_LE, page_cursor);
-
-		rec = page_cur_tuple_insert(page_cursor, tuple,
-					    cursor->index, mtr);
-	}
-
-	return(rec);
-}
-
-/*****************************************************************
-For an insert, checks the locks and does the undo logging if desired. */
-UNIV_INLINE
-ulint
-btr_cur_ins_lock_and_undo(
-/*======================*/
-				/* out: DB_SUCCESS, DB_WAIT_LOCK,
-				DB_FAIL, or error number */
-	ulint		flags,	/* in: undo logging and locking flags: if
-				not zero, the parameters index and thr
-				should be specified */
-	btr_cur_t*	cursor,	/* in: cursor on page after which to insert */
-	dtuple_t*	entry,	/* in: entry to insert */
-	que_thr_t*	thr,	/* in: query thread or NULL */
-	ibool*		inherit)/* out: TRUE if the inserted new record maybe
-				should inherit LOCK_GAP type locks from the
-				successor record */
-{
-	dict_index_t*	index;
-	ulint		err;
-	rec_t*		rec;
-	dulint		roll_ptr;
-
-	/* Check if we have to wait for a lock: enqueue an explicit lock
-	request if yes */
-
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-
-	err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
-
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
-
-		err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
-						    thr, index, entry,
-						    NULL, 0, NULL,
-						    &roll_ptr);
-		if (err != DB_SUCCESS) {
-
-			return(err);
-		}
-
-		/* Now we can fill in the roll ptr field in entry */
-
-		if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
-			row_upd_index_entry_sys_field(entry, index,
-						      DATA_ROLL_PTR, roll_ptr);
-		}
-	}
-
-	return(DB_SUCCESS);
-}
-
-#ifdef UNIV_DEBUG
-/*****************************************************************
-Report information about a transaction. */
-static
-void
-btr_cur_trx_report(
-/*===============*/
-	trx_t*			trx,	/* in: transaction */
-	const dict_index_t*	index,	/* in: index */
-	const char*		op)	/* in: operation */
-{
-	fprintf(stderr, "Trx with id %lu %lu going to ",
-		ut_dulint_get_high(trx->id),
-		ut_dulint_get_low(trx->id));
-	fputs(op, stderr);
-	dict_index_name_print(stderr, trx, index);
-	putc('\n', stderr);
-}
-#endif /* UNIV_DEBUG */
-
-/*****************************************************************
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
-				/* out: DB_SUCCESS, DB_WAIT_LOCK,
-				DB_FAIL, or error number */
-	ulint		flags,	/* in: undo logging and locking flags: if not
-				zero, the parameters index and thr should be
-				specified */
-	btr_cur_t*	cursor,	/* in: cursor on page after which to insert;
-				cursor stays valid */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t**		rec,	/* out: pointer to inserted record if
-				succeed */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or
-				NULL */
-	que_thr_t*	thr,	/* in: query thread or NULL */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	big_rec_t*	big_rec_vec	= NULL;
-	dict_index_t*	index;
-	page_cur_t*	page_cursor;
-	page_t*		page;
-	ulint		max_size;
-	rec_t*		dummy_rec;
-	ulint		level;
-	ibool		reorg;
-	ibool		inherit;
-	ulint		rec_size;
-	ulint		type;
-	ulint		err;
-
-	*big_rec = NULL;
-
-	page = btr_cur_get_page(cursor);
-	index = cursor->index;
-
-	if (!dtuple_check_typed_no_assert(entry)) {
-		fputs("InnoDB: Error in a tuple to insert into ", stderr);
-		dict_index_name_print(stderr, thr_get_trx(thr), index);
-	}
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
-		dtuple_print(stderr, entry);
-	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	max_size = page_get_max_insert_size_after_reorganize(page, 1);
-	level = btr_page_get_level(page, mtr);
-
-calculate_sizes_again:
-	/* Calculate the record size when entry is converted to a record */
-	rec_size = rec_get_converted_size(index, entry);
-
-	if (rec_size
-	    >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
-		      REC_MAX_DATA_SIZE)) {
-
-		/* The record is so big that we have to store some fields
-		externally on separate database pages */
-
-		big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
-
-		if (big_rec_vec == NULL) {
-
-			return(DB_TOO_BIG_RECORD);
-		}
-
-		goto calculate_sizes_again;
-	}
-
-	/* If there have been many consecutive inserts, and we are on the leaf
-	level, check if we have to split the page to reserve enough free space
-	for future updates of records. */
-
-	type = index->type;
-
-	if ((type & DICT_CLUSTERED)
-	    && (dict_index_get_space_reserve() + rec_size > max_size)
-	    && (page_get_n_recs(page) >= 2)
-	    && (0 == level)
-	    && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
-		|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
-
-		if (big_rec_vec) {
-			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
-		}
-
-		return(DB_FAIL);
-	}
-
-	if (!(((max_size >= rec_size)
-	       && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
-	      || (page_get_max_insert_size(page, 1) >= rec_size)
-	      || (page_get_n_recs(page) <= 1))) {
-
-		if (big_rec_vec) {
-			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
-		}
-		return(DB_FAIL);
-	}
-
-	/* Check locks and write to the undo log, if specified */
-	err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
-
-	if (err != DB_SUCCESS) {
-
-		if (big_rec_vec) {
-			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
-		}
-		return(err);
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	reorg = FALSE;
-
-	/* Now, try the insert */
-
-	*rec = page_cur_insert_rec_low(page_cursor, entry, index,
-				       NULL, NULL, mtr);
-	if (UNIV_UNLIKELY(!(*rec))) {
-		/* If the record did not fit, reorganize */
-		btr_page_reorganize(page, index, mtr);
-
-		ut_ad(page_get_max_insert_size(page, 1) == max_size);
-
-		reorg = TRUE;
-
-		page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
-
-		*rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
-
-		if (UNIV_UNLIKELY(!*rec)) {
-			fputs("InnoDB: Error: cannot insert tuple ", stderr);
-			dtuple_print(stderr, entry);
-			fputs(" into ", stderr);
-			dict_index_name_print(stderr, thr_get_trx(thr), index);
-			fprintf(stderr, "\nInnoDB: max insert size %lu\n",
-				(ulong) max_size);
-			ut_error;
-		}
-	}
-
-#ifdef BTR_CUR_HASH_ADAPT
-	if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
-		btr_search_update_hash_node_on_insert(cursor);
-	} else {
-		btr_search_update_hash_on_insert(cursor);
-	}
-#endif
-
-	if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
-
-		lock_update_insert(*rec);
-	}
-
-#if 0
-	fprintf(stderr, "Insert into page %lu, max ins size %lu,"
-		" rec %lu ind type %lu\n",
-		buf_frame_get_page_no(page), max_size,
-		rec_size + PAGE_DIR_SLOT_SIZE, type);
-#endif
-	if (!(type & DICT_CLUSTERED)) {
-		/* We have added a record to page: update its free bits */
-		ibuf_update_free_bits_if_full(cursor->index, page, max_size,
-					      rec_size + PAGE_DIR_SLOT_SIZE);
-	}
-
-	*big_rec = big_rec_vec;
-
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
-				/* out: DB_SUCCESS or error number */
-	ulint		flags,	/* in: undo logging and locking flags: if not
-				zero, the parameter thr should be
-				specified; if no undo logging is specified,
-				then the caller must have reserved enough
-				free extents in the file space so that the
-				insertion will certainly succeed */
-	btr_cur_t*	cursor,	/* in: cursor after which to insert;
-				cursor stays valid */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t**		rec,	/* out: pointer to inserted record if
-				succeed */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or
-				NULL */
-	que_thr_t*	thr,	/* in: query thread or NULL */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index		= cursor->index;
-	big_rec_t*	big_rec_vec	= NULL;
-	page_t*		page;
-	ulint		err;
-	ibool		dummy_inh;
-	ibool		success;
-	ulint		n_extents	= 0;
-	ulint		n_reserved;
-
-	ut_ad(dtuple_check_typed(entry));
-
-	*big_rec = NULL;
-
-	page = btr_cur_get_page(cursor);
-
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-
-	/* Try first an optimistic insert; reset the cursor flag: we do not
-	assume anything of how it was positioned */
-
-	cursor->flag = BTR_CUR_BINARY;
-
-	err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
-					thr, mtr);
-	if (err != DB_FAIL) {
-
-		return(err);
-	}
-
-	/* Retry with a pessimistic insert. Check locks and write to undo log,
-	if specified */
-
-	err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &dummy_inh);
-
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
-		/* First reserve enough free space for the file segments
-		of the index tree, so that the insert will not fail because
-		of lack of space */
-
-		n_extents = cursor->tree_height / 16 + 3;
-
-		success = fsp_reserve_free_extents(&n_reserved, index->space,
-						   n_extents, FSP_NORMAL, mtr);
-		if (!success) {
-			err = DB_OUT_OF_FILE_SPACE;
-
-			return(err);
-		}
-	}
-
-	if (rec_get_converted_size(index, entry)
-	    >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
-		      REC_MAX_DATA_SIZE)) {
-
-		/* The record is so big that we have to store some fields
-		externally on separate database pages */
-
-		big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
-
-		if (big_rec_vec == NULL) {
-
-			if (n_extents > 0) {
-				fil_space_release_free_extents(index->space,
-							       n_reserved);
-			}
-			return(DB_TOO_BIG_RECORD);
-		}
-	}
-
-	if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
-
-		/* The page is the root page */
-		*rec = btr_root_raise_and_insert(cursor, entry, mtr);
-	} else {
-		*rec = btr_page_split_and_insert(cursor, entry, mtr);
-	}
-
-	btr_cur_position(index, page_rec_get_prev(*rec), cursor);
-
-#ifdef BTR_CUR_ADAPT
-	btr_search_update_hash_on_insert(cursor);
-#endif
-	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-
-		lock_update_insert(*rec);
-	}
-
-	err = DB_SUCCESS;
-
-	if (n_extents > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
-	*big_rec = big_rec_vec;
-
-	return(err);
-}
-
-/*==================== B-TREE UPDATE =========================*/
-
-/*****************************************************************
-For an update, checks the locks and does the undo logging. */
-UNIV_INLINE
-ulint
-btr_cur_upd_lock_and_undo(
-/*======================*/
-				/* out: DB_SUCCESS, DB_WAIT_LOCK, or error
-				number */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor on record to update */
-	upd_t*		update,	/* in: update vector */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	dulint*		roll_ptr)/* out: roll pointer */
-{
-	dict_index_t*	index;
-	rec_t*		rec;
-	ulint		err;
-
-	ut_ad(cursor && update && thr && roll_ptr);
-
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-
-	if (!(index->type & DICT_CLUSTERED)) {
-		/* We do undo logging only when we update a clustered index
-		record */
-		return(lock_sec_rec_modify_check_and_lock(flags, rec, index,
-							  thr));
-	}
-
-	/* Check if we have to wait for a lock: enqueue an explicit lock
-	request if yes */
-
-	err = DB_SUCCESS;
-
-	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-		err = lock_clust_rec_modify_check_and_lock(
-			flags, rec, index,
-			rec_get_offsets(rec, index, offsets_,
-					ULINT_UNDEFINED, &heap), thr);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		if (err != DB_SUCCESS) {
-
-			return(err);
-		}
-	}
-
-	/* Append the info about the update in the undo log */
-
-	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, update,
-					    cmpl_info, rec, roll_ptr);
-	return(err);
-}
-
-/***************************************************************
-Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
-void
-btr_cur_update_in_place_log(
-/*========================*/
-	ulint		flags,		/* in: flags */
-	rec_t*		rec,		/* in: record */
-	dict_index_t*	index,		/* in: index where cursor positioned */
-	upd_t*		update,		/* in: update vector */
-	trx_t*		trx,		/* in: transaction */
-	dulint		roll_ptr,	/* in: roll ptr */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	byte*	log_ptr;
-	page_t*	page	= page_align(rec);
-	ut_ad(flags < 256);
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
-					    ? MLOG_COMP_REC_UPDATE_IN_PLACE
-					    : MLOG_REC_UPDATE_IN_PLACE,
-					    1 + DATA_ROLL_PTR_LEN + 14 + 2
-					    + MLOG_BUF_MARGIN);
-
-	if (!log_ptr) {
-		/* Logging in mtr is switched off during crash recovery */
-		return;
-	}
-
-	/* The code below assumes index is a clustered index: change index to
-	the clustered index if we are updating a secondary index record (or we
-	could as well skip writing the sys col values to the log in this case
-	because they are not needed for a secondary index record update) */
-
-	index = dict_table_get_first_index(index->table);
-
-	mach_write_to_1(log_ptr, flags);
-	log_ptr++;
-
-	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
-						mtr);
-	mach_write_to_2(log_ptr, page_offset(rec));
-	log_ptr += 2;
-
-	row_upd_index_write_log(update, log_ptr, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	page_t*		page,	/* in: page or NULL */
-	dict_index_t*	index)	/* in: index corresponding to page */
-{
-	ulint	flags;
-	rec_t*	rec;
-	upd_t*	update;
-	ulint	pos;
-	dulint	trx_id;
-	dulint	roll_ptr;
-	ulint	rec_offset;
-	mem_heap_t* heap;
-	ulint*	offsets;
-
-	if (end_ptr < ptr + 1) {
-
-		return(NULL);
-	}
-
-	flags = mach_read_from_1(ptr);
-	ptr++;
-
-	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
-	if (ptr == NULL) {
-
-		return(NULL);
-	}
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	rec_offset = mach_read_from_2(ptr);
-	ptr += 2;
-
-	ut_a(rec_offset <= UNIV_PAGE_SIZE);
-
-	heap = mem_heap_create(256);
-
-	ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
-
-	if (!ptr || !page) {
-
-		goto func_exit;
-	}
-
-	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
-	rec = page + rec_offset;
-
-	/* We do not need to reserve btr_search_latch, as the page is only
-	being recovered, and there cannot be a hash index to it. */
-
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_rec_sys_fields_in_recovery(rec, offsets,
-						   pos, trx_id, roll_ptr);
-	}
-
-	row_upd_rec_in_place(rec, offsets, update);
-
-func_exit:
-	mem_heap_free(heap);
-
-	return(ptr);
-}
-
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change. */
-
-ulint
-btr_cur_update_in_place(
-/*====================*/
-				/* out: DB_SUCCESS or error number */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	buf_block_t*	block;
-	ulint		err;
-	rec_t*		rec;
-	dulint		roll_ptr	= ut_dulint_zero;
-	trx_t*		trx;
-	ulint		was_delete_marked;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	trx = thr_get_trx(thr);
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(trx, index, "update ");
-		rec_print_new(stderr, rec, offsets);
-	}
-#endif /* UNIV_DEBUG */
-
-	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
-					thr, &roll_ptr);
-	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		return(err);
-	}
-
-	block = buf_block_align(rec);
-	ut_ad(!!page_is_comp(buf_block_get_frame(block))
-	      == dict_table_is_comp(index->table));
-
-	if (block->is_hashed) {
-		/* The function row_upd_changes_ord_field_binary works only
-		if the update vector was built for a clustered index, we must
-		NOT call it if index is secondary */
-
-		if (!(index->type & DICT_CLUSTERED)
-		    || row_upd_changes_ord_field_binary(NULL, index, update)) {
-
-			/* Remove possible hash index pointer to this record */
-			btr_search_update_hash_on_delete(cursor);
-		}
-
-		rw_lock_x_lock(&btr_search_latch);
-	}
-
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
-	}
-
-	was_delete_marked = rec_get_deleted_flag(
-		rec, page_is_comp(buf_block_get_frame(block)));
-
-	row_upd_rec_in_place(rec, offsets, update);
-
-	if (block->is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
-	}
-
-	btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
-				    mtr);
-	if (was_delete_marked
-	    && !rec_get_deleted_flag(rec, page_is_comp(
-					     buf_block_get_frame(block)))) {
-		/* The new updated record owns its possible externally
-		stored fields */
-
-		btr_cur_unmark_extern_fields(rec, mtr, offsets);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. We assume here that the ordering
-fields of the record do not change. */
-
-ulint
-btr_cur_optimistic_update(
-/*======================*/
-				/* out: DB_SUCCESS, or DB_OVERFLOW if the
-				updated record does not fit, DB_UNDERFLOW
-				if the page would become too empty */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector; this must also
-				contain trx id and roll ptr fields */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	page_cur_t*	page_cursor;
-	ulint		err;
-	page_t*		page;
-	rec_t*		rec;
-	ulint		max_size;
-	ulint		new_rec_size;
-	ulint		old_rec_size;
-	dtuple_t*	new_entry;
-	dulint		roll_ptr;
-	trx_t*		trx;
-	mem_heap_t*	heap;
-	ibool		reorganized	= FALSE;
-	ulint		i;
-	ulint*		offsets;
-
-	page = btr_cur_get_page(cursor);
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
-	heap = mem_heap_create(1024);
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "update ");
-		rec_print_new(stderr, rec, offsets);
-	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
-
-		/* The simplest and the most common case: the update does not
-		change the size of any field and none of the updated fields is
-		externally stored in rec or update */
-		mem_heap_free(heap);
-		return(btr_cur_update_in_place(flags, cursor, update,
-					       cmpl_info, thr, mtr));
-	}
-
-	for (i = 0; i < upd_get_n_fields(update); i++) {
-		if (upd_get_nth_field(update, i)->extern_storage) {
-
-			/* Externally stored fields are treated in pessimistic
-			update */
-
-			mem_heap_free(heap);
-			return(DB_OVERFLOW);
-		}
-	}
-
-	if (rec_offs_any_extern(offsets)) {
-		/* Externally stored fields are treated in pessimistic
-		update */
-
-		mem_heap_free(heap);
-		return(DB_OVERFLOW);
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
-	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, NULL);
-	old_rec_size = rec_offs_size(offsets);
-	new_rec_size = rec_get_converted_size(index, new_entry);
-
-	if (UNIV_UNLIKELY(new_rec_size
-			  >= (page_get_free_space_of_empty(page_is_comp(page))
-			      / 2))) {
-
-		mem_heap_free(heap);
-
-		return(DB_OVERFLOW);
-	}
-
-	max_size = old_rec_size
-		+ page_get_max_insert_size_after_reorganize(page, 1);
-
-	if (UNIV_UNLIKELY(page_get_data_size(page)
-			  - old_rec_size + new_rec_size
-			  < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
-
-		/* The page would become too empty */
-
-		mem_heap_free(heap);
-
-		return(DB_UNDERFLOW);
-	}
-
-	if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
-	       && (max_size >= new_rec_size))
-	      || (page_get_n_recs(page) <= 1))) {
-
-		/* There was not enough space, or it did not pay to
-		reorganize: for simplicity, we decide what to do assuming a
-		reorganization is needed, though it might not be necessary */
-
-		mem_heap_free(heap);
-
-		return(DB_OVERFLOW);
-	}
-
-	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
-					&roll_ptr);
-	if (err != DB_SUCCESS) {
-
-		mem_heap_free(heap);
-
-		return(err);
-	}
-
-	/* Ok, we may do the replacement. Store on the page infimum the
-	explicit locks on rec, before deleting rec (see the comment in
-	.._pessimistic_update). */
-
-	lock_rec_store_on_page_infimum(page, rec);
-
-	btr_search_update_hash_on_delete(cursor);
-
-	page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
-	page_cur_move_to_prev(page_cursor);
-
-	trx = thr_get_trx(thr);
-
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
-					      roll_ptr);
-		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx->id);
-	}
-
-	rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
-
-	ut_a(rec); /* <- We calculated above the insert would fit */
-
-	if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
-		/* The new inserted record owns its possible externally
-		stored fields */
-
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		btr_cur_unmark_extern_fields(rec, mtr, offsets);
-	}
-
-	/* Restore the old explicit lock state on the record */
-
-	lock_rec_restore_from_page_infimum(rec, page);
-
-	page_cur_move_to_next(page_cursor);
-
-	mem_heap_free(heap);
-
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************
-If, in a split, a new supremum record was created as the predecessor of the
-updated record, the supremum record must inherit exactly the locks on the
-updated record. In the split it may have inherited locks from the successor
-of the updated record, which is not correct. This function restores the
-right locks for the new supremum. */
-static
-void
-btr_cur_pess_upd_restore_supremum(
-/*==============================*/
-	rec_t*	rec,	/* in: updated record */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	page_t*	page;
-	page_t*	prev_page;
-	ulint	space;
-	ulint	prev_page_no;
-
-	page = buf_frame_align(rec);
-
-	if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
-		/* Updated record is not the first user record on its page */
-
-		return;
-	}
-
-	space = buf_frame_get_space_id(page);
-	prev_page_no = btr_page_get_prev(page, mtr);
-
-	ut_ad(prev_page_no != FIL_NULL);
-	prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
-#ifdef UNIV_BTR_DEBUG
-	ut_a(btr_page_get_next(prev_page, mtr)
-	     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-	/* We must already have an x-latch to prev_page! */
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
-				MTR_MEMO_PAGE_X_FIX));
-
-	lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
-					     rec);
-}
-
-/*****************************************************************
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. We assume
-here that the ordering fields of the record do not change. */
-
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
-				/* out: DB_SUCCESS or error code */
-	ulint		flags,	/* in: undo logging, locking, and rollback
-				flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or NULL */
-	upd_t*		update,	/* in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	big_rec_t*	big_rec_vec	= NULL;
-	big_rec_t*	dummy_big_rec;
-	dict_index_t*	index;
-	page_t*		page;
-	rec_t*		rec;
-	page_cur_t*	page_cursor;
-	dtuple_t*	new_entry;
-	mem_heap_t*	heap;
-	ulint		err;
-	ulint		optim_err;
-	ibool		dummy_reorganized;
-	dulint		roll_ptr;
-	trx_t*		trx;
-	ibool		was_first;
-	ibool		success;
-	ulint		n_extents	= 0;
-	ulint		n_reserved;
-	ulint*		ext_vect;
-	ulint		n_ext_vect;
-	ulint		reserve_flag;
-	ulint*		offsets		= NULL;
-
-	*big_rec = NULL;
-
-	page = btr_cur_get_page(cursor);
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-
-	optim_err = btr_cur_optimistic_update(flags, cursor, update,
-					      cmpl_info, thr, mtr);
-
-	if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
-
-		return(optim_err);
-	}
-
-	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
-					thr, &roll_ptr);
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	if (optim_err == DB_OVERFLOW) {
-		/* First reserve enough free space for the file segments
-		of the index tree, so that the update will not fail because
-		of lack of space */
-
-		n_extents = cursor->tree_height / 16 + 3;
-
-		if (flags & BTR_NO_UNDO_LOG_FLAG) {
-			reserve_flag = FSP_CLEANING;
-		} else {
-			reserve_flag = FSP_NORMAL;
-		}
-
-		success = fsp_reserve_free_extents(&n_reserved, index->space,
-						   n_extents,
-						   reserve_flag, mtr);
-		if (!success) {
-			err = DB_OUT_OF_FILE_SPACE;
-
-			return(err);
-		}
-	}
-
-	heap = mem_heap_create(1024);
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-	trx = thr_get_trx(thr);
-
-	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
-	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, heap);
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
-					      roll_ptr);
-		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx->id);
-	}
-
-	if (flags & BTR_NO_UNDO_LOG_FLAG) {
-		/* We are in a transaction rollback undoing a row
-		update: we must free possible externally stored fields
-		which got new values in the update, if they are not
-		inherited values. They can be inherited if we have
-		updated the primary key to another value, and then
-		update it back again. */
-
-		ut_a(big_rec_vec == NULL);
-
-		btr_rec_free_updated_extern_fields(index, rec, offsets,
-						   update, TRUE, mtr);
-	}
-
-	/* We have to set appropriate extern storage bits in the new
-	record to be inserted: we have to remember which fields were such */
-
-	ext_vect = mem_heap_alloc(heap, sizeof(ulint)
-				  * dict_index_get_n_fields(index));
-	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
-	offsets = rec_get_offsets(rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
-
-	if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
-			  >= ut_min(page_get_free_space_of_empty(
-					    page_is_comp(page)) / 2,
-				    REC_MAX_DATA_SIZE))) {
-
-		big_rec_vec = dtuple_convert_big_rec(index, new_entry,
-						     ext_vect, n_ext_vect);
-		if (big_rec_vec == NULL) {
-
-			err = DB_TOO_BIG_RECORD;
-			goto return_after_reservations;
-		}
-	}
-
-	page_cursor = btr_cur_get_page_cur(cursor);
-
-	/* Store state of explicit locks on rec on the page infimum record,
-	before deleting rec. The page infimum acts as a dummy carrier of the
-	locks, taking care also of lock releases, before we can move the locks
-	back on the actual record. There is a special case: if we are
-	inserting on the root page and the insert causes a call of
-	btr_root_raise_and_insert. Therefore we cannot in the lock system
-	delete the lock structs set on the root page even if the root
-	page carries just node pointers. */
-
-	lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
-
-	btr_search_update_hash_on_delete(cursor);
-
-	page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
-	page_cur_move_to_prev(page_cursor);
-
-	rec = btr_cur_insert_if_possible(cursor, new_entry,
-					 &dummy_reorganized, mtr);
-	ut_a(rec || optim_err != DB_UNDERFLOW);
-
-	if (rec) {
-		lock_rec_restore_from_page_infimum(rec, page);
-		rec_set_field_extern_bits(rec, index,
-					  ext_vect, n_ext_vect, mtr);
-
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-
-		if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
-			/* The new inserted record owns its possible externally
-			stored fields */
-			btr_cur_unmark_extern_fields(rec, mtr, offsets);
-		}
-
-		btr_cur_compress_if_useful(cursor, mtr);
-
-		err = DB_SUCCESS;
-		goto return_after_reservations;
-	}
-
-	if (page_cur_is_before_first(page_cursor)) {
-		/* The record to be updated was positioned as the first user
-		record on its page */
-
-		was_first = TRUE;
-	} else {
-		was_first = FALSE;
-	}
-
-	/* The first parameter means that no lock checking and undo logging
-	is made in the insert */
-
-	err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
-					 | BTR_NO_LOCKING_FLAG
-					 | BTR_KEEP_SYS_FLAG,
-					 cursor, new_entry, &rec,
-					 &dummy_big_rec, NULL, mtr);
-	ut_a(rec);
-	ut_a(err == DB_SUCCESS);
-	ut_a(dummy_big_rec == NULL);
-
-	rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-	if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
-		/* The new inserted record owns its possible externally
-		stored fields */
-
-		btr_cur_unmark_extern_fields(rec, mtr, offsets);
-	}
-
-	lock_rec_restore_from_page_infimum(rec, page);
-
-	/* If necessary, restore also the correct lock state for a new,
-	preceding supremum record created in a page split. While the old
-	record was nonexistent, the supremum might have inherited its locks
-	from a wrong record. */
-
-	if (!was_first) {
-		btr_cur_pess_upd_restore_supremum(rec, mtr);
-	}
-
-return_after_reservations:
-	mem_heap_free(heap);
-
-	if (n_extents > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
-	*big_rec = big_rec_vec;
-
-	return(err);
-}
-
-/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
-
-/********************************************************************
-Writes the redo log record for delete marking or unmarking of an index
-record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_clust_rec_log(
-/*===============================*/
-	ulint		flags,	/* in: flags */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index of the record */
-	ibool		val,	/* in: value to set */
-	trx_t*		trx,	/* in: deleting transaction */
-	dulint		roll_ptr,/* in: roll ptr to the undo log record */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	log_ptr;
-	ut_ad(flags < 256);
-	ut_ad(val <= 1);
-
-	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
-	log_ptr = mlog_open_and_write_index(mtr, rec, index,
-					    page_rec_is_comp(rec)
-					    ? MLOG_COMP_REC_CLUST_DELETE_MARK
-					    : MLOG_REC_CLUST_DELETE_MARK,
-					    1 + 1 + DATA_ROLL_PTR_LEN
-					    + 14 + 2);
-
-	if (!log_ptr) {
-		/* Logging in mtr is switched off during crash recovery */
-		return;
-	}
-
-	mach_write_to_1(log_ptr, flags);
-	log_ptr++;
-	mach_write_to_1(log_ptr, val);
-	log_ptr++;
-
-	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
-						mtr);
-	mach_write_to_2(log_ptr, page_offset(rec));
-	log_ptr += 2;
-
-	mlog_close(mtr, log_ptr);
-}
-
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: index corresponding to page */
-	page_t*		page)	/* in: page or NULL */
-{
-	ulint	flags;
-	ulint	val;
-	ulint	pos;
-	dulint	trx_id;
-	dulint	roll_ptr;
-	ulint	offset;
-	rec_t*	rec;
-
-	ut_ad(!page
-	      || !!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	flags = mach_read_from_1(ptr);
-	ptr++;
-	val = mach_read_from_1(ptr);
-	ptr++;
-
-	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
-	if (ptr == NULL) {
-
-		return(NULL);
-	}
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	offset = mach_read_from_2(ptr);
-	ptr += 2;
-
-	ut_a(offset <= UNIV_PAGE_SIZE);
-
-	if (page) {
-		rec = page + offset;
-
-		if (!(flags & BTR_KEEP_SYS_FLAG)) {
-			mem_heap_t*	heap		= NULL;
-			ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-			*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-			row_upd_rec_sys_fields_in_recovery(
-				rec, rec_get_offsets(rec, index, offsets_,
-						     ULINT_UNDEFINED, &heap),
-				pos, trx_id, roll_ptr);
-			if (UNIV_LIKELY_NULL(heap)) {
-				mem_heap_free(heap);
-			}
-		}
-
-		/* We do not need to reserve btr_search_latch, as the page
-		is only being recovered, and there cannot be a hash index to
-		it. */
-
-		rec_set_deleted_flag(rec, page_is_comp(page), val);
-	}
-
-	return(ptr);
-}
-
-/***************************************************************
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				number */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor */
-	ibool		val,	/* in: value to set */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index;
-	buf_block_t*	block;
-	dulint		roll_ptr;
-	ulint		err;
-	rec_t*		rec;
-	trx_t*		trx;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
-	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
-		rec_print_new(stderr, rec, offsets);
-	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-
-	err = lock_clust_rec_modify_check_and_lock(flags,
-						   rec, index, offsets, thr);
-
-	if (err != DB_SUCCESS) {
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		return(err);
-	}
-
-	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, NULL, 0, rec,
-					    &roll_ptr);
-	if (err != DB_SUCCESS) {
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		return(err);
-	}
-
-	block = buf_block_align(rec);
-
-	if (block->is_hashed) {
-		rw_lock_x_lock(&btr_search_latch);
-	}
-
-	rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
-
-	trx = thr_get_trx(thr);
-
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
-	}
-
-	if (block->is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
-	}
-
-	btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
-					   roll_ptr, mtr);
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(DB_SUCCESS);
-}
-
-/********************************************************************
-Writes the redo log record for a delete mark setting of a secondary
-index record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_sec_rec_log(
-/*=============================*/
-	rec_t*		rec,	/* in: record */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	log_ptr;
-	ut_ad(val <= 1);
-
-	log_ptr = mlog_open(mtr, 11 + 1 + 2);
-
-	if (!log_ptr) {
-		/* Logging in mtr is switched off during crash recovery:
-		in that case mlog_open returns NULL */
-		return;
-	}
-
-	log_ptr = mlog_write_initial_log_record_fast(
-		rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
-	mach_write_to_1(log_ptr, val);
-	log_ptr++;
-
-	mach_write_to_2(log_ptr, page_offset(rec));
-	log_ptr += 2;
-
-	mlog_close(mtr, log_ptr);
-}
-
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	page_t*		page)	/* in: page or NULL */
-{
-	ulint	val;
-	ulint	offset;
-	rec_t*	rec;
-
-	if (end_ptr < ptr + 3) {
-
-		return(NULL);
-	}
-
-	val = mach_read_from_1(ptr);
-	ptr++;
-
-	offset = mach_read_from_2(ptr);
-	ptr += 2;
-
-	ut_a(offset <= UNIV_PAGE_SIZE);
-
-	if (page) {
-		rec = page + offset;
-
-		/* We do not need to reserve btr_search_latch, as the page
-		is only being recovered, and there cannot be a hash index to
-		it. */
-
-		rec_set_deleted_flag(rec, page_is_comp(page), val);
-	}
-
-	return(ptr);
-}
-
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				number */
-	ulint		flags,	/* in: locking flag */
-	btr_cur_t*	cursor,	/* in: cursor */
-	ibool		val,	/* in: value to set */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	buf_block_t*	block;
-	rec_t*		rec;
-	ulint		err;
-
-	rec = btr_cur_get_rec(cursor);
-
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), cursor->index,
-				   "del mark ");
-		rec_print(stderr, rec, cursor->index);
-	}
-#endif /* UNIV_DEBUG */
-
-	err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
-						 thr);
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	block = buf_block_align(rec);
-	ut_ad(!!page_is_comp(buf_block_get_frame(block))
-	      == dict_table_is_comp(cursor->index->table));
-
-	if (block->is_hashed) {
-		rw_lock_x_lock(&btr_search_latch);
-	}
-
-	rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
-			     val);
-
-	if (block->is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
-	}
-
-	btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
-
-	return(DB_SUCCESS);
-}
-
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is only
-used by the insert buffer insert merge mechanism. */
-
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
-	rec_t*		rec,	/* in: record to delete unmark */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	/* We do not need to reserve btr_search_latch, as the page has just
-	been read to the buffer pool and there cannot be a hash index to it. */
-
-	rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
-
-	btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
-}
-
-/*==================== B-TREE RECORD REMOVE =========================*/
-
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
-				cursor does not stay valid */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
-
-	btr_compress(cursor, mtr);
-}
-
-/*****************************************************************
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
-				/* out: TRUE if compression occurred */
-	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
-				cursor does not stay valid if compression
-				occurs */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
-				MTR_MEMO_PAGE_X_FIX));
-
-	if (btr_cur_compress_recommendation(cursor, mtr)) {
-
-		btr_compress(cursor, mtr);
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/***********************************************************
-Removes the record on which the tree cursor is positioned on a leaf page.
-It is assumed that the mtr has an x-latch on the page where the cursor is
-positioned, but no latch on the whole tree. */
-
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
-				/* out: TRUE if success, i.e., the page
-				did not become too empty */
-	btr_cur_t*	cursor,	/* in: cursor on leaf page, on the record to
-				delete; cursor stays valid: if deletion
-				succeeds, on function exit it points to the
-				successor of the deleted record */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		page;
-	ulint		max_ins_size;
-	rec_t*		rec;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	ibool		no_compress_needed;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
-				MTR_MEMO_PAGE_X_FIX));
-	/* This is intended only for leaf page deletions */
-
-	page = btr_cur_get_page(cursor);
-
-	ut_ad(btr_page_get_level(page, mtr) == 0);
-
-	rec = btr_cur_get_rec(cursor);
-	offsets = rec_get_offsets(rec, cursor->index, offsets,
-				  ULINT_UNDEFINED, &heap);
-
-	no_compress_needed = !rec_offs_any_extern(offsets)
-		&& btr_cur_can_delete_without_compress(
-			cursor, rec_offs_size(offsets), mtr);
-
-	if (no_compress_needed) {
-
-		lock_update_delete(rec);
-
-		btr_search_update_hash_on_delete(cursor);
-
-		max_ins_size = page_get_max_insert_size_after_reorganize(
-			page, 1);
-		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
-				    cursor->index, offsets, mtr);
-
-		ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
-					  mtr);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	return(no_compress_needed);
-}
-
-/*****************************************************************
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
-				/* out: TRUE if compression occurred */
-	ulint*		err,	/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
-				the latter may occur because we may have
-				to update node pointers on upper levels,
-				and in the case of variable length keys
-				these may actually grow in size */
-	ibool		has_reserved_extents, /* in: TRUE if the
-				caller has already reserved enough free
-				extents so that he knows that the operation
-				will succeed */
-	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
-				if compression does not occur, the cursor
-				stays valid: it points to successor of
-				deleted record on function exit */
-	ibool		in_rollback,/* in: TRUE if called in rollback */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		page;
-	dict_index_t*	index;
-	rec_t*		rec;
-	dtuple_t*	node_ptr;
-	ulint		n_extents	= 0;
-	ulint		n_reserved;
-	ibool		success;
-	ibool		ret		= FALSE;
-	ulint		level;
-	mem_heap_t*	heap;
-	ulint*		offsets;
-
-	page = btr_cur_get_page(cursor);
-	index = btr_cur_get_index(cursor);
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-	if (!has_reserved_extents) {
-		/* First reserve enough free space for the file segments
-		of the index tree, so that the node pointer updates will
-		not fail because of lack of space */
-
-		n_extents = cursor->tree_height / 32 + 1;
-
-		success = fsp_reserve_free_extents(&n_reserved,
-						   index->space,
-						   n_extents,
-						   FSP_CLEANING, mtr);
-		if (!success) {
-			*err = DB_OUT_OF_FILE_SPACE;
-
-			return(FALSE);
-		}
-	}
-
-	heap = mem_heap_create(1024);
-	rec = btr_cur_get_rec(cursor);
-
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-	/* Free externally stored fields if the record is neither
-	a node pointer nor in two-byte format.
-	This avoids an unnecessary loop. */
-	if (page_is_comp(page)
-	    ? !rec_get_node_ptr_flag(rec)
-	    : !rec_get_1byte_offs_flag(rec)) {
-		btr_rec_free_externally_stored_fields(index,
-						      rec, offsets,
-						      in_rollback, mtr);
-	}
-
-	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
-	    && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
-			     != buf_frame_get_page_no(page))) {
-
-		/* If there is only one record, drop the whole page in
-		btr_discard_page, if this is not the root page */
-
-		btr_discard_page(cursor, mtr);
-
-		*err = DB_SUCCESS;
-		ret = TRUE;
-
-		goto return_after_reservations;
-	}
-
-	lock_update_delete(rec);
-	level = btr_page_get_level(page, mtr);
-
-	if (level > 0
-	    && UNIV_UNLIKELY(rec == page_rec_get_next(
-				     page_get_infimum_rec(page)))) {
-
-		rec_t*	next_rec = page_rec_get_next(rec);
-
-		if (btr_page_get_prev(page, mtr) == FIL_NULL) {
-
-			/* If we delete the leftmost node pointer on a
-			non-leaf level, we must mark the new leftmost node
-			pointer as the predefined minimum record */
-
-			btr_set_min_rec_mark(next_rec, page_is_comp(page),
-					     mtr);
-		} else {
-			/* Otherwise, if we delete the leftmost node pointer
-			on a page, we have to change the father node pointer
-			so that it is equal to the new leftmost node pointer
-			on the page */
-
-			btr_node_ptr_delete(index, page, mtr);
-
-			node_ptr = dict_index_build_node_ptr(
-				index, next_rec, buf_frame_get_page_no(page),
-				heap, level);
-
-			btr_insert_on_non_leaf_level(index,
-						     level + 1, node_ptr, mtr);
-		}
-	}
-
-	btr_search_update_hash_on_delete(cursor);
-
-	page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
-
-	ut_ad(btr_check_node_ptr(index, page, mtr));
-
-	*err = DB_SUCCESS;
-
-return_after_reservations:
-	mem_heap_free(heap);
-
-	if (ret == FALSE) {
-		ret = btr_cur_compress_if_useful(cursor, mtr);
-	}
-
-	if (n_extents > 0) {
-		fil_space_release_free_extents(index->space, n_reserved);
-	}
-
-	return(ret);
-}
-
-/***********************************************************************
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
-	btr_cur_t*	cursor,		/* in: cursor positioned on a page */
-	ulint		height,		/* in: height of the page in tree;
-					0 means leaf node */
-	ulint		root_height)	/* in: root node height in tree */
-{
-	btr_path_t*	slot;
-	rec_t*		rec;
-
-	ut_a(cursor->path_arr);
-
-	if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
-		/* Do nothing; return empty path */
-
-		slot = cursor->path_arr;
-		slot->nth_rec = ULINT_UNDEFINED;
-
-		return;
-	}
-
-	if (height == 0) {
-		/* Mark end of slots for path */
-		slot = cursor->path_arr + root_height + 1;
-		slot->nth_rec = ULINT_UNDEFINED;
-	}
-
-	rec = btr_cur_get_rec(cursor);
-
-	slot = cursor->path_arr + (root_height - height);
-
-	slot->nth_rec = page_rec_get_n_recs_before(rec);
-	slot->n_recs = page_get_n_recs(buf_frame_align(rec));
-}
-
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
-btr_estimate_n_rows_in_range(
-/*=========================*/
-				/* out: estimated number of rows */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple1,	/* in: range start, may also be empty tuple */
-	ulint		mode1,	/* in: search mode for range start */
-	dtuple_t*	tuple2,	/* in: range end, may also be empty tuple */
-	ulint		mode2)	/* in: search mode for range end */
-{
-	btr_path_t	path1[BTR_PATH_ARRAY_N_SLOTS];
-	btr_path_t	path2[BTR_PATH_ARRAY_N_SLOTS];
-	btr_cur_t	cursor;
-	btr_path_t*	slot1;
-	btr_path_t*	slot2;
-	ibool		diverged;
-	ibool		diverged_lot;
-	ulint		divergence_level;
-	ib_longlong	n_rows;
-	ulint		i;
-	mtr_t		mtr;
-
-	mtr_start(&mtr);
-
-	cursor.path_arr = path1;
-
-	if (dtuple_get_n_fields(tuple1) > 0) {
-
-		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
-					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					    &cursor, 0, &mtr);
-	} else {
-		btr_cur_open_at_index_side(TRUE, index,
-					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					   &cursor, &mtr);
-	}
-
-	mtr_commit(&mtr);
-
-	mtr_start(&mtr);
-
-	cursor.path_arr = path2;
-
-	if (dtuple_get_n_fields(tuple2) > 0) {
-
-		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
-					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					    &cursor, 0, &mtr);
-	} else {
-		btr_cur_open_at_index_side(FALSE, index,
-					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					   &cursor, &mtr);
-	}
-
-	mtr_commit(&mtr);
-
-	/* We have the path information for the range in path1 and path2 */
-
-	n_rows = 1;
-	diverged = FALSE;	    /* This becomes true when the path is not
-				    the same any more */
-	diverged_lot = FALSE;	    /* This becomes true when the paths are
-				    not the same or adjacent any more */
-	divergence_level = 1000000; /* This is the level where paths diverged
-				    a lot */
-	for (i = 0; ; i++) {
-		ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
-
-		slot1 = path1 + i;
-		slot2 = path2 + i;
-
-		if (slot1->nth_rec == ULINT_UNDEFINED
-		    || slot2->nth_rec == ULINT_UNDEFINED) {
-
-			if (i > divergence_level + 1) {
-				/* In trees whose height is > 1 our algorithm
-				tends to underestimate: multiply the estimate
-				by 2: */
-
-				n_rows = n_rows * 2;
-			}
-
-			/* Do not estimate the number of rows in the range
-			to over 1 / 2 of the estimated rows in the whole
-			table */
-
-			if (n_rows > index->table->stat_n_rows / 2) {
-				n_rows = index->table->stat_n_rows / 2;
-
-				/* If there are just 0 or 1 rows in the table,
-				then we estimate all rows are in the range */
-
-				if (n_rows == 0) {
-					n_rows = index->table->stat_n_rows;
-				}
-			}
-
-			return(n_rows);
-		}
-
-		if (!diverged && slot1->nth_rec != slot2->nth_rec) {
-
-			diverged = TRUE;
-
-			if (slot1->nth_rec < slot2->nth_rec) {
-				n_rows = slot2->nth_rec - slot1->nth_rec;
-
-				if (n_rows > 1) {
-					diverged_lot = TRUE;
-					divergence_level = i;
-				}
-			} else {
-				/* Maybe the tree has changed between
-				searches */
-
-				return(10);
-			}
-
-		} else if (diverged && !diverged_lot) {
-
-			if (slot1->nth_rec < slot1->n_recs
-			    || slot2->nth_rec > 1) {
-
-				diverged_lot = TRUE;
-				divergence_level = i;
-
-				n_rows = 0;
-
-				if (slot1->nth_rec < slot1->n_recs) {
-					n_rows += slot1->n_recs
-						- slot1->nth_rec;
-				}
-
-				if (slot2->nth_rec > 1) {
-					n_rows += slot2->nth_rec - 1;
-				}
-			}
-		} else if (diverged_lot) {
-
-			n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
-				/ 2;
-		}
-	}
-}
-
-/***********************************************************************
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
-	dict_index_t*	index)	/* in: index */
-{
-	btr_cur_t	cursor;
-	page_t*		page;
-	rec_t*		rec;
-	ulint		n_cols;
-	ulint		matched_fields;
-	ulint		matched_bytes;
-	ib_longlong*	n_diff;
-	ulint		not_empty_flag	= 0;
-	ulint		total_external_size = 0;
-	ulint		i;
-	ulint		j;
-	ulint		add_on;
-	mtr_t		mtr;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_rec_[REC_OFFS_NORMAL_SIZE];
-	ulint		offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets_rec	= offsets_rec_;
-	ulint*		offsets_next_rec= offsets_next_rec_;
-	*offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
-	*offsets_next_rec_
-		= (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
-
-	n_cols = dict_index_get_n_unique(index);
-
-	n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
-
-	memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
-
-	/* We sample some pages in the index to get an estimate */
-
-	for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
-		rec_t*	supremum;
-		mtr_start(&mtr);
-
-		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
-
-		/* Count the number of different key values for each prefix of
-		the key on this index page. If the prefix does not determine
-		the index record uniquely in te B-tree, then we subtract one
-		because otherwise our algorithm would give a wrong estimate
-		for an index where there is just one key value. */
-
-		page = btr_cur_get_page(&cursor);
-
-		supremum = page_get_supremum_rec(page);
-		rec = page_rec_get_next(page_get_infimum_rec(page));
-
-		if (rec != supremum) {
-			not_empty_flag = 1;
-			offsets_rec = rec_get_offsets(rec, index, offsets_rec,
-						      ULINT_UNDEFINED, &heap);
-		}
-
-		while (rec != supremum) {
-			rec_t*	next_rec = page_rec_get_next(rec);
-			if (next_rec == supremum) {
-				break;
-			}
-
-			matched_fields = 0;
-			matched_bytes = 0;
-			offsets_next_rec = rec_get_offsets(next_rec, index,
-							   offsets_next_rec,
-							   n_cols, &heap);
-
-			cmp_rec_rec_with_match(rec, next_rec,
-					       offsets_rec, offsets_next_rec,
-					       index, &matched_fields,
-					       &matched_bytes);
-
-			for (j = matched_fields + 1; j <= n_cols; j++) {
-				/* We add one if this index record has
-				a different prefix from the previous */
-
-				n_diff[j]++;
-			}
-
-			total_external_size
-				+= btr_rec_get_externally_stored_len(
-					rec, offsets_rec);
-
-			rec = next_rec;
-			/* Initialize offsets_rec for the next round
-			and assign the old offsets_rec buffer to
-			offsets_next_rec. */
-			{
-				ulint*	offsets_tmp = offsets_rec;
-				offsets_rec = offsets_next_rec;
-				offsets_next_rec = offsets_tmp;
-			}
-		}
-
-
-		if (n_cols == dict_index_get_n_unique_in_tree(index)) {
-
-			/* If there is more than one leaf page in the tree,
-			we add one because we know that the first record
-			on the page certainly had a different prefix than the
-			last record on the previous index page in the
-			alphabetical order. Before this fix, if there was
-			just one big record on each clustered index page, the
-			algorithm grossly underestimated the number of rows
-			in the table. */
-
-			if (btr_page_get_prev(page, &mtr) != FIL_NULL
-			    || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
-				n_diff[n_cols]++;
-			}
-		}
-
-		offsets_rec = rec_get_offsets(rec, index, offsets_rec,
-					      ULINT_UNDEFINED, &heap);
-		total_external_size += btr_rec_get_externally_stored_len(
-			rec, offsets_rec);
-		mtr_commit(&mtr);
-	}
-
-	/* If we saw k borders between different key values on
-	BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many
-	there will be in index->stat_n_leaf_pages */
-
-	/* We must take into account that our sample actually represents
-	also the pages used for external storage of fields (those pages are
-	included in index->stat_n_leaf_pages) */
-
-	for (j = 0; j <= n_cols; j++) {
-		index->stat_n_diff_key_vals[j]
-			= ((n_diff[j]
-			    * (ib_longlong)index->stat_n_leaf_pages
-			    + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
-			    + total_external_size
-			    + not_empty_flag)
-			   / (BTR_KEY_VAL_ESTIMATE_N_PAGES
-			      + total_external_size));
-
-		/* If the tree is small, smaller than
-		10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
-		the above estimate is ok. For bigger trees it is common that we
-		do not see any borders between key values in the few pages
-		we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES
-		different key values, or even more. Let us try to approximate
-		that: */
-
-		add_on = index->stat_n_leaf_pages
-			/ (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES
-				 + total_external_size));
-
-		if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) {
-			add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES;
-		}
-
-		index->stat_n_diff_key_vals[j] += add_on;
-	}
-
-	mem_free(n_diff);
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
-
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
-				/* out: externally stored part,
-				in units of a database page */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	ulint	n_fields;
-	byte*	data;
-	ulint	local_len;
-	ulint	extern_len;
-	ulint	total_extern_len = 0;
-	ulint	i;
-
-	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
-	n_fields = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n_fields; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			data = rec_get_nth_field(rec, offsets, i, &local_len);
-
-			local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-			extern_len = mach_read_from_4(data + local_len
-						      + BTR_EXTERN_LEN + 4);
-
-			total_extern_len += ut_calc_align(extern_len,
-							  UNIV_PAGE_SIZE);
-		}
-	}
-
-	return(total_extern_len / UNIV_PAGE_SIZE);
-}
-
-/***********************************************************************
-Sets the ownership bit of an externally stored field in a record. */
-static
-void
-btr_cur_set_ownership_of_extern_field(
-/*==================================*/
-	rec_t*		rec,	/* in: clustered index record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		i,	/* in: field number */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	data;
-	ulint	local_len;
-	ulint	byte_val;
-
-	data = rec_get_nth_field(rec, offsets, i, &local_len);
-
-	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
-	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-	byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
-
-	if (val) {
-		byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
-	} else {
-		byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
-	}
-
-	mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
-			 MLOG_1BYTE, mtr);
-}
-
-/***********************************************************************
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
-	rec_t*		rec,	/* in: record in a clustered index */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update,	/* in: update vector */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ibool	is_updated;
-	ulint	n;
-	ulint	j;
-	ulint	i;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
-	n = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			/* Check it is not in updated fields */
-			is_updated = FALSE;
-
-			if (update) {
-				for (j = 0; j < upd_get_n_fields(update);
-				     j++) {
-					if (upd_get_nth_field(update, j)
-					    ->field_no == i) {
-						is_updated = TRUE;
-					}
-				}
-			}
-
-			if (!is_updated) {
-				btr_cur_set_ownership_of_extern_field(
-					rec, offsets, i, FALSE, mtr);
-			}
-		}
-	}
-}
-
-/***********************************************************************
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
-	dtuple_t*	entry,		/* in: updated entry to be inserted to
-					clustered index */
-	ulint*		ext_vec,	/* in: array of extern fields in the
-					original record */
-	ulint		n_ext_vec,	/* in: number of elements in ext_vec */
-	upd_t*		update)		/* in: update vector */
-{
-	dfield_t* dfield;
-	ulint	byte_val;
-	byte*	data;
-	ulint	len;
-	ibool	is_updated;
-	ulint	j;
-	ulint	i;
-
-	if (ext_vec == NULL) {
-
-		return;
-	}
-
-	for (i = 0; i < n_ext_vec; i++) {
-
-		/* Check ext_vec[i] is in updated fields */
-		is_updated = FALSE;
-
-		for (j = 0; j < upd_get_n_fields(update); j++) {
-			if (upd_get_nth_field(update, j)->field_no
-			    == ext_vec[i]) {
-				is_updated = TRUE;
-			}
-		}
-
-		if (!is_updated) {
-			dfield = dtuple_get_nth_field(entry, ext_vec[i]);
-
-			data = (byte*) dfield_get_data(dfield);
-			len = dfield_get_len(dfield);
-
-			len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-			byte_val = mach_read_from_1(data + len
-						    + BTR_EXTERN_LEN);
-
-			byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG;
-
-			mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
-		}
-	}
-}
-
-/***********************************************************************
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
-	rec_t*		rec,	/* in: record in a clustered index */
-	mtr_t*		mtr,	/* in: mtr */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	ulint	n;
-	ulint	i;
-
-	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
-	n = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			btr_cur_set_ownership_of_extern_field(rec, offsets, i,
-							      TRUE, mtr);
-		}
-	}
-}
-
-/***********************************************************************
-Marks all extern fields in a dtuple as owned by the record. */
-
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
-	dtuple_t*	entry,		/* in: clustered index entry */
-	ulint*		ext_vec,	/* in: array of numbers of fields
-					which have been stored externally */
-	ulint		n_ext_vec)	/* in: number of elements in ext_vec */
-{
-	dfield_t* dfield;
-	ulint	byte_val;
-	byte*	data;
-	ulint	len;
-	ulint	i;
-
-	for (i = 0; i < n_ext_vec; i++) {
-		dfield = dtuple_get_nth_field(entry, ext_vec[i]);
-
-		data = (byte*) dfield_get_data(dfield);
-		len = dfield_get_len(dfield);
-
-		len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-		byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
-
-		byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
-
-		mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
-	}
-}
-
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
-				/* out: number of values stored in ext_vect */
-	ulint*		ext_vect,/* in: array of ulints, must be preallocated
-				to have space for all fields in rec */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update)	/* in: update vector or NULL */
-{
-	ulint	n_pushed	= 0;
-	ibool	is_updated;
-	ulint	n;
-	ulint	j;
-	ulint	i;
-
-	if (update) {
-		n = upd_get_n_fields(update);
-
-		for (i = 0; i < n; i++) {
-
-			if (upd_get_nth_field(update, i)->extern_storage) {
-
-				ext_vect[n_pushed] = upd_get_nth_field(
-					update, i)->field_no;
-
-				n_pushed++;
-			}
-		}
-	}
-
-	n = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			/* Check it is not in updated fields */
-			is_updated = FALSE;
-
-			if (update) {
-				for (j = 0; j < upd_get_n_fields(update);
-				     j++) {
-					if (upd_get_nth_field(update, j)
-					    ->field_no == i) {
-						is_updated = TRUE;
-					}
-				}
-			}
-
-			if (!is_updated) {
-				ext_vect[n_pushed] = i;
-				n_pushed++;
-			}
-		}
-	}
-
-	return(n_pushed);
-}
-
-/***********************************************************************
-Returns the length of a BLOB part stored on the header page. */
-static
-ulint
-btr_blob_get_part_len(
-/*==================*/
-				/* out: part length */
-	byte*	blob_header)	/* in: blob header */
-{
-	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
-}
-
-/***********************************************************************
-Returns the page number where the next BLOB part is stored. */
-static
-ulint
-btr_blob_get_next_page_no(
-/*======================*/
-				/* out: page number or FIL_NULL if
-				no more pages */
-	byte*	blob_header)	/* in: blob header */
-{
-	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
-}
-
-/***********************************************************************
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
-
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
-					/* out: DB_SUCCESS or error */
-	dict_index_t*	index,		/* in: index of rec; the index tree
-					MUST be X-latched */
-	rec_t*		rec,		/* in: record */
-	const ulint*	offsets,	/* in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
-	big_rec_t*	big_rec_vec,	/* in: vector containing fields
-					to be stored externally */
-	mtr_t*		local_mtr __attribute__((unused))) /* in: mtr
-					containing the latch to rec and to the
-					tree */
-{
-	byte*	data;
-	ulint	local_len;
-	ulint	extern_len;
-	ulint	store_len;
-	ulint	page_no;
-	page_t*	page;
-	ulint	space_id;
-	page_t*	prev_page;
-	page_t*	rec_page;
-	ulint	prev_page_no;
-	ulint	hint_page_no;
-	ulint	i;
-	mtr_t	mtr;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_a(index->type & DICT_CLUSTERED);
-
-	space_id = buf_frame_get_space_id(rec);
-
-	/* We have to create a file segment to the tablespace
-	for each field and put the pointer to the field in rec */
-
-	for (i = 0; i < big_rec_vec->n_fields; i++) {
-
-		data = rec_get_nth_field(rec, offsets,
-					 big_rec_vec->fields[i].field_no,
-					 &local_len);
-		ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-		local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-		extern_len = big_rec_vec->fields[i].len;
-
-		ut_a(extern_len > 0);
-
-		prev_page_no = FIL_NULL;
-
-		while (extern_len > 0) {
-			mtr_start(&mtr);
-
-			if (prev_page_no == FIL_NULL) {
-				hint_page_no = buf_frame_get_page_no(rec) + 1;
-			} else {
-				hint_page_no = prev_page_no + 1;
-			}
-
-			page = btr_page_alloc(index, hint_page_no,
-					      FSP_NO_DIR, 0, &mtr);
-			if (page == NULL) {
-
-				mtr_commit(&mtr);
-
-				return(DB_OUT_OF_FILE_SPACE);
-			}
-
-			mlog_write_ulint(page + FIL_PAGE_TYPE,
-					 FIL_PAGE_TYPE_BLOB,
-					 MLOG_2BYTES, &mtr);
-
-			page_no = buf_frame_get_page_no(page);
-
-			if (prev_page_no != FIL_NULL) {
-				prev_page = buf_page_get(space_id,
-							 prev_page_no,
-							 RW_X_LATCH, &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-				buf_page_dbg_add_level(prev_page,
-						       SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
-				mlog_write_ulint(prev_page + FIL_PAGE_DATA
-						 + BTR_BLOB_HDR_NEXT_PAGE_NO,
-						 page_no, MLOG_4BYTES, &mtr);
-			}
-
-			if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
-					  - BTR_BLOB_HDR_SIZE
-					  - FIL_PAGE_DATA_END)) {
-				store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
-					- BTR_BLOB_HDR_SIZE
-					- FIL_PAGE_DATA_END;
-			} else {
-				store_len = extern_len;
-			}
-
-			mlog_write_string(page + FIL_PAGE_DATA
-					  + BTR_BLOB_HDR_SIZE,
-					  big_rec_vec->fields[i].data
-					  + big_rec_vec->fields[i].len
-					  - extern_len,
-					  store_len, &mtr);
-			mlog_write_ulint(page + FIL_PAGE_DATA
-					 + BTR_BLOB_HDR_PART_LEN,
-					 store_len, MLOG_4BYTES, &mtr);
-			mlog_write_ulint(page + FIL_PAGE_DATA
-					 + BTR_BLOB_HDR_NEXT_PAGE_NO,
-					 FIL_NULL, MLOG_4BYTES, &mtr);
-
-			extern_len -= store_len;
-
-			rec_page = buf_page_get(space_id,
-						buf_frame_get_page_no(data),
-						RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-			mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
-					 MLOG_4BYTES, &mtr);
-			mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
-					 big_rec_vec->fields[i].len
-					 - extern_len,
-					 MLOG_4BYTES, &mtr);
-
-			if (prev_page_no == FIL_NULL) {
-				mlog_write_ulint(data + local_len
-						 + BTR_EXTERN_SPACE_ID,
-						 space_id,
-						 MLOG_4BYTES, &mtr);
-
-				mlog_write_ulint(data + local_len
-						 + BTR_EXTERN_PAGE_NO,
-						 page_no,
-						 MLOG_4BYTES, &mtr);
-
-				mlog_write_ulint(data + local_len
-						 + BTR_EXTERN_OFFSET,
-						 FIL_PAGE_DATA,
-						 MLOG_4BYTES, &mtr);
-
-				/* Set the bit denoting that this field
-				in rec is stored externally */
-
-				rec_set_nth_field_extern_bit(
-					rec, index,
-					big_rec_vec->fields[i].field_no,
-					TRUE, &mtr);
-			}
-
-			prev_page_no = page_no;
-
-			mtr_commit(&mtr);
-		}
-	}
-
-	return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-
-void
-btr_free_externally_stored_field(
-/*=============================*/
-	dict_index_t*	index,		/* in: index of the data, the index
-					tree MUST be X-latched; if the tree
-					height is 1, then also the root page
-					must be X-latched! (this is relevant
-					in the case this function is called
-					from purge where 'data' is located on
-					an undo log page, not an index
-					page) */
-	byte*		data,		/* in: internally stored data
-					+ reference to the externally
-					stored part */
-	ulint		local_len,	/* in: length of data */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-					rollback and we do not want to free
-					inherited fields */
-	mtr_t*		local_mtr __attribute__((unused))) /* in: mtr
-					containing the latch to data an an
-					X-latch to the index tree */
-{
-	page_t*	page;
-	page_t*	rec_page;
-	ulint	space_id;
-	ulint	page_no;
-	ulint	offset;
-	ulint	extern_len;
-	ulint	next_page_no;
-	ulint	part_len;
-	mtr_t	mtr;
-
-	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-	for (;;) {
-		mtr_start(&mtr);
-
-		rec_page = buf_page_get(buf_frame_get_space_id(data),
-					buf_frame_get_page_no(data),
-					RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-		space_id = mach_read_from_4(data + local_len
-					    + BTR_EXTERN_SPACE_ID);
-
-		page_no = mach_read_from_4(data + local_len
-					   + BTR_EXTERN_PAGE_NO);
-
-		offset = mach_read_from_4(data + local_len
-					  + BTR_EXTERN_OFFSET);
-		extern_len = mach_read_from_4(data + local_len
-					      + BTR_EXTERN_LEN + 4);
-
-		/* If extern len is 0, then there is no external storage data
-		at all */
-
-		if (extern_len == 0) {
-
-			mtr_commit(&mtr);
-
-			return;
-		}
-
-		if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
-		    & BTR_EXTERN_OWNER_FLAG) {
-			/* This field does not own the externally
-			stored field: do not free! */
-
-			mtr_commit(&mtr);
-
-			return;
-		}
-
-		if (do_not_free_inherited
-		    && mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
-		    & BTR_EXTERN_INHERITED_FLAG) {
-			/* Rollback and inherited field: do not free! */
-
-			mtr_commit(&mtr);
-
-			return;
-		}
-
-		page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
-		next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
-						+ BTR_BLOB_HDR_NEXT_PAGE_NO);
-
-		part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
-
-		ut_a(extern_len >= part_len);
-
-		/* We must supply the page level (= 0) as an argument
-		because we did not store it on the page (we save the space
-		overhead from an index page header. */
-
-		btr_page_free_low(index, page, 0, &mtr);
-
-		mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
-				 next_page_no,
-				 MLOG_4BYTES, &mtr);
-		mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
-				 extern_len - part_len,
-				 MLOG_4BYTES, &mtr);
-		if (next_page_no == FIL_NULL) {
-			ut_a(extern_len - part_len == 0);
-		}
-
-		if (extern_len - part_len == 0) {
-			ut_a(next_page_no == FIL_NULL);
-		}
-
-		mtr_commit(&mtr);
-	}
-}
-
-/***************************************************************
-Frees the externally stored fields for a record. */
-
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
-	dict_index_t*	index,	/* in: index of the data, the index
-				tree MUST be X-latched */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-				rollback and we do not want to free
-				inherited fields */
-	mtr_t*		mtr)	/* in: mini-transaction handle which contains
-				an X-latch to record page and to the index
-				tree */
-{
-	ulint	n_fields;
-	byte*	data;
-	ulint	len;
-	ulint	i;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
-				MTR_MEMO_PAGE_X_FIX));
-	/* Free possible externally stored fields in the record */
-
-	ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
-	n_fields = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n_fields; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			data = rec_get_nth_field(rec, offsets, i, &len);
-			btr_free_externally_stored_field(index, data, len,
-							 do_not_free_inherited,
-							 mtr);
-		}
-	}
-}
-
-/***************************************************************
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
-	dict_index_t*	index,	/* in: index of rec; the index tree MUST be
-				X-latched */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	upd_t*		update,	/* in: update vector */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-				rollback and we do not want to free
-				inherited fields */
-	mtr_t*		mtr)	/* in: mini-transaction handle which contains
-				an X-latch to record page and to the tree */
-{
-	upd_field_t*	ufield;
-	ulint		n_fields;
-	byte*		data;
-	ulint		len;
-	ulint		i;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
-				MTR_MEMO_PAGE_X_FIX));
-
-	/* Free possible externally stored fields in the record */
-
-	n_fields = upd_get_n_fields(update);
-
-	for (i = 0; i < n_fields; i++) {
-		ufield = upd_get_nth_field(update, i);
-
-		if (rec_offs_nth_extern(offsets, ufield->field_no)) {
-
-			data = rec_get_nth_field(rec, offsets,
-						 ufield->field_no, &len);
-			btr_free_externally_stored_field(index, data, len,
-							 do_not_free_inherited,
-							 mtr);
-		}
-	}
-}
-
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
-
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
-				/* out: the whole field copied to heap */
-	ulint*		len,	/* out: length of the whole field */
-	byte*		data,	/* in: 'internally' stored part of the
-				field containing also the reference to
-				the external part */
-	ulint		local_len,/* in: length of data */
-	mem_heap_t*	heap)	/* in: mem heap */
-{
-	page_t*	page;
-	ulint	space_id;
-	ulint	page_no;
-	ulint	offset;
-	ulint	extern_len;
-	byte*	blob_header;
-	ulint	part_len;
-	byte*	buf;
-	ulint	copied_len;
-	mtr_t	mtr;
-
-	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
-	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
-	space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
-
-	page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
-
-	offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
-
-	/* Currently a BLOB cannot be bigger that 4 GB; we
-	leave the 4 upper bytes in the length field unused */
-
-	extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
-
-	buf = mem_heap_alloc(heap, local_len + extern_len);
-
-	ut_memcpy(buf, data, local_len);
-	copied_len = local_len;
-
-	if (extern_len == 0) {
-		*len = copied_len;
-
-		return(buf);
-	}
-
-	for (;;) {
-		mtr_start(&mtr);
-
-		page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
-		blob_header = page + offset;
-
-		part_len = btr_blob_get_part_len(blob_header);
-
-		ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
-			  part_len);
-		copied_len += part_len;
-
-		page_no = btr_blob_get_next_page_no(blob_header);
-
-		mtr_commit(&mtr);
-
-		if (page_no == FIL_NULL) {
-			ut_a(copied_len == local_len + extern_len);
-
-			*len = copied_len;
-
-			return(buf);
-		}
-
-		/* On other BLOB pages except the first the BLOB header
-		always is at the page data start: */
-
-		offset = FIL_PAGE_DATA;
-
-		ut_a(copied_len < local_len + extern_len);
-	}
-}
-
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
-				/* out: the field copied to heap */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		no,	/* in: field number */
-	ulint*		len,	/* out: length of the field */
-	mem_heap_t*	heap)	/* in: mem heap */
-{
-	ulint	local_len;
-	byte*	data;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_a(rec_offs_nth_extern(offsets, no));
-
-	/* An externally stored field can contain some initial
-	data from the field, and in the last 20 bytes it has the
-	space id, page number, and offset where the rest of the
-	field data is stored, and the data length in addition to
-	the data stored locally. We may need to store some data
-	locally to get the local record length above the 128 byte
-	limit so that field offsets are stored in two bytes, and
-	the extern bit is available in those two bytes. */
-
-	data = rec_get_nth_field(rec, offsets, no, &local_len);
-
-	return(btr_copy_externally_stored_field(len, data, local_len, heap));
-}
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
deleted file mode 100644
index 5b4f0ee6ecb..00000000000
--- a/storage/innobase/buf/buf0buf.c
+++ /dev/null
@@ -1,2590 +0,0 @@
-/*   Innobase relational database engine; Copyright (C) 2001 Innobase Oy
-
-     This program is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License 2
-     as published by the Free Software Foundation in June 1991.
-
-     This program is distributed in the hope that it will be useful,
-     but WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-     GNU General Public License for more details.
-
-     You should have received a copy of the GNU General Public License 2
-     along with this program (in file COPYING); if not, write to the Free
-     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer buf_pool
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#include "mem0mem.h"
-#include "btr0btr.h"
-#include "fil0fil.h"
-#include "lock0lock.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "log0log.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-
-/*
-		IMPLEMENTATION OF THE BUFFER POOL
-		=================================
-
-Performance improvement:
-------------------------
-Thread scheduling in NT may be so slow that the OS wait mechanism should
-not be used even in waiting for disk reads to complete.
-Rather, we should put waiting query threads to the queue of
-waiting jobs, and let the OS thread do something useful while the i/o
-is processed. In this way we could remove most OS thread switches in
-an i/o-intensive benchmark like TPC-C.
-
-A possibility is to put a user space thread library between the database
-and NT. User space thread libraries might be very fast.
-
-SQL Server 7.0 can be configured to use 'fibers' which are lightweight
-threads in NT. These should be studied.
-
-		Buffer frames and blocks
-		------------------------
-Following the terminology of Gray and Reuter, we call the memory
-blocks where file pages are loaded buffer frames. For each buffer
-frame there is a control block, or shortly, a block, in the buffer
-control array. The control info which does not need to be stored
-in the file along with the file page, resides in the control block.
-
-		Buffer pool struct
-		------------------
-The buffer buf_pool contains a single mutex which protects all the
-control data structures of the buf_pool. The content of a buffer frame is
-protected by a separate read-write lock in its control block, though.
-These locks can be locked and unlocked without owning the buf_pool mutex.
-The OS events in the buf_pool struct can be waited for without owning the
-buf_pool mutex.
-
-The buf_pool mutex is a hot-spot in main memory, causing a lot of
-memory bus traffic on multiprocessor systems when processors
-alternately access the mutex. On our Pentium, the mutex is accessed
-maybe every 10 microseconds. We gave up the solution to have mutexes
-for each control block, for instance, because it seemed to be
-complicated.
-
-A solution to reduce mutex contention of the buf_pool mutex is to
-create a separate mutex for the page hash table. On Pentium,
-accessing the hash table takes 2 microseconds, about half
-of the total buf_pool mutex hold time.
-
-		Control blocks
-		--------------
-
-The control block contains, for instance, the bufferfix count
-which is incremented when a thread wants a file page to be fixed
-in a buffer frame. The bufferfix operation does not lock the
-contents of the frame, however. For this purpose, the control
-block contains a read-write lock.
-
-The buffer frames have to be aligned so that the start memory
-address of a frame is divisible by the universal page size, which
-is a power of two.
-
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
-The control blocks containing file pages are put to a hash table
-according to the file address of the page.
-We could speed up the access to an individual page by using
-"pointer swizzling": we could replace the page references on
-non-leaf index pages by direct pointers to the page, if it exists
-in the buf_pool. We could make a separate hash table where we could
-chain all the page references in non-leaf pages residing in the buf_pool,
-using the page reference as the hash key,
-and at the time of reading of a page update the pointers accordingly.
-Drawbacks of this solution are added complexity and,
-possibly, extra space required on non-leaf pages for memory pointers.
-A simpler solution is just to speed up the hash table mechanism
-in the database, using tables whose size is a power of 2.
-
-		Lists of blocks
-		---------------
-
-There are several lists of control blocks. The free list contains
-blocks which are currently not used.
-
-The LRU-list contains all the blocks holding a file page
-except those for which the bufferfix count is non-zero.
-The pages are in the LRU list roughly in the order of the last
-access to the page, so that the oldest pages are at the end of the
-list. We also keep a pointer to near the end of the LRU list,
-which we can use when we want to artificially age a page in the
-buf_pool. This is used if we know that some page is not needed
-again for some time: we insert the block right after the pointer,
-causing it to be replaced sooner than would noramlly be the case.
-Currently this aging mechanism is used for read-ahead mechanism
-of pages, and it can also be used when there is a scan of a full
-table which cannot fit in the memory. Putting the pages near the
-of the LRU list, we make sure that most of the buf_pool stays in the
-main memory, undisturbed.
-
-The chain of modified blocks contains the blocks
-holding file pages that have been modified in the memory
-but not written to disk yet. The block with the oldest modification
-which has not yet been written to disk is at the end of the chain.
-
-		Loading a file page
-		-------------------
-
-First, a victim block for replacement has to be found in the
-buf_pool. It is taken from the free list or searched for from the
-end of the LRU-list. An exclusive lock is reserved for the frame,
-the io_fix field is set in the block fixing the block in buf_pool,
-and the io-operation for loading the page is queued. The io-handler thread
-releases the X-lock on the frame and resets the io_fix field
-when the io operation completes.
-
-A thread may request the above operation using the function
-buf_page_get(). It may then continue to request a lock on the frame.
-The lock is granted when the io-handler releases the x-lock.
-
-		Read-ahead
-		----------
-
-The read-ahead mechanism is intended to be intelligent and
-isolated from the semantically higher levels of the database
-index management. From the higher level we only need the
-information if a file page has a natural successor or
-predecessor page. On the leaf level of a B-tree index,
-these are the next and previous pages in the natural
-order of the pages.
-
-Let us first explain the read-ahead mechanism when the leafs
-of a B-tree are scanned in an ascending or descending order.
-When a read page is the first time referenced in the buf_pool,
-the buffer manager checks if it is at the border of a so-called
-linear read-ahead area. The tablespace is divided into these
-areas of size 64 blocks, for example. So if the page is at the
-border of such an area, the read-ahead mechanism checks if
-all the other blocks in the area have been accessed in an
-ascending or descending order. If this is the case, the system
-looks at the natural successor or predecessor of the page,
-checks if that is at the border of another area, and in this case
-issues read-requests for all the pages in that area. Maybe
-we could relax the condition that all the pages in the area
-have to be accessed: if data is deleted from a table, there may
-appear holes of unused pages in the area.
-
-A different read-ahead mechanism is used when there appears
-to be a random access pattern to a file.
-If a new page is referenced in the buf_pool, and several pages
-of its random access area (for instance, 32 consecutive pages
-in a tablespace) have recently been referenced, we may predict
-that the whole area may be needed in the near future, and issue
-the read requests for the whole area.
-
-		AWE implementation
-		------------------
-
-By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
-we mean the physical 16 kB memory area allocated from RAM for that block.
-By a 'frame' we mean a 16 kB area in the virtual address space of the
-process, in the frame_mem of buf_pool.
-
-We can map pages to the frames of the buffer pool.
-
-1) A buffer block allocated to use as a non-data page, e.g., to the lock
-table, is always mapped to a frame.
-2) A bufferfixed or io-fixed data page is always mapped to a frame.
-3) When we need to map a block to frame, we look from the list
-awe_LRU_free_mapped and try to unmap its last block, but note that
-bufferfixed or io-fixed pages cannot be unmapped.
-4) For every frame in the buffer pool there is always a block whose page is
-mapped to it. When we create the buffer pool, we map the first elements
-in the free list to the frames.
-5) When we have AWE enabled, we disable adaptive hash indexes.
-*/
-
-/* Value in microseconds */
-static const int WAIT_FOR_READ	= 20000;
-
-buf_pool_t*	buf_pool = NULL; /* The buffer buf_pool of the database */
-
-#ifdef UNIV_DEBUG
-ulint		buf_dbg_counter	= 0; /* This is used to insert validation
-					operations in excution in the
-					debug version */
-ibool		buf_debug_prints = FALSE; /* If this is set TRUE,
-					the program prints info whenever
-					read-ahead or flush occurs */
-#endif /* UNIV_DEBUG */
-/************************************************************************
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures. */
-
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
-			/* out: checksum */
-	byte*	 page)	/* in: buffer page */
-{
-	ulint checksum;
-
-	/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
-	..._ARCH_LOG_NO, are written outside the buffer pool to the first
-	pages of data files, we have to skip them in the page checksum
-	calculation.
-	We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
-	checksum is stored, and also the last 8 bytes of page because
-	there we store the old formula checksum. */
-
-	checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
-				  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
-		+ ut_fold_binary(page + FIL_PAGE_DATA,
-				 UNIV_PAGE_SIZE - FIL_PAGE_DATA
-				 - FIL_PAGE_END_LSN_OLD_CHKSUM);
-	checksum = checksum & 0xFFFFFFFFUL;
-
-	return(checksum);
-}
-
-/************************************************************************
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
-			/* out: checksum */
-	byte*	 page)	/* in: buffer page */
-{
-	ulint checksum;
-
-	checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
-
-	checksum = checksum & 0xFFFFFFFFUL;
-
-	return(checksum);
-}
-
-/************************************************************************
-Checks if a page is corrupt. */
-
-ibool
-buf_page_is_corrupted(
-/*==================*/
-				/* out: TRUE if corrupted */
-	byte*	read_buf)	/* in: a database page */
-{
-	ulint	checksum;
-	ulint	old_checksum;
-	ulint	checksum_field;
-	ulint	old_checksum_field;
-#ifndef UNIV_HOTBACKUP
-	dulint	current_lsn;
-#endif
-	if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
-	    != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
-
-		/* Stored log sequence numbers at the start and the end
-		of page do not match */
-
-		return(TRUE);
-	}
-
-#ifndef UNIV_HOTBACKUP
-	if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
-		if (ut_dulint_cmp(current_lsn,
-				  mach_read_from_8(read_buf + FIL_PAGE_LSN))
-		    < 0) {
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: Error: page %lu log sequence number"
-				" %lu %lu\n"
-				"InnoDB: is in the future! Current system "
-				"log sequence number %lu %lu.\n"
-				"InnoDB: Your database may be corrupt or "
-				"you may have copied the InnoDB\n"
-				"InnoDB: tablespace but not the InnoDB "
-				"log files. See\n"
-				"InnoDB: http://dev.mysql.com/doc/refman/"
-				"5.1/en/forcing-recovery.html\n"
-				"InnoDB: for more information.\n",
-				(ulong) mach_read_from_4(read_buf
-							 + FIL_PAGE_OFFSET),
-				(ulong) ut_dulint_get_high
-				(mach_read_from_8(read_buf + FIL_PAGE_LSN)),
-				(ulong) ut_dulint_get_low
-				(mach_read_from_8(read_buf + FIL_PAGE_LSN)),
-				(ulong) ut_dulint_get_high(current_lsn),
-				(ulong) ut_dulint_get_low(current_lsn));
-		}
-	}
-#endif
-
-	/* If we use checksums validation, make additional check before
-	returning TRUE to ensure that the checksum is not equal to
-	BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
-	disabled. Otherwise, skip checksum calculation and return FALSE */
-
-	if (srv_use_checksums) {
-		old_checksum = buf_calc_page_old_checksum(read_buf);
-
-		old_checksum_field = mach_read_from_4(
-			read_buf + UNIV_PAGE_SIZE
-			- FIL_PAGE_END_LSN_OLD_CHKSUM);
-
-		/* There are 2 valid formulas for old_checksum_field:
-
-		1. Very old versions of InnoDB only stored 8 byte lsn to the
-		start and the end of the page.
-
-		2. Newer InnoDB versions store the old formula checksum
-		there. */
-
-		if (old_checksum_field != mach_read_from_4(read_buf
-							   + FIL_PAGE_LSN)
-		    && old_checksum_field != old_checksum
-		    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
-
-			return(TRUE);
-		}
-
-		checksum = buf_calc_page_new_checksum(read_buf);
-		checksum_field = mach_read_from_4(read_buf
-						  + FIL_PAGE_SPACE_OR_CHKSUM);
-
-		/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
-		(always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
-
-		if (checksum_field != 0 && checksum_field != checksum
-		    && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
-
-			return(TRUE);
-		}
-	}
-
-	return(FALSE);
-}
-
-/************************************************************************
-Prints a page to stderr. */
-
-void
-buf_page_print(
-/*===========*/
-	byte*	read_buf)	/* in: a database page */
-{
-	dict_index_t*	index;
-	ulint		checksum;
-	ulint		old_checksum;
-
-	ut_print_timestamp(stderr);
-	fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
-		(ulint)UNIV_PAGE_SIZE);
-	ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
-	fputs("InnoDB: End of page dump\n", stderr);
-
-	checksum = srv_use_checksums
-		? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
-	old_checksum = srv_use_checksums
-		? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
-
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
-		" checksum %lu\n"
-		"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
-		" stored checksum %lu\n"
-		"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
-		" at page end %lu\n"
-		"InnoDB: Page number (if stored to page already) %lu,\n"
-		"InnoDB: space id (if created with >= MySQL-4.1.1"
-		" and stored already) %lu\n",
-		(ulong) checksum, (ulong) old_checksum,
-		(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-		(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM),
-		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
-		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
-		(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
-		(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-		(ulong) mach_read_from_4(read_buf
-					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-
-	if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
-	    == TRX_UNDO_INSERT) {
-		fprintf(stderr,
-			"InnoDB: Page may be an insert undo log page\n");
-	} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
-				    + TRX_UNDO_PAGE_TYPE)
-		   == TRX_UNDO_UPDATE) {
-		fprintf(stderr,
-			"InnoDB: Page may be an update undo log page\n");
-	}
-
-	switch (fil_page_get_type(read_buf)) {
-	case FIL_PAGE_INDEX:
-		fprintf(stderr,
-			"InnoDB: Page may be an index page where"
-			" index id is %lu %lu\n",
-			(ulong) ut_dulint_get_high
-			(btr_page_get_index_id(read_buf)),
-			(ulong) ut_dulint_get_low
-			(btr_page_get_index_id(read_buf)));
-
-		/* If the code is in ibbackup, dict_sys may be uninitialized,
-		i.e., NULL */
-
-		if (dict_sys != NULL) {
-
-			index = dict_index_find_on_id_low(
-				btr_page_get_index_id(read_buf));
-			if (index) {
-				fputs("InnoDB: (", stderr);
-				dict_index_name_print(stderr, NULL, index);
-				fputs(")\n", stderr);
-			}
-		}
-		break;
-	case FIL_PAGE_INODE:
-		fputs("InnoDB: Page may be an 'inode' page\n", stderr);
-		break;
-	case FIL_PAGE_IBUF_FREE_LIST:
-		fputs("InnoDB: Page may be an insert buffer free list page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_ALLOCATED:
-		fputs("InnoDB: Page may be a freshly allocated page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_IBUF_BITMAP:
-		fputs("InnoDB: Page may be an insert buffer bitmap page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_SYS:
-		fputs("InnoDB: Page may be a system page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_TRX_SYS:
-		fputs("InnoDB: Page may be a transaction system page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_FSP_HDR:
-		fputs("InnoDB: Page may be a file space header page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_XDES:
-		fputs("InnoDB: Page may be an extent descriptor page\n",
-		      stderr);
-		break;
-	case FIL_PAGE_TYPE_BLOB:
-		fputs("InnoDB: Page may be a BLOB page\n",
-		      stderr);
-		break;
-	}
-}
-
-/************************************************************************
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-buf_block_init(
-/*===========*/
-	buf_block_t*	block,	/* in: pointer to control block */
-	byte*		frame)	/* in: pointer to buffer frame, or NULL if in
-				the case of AWE there is no frame */
-{
-	block->magic_n = 0;
-
-	block->state = BUF_BLOCK_NOT_USED;
-
-	block->frame = frame;
-
-	block->awe_info = NULL;
-
-	block->buf_fix_count = 0;
-	block->io_fix = 0;
-
-	block->modify_clock = ut_dulint_zero;
-
-	block->file_page_was_freed = FALSE;
-
-	block->check_index_page_at_flush = FALSE;
-	block->index = NULL;
-
-	block->in_free_list = FALSE;
-	block->in_LRU_list = FALSE;
-
-	block->n_pointers = 0;
-
-	mutex_create(&block->mutex, SYNC_BUF_BLOCK);
-
-	rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
-	ut_ad(rw_lock_validate(&(block->lock)));
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/************************************************************************
-Creates the buffer pool. */
-
-buf_pool_t*
-buf_pool_init(
-/*==========*/
-				/* out, own: buf_pool object, NULL if not
-				enough memory or error */
-	ulint	max_size,	/* in: maximum size of the buf_pool in
-				blocks */
-	ulint	curr_size,	/* in: current size to use, must be <=
-				max_size, currently must be equal to
-				max_size */
-	ulint	n_frames)	/* in: number of frames; if AWE is used,
-				this is the size of the address space window
-				where physical memory pages are mapped; if
-				AWE is not used then this must be the same
-				as max_size */
-{
-	byte*		frame;
-	ulint		i;
-	buf_block_t*	block;
-
-	ut_a(max_size == curr_size);
-	ut_a(srv_use_awe || n_frames == max_size);
-
-	if (n_frames > curr_size) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: you must specify in my.cnf"
-			" .._awe_mem_mb larger\n"
-			"InnoDB: than .._buffer_pool_size. Now the former"
-			" is %lu pages,\n"
-			"InnoDB: the latter %lu pages.\n",
-			(ulong) curr_size, (ulong) n_frames);
-
-		return(NULL);
-	}
-
-	buf_pool = mem_alloc(sizeof(buf_pool_t));
-
-	/* 1. Initialize general fields
-	---------------------------- */
-	mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (srv_use_awe) {
-		/*----------------------------------------*/
-		/* Allocate the virtual address space window, i.e., the
-		buffer pool frames */
-
-		buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
-			UNIV_PAGE_SIZE * (n_frames + 1));
-
-		/* Allocate the physical memory for AWE and the AWE info array
-		for buf_pool */
-
-		if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
-
-			fprintf(stderr,
-				"InnoDB: AWE: Error: physical memory must be"
-				" allocated in full megabytes.\n"
-				"InnoDB: Trying to allocate %lu"
-				" database pages.\n",
-				(ulong) curr_size);
-
-			return(NULL);
-		}
-
-		if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
-						  curr_size
-						  / ((1024 * 1024)
-						     / UNIV_PAGE_SIZE))) {
-
-			return(NULL);
-		}
-		/*----------------------------------------*/
-	} else {
-		buf_pool->frame_mem = os_mem_alloc_large(
-			UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE);
-	}
-
-	if (buf_pool->frame_mem == NULL) {
-
-		return(NULL);
-	}
-
-	buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
-
-	if (buf_pool->blocks == NULL) {
-
-		return(NULL);
-	}
-
-	buf_pool->max_size = max_size;
-	buf_pool->curr_size = curr_size;
-
-	buf_pool->n_frames = n_frames;
-
-	/* Align pointer to the first frame */
-
-	frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
-
-	buf_pool->frame_zero = frame;
-	buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
-
-	if (srv_use_awe) {
-		/*----------------------------------------*/
-		/* Map an initial part of the allocated physical memory to
-		the window */
-
-		os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
-						  n_frames
-						  * (UNIV_PAGE_SIZE
-						     / OS_AWE_X86_PAGE_SIZE),
-						  buf_pool->awe_info);
-		/*----------------------------------------*/
-	}
-
-	buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
-
-	if (buf_pool->blocks_of_frames == NULL) {
-
-		return(NULL);
-	}
-
-	/* Init block structs and assign frames for them; in the case of
-	AWE there are less frames than blocks. Then we assign the frames
-	to the first blocks (we already mapped the memory above). We also
-	init the awe_info for every block. */
-
-	for (i = 0; i < max_size; i++) {
-
-		block = buf_pool_get_nth_block(buf_pool, i);
-
-		if (i < n_frames) {
-			frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
-			*(buf_pool->blocks_of_frames + i) = block;
-		} else {
-			frame = NULL;
-		}
-
-		buf_block_init(block, frame);
-
-		if (srv_use_awe) {
-			/*----------------------------------------*/
-			block->awe_info = buf_pool->awe_info
-				+ i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
-			/*----------------------------------------*/
-		}
-	}
-
-	buf_pool->page_hash = hash_create(2 * max_size);
-
-	buf_pool->n_pend_reads = 0;
-
-	buf_pool->last_printout_time = time(NULL);
-
-	buf_pool->n_pages_read = 0;
-	buf_pool->n_pages_written = 0;
-	buf_pool->n_pages_created = 0;
-	buf_pool->n_pages_awe_remapped = 0;
-
-	buf_pool->n_page_gets = 0;
-	buf_pool->n_page_gets_old = 0;
-	buf_pool->n_pages_read_old = 0;
-	buf_pool->n_pages_written_old = 0;
-	buf_pool->n_pages_created_old = 0;
-	buf_pool->n_pages_awe_remapped_old = 0;
-
-	/* 2. Initialize flushing fields
-	---------------------------- */
-	UT_LIST_INIT(buf_pool->flush_list);
-
-	for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
-		buf_pool->n_flush[i] = 0;
-		buf_pool->init_flush[i] = FALSE;
-		buf_pool->no_flush[i] = os_event_create(NULL);
-	}
-
-	buf_pool->LRU_flush_ended = 0;
-
-	buf_pool->ulint_clock = 1;
-	buf_pool->freed_page_clock = 0;
-
-	/* 3. Initialize LRU fields
-	---------------------------- */
-	UT_LIST_INIT(buf_pool->LRU);
-
-	buf_pool->LRU_old = NULL;
-
-	UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
-
-	/* Add control blocks to the free list */
-	UT_LIST_INIT(buf_pool->free);
-
-	for (i = 0; i < curr_size; i++) {
-
-		block = buf_pool_get_nth_block(buf_pool, i);
-
-		if (block->frame) {
-			/* Wipe contents of frame to eliminate a Purify
-			warning */
-
-#ifdef HAVE_purify
-			memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
-			if (srv_use_awe) {
-				/* Add to the list of blocks mapped to
-				frames */
-
-				UT_LIST_ADD_LAST(awe_LRU_free_mapped,
-						 buf_pool->awe_LRU_free_mapped,
-						 block);
-			}
-		}
-
-		UT_LIST_ADD_LAST(free, buf_pool->free, block);
-		block->in_free_list = TRUE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	if (srv_use_adaptive_hash_indexes) {
-		btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
-				      / sizeof(void*) / 64);
-	} else {
-		/* Create only a small dummy system */
-		btr_search_sys_create(1000);
-	}
-
-	return(buf_pool);
-}
-
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
-
-void
-buf_awe_map_page_to_frame(
-/*======================*/
-	buf_block_t*	block,		/* in: block whose page should be
-					mapped to a frame */
-	ibool		add_to_mapped_list) /* in: TRUE if we in the case
-					we need to map the page should also
-					add the block to the
-					awe_LRU_free_mapped list */
-{
-	buf_block_t*	bck;
-
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(block);
-
-	if (block->frame) {
-
-		return;
-	}
-
-	/* Scan awe_LRU_free_mapped from the end and try to find a block
-	which is not bufferfixed or io-fixed */
-
-	bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
-
-	while (bck) {
-		ibool skip;
-
-		mutex_enter(&bck->mutex);
-
-		skip = (bck->state == BUF_BLOCK_FILE_PAGE
-			&& (bck->buf_fix_count != 0 || bck->io_fix != 0));
-
-		if (skip) {
-			mutex_exit(&bck->mutex);
-
-			/* We have to skip this */
-			bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
-		} else {
-			/* We can map block to the frame of bck */
-
-			os_awe_map_physical_mem_to_window(
-				bck->frame,
-				UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
-				block->awe_info);
-
-			block->frame = bck->frame;
-
-			*(buf_pool->blocks_of_frames
-			  + (((ulint)(block->frame
-				      - buf_pool->frame_zero))
-			     >> UNIV_PAGE_SIZE_SHIFT))
-				= block;
-
-			bck->frame = NULL;
-			UT_LIST_REMOVE(awe_LRU_free_mapped,
-				       buf_pool->awe_LRU_free_mapped,
-				       bck);
-
-			if (add_to_mapped_list) {
-				UT_LIST_ADD_FIRST(
-					awe_LRU_free_mapped,
-					buf_pool->awe_LRU_free_mapped,
-					block);
-			}
-
-			buf_pool->n_pages_awe_remapped++;
-
-			mutex_exit(&bck->mutex);
-
-			return;
-		}
-	}
-
-	fprintf(stderr,
-		"InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
-		"InnoDB: awe_LRU_free_mapped list length %lu\n",
-		(ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
-
-	ut_a(0);
-}
-
-/************************************************************************
-Allocates a buffer block. */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(void)
-/*=================*/
-				/* out, own: the allocated block; also if AWE
-				is used it is guaranteed that the page is
-				mapped to a frame */
-{
-	buf_block_t*	block;
-
-	block = buf_LRU_get_free_block();
-
-	return(block);
-}
-
-/************************************************************************
-Moves to the block to the start of the LRU list if there is a danger
-that the block would drift out of the buffer pool. */
-UNIV_INLINE
-void
-buf_block_make_young(
-/*=================*/
-	buf_block_t*	block)	/* in: block to make younger */
-{
-	ut_ad(!mutex_own(&(buf_pool->mutex)));
-
-	/* Note that we read freed_page_clock's without holding any mutex:
-	this is allowed since the result is used only in heuristics */
-
-	if (buf_block_peek_if_too_old(block)) {
-
-		mutex_enter(&buf_pool->mutex);
-		/* There has been freeing activity in the LRU list:
-		best to move to the head of the LRU list */
-
-		buf_LRU_make_block_young(block);
-		mutex_exit(&buf_pool->mutex);
-	}
-}
-
-/************************************************************************
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
-the buffer pool. */
-
-void
-buf_page_make_young(
-/*================*/
-	buf_frame_t*	frame)	/* in: buffer frame of a file page */
-{
-	buf_block_t*	block;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	block = buf_block_align(frame);
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	buf_LRU_make_block_young(block);
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-/************************************************************************
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
-	buf_block_t*	block)	/* in, own: block to be freed */
-{
-	mutex_enter(&(buf_pool->mutex));
-
-	mutex_enter(&block->mutex);
-
-	ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-
-	buf_LRU_block_free_non_file_page(block);
-
-	mutex_exit(&block->mutex);
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-/*************************************************************************
-Allocates a buffer frame. */
-
-buf_frame_t*
-buf_frame_alloc(void)
-/*=================*/
-				/* out: buffer frame */
-{
-	return(buf_block_alloc()->frame);
-}
-
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
-
-void
-buf_frame_free(
-/*===========*/
-	buf_frame_t*	frame)	/* in: buffer frame */
-{
-	buf_block_free(buf_block_align(frame));
-}
-
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
-
-buf_block_t*
-buf_page_peek_block(
-/*================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL; NOTE that the page is not necessarily
-			yet read from disk! */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	buf_block_t*	block;
-
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(block);
-}
-
-/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	buf_block_t*	block;
-
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	if (block) {
-		block->check_index_page_at_flush = FALSE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-/************************************************************************
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
-			/* out: TRUE if page hash index is built in search
-			system */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	buf_block_t*	block;
-	ibool		is_hashed;
-
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	if (!block) {
-		is_hashed = FALSE;
-	} else {
-		is_hashed = block->is_hashed;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(is_hashed);
-}
-
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
-
-ibool
-buf_page_peek(
-/*==========*/
-			/* out: TRUE if found from page hash table,
-			NOTE that the page is not necessarily yet read
-			from disk! */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	if (buf_page_peek_block(space, offset)) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/************************************************************************
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	buf_block_t*	block;
-
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	if (block) {
-		block->file_page_was_freed = TRUE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(block);
-}
-
-/************************************************************************
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
-{
-	buf_block_t*	block;
-
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	if (block) {
-		block->file_page_was_freed = FALSE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(block);
-}
-
-/************************************************************************
-This is the general function used to get access to a database page. */
-
-buf_frame_t*
-buf_page_get_gen(
-/*=============*/
-				/* out: pointer to the frame or NULL */
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: page number */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_frame_t*	guess,	/* in: guessed frame or NULL */
-	ulint		mode,	/* in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr)	/* in: mini-transaction */
-{
-	buf_block_t*	block;
-	ibool		accessed;
-	ulint		fix_type;
-	ibool		success;
-	ibool		must_read;
-
-	ut_ad(mtr);
-	ut_ad((rw_latch == RW_S_LATCH)
-	      || (rw_latch == RW_X_LATCH)
-	      || (rw_latch == RW_NO_LATCH));
-	ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
-	ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
-	      || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
-#ifndef UNIV_LOG_DEBUG
-	ut_ad(!ibuf_inside() || ibuf_page(space, offset));
-#endif
-	buf_pool->n_page_gets++;
-loop:
-	block = NULL;
-	mutex_enter_fast(&(buf_pool->mutex));
-
-	if (guess) {
-		block = buf_block_align(guess);
-
-		if ((offset != block->offset) || (space != block->space)
-		    || (block->state != BUF_BLOCK_FILE_PAGE)) {
-
-			block = NULL;
-		}
-	}
-
-	if (block == NULL) {
-		block = buf_page_hash_get(space, offset);
-	}
-
-	if (block == NULL) {
-		/* Page not in buf_pool: needs to be read from file */
-
-		mutex_exit(&(buf_pool->mutex));
-
-		if (mode == BUF_GET_IF_IN_POOL) {
-
-			return(NULL);
-		}
-
-		buf_read_page(space, offset);
-
-#ifdef UNIV_DEBUG
-		buf_dbg_counter++;
-
-		if (buf_dbg_counter % 37 == 0) {
-			ut_ad(buf_validate());
-		}
-#endif
-		goto loop;
-	}
-
-	mutex_enter(&block->mutex);
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	must_read = FALSE;
-
-	if (block->io_fix == BUF_IO_READ) {
-
-		must_read = TRUE;
-
-		if (mode == BUF_GET_IF_IN_POOL) {
-			/* The page is only being read to buffer */
-			mutex_exit(&buf_pool->mutex);
-			mutex_exit(&block->mutex);
-
-			return(NULL);
-		}
-	}
-
-	/* If AWE is enabled and the page is not mapped to a frame, then
-	map it */
-
-	if (block->frame == NULL) {
-		ut_a(srv_use_awe);
-
-		/* We set second parameter TRUE because the block is in the
-		LRU list and we must put it to awe_LRU_free_mapped list once
-		mapped to a frame */
-
-		buf_awe_map_page_to_frame(block, TRUE);
-	}
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_block_buf_fix_inc_debug(block, file, line);
-#else
-	buf_block_buf_fix_inc(block);
-#endif
-	mutex_exit(&buf_pool->mutex);
-
-	/* Check if this is the first access to the page */
-
-	accessed = block->accessed;
-
-	block->accessed = TRUE;
-
-	mutex_exit(&block->mutex);
-
-	buf_block_make_young(block);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
-	ut_a(block->file_page_was_freed == FALSE);
-#endif
-
-#ifdef UNIV_DEBUG
-	buf_dbg_counter++;
-
-	if (buf_dbg_counter % 5771 == 0) {
-		ut_ad(buf_validate());
-	}
-#endif
-	ut_ad(block->buf_fix_count > 0);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-
-	if (mode == BUF_GET_NOWAIT) {
-		if (rw_latch == RW_S_LATCH) {
-			success = rw_lock_s_lock_nowait(&(block->lock),
-							file, line);
-			fix_type = MTR_MEMO_PAGE_S_FIX;
-		} else {
-			ut_ad(rw_latch == RW_X_LATCH);
-			success = rw_lock_x_lock_func_nowait(&(block->lock),
-							     file, line);
-			fix_type = MTR_MEMO_PAGE_X_FIX;
-		}
-
-		if (!success) {
-			mutex_enter(&block->mutex);
-
-			block->buf_fix_count--;
-
-			mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
-			rw_lock_s_unlock(&(block->debug_latch));
-#endif
-
-			return(NULL);
-		}
-	} else if (rw_latch == RW_NO_LATCH) {
-
-		if (must_read) {
-			/* Let us wait until the read operation
-			completes */
-
-			for (;;) {
-				mutex_enter(&block->mutex);
-
-				if (block->io_fix == BUF_IO_READ) {
-
-					mutex_exit(&block->mutex);
-
-					os_thread_sleep(WAIT_FOR_READ);
-				} else {
-
-					mutex_exit(&block->mutex);
-
-					break;
-				}
-			}
-		}
-
-		fix_type = MTR_MEMO_BUF_FIX;
-	} else if (rw_latch == RW_S_LATCH) {
-
-		rw_lock_s_lock_func(&(block->lock), 0, file, line);
-
-		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		rw_lock_x_lock_func(&(block->lock), 0, file, line);
-
-		fix_type = MTR_MEMO_PAGE_X_FIX;
-	}
-
-	mtr_memo_push(mtr, block, fix_type);
-
-	if (!accessed) {
-		/* In the case of a first access, try to apply linear
-		read-ahead */
-
-		buf_read_ahead_linear(space, offset);
-	}
-
-#ifdef UNIV_IBUF_DEBUG
-	ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-	return(block->frame);
-}
-
-/************************************************************************
-This is the general function used to get optimistic access to a database
-page. */
-
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
-				/* out: TRUE if success */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_block_t*	block,	/* in: guessed buffer block */
-	buf_frame_t*	guess,	/* in: guessed frame; note that AWE may move
-				frames */
-	dulint		modify_clock,/* in: modify clock value if mode is
-				..._GUESS_ON_CLOCK */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr)	/* in: mini-transaction */
-{
-	ibool		accessed;
-	ibool		success;
-	ulint		fix_type;
-
-	ut_ad(mtr && block);
-	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
-	/* If AWE is used, block may have a different frame now, e.g., NULL */
-
-	mutex_enter(&block->mutex);
-
-	if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
-	    || UNIV_UNLIKELY(block->frame != guess)) {
-
-		mutex_exit(&block->mutex);
-
-		return(FALSE);
-	}
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_block_buf_fix_inc_debug(block, file, line);
-#else
-	buf_block_buf_fix_inc(block);
-#endif
-	accessed = block->accessed;
-	block->accessed = TRUE;
-
-	mutex_exit(&block->mutex);
-
-	buf_block_make_young(block);
-
-	/* Check if this is the first access to the page */
-
-	ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
-
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
-		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait(&(block->lock),
-						     file, line);
-		fix_type = MTR_MEMO_PAGE_X_FIX;
-	}
-
-	if (UNIV_UNLIKELY(!success)) {
-		mutex_enter(&block->mutex);
-
-		block->buf_fix_count--;
-
-		mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_s_unlock(&(block->debug_latch));
-#endif
-		return(FALSE);
-	}
-
-	if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-		if (rw_latch == RW_S_LATCH) {
-			rw_lock_s_unlock(&(block->lock));
-		} else {
-			rw_lock_x_unlock(&(block->lock));
-		}
-
-		mutex_enter(&block->mutex);
-
-		block->buf_fix_count--;
-
-		mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_s_unlock(&(block->debug_latch));
-#endif
-		return(FALSE);
-	}
-
-	mtr_memo_push(mtr, block, fix_type);
-
-#ifdef UNIV_DEBUG
-	buf_dbg_counter++;
-
-	if (buf_dbg_counter % 5771 == 0) {
-		ut_ad(buf_validate());
-	}
-#endif
-	ut_ad(block->buf_fix_count > 0);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
-	ut_a(block->file_page_was_freed == FALSE);
-#endif
-	if (UNIV_UNLIKELY(!accessed)) {
-		/* In the case of a first access, try to apply linear
-		read-ahead */
-
-		buf_read_ahead_linear(buf_frame_get_space_id(guess),
-				      buf_frame_get_page_no(guess));
-	}
-
-#ifdef UNIV_IBUF_DEBUG
-	ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-	buf_pool->n_page_gets++;
-
-	return(TRUE);
-}
-
-/************************************************************************
-This is used to get access to a known database page, when no waiting can be
-done. For example, if a search in an adaptive hash index leads us to this
-frame. */
-
-ibool
-buf_page_get_known_nowait(
-/*======================*/
-				/* out: TRUE if success */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_frame_t*	guess,	/* in: the known page frame */
-	ulint		mode,	/* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr)	/* in: mini-transaction */
-{
-	buf_block_t*	block;
-	ibool		success;
-	ulint		fix_type;
-
-	ut_ad(mtr);
-	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
-	block = buf_block_align(guess);
-
-	mutex_enter(&block->mutex);
-
-	if (block->state == BUF_BLOCK_REMOVE_HASH) {
-		/* Another thread is just freeing the block from the LRU list
-		of the buffer pool: do not try to access this page; this
-		attempt to access the page can only come through the hash
-		index because when the buffer block state is ..._REMOVE_HASH,
-		we have already removed it from the page address hash table
-		of the buffer pool. */
-
-		mutex_exit(&block->mutex);
-
-		return(FALSE);
-	}
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_block_buf_fix_inc_debug(block, file, line);
-#else
-	buf_block_buf_fix_inc(block);
-#endif
-	mutex_exit(&block->mutex);
-
-	if (mode == BUF_MAKE_YOUNG) {
-		buf_block_make_young(block);
-	}
-
-	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
-
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
-		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait(&(block->lock),
-						     file, line);
-		fix_type = MTR_MEMO_PAGE_X_FIX;
-	}
-
-	if (!success) {
-		mutex_enter(&block->mutex);
-
-		block->buf_fix_count--;
-
-		mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_s_unlock(&(block->debug_latch));
-#endif
-
-		return(FALSE);
-	}
-
-	mtr_memo_push(mtr, block, fix_type);
-
-#ifdef UNIV_DEBUG
-	buf_dbg_counter++;
-
-	if (buf_dbg_counter % 5771 == 0) {
-		ut_ad(buf_validate());
-	}
-#endif
-	ut_ad(block->buf_fix_count > 0);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
-	ut_a(block->file_page_was_freed == FALSE);
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
-	ut_a((mode == BUF_KEEP_OLD)
-	     || (ibuf_count_get(block->space, block->offset) == 0));
-#endif
-	buf_pool->n_page_gets++;
-
-	return(TRUE);
-}
-
-/************************************************************************
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: offset of the page within space
-				in units of a page */
-	buf_block_t*	block)	/* in: block to init */
-{
-	/* Set the state of the block */
-	block->magic_n		= BUF_BLOCK_MAGIC_N;
-
-	block->state		= BUF_BLOCK_FILE_PAGE;
-	block->space		= space;
-	block->offset		= offset;
-
-	block->lock_hash_val	= 0;
-
-	block->freed_page_clock = 0;
-
-	block->newest_modification = ut_dulint_zero;
-	block->oldest_modification = ut_dulint_zero;
-
-	block->accessed		= FALSE;
-	block->buf_fix_count	= 0;
-	block->io_fix		= 0;
-
-	block->n_hash_helps	= 0;
-	block->is_hashed	= FALSE;
-	block->n_fields		= 1;
-	block->n_bytes		= 0;
-	block->left_side	= TRUE;
-
-	block->file_page_was_freed = FALSE;
-}
-
-/************************************************************************
-Inits a page to the buffer buf_pool. */
-static
-void
-buf_page_init(
-/*==========*/
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: offset of the page within space
-				in units of a page */
-	buf_block_t*	block)	/* in: block to init */
-{
-
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&(block->mutex)));
-	ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-
-	/* Set the state of the block */
-	block->magic_n		= BUF_BLOCK_MAGIC_N;
-
-	block->state		= BUF_BLOCK_FILE_PAGE;
-	block->space		= space;
-	block->offset		= offset;
-
-	block->check_index_page_at_flush = FALSE;
-	block->index		= NULL;
-
-	block->lock_hash_val	= lock_rec_hash(space, offset);
-
-#ifdef UNIV_DEBUG_VALGRIND
-	if (!space) {
-		/* Silence valid Valgrind warnings about uninitialized
-		data being written to data files.  There are some unused
-		bytes on some pages that InnoDB does not initialize. */
-		UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
-	}
-#endif /* UNIV_DEBUG_VALGRIND */
-
-	/* Insert into the hash table of file pages */
-
-	if (buf_page_hash_get(space, offset)) {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu already found"
-			" in the hash table\n",
-			(ulong) space,
-			(ulong) offset);
-#ifdef UNIV_DEBUG
-		buf_print();
-		buf_LRU_print();
-		buf_validate();
-		buf_LRU_validate();
-#endif /* UNIV_DEBUG */
-		ut_a(0);
-	}
-
-	HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
-		    buf_page_address_fold(space, offset), block);
-
-	block->freed_page_clock = 0;
-
-	block->newest_modification = ut_dulint_zero;
-	block->oldest_modification = ut_dulint_zero;
-
-	block->accessed		= FALSE;
-	block->buf_fix_count	= 0;
-	block->io_fix		= 0;
-
-	block->n_hash_helps	= 0;
-	block->is_hashed	= FALSE;
-	block->n_fields		= 1;
-	block->n_bytes		= 0;
-	block->left_side	= TRUE;
-
-	block->file_page_was_freed = FALSE;
-}
-
-/************************************************************************
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
-buf_page_init_for_read(
-/*===================*/
-				/* out: pointer to the block or NULL */
-	ulint*		err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/* in: space id */
-	ib_longlong	tablespace_version,/* in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset)	/* in: page number */
-{
-	buf_block_t*	block;
-	mtr_t		mtr;
-
-	ut_ad(buf_pool);
-
-	*err = DB_SUCCESS;
-
-	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-		/* It is a read-ahead within an ibuf routine */
-
-		ut_ad(!ibuf_bitmap_page(offset));
-		ut_ad(ibuf_inside());
-
-		mtr_start(&mtr);
-
-		if (!ibuf_page_low(space, offset, &mtr)) {
-
-			mtr_commit(&mtr);
-
-			return(NULL);
-		}
-	} else {
-		ut_ad(mode == BUF_READ_ANY_PAGE);
-	}
-
-	block = buf_block_alloc();
-
-	ut_a(block);
-
-	mutex_enter(&(buf_pool->mutex));
-	mutex_enter(&block->mutex);
-
-	if (fil_tablespace_deleted_or_being_deleted_in_mem(
-		    space, tablespace_version)) {
-		*err = DB_TABLESPACE_DELETED;
-	}
-
-	if (*err == DB_TABLESPACE_DELETED
-	    || NULL != buf_page_hash_get(space, offset)) {
-
-		/* The page belongs to a space which has been
-		deleted or is being deleted, or the page is
-		already in buf_pool, return */
-
-		mutex_exit(&block->mutex);
-		mutex_exit(&(buf_pool->mutex));
-
-		buf_block_free(block);
-
-		if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
-			mtr_commit(&mtr);
-		}
-
-		return(NULL);
-	}
-
-	ut_ad(block);
-
-	buf_page_init(space, offset, block);
-
-	/* The block must be put to the LRU list, to the old blocks */
-
-	buf_LRU_add_block(block, TRUE);		/* TRUE == to old blocks */
-
-	block->io_fix = BUF_IO_READ;
-
-	buf_pool->n_pend_reads++;
-
-	/* We set a pass-type x-lock on the frame because then the same
-	thread which called for the read operation (and is running now at
-	this point of code) can wait for the read to complete by waiting
-	for the x-lock on the frame; if the x-lock were recursive, the
-	same thread would illegally get the x-lock before the page read
-	is completed. The x-lock is cleared by the io-handler thread. */
-
-	rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
-
-	mutex_exit(&block->mutex);
-	mutex_exit(&(buf_pool->mutex));
-
-	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
-		mtr_commit(&mtr);
-	}
-
-	return(block);
-}
-
-/************************************************************************
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
-buf_page_create(
-/*============*/
-			/* out: pointer to the frame, page bufferfixed */
-	ulint	space,	/* in: space id */
-	ulint	offset,	/* in: offset of the page within space in units of
-			a page */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
-{
-	buf_frame_t*	frame;
-	buf_block_t*	block;
-	buf_block_t*	free_block	= NULL;
-
-	ut_ad(mtr);
-
-	free_block = buf_LRU_get_free_block();
-
-	mutex_enter(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	if (block != NULL) {
-#ifdef UNIV_IBUF_DEBUG
-		ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-		block->file_page_was_freed = FALSE;
-
-		/* Page can be found in buf_pool */
-		mutex_exit(&(buf_pool->mutex));
-
-		buf_block_free(free_block);
-
-		frame = buf_page_get_with_no_latch(space, offset, mtr);
-
-		return(frame);
-	}
-
-	/* If we get here, the page was not in buf_pool: init it there */
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Creating space %lu page %lu to buffer\n",
-			(ulong) space, (ulong) offset);
-	}
-#endif /* UNIV_DEBUG */
-
-	block = free_block;
-
-	mutex_enter(&block->mutex);
-
-	buf_page_init(space, offset, block);
-
-	/* The block must be put to the LRU list */
-	buf_LRU_add_block(block, FALSE);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
-#else
-	buf_block_buf_fix_inc(block);
-#endif
-	buf_pool->n_pages_created++;
-
-	mutex_exit(&(buf_pool->mutex));
-
-	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-
-	block->accessed = TRUE;
-
-	mutex_exit(&block->mutex);
-
-	/* Delete possible entries for the page from the insert buffer:
-	such can exist if the page belonged to an index which was dropped */
-
-	ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
-
-	/* Flush pages from the end of the LRU list if necessary */
-	buf_flush_free_margin();
-
-	frame = block->frame;
-
-	memset(frame + FIL_PAGE_PREV, 0xff, 4);
-	memset(frame + FIL_PAGE_NEXT, 0xff, 4);
-	mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
-
-	/* Reset to zero the file flush lsn field in the page; if the first
-	page of an ibdata file is 'created' in this function into the buffer
-	pool then we lose the original contents of the file flush lsn stamp.
-	Then InnoDB could in a crash recovery print a big, false, corruption
-	warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
-
-	memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
-
-#ifdef UNIV_DEBUG
-	buf_dbg_counter++;
-
-	if (buf_dbg_counter % 357 == 0) {
-		ut_ad(buf_validate());
-	}
-#endif
-#ifdef UNIV_IBUF_DEBUG
-	ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-	return(frame);
-}
-
-/************************************************************************
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-
-void
-buf_page_io_complete(
-/*=================*/
-	buf_block_t*	block)	/* in: pointer to the block in question */
-{
-	ulint		io_type;
-
-	ut_ad(block);
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	/* We do not need protect block->io_fix here by block->mutex to read
-	it because this is the only function where we can change the value
-	from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
-	ensures that this is the only thread that handles the i/o for this
-	block. */
-
-	io_type = block->io_fix;
-
-	if (io_type == BUF_IO_READ) {
-		/* If this page is not uninitialized and not in the
-		doublewrite buffer, then the page number and space id
-		should be the same as in block. */
-		ulint	read_page_no = mach_read_from_4(
-			block->frame + FIL_PAGE_OFFSET);
-		ulint	read_space_id = mach_read_from_4(
-			block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
-		if (!block->space
-		    && trx_doublewrite_page_inside(block->offset)) {
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: reading page %lu\n"
-				"InnoDB: which is in the"
-				" doublewrite buffer!\n",
-				(ulong) block->offset);
-		} else if (!read_space_id && !read_page_no) {
-			/* This is likely an uninitialized page. */
-		} else if ((block->space && block->space != read_space_id)
-			   || block->offset != read_page_no) {
-			/* We did not compare space_id to read_space_id
-			if block->space == 0, because the field on the
-			page may contain garbage in MySQL < 4.1.1,
-			which only supported block->space == 0. */
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: space id and page n:o"
-				" stored in the page\n"
-				"InnoDB: read in are %lu:%lu,"
-				" should be %lu:%lu!\n",
-				(ulong) read_space_id, (ulong) read_page_no,
-				(ulong) block->space, (ulong) block->offset);
-		}
-		/* From version 3.23.38 up we store the page checksum
-		to the 4 first bytes of the page end lsn field */
-
-		if (buf_page_is_corrupted(block->frame)) {
-			fprintf(stderr,
-				"InnoDB: Database page corruption on disk"
-				" or a failed\n"
-				"InnoDB: file read of page %lu.\n",
-				(ulong) block->offset);
-
-			fputs("InnoDB: You may have to recover"
-			      " from a backup.\n", stderr);
-
-			buf_page_print(block->frame);
-
-			fprintf(stderr,
-				"InnoDB: Database page corruption on disk"
-				" or a failed\n"
-				"InnoDB: file read of page %lu.\n",
-				(ulong) block->offset);
-			fputs("InnoDB: You may have to recover"
-			      " from a backup.\n", stderr);
-			fputs("InnoDB: It is also possible that"
-			      " your operating\n"
-			      "InnoDB: system has corrupted its"
-			      " own file cache\n"
-			      "InnoDB: and rebooting your computer"
-			      " removes the\n"
-			      "InnoDB: error.\n"
-			      "InnoDB: If the corrupt page is an index page\n"
-			      "InnoDB: you can also try to"
-			      " fix the corruption\n"
-			      "InnoDB: by dumping, dropping,"
-			      " and reimporting\n"
-			      "InnoDB: the corrupt table."
-			      " You can use CHECK\n"
-			      "InnoDB: TABLE to scan your"
-			      " table for corruption.\n"
-			      "InnoDB: See also"
-			      " http://dev.mysql.com/doc/refman/5.1/en/"
-			      "forcing-recovery.html\n"
-			      "InnoDB: about forcing recovery.\n", stderr);
-
-			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
-				fputs("InnoDB: Ending processing because of"
-				      " a corrupt database page.\n",
-				      stderr);
-				exit(1);
-			}
-		}
-
-		if (recv_recovery_is_on()) {
-			recv_recover_page(FALSE, TRUE, block->frame,
-					  block->space, block->offset);
-		}
-
-		if (!recv_no_ibuf_operations) {
-			ibuf_merge_or_delete_for_page(
-				block->frame, block->space, block->offset,
-				TRUE);
-		}
-	}
-
-	mutex_enter(&(buf_pool->mutex));
-	mutex_enter(&block->mutex);
-
-#ifdef UNIV_IBUF_DEBUG
-	ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-	/* Because this thread which does the unlocking is not the same that
-	did the locking, we use a pass value != 0 in unlock, which simply
-	removes the newest lock debug record, without checking the thread
-	id. */
-
-	block->io_fix = 0;
-
-	if (io_type == BUF_IO_READ) {
-		/* NOTE that the call to ibuf may have moved the ownership of
-		the x-latch to this OS thread: do not let this confuse you in
-		debugging! */
-
-		ut_ad(buf_pool->n_pend_reads > 0);
-		buf_pool->n_pend_reads--;
-		buf_pool->n_pages_read++;
-
-		rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
-
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fputs("Has read ", stderr);
-		}
-#endif /* UNIV_DEBUG */
-	} else {
-		ut_ad(io_type == BUF_IO_WRITE);
-
-		/* Write means a flush operation: call the completion
-		routine in the flush system */
-
-		buf_flush_write_complete(block);
-
-		rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
-
-		buf_pool->n_pages_written++;
-
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fputs("Has written ", stderr);
-		}
-#endif /* UNIV_DEBUG */
-	}
-
-	mutex_exit(&block->mutex);
-	mutex_exit(&(buf_pool->mutex));
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "page space %lu page no %lu\n",
-			(ulong) block->space, (ulong) block->offset);
-	}
-#endif /* UNIV_DEBUG */
-}
-
-/*************************************************************************
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-
-void
-buf_pool_invalidate(void)
-/*=====================*/
-{
-	ibool	freed;
-
-	ut_ad(buf_all_freed());
-
-	freed = TRUE;
-
-	while (freed) {
-		freed = buf_LRU_search_and_free_block(100);
-	}
-
-	mutex_enter(&(buf_pool->mutex));
-
-	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer buf_pool data structure. */
-
-ibool
-buf_validate(void)
-/*==============*/
-{
-	buf_block_t*	block;
-	ulint		i;
-	ulint		n_single_flush	= 0;
-	ulint		n_lru_flush	= 0;
-	ulint		n_list_flush	= 0;
-	ulint		n_lru		= 0;
-	ulint		n_flush		= 0;
-	ulint		n_free		= 0;
-	ulint		n_page		= 0;
-
-	ut_ad(buf_pool);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	for (i = 0; i < buf_pool->curr_size; i++) {
-
-		block = buf_pool_get_nth_block(buf_pool, i);
-
-		mutex_enter(&block->mutex);
-
-		if (block->state == BUF_BLOCK_FILE_PAGE) {
-
-			ut_a(buf_page_hash_get(block->space,
-					       block->offset) == block);
-			n_page++;
-
-#ifdef UNIV_IBUF_DEBUG
-			ut_a((block->io_fix == BUF_IO_READ)
-			     || ibuf_count_get(block->space, block->offset)
-			     == 0);
-#endif
-			if (block->io_fix == BUF_IO_WRITE) {
-
-				if (block->flush_type == BUF_FLUSH_LRU) {
-					n_lru_flush++;
-					ut_a(rw_lock_is_locked(
-						     &block->lock,
-						     RW_LOCK_SHARED));
-				} else if (block->flush_type
-					   == BUF_FLUSH_LIST) {
-					n_list_flush++;
-				} else if (block->flush_type
-					   == BUF_FLUSH_SINGLE_PAGE) {
-					n_single_flush++;
-				} else {
-					ut_error;
-				}
-
-			} else if (block->io_fix == BUF_IO_READ) {
-
-				ut_a(rw_lock_is_locked(&(block->lock),
-						       RW_LOCK_EX));
-			}
-
-			n_lru++;
-
-			if (ut_dulint_cmp(block->oldest_modification,
-					  ut_dulint_zero) > 0) {
-				n_flush++;
-			}
-
-		} else if (block->state == BUF_BLOCK_NOT_USED) {
-			n_free++;
-		}
-
-		mutex_exit(&block->mutex);
-	}
-
-	if (n_lru + n_free > buf_pool->curr_size) {
-		fprintf(stderr, "n LRU %lu, n free %lu\n",
-			(ulong) n_lru, (ulong) n_free);
-		ut_error;
-	}
-
-	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
-		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
-			(ulong) UT_LIST_GET_LEN(buf_pool->free),
-			(ulong) n_free);
-		ut_error;
-	}
-	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-
-	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
-	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
-	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
-
-	mutex_exit(&(buf_pool->mutex));
-
-	ut_a(buf_LRU_validate());
-	ut_a(buf_flush_validate());
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Prints info of the buffer buf_pool data structure. */
-
-void
-buf_print(void)
-/*===========*/
-{
-	dulint*		index_ids;
-	ulint*		counts;
-	ulint		size;
-	ulint		i;
-	ulint		j;
-	dulint		id;
-	ulint		n_found;
-	buf_frame_t*	frame;
-	dict_index_t*	index;
-
-	ut_ad(buf_pool);
-
-	size = buf_pool->curr_size;
-
-	index_ids = mem_alloc(sizeof(dulint) * size);
-	counts = mem_alloc(sizeof(ulint) * size);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	fprintf(stderr,
-		"buf_pool size %lu\n"
-		"database pages %lu\n"
-		"free pages %lu\n"
-		"modified database pages %lu\n"
-		"n pending reads %lu\n"
-		"n pending flush LRU %lu list %lu single page %lu\n"
-		"pages read %lu, created %lu, written %lu\n",
-		(ulong) size,
-		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
-		(ulong) UT_LIST_GET_LEN(buf_pool->free),
-		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
-		(ulong) buf_pool->n_pend_reads,
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
-		(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
-		(ulong) buf_pool->n_pages_written);
-
-	/* Count the number of blocks belonging to each index in the buffer */
-
-	n_found = 0;
-
-	for (i = 0; i < size; i++) {
-		frame = buf_pool_get_nth_block(buf_pool, i)->frame;
-
-		if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
-
-			id = btr_page_get_index_id(frame);
-
-			/* Look for the id in the index_ids array */
-			j = 0;
-
-			while (j < n_found) {
-
-				if (ut_dulint_cmp(index_ids[j], id) == 0) {
-					(counts[j])++;
-
-					break;
-				}
-				j++;
-			}
-
-			if (j == n_found) {
-				n_found++;
-				index_ids[j] = id;
-				counts[j] = 1;
-			}
-		}
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	for (i = 0; i < n_found; i++) {
-		index = dict_index_get_if_in_cache(index_ids[i]);
-
-		fprintf(stderr,
-			"Block count for index %lu in buffer is about %lu",
-			(ulong) ut_dulint_get_low(index_ids[i]),
-			(ulong) counts[i]);
-
-		if (index) {
-			putc(' ', stderr);
-			dict_index_name_print(stderr, NULL, index);
-		}
-
-		putc('\n', stderr);
-	}
-
-	mem_free(index_ids);
-	mem_free(counts);
-
-	ut_a(buf_validate());
-}
-
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
-ulint
-buf_get_latched_pages_number(void)
-{
-	buf_block_t*	block;
-	ulint		i;
-	ulint		fixed_pages_number = 0;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	for (i = 0; i < buf_pool->curr_size; i++) {
-
-		block = buf_pool_get_nth_block(buf_pool, i);
-
-		if (block->magic_n == BUF_BLOCK_MAGIC_N) {
-			mutex_enter(&block->mutex);
-
-			if (block->buf_fix_count != 0 || block->io_fix != 0) {
-				fixed_pages_number++;
-			}
-
-			mutex_exit(&block->mutex);
-		}
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(fixed_pages_number);
-}
-#endif /* UNIV_DEBUG */
-
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
-ulint
-buf_get_n_pending_ios(void)
-/*=======================*/
-{
-	return(buf_pool->n_pend_reads
-	       + buf_pool->n_flush[BUF_FLUSH_LRU]
-	       + buf_pool->n_flush[BUF_FLUSH_LIST]
-	       + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-}
-
-/*************************************************************************
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
-ulint
-buf_get_modified_ratio_pct(void)
-/*============================*/
-{
-	ulint	ratio;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
-		/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
-		   + UT_LIST_GET_LEN(buf_pool->free));
-
-	/* 1 + is there to avoid division by zero */
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(ratio);
-}
-
-/*************************************************************************
-Prints info of the buffer i/o. */
-
-void
-buf_print_io(
-/*=========*/
-	FILE*	file)	/* in/out: buffer where to print */
-{
-	time_t	current_time;
-	double	time_elapsed;
-	ulint	size;
-
-	ut_ad(buf_pool);
-	size = buf_pool->curr_size;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (srv_use_awe) {
-		fprintf(stderr,
-			"AWE: Buffer pool memory frames %lu\n",
-			(ulong) buf_pool->n_frames);
-
-		fprintf(stderr,
-			"AWE: Database pages and free buffers"
-			" mapped in frames %lu\n",
-			(ulong)
-			UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
-	}
-	fprintf(file,
-		"Buffer pool size   %lu\n"
-		"Free buffers       %lu\n"
-		"Database pages     %lu\n"
-		"Modified db pages  %lu\n"
-		"Pending reads %lu\n"
-		"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
-		(ulong) size,
-		(ulong) UT_LIST_GET_LEN(buf_pool->free),
-		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
-		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
-		(ulong) buf_pool->n_pend_reads,
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
-		+ buf_pool->init_flush[BUF_FLUSH_LRU],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
-		+ buf_pool->init_flush[BUF_FLUSH_LIST],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-
-	current_time = time(NULL);
-	time_elapsed = 0.001 + difftime(current_time,
-					buf_pool->last_printout_time);
-	buf_pool->last_printout_time = current_time;
-
-	fprintf(file,
-		"Pages read %lu, created %lu, written %lu\n"
-		"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
-		(ulong) buf_pool->n_pages_read,
-		(ulong) buf_pool->n_pages_created,
-		(ulong) buf_pool->n_pages_written,
-		(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
-		/ time_elapsed,
-		(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
-		/ time_elapsed,
-		(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
-		/ time_elapsed);
-
-	if (srv_use_awe) {
-		fprintf(file, "AWE: %.2f page remaps/s\n",
-			(buf_pool->n_pages_awe_remapped
-			 - buf_pool->n_pages_awe_remapped_old)
-			/ time_elapsed);
-	}
-
-	if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
-		fprintf(file, "Buffer pool hit rate %lu / 1000\n",
-			(ulong)
-			(1000 - ((1000 * (buf_pool->n_pages_read
-					  - buf_pool->n_pages_read_old))
-				 / (buf_pool->n_page_gets
-				    - buf_pool->n_page_gets_old))));
-	} else {
-		fputs("No buffer pool page gets since the last printout\n",
-		      file);
-	}
-
-	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
-	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
-	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
-	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
-	buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-buf_refresh_io_stats(void)
-/*======================*/
-{
-	buf_pool->last_printout_time = time(NULL);
-	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
-	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
-	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
-	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
-	buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
-}
-
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
-ibool
-buf_all_freed(void)
-/*===============*/
-{
-	buf_block_t*	block;
-	ulint		i;
-
-	ut_ad(buf_pool);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	for (i = 0; i < buf_pool->curr_size; i++) {
-
-		block = buf_pool_get_nth_block(buf_pool, i);
-
-		mutex_enter(&block->mutex);
-
-		if (block->state == BUF_BLOCK_FILE_PAGE) {
-
-			if (!buf_flush_ready_for_replace(block)) {
-
-				fprintf(stderr,
-					"Page %lu %lu still fixed or dirty\n",
-					(ulong) block->space,
-					(ulong) block->offset);
-				ut_error;
-			}
-		}
-
-		mutex_exit(&block->mutex);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
-ibool
-buf_pool_check_no_pending_io(void)
-/*==============================*/
-				/* out: TRUE if there is no pending i/o */
-{
-	ibool	ret;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
-	    + buf_pool->n_flush[BUF_FLUSH_LIST]
-	    + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
-		ret = FALSE;
-	} else {
-		ret = TRUE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(ret);
-}
-
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
-ulint
-buf_get_free_list_len(void)
-/*=======================*/
-{
-	ulint	len;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	len = UT_LIST_GET_LEN(buf_pool->free);
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(len);
-}
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
deleted file mode 100644
index 423c08c0569..00000000000
--- a/storage/innobase/buf/buf0flu.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/******************************************************
-The database buffer buf_pool flush algorithm
-
-(c) 1995-2001 Innobase Oy
-
-Created 11/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#include "trx0sys.h"
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "trx0sys.h"
-#include "srv0srv.h"
-
-/* When flushed, dirty blocks are searched in neighborhoods of this size, and
-flushed along with the original page. */
-
-#define BUF_FLUSH_AREA		ut_min(BUF_READ_AHEAD_AREA,\
-		buf_pool->curr_size / 16)
-
-/**********************************************************************
-Validates the flush list. */
-static
-ibool
-buf_flush_validate_low(void);
-/*========================*/
-		/* out: TRUE if ok */
-
-/************************************************************************
-Inserts a modified block into the flush list. */
-
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
-	buf_block_t*	block)	/* in: block which is modified */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
-	      || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
-				->oldest_modification,
-				block->oldest_modification) <= 0));
-
-	UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
-
-	ut_ad(buf_flush_validate_low());
-}
-
-/************************************************************************
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
-	buf_block_t*	block)	/* in: block which is modified */
-{
-	buf_block_t*	prev_b;
-	buf_block_t*	b;
-
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	prev_b = NULL;
-	b = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
-	while (b && (ut_dulint_cmp(b->oldest_modification,
-				   block->oldest_modification) > 0)) {
-		prev_b = b;
-		b = UT_LIST_GET_NEXT(flush_list, b);
-	}
-
-	if (prev_b == NULL) {
-		UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
-	} else {
-		UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
-				     block);
-	}
-
-	ut_ad(buf_flush_validate_low());
-}
-
-/************************************************************************
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed. */
-
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
-				/* out: TRUE if can replace immediately */
-	buf_block_t*	block)	/* in: buffer control block, must be in state
-				BUF_BLOCK_FILE_PAGE and in the LRU list */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&block->mutex));
-	if (block->state != BUF_BLOCK_FILE_PAGE) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: buffer block state %lu"
-			" in the LRU list!\n",
-			(ulong)block->state);
-		ut_print_buf(stderr, block, sizeof(buf_block_t));
-
-		return(FALSE);
-	}
-
-	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
-	    || (block->buf_fix_count != 0)
-	    || (block->io_fix != 0)) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/************************************************************************
-Returns TRUE if the block is modified and ready for flushing. */
-UNIV_INLINE
-ibool
-buf_flush_ready_for_flush(
-/*======================*/
-				/* out: TRUE if can flush immediately */
-	buf_block_t*	block,	/* in: buffer control block, must be in state
-				BUF_BLOCK_FILE_PAGE */
-	ulint		flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&(block->mutex)));
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
-	    && (block->io_fix == 0)) {
-		if (flush_type != BUF_FLUSH_LRU) {
-
-			return(TRUE);
-
-		} else if (block->buf_fix_count == 0) {
-
-			/* If we are flushing the LRU list, to avoid deadlocks
-			we require the block not to be bufferfixed, and hence
-			not latched. */
-
-			return(TRUE);
-		}
-	}
-
-	return(FALSE);
-}
-
-/************************************************************************
-Updates the flush system data structures when a write is completed. */
-
-void
-buf_flush_write_complete(
-/*=====================*/
-	buf_block_t*	block)	/* in: pointer to the block in question */
-{
-	ut_ad(block);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	block->oldest_modification = ut_dulint_zero;
-
-	UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
-
-	ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
-
-	(buf_pool->n_flush[block->flush_type])--;
-
-	if (block->flush_type == BUF_FLUSH_LRU) {
-		/* Put the block to the end of the LRU list to wait to be
-		moved to the free list */
-
-		buf_LRU_make_block_old(block);
-
-		buf_pool->LRU_flush_ended++;
-	}
-
-	/* fprintf(stderr, "n pending flush %lu\n",
-	buf_pool->n_flush[block->flush_type]); */
-
-	if ((buf_pool->n_flush[block->flush_type] == 0)
-	    && (buf_pool->init_flush[block->flush_type] == FALSE)) {
-
-		/* The running flush batch has ended */
-
-		os_event_set(buf_pool->no_flush[block->flush_type]);
-	}
-}
-
-/************************************************************************
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-static
-void
-buf_flush_buffered_writes(void)
-/*===========================*/
-{
-	buf_block_t*	block;
-	byte*		write_buf;
-	ulint		len;
-	ulint		len2;
-	ulint		i;
-
-	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
-		os_aio_simulated_wake_handler_threads();
-
-		return;
-	}
-
-	mutex_enter(&(trx_doublewrite->mutex));
-
-	/* Write first to doublewrite buffer blocks. We use synchronous
-	aio and thus know that file write has been completed when the
-	control returns. */
-
-	if (trx_doublewrite->first_free == 0) {
-
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		return;
-	}
-
-	for (i = 0; i < trx_doublewrite->first_free; i++) {
-
-		block = trx_doublewrite->buf_block_arr[i];
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-		if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
-		    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: ERROR: The page to be written"
-				" seems corrupt!\n"
-				"InnoDB: The lsn fields do not match!"
-				" Noticed in the buffer pool\n"
-				"InnoDB: before posting to the"
-				" doublewrite buffer.\n");
-		}
-
-		if (block->check_index_page_at_flush
-		    && !page_simple_validate(block->frame)) {
-
-			buf_page_print(block->frame);
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Apparent corruption of an"
-				" index page n:o %lu in space %lu\n"
-				"InnoDB: to be written to data file."
-				" We intentionally crash server\n"
-				"InnoDB: to prevent corrupt data"
-				" from ending up in data\n"
-				"InnoDB: files.\n",
-				(ulong) block->offset, (ulong) block->space);
-
-			ut_error;
-		}
-	}
-
-	/* increment the doublewrite flushed pages counter */
-	srv_dblwr_pages_written+= trx_doublewrite->first_free;
-	srv_dblwr_writes++;
-
-	if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-	} else {
-		len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
-	}
-
-	fil_io(OS_FILE_WRITE,
-	       TRUE, TRX_SYS_SPACE,
-	       trx_doublewrite->block1, 0, len,
-	       (void*)trx_doublewrite->write_buf, NULL);
-
-	write_buf = trx_doublewrite->write_buf;
-
-	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
-		if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
-		    != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: ERROR: The page to be written"
-				" seems corrupt!\n"
-				"InnoDB: The lsn fields do not match!"
-				" Noticed in the doublewrite block1.\n");
-		}
-	}
-
-	if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		len = (trx_doublewrite->first_free
-		       - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
-
-		fil_io(OS_FILE_WRITE,
-		       TRUE, TRX_SYS_SPACE,
-		       trx_doublewrite->block2, 0, len,
-		       (void*)(trx_doublewrite->write_buf
-			       + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
-			       * UNIV_PAGE_SIZE),
-		       NULL);
-
-		write_buf = trx_doublewrite->write_buf
-			+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-		for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
-		     len2 += UNIV_PAGE_SIZE) {
-			if (mach_read_from_4(write_buf + len2
-					     + FIL_PAGE_LSN + 4)
-			    != mach_read_from_4(write_buf + len2
-						+ UNIV_PAGE_SIZE
-						- FIL_PAGE_END_LSN_OLD_CHKSUM
-						+ 4)) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: ERROR: The page to be"
-					" written seems corrupt!\n"
-					"InnoDB: The lsn fields do not match!"
-					" Noticed in"
-					" the doublewrite block2.\n");
-			}
-		}
-	}
-
-	/* Now flush the doublewrite buffer data to disk */
-
-	fil_flush(TRX_SYS_SPACE);
-
-	/* We know that the writes have been flushed to disk now
-	and in recovery we will find them in the doublewrite buffer
-	blocks. Next do the writes to the intended positions. */
-
-	for (i = 0; i < trx_doublewrite->first_free; i++) {
-		block = trx_doublewrite->buf_block_arr[i];
-
-		if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
-		    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: ERROR: The page to be written"
-				" seems corrupt!\n"
-				"InnoDB: The lsn fields do not match!"
-				" Noticed in the buffer pool\n"
-				"InnoDB: after posting and flushing"
-				" the doublewrite buffer.\n"
-				"InnoDB: Page buf fix count %lu,"
-				" io fix %lu, state %lu\n",
-				(ulong)block->buf_fix_count,
-				(ulong)block->io_fix,
-				(ulong)block->state);
-		}
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-		       FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
-		       (void*)block->frame, (void*)block);
-	}
-
-	/* Wake possible simulated aio thread to actually post the
-	writes to the operating system */
-
-	os_aio_simulated_wake_handler_threads();
-
-	/* Wait that all async writes to tablespaces have been posted to
-	the OS */
-
-	os_aio_wait_until_no_pending_writes();
-
-	/* Now we flush the data to disk (for example, with fsync) */
-
-	fil_flush_file_spaces(FIL_TABLESPACE);
-
-	/* We can now reuse the doublewrite memory buffer: */
-
-	trx_doublewrite->first_free = 0;
-
-	mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/************************************************************************
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_flush_buffered_writes and waits for for free space to
-appear. */
-static
-void
-buf_flush_post_to_doublewrite_buf(
-/*==============================*/
-	buf_block_t*	block)	/* in: buffer block to write */
-{
-try_again:
-	mutex_enter(&(trx_doublewrite->mutex));
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	if (trx_doublewrite->first_free
-	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		buf_flush_buffered_writes();
-
-		goto try_again;
-	}
-
-	ut_memcpy(trx_doublewrite->write_buf
-		  + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
-		  block->frame, UNIV_PAGE_SIZE);
-
-	trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
-
-	trx_doublewrite->first_free++;
-
-	if (trx_doublewrite->first_free
-	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		buf_flush_buffered_writes();
-
-		return;
-	}
-
-	mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/************************************************************************
-Initializes a page for writing to the tablespace. */
-
-void
-buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/* in: page */
-	dulint	newest_lsn,	/* in: newest modification lsn to the page */
-	ulint	space,		/* in: space id */
-	ulint	page_no)	/* in: page number */
-{
-	/* Write the newest modification lsn to the page header and trailer */
-	mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
-
-	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-			newest_lsn);
-	/* Write the page number and the space id */
-
-	mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
-	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
-
-	/* Store the new formula checksum */
-
-	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
-			srv_use_checksums
-			? buf_calc_page_new_checksum(page)
-			: BUF_NO_CHECKSUM_MAGIC);
-
-	/* We overwrite the first 4 bytes of the end lsn field to store
-	the old formula checksum. Since it depends also on the field
-	FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
-	new formula checksum. */
-
-	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-			srv_use_checksums
-			? buf_calc_page_old_checksum(page)
-			: BUF_NO_CHECKSUM_MAGIC);
-}
-
-/************************************************************************
-Does an asynchronous write of a buffer page. NOTE: in simulated aio and
-also when the doublewrite buffer is used, we must call
-buf_flush_buffered_writes after we have posted a batch of writes! */
-static
-void
-buf_flush_write_block_low(
-/*======================*/
-	buf_block_t*	block)	/* in: buffer block to write */
-{
-#ifdef UNIV_LOG_DEBUG
-	static ibool univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_IBUF_DEBUG
-	ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
-	ut_ad(!ut_dulint_is_zero(block->newest_modification));
-
-#ifdef UNIV_LOG_DEBUG
-	if (!univ_log_debug_warned) {
-		univ_log_debug_warned = TRUE;
-		fputs("Warning: cannot force log to disk if"
-		      " UNIV_LOG_DEBUG is defined!\n"
-		      "Crash recovery will not work!\n",
-		      stderr);
-	}
-#else
-	/* Force the log to the disk before writing the modified block */
-	log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
-	buf_flush_init_for_writing(block->frame, block->newest_modification,
-				   block->space, block->offset);
-	if (!srv_use_doublewrite_buf || !trx_doublewrite) {
-		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-		       FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
-		       (void*)block->frame, (void*)block);
-	} else {
-		buf_flush_post_to_doublewrite_buf(block);
-	}
-}
-
-/************************************************************************
-Writes a page asynchronously from the buffer buf_pool to a file, if it can be
-found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
-we must call os_aio_simulated_wake_handler_threads after we have posted a batch
-of writes! */
-static
-ulint
-buf_flush_try_page(
-/*===============*/
-				/* out: 1 if a page was flushed, 0 otherwise */
-	ulint	space,		/* in: space id */
-	ulint	offset,		/* in: page offset */
-	ulint	flush_type)	/* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
-				BUF_FLUSH_SINGLE_PAGE */
-{
-	buf_block_t*	block;
-	ibool		locked;
-
-	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
-	      || flush_type == BUF_FLUSH_SINGLE_PAGE);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	block = buf_page_hash_get(space, offset);
-
-	ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
-
-	if (!block) {
-		mutex_exit(&(buf_pool->mutex));
-		return(0);
-	}
-
-	mutex_enter(&block->mutex);
-
-	if (flush_type == BUF_FLUSH_LIST
-	    && buf_flush_ready_for_flush(block, flush_type)) {
-
-		block->io_fix = BUF_IO_WRITE;
-
-		/* If AWE is enabled and the page is not mapped to a frame,
-		then map it */
-
-		if (block->frame == NULL) {
-			ut_a(srv_use_awe);
-
-			/* We set second parameter TRUE because the block is
-			in the LRU list and we must put it to
-			awe_LRU_free_mapped list once mapped to a frame */
-
-			buf_awe_map_page_to_frame(block, TRUE);
-		}
-
-		block->flush_type = flush_type;
-
-		if (buf_pool->n_flush[flush_type] == 0) {
-
-			os_event_reset(buf_pool->no_flush[flush_type]);
-		}
-
-		(buf_pool->n_flush[flush_type])++;
-
-		locked = FALSE;
-
-		/* If the simulated aio thread is not running, we must
-		not wait for any latch, as we may end up in a deadlock:
-		if buf_fix_count == 0, then we know we need not wait */
-
-		if (block->buf_fix_count == 0) {
-			rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
-			locked = TRUE;
-		}
-
-		mutex_exit(&block->mutex);
-		mutex_exit(&(buf_pool->mutex));
-
-		if (!locked) {
-			buf_flush_buffered_writes();
-
-			rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-		}
-
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fprintf(stderr,
-				"Flushing page space %lu, page no %lu \n",
-				(ulong) block->space, (ulong) block->offset);
-		}
-#endif /* UNIV_DEBUG */
-
-		buf_flush_write_block_low(block);
-
-		return(1);
-
-	} else if (flush_type == BUF_FLUSH_LRU
-		   && buf_flush_ready_for_flush(block, flush_type)) {
-
-		/* VERY IMPORTANT:
-		Because any thread may call the LRU flush, even when owning
-		locks on pages, to avoid deadlocks, we must make sure that the
-		s-lock is acquired on the page without waiting: this is
-		accomplished because in the if-condition above we require
-		the page not to be bufferfixed (in function
-		..._ready_for_flush). */
-
-		block->io_fix = BUF_IO_WRITE;
-
-		/* If AWE is enabled and the page is not mapped to a frame,
-		then map it */
-
-		if (block->frame == NULL) {
-			ut_a(srv_use_awe);
-
-			/* We set second parameter TRUE because the block is
-			in the LRU list and we must put it to
-			awe_LRU_free_mapped list once mapped to a frame */
-
-			buf_awe_map_page_to_frame(block, TRUE);
-		}
-
-		block->flush_type = flush_type;
-
-		if (buf_pool->n_flush[flush_type] == 0) {
-
-			os_event_reset(buf_pool->no_flush[flush_type]);
-		}
-
-		(buf_pool->n_flush[flush_type])++;
-
-		rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
-		/* Note that the s-latch is acquired before releasing the
-		buf_pool mutex: this ensures that the latch is acquired
-		immediately. */
-
-		mutex_exit(&block->mutex);
-		mutex_exit(&(buf_pool->mutex));
-
-		buf_flush_write_block_low(block);
-
-		return(1);
-
-	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE
-		   && buf_flush_ready_for_flush(block, flush_type)) {
-
-		block->io_fix = BUF_IO_WRITE;
-
-		/* If AWE is enabled and the page is not mapped to a frame,
-		then map it */
-
-		if (block->frame == NULL) {
-			ut_a(srv_use_awe);
-
-			/* We set second parameter TRUE because the block is
-			in the LRU list and we must put it to
-			awe_LRU_free_mapped list once mapped to a frame */
-
-			buf_awe_map_page_to_frame(block, TRUE);
-		}
-
-		block->flush_type = flush_type;
-
-		if (buf_pool->n_flush[block->flush_type] == 0) {
-
-			os_event_reset(buf_pool->no_flush[block->flush_type]);
-		}
-
-		(buf_pool->n_flush[flush_type])++;
-
-		mutex_exit(&block->mutex);
-		mutex_exit(&(buf_pool->mutex));
-
-		rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fprintf(stderr,
-				"Flushing single page space %lu,"
-				" page no %lu \n",
-				(ulong) block->space,
-				(ulong) block->offset);
-		}
-#endif /* UNIV_DEBUG */
-
-		buf_flush_write_block_low(block);
-
-		return(1);
-	}
-
-	mutex_exit(&block->mutex);
-	mutex_exit(&(buf_pool->mutex));
-
-	return(0);
-}
-
-/***************************************************************
-Flushes to disk all flushable pages within the flush area. */
-static
-ulint
-buf_flush_try_neighbors(
-/*====================*/
-				/* out: number of pages flushed */
-	ulint	space,		/* in: space id */
-	ulint	offset,		/* in: page offset */
-	ulint	flush_type)	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
-	buf_block_t*	block;
-	ulint		low, high;
-	ulint		count		= 0;
-	ulint		i;
-
-	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
-	low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
-	high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
-
-	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
-		/* If there is little space, it is better not to flush any
-		block except from the end of the LRU list */
-
-		low = offset;
-		high = offset + 1;
-	}
-
-	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-
-	if (high > fil_space_get_size(space)) {
-		high = fil_space_get_size(space);
-	}
-
-	mutex_enter(&(buf_pool->mutex));
-
-	for (i = low; i < high; i++) {
-
-		block = buf_page_hash_get(space, i);
-		ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
-
-		if (!block) {
-
-			continue;
-
-		} else if (flush_type == BUF_FLUSH_LRU && i != offset
-			   && !block->old) {
-
-			/* We avoid flushing 'non-old' blocks in an LRU flush,
-			because the flushed blocks are soon freed */
-
-			continue;
-		} else {
-
-			mutex_enter(&block->mutex);
-
-			if (buf_flush_ready_for_flush(block, flush_type)
-			    && (i == offset || block->buf_fix_count == 0)) {
-				/* We only try to flush those
-				neighbors != offset where the buf fix count is
-				zero, as we then know that we probably can
-				latch the page without a semaphore wait.
-				Semaphore waits are expensive because we must
-				flush the doublewrite buffer before we start
-				waiting. */
-
-				mutex_exit(&block->mutex);
-
-				mutex_exit(&(buf_pool->mutex));
-
-				/* Note: as we release the buf_pool mutex
-				above, in buf_flush_try_page we cannot be sure
-				the page is still in a flushable state:
-				therefore we check it again inside that
-				function. */
-
-				count += buf_flush_try_page(space, i,
-							    flush_type);
-
-				mutex_enter(&(buf_pool->mutex));
-			} else {
-				mutex_exit(&block->mutex);
-			}
-		}
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(count);
-}
-
-/***********************************************************************
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
-ulint
-buf_flush_batch(
-/*============*/
-				/* out: number of blocks for which the write
-				request was queued; ULINT_UNDEFINED if there
-				was a flush of the same type already running */
-	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
-				BUF_FLUSH_LIST, then the caller must not own
-				any latches on pages */
-	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
-				(it is not guaranteed that the actual number
-				is that big, though) */
-	dulint	lsn_limit)	/* in the case BUF_FLUSH_LIST all blocks whose
-				oldest_modification is smaller than this
-				should be flushed (if their number does not
-				exceed min_n), otherwise ignored */
-{
-	buf_block_t*	block;
-	ulint		page_count	= 0;
-	ulint		old_page_count;
-	ulint		space;
-	ulint		offset;
-	ibool		found;
-
-	ut_ad((flush_type == BUF_FLUSH_LRU)
-	      || (flush_type == BUF_FLUSH_LIST));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((flush_type != BUF_FLUSH_LIST)
-	      || sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
-	mutex_enter(&(buf_pool->mutex));
-
-	if ((buf_pool->n_flush[flush_type] > 0)
-	    || (buf_pool->init_flush[flush_type] == TRUE)) {
-
-		/* There is already a flush batch of the same type running */
-
-		mutex_exit(&(buf_pool->mutex));
-
-		return(ULINT_UNDEFINED);
-	}
-
-	(buf_pool->init_flush)[flush_type] = TRUE;
-
-	for (;;) {
-		/* If we have flushed enough, leave the loop */
-		if (page_count >= min_n) {
-
-			break;
-		}
-
-		/* Start from the end of the list looking for a suitable
-		block to be flushed. */
-
-		if (flush_type == BUF_FLUSH_LRU) {
-			block = UT_LIST_GET_LAST(buf_pool->LRU);
-		} else {
-			ut_ad(flush_type == BUF_FLUSH_LIST);
-
-			block = UT_LIST_GET_LAST(buf_pool->flush_list);
-			if (!block
-			    || (ut_dulint_cmp(block->oldest_modification,
-					      lsn_limit) >= 0)) {
-				/* We have flushed enough */
-
-				break;
-			}
-		}
-
-		found = FALSE;
-
-		/* Note that after finding a single flushable page, we try to
-		flush also all its neighbors, and after that start from the
-		END of the LRU list or flush list again: the list may change
-		during the flushing and we cannot safely preserve within this
-		function a pointer to a block in the list! */
-
-		while ((block != NULL) && !found) {
-			ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-			mutex_enter(&block->mutex);
-
-			if (buf_flush_ready_for_flush(block, flush_type)) {
-
-				found = TRUE;
-				space = block->space;
-				offset = block->offset;
-
-				mutex_exit(&block->mutex);
-				mutex_exit(&(buf_pool->mutex));
-
-				old_page_count = page_count;
-
-				/* Try to flush also all the neighbors */
-				page_count += buf_flush_try_neighbors(
-					space, offset, flush_type);
-				/* fprintf(stderr,
-				"Flush type %lu, page no %lu, neighb %lu\n",
-				flush_type, offset,
-				page_count - old_page_count); */
-
-				mutex_enter(&(buf_pool->mutex));
-
-			} else if (flush_type == BUF_FLUSH_LRU) {
-
-				mutex_exit(&block->mutex);
-
-				block = UT_LIST_GET_PREV(LRU, block);
-			} else {
-				ut_ad(flush_type == BUF_FLUSH_LIST);
-
-				mutex_exit(&block->mutex);
-
-				block = UT_LIST_GET_PREV(flush_list, block);
-			}
-		}
-
-		/* If we could not find anything to flush, leave the loop */
-
-		if (!found) {
-			break;
-		}
-	}
-
-	(buf_pool->init_flush)[flush_type] = FALSE;
-
-	if ((buf_pool->n_flush[flush_type] == 0)
-	    && (buf_pool->init_flush[flush_type] == FALSE)) {
-
-		/* The running flush batch has ended */
-
-		os_event_set(buf_pool->no_flush[flush_type]);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	buf_flush_buffered_writes();
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && page_count > 0) {
-		ut_a(flush_type == BUF_FLUSH_LRU
-		     || flush_type == BUF_FLUSH_LIST);
-		fprintf(stderr, flush_type == BUF_FLUSH_LRU
-			? "Flushed %lu pages in LRU flush\n"
-			: "Flushed %lu pages in flush list flush\n",
-			(ulong) page_count);
-	}
-#endif /* UNIV_DEBUG */
-
-	srv_buf_pool_flushed += page_count;
-
-	return(page_count);
-}
-
-/**********************************************************************
-Waits until a flush batch of the given type ends */
-
-void
-buf_flush_wait_batch_end(
-/*=====================*/
-	ulint	type)	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
-	ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
-
-	os_event_wait(buf_pool->no_flush[type]);
-}
-
-/**********************************************************************
-Gives a recommendation of how many blocks should be flushed to establish
-a big enough margin of replaceable blocks near the end of the LRU list
-and in the free list. */
-static
-ulint
-buf_flush_LRU_recommendation(void)
-/*==============================*/
-			/* out: number of blocks which should be flushed
-			from the end of the LRU list */
-{
-	buf_block_t*	block;
-	ulint		n_replaceable;
-	ulint		distance	= 0;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
-
-	block = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	while ((block != NULL)
-	       && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
-		   + BUF_FLUSH_EXTRA_MARGIN)
-	       && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
-
-		mutex_enter(&block->mutex);
-
-		if (buf_flush_ready_for_replace(block)) {
-			n_replaceable++;
-		}
-
-		mutex_exit(&block->mutex);
-
-		distance++;
-
-		block = UT_LIST_GET_PREV(LRU, block);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
-
-		return(0);
-	}
-
-	return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
-	       - n_replaceable);
-}
-
-/*************************************************************************
-Flushes pages from the end of the LRU list if there is too small a margin
-of replaceable pages there or in the free list. VERY IMPORTANT: this function
-is called also by threads which have locks on pages. To avoid deadlocks, we
-flush only pages such that the s-lock required for flushing can be acquired
-immediately, without waiting. */
-
-void
-buf_flush_free_margin(void)
-/*=======================*/
-{
-	ulint	n_to_flush;
-	ulint	n_flushed;
-
-	n_to_flush = buf_flush_LRU_recommendation();
-
-	if (n_to_flush > 0) {
-		n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
-					    ut_dulint_zero);
-		if (n_flushed == ULINT_UNDEFINED) {
-			/* There was an LRU type flush batch already running;
-			let us wait for it to end */
-
-			buf_flush_wait_batch_end(BUF_FLUSH_LRU);
-		}
-	}
-}
-
-/**********************************************************************
-Validates the flush list. */
-static
-ibool
-buf_flush_validate_low(void)
-/*========================*/
-		/* out: TRUE if ok */
-{
-	buf_block_t*	block;
-	dulint		om;
-
-	UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
-
-	block = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
-	while (block != NULL) {
-		om = block->oldest_modification;
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-		ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
-
-		block = UT_LIST_GET_NEXT(flush_list, block);
-
-		if (block) {
-			ut_a(ut_dulint_cmp(om, block->oldest_modification)
-			     >= 0);
-		}
-	}
-
-	return(TRUE);
-}
-
-/**********************************************************************
-Validates the flush list. */
-
-ibool
-buf_flush_validate(void)
-/*====================*/
-		/* out: TRUE if ok */
-{
-	ibool	ret;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	ret = buf_flush_validate_low();
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(ret);
-}
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
deleted file mode 100644
index d3c787d1578..00000000000
--- a/storage/innobase/buf/buf0lru.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/******************************************************
-The database buffer replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-
-#ifdef UNIV_NONINL
-#include "buf0lru.ic"
-#include "srv0srv.h"	/* Needed to getsrv_print_innodb_monitor */
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "os0sync.h"
-#include "fil0fil.h"
-#include "btr0btr.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "btr0sea.h"
-#include "os0file.h"
-#include "log0recv.h"
-
-/* The number of blocks from the LRU_old pointer onward, including the block
-pointed to, must be 3/8 of the whole LRU list length, except that the
-tolerance defined below is allowed. Note that the tolerance must be small
-enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
-LRU_old pointer is not allowed to point to either end of the LRU list. */
-
-#define BUF_LRU_OLD_TOLERANCE	20
-
-/* The whole LRU list length is divided by this number to determine an
-initial segment in buf_LRU_get_recent_limit */
-
-#define BUF_LRU_INITIAL_RATIO	8
-
-/* When dropping the search hash index entries before deleting an ibd
-file, we build a local array of pages belonging to that tablespace
-in the buffer pool. Following is the size of that array. */
-#define BUF_LRU_DROP_SEARCH_HASH_SIZE	1024
-
-/* If we switch on the InnoDB monitor because there are too few available
-frames in the buffer pool, we set this to TRUE */
-ibool	buf_lru_switched_on_innodb_mon	= FALSE;
-
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
-static
-void
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
-	buf_block_t*	block);	/* in: block, must contain a file page and
-				be in a state where it can be freed; there
-				may or may not be a hash index to the page */
-/**********************************************************************
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
-	buf_block_t*	block);	/* in: block, must contain a file page and
-				be in a state where it can be freed */
-
-/**********************************************************************
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
-static
-void
-buf_LRU_drop_page_hash_batch(
-/*=========================*/
-	ulint		id,	/* in: space id */
-	const ulint*	arr,	/* in: array of page_no */
-	ulint		count)	/* in: number of entries in array */
-{
-	ulint	i;
-
-	ut_ad(arr != NULL);
-	ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
-
-	for (i = 0; i < count; ++i) {
-		btr_search_drop_page_hash_when_freed(id, arr[i]);
-	}
-}
-
-/**********************************************************************
-When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
-hash index entries belonging to that table. This function tries to
-do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
-static
-void
-buf_LRU_drop_page_hash_for_tablespace(
-/*==================================*/
-	ulint	id)	/* in: space id */
-{
-	buf_block_t*	block;
-	ulint*		page_arr;
-	ulint		num_entries;
-
-	page_arr = ut_malloc(sizeof(ulint)
-			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
-	mutex_enter(&buf_pool->mutex);
-
-scan_again:
-	num_entries = 0;
-	block = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	while (block != NULL) {
-		buf_block_t*	prev_block;
-
-		mutex_enter(&block->mutex);
-		prev_block = UT_LIST_GET_PREV(LRU, block);
-
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-		if (block->space != id
-		    || block->buf_fix_count > 0
-		    || block->io_fix != 0) {
-			/* We leave the fixed pages as is in this scan.
-			To be dealt with later in the final scan. */
-			mutex_exit(&block->mutex);
-			goto next_page;
-		}
-
-		ut_ad(block->space == id);
-		if (block->is_hashed) {
-
-			/* Store the offset(i.e.: page_no) in the array
-			so that we can drop hash index in a batch
-			later. */
-			page_arr[num_entries] = block->offset;
-			mutex_exit(&block->mutex);
-			ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
-			++num_entries;
-
-			if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
-				goto next_page;
-			}
-			/* Array full. We release the buf_pool->mutex to
-			obey the latching order. */
-			mutex_exit(&buf_pool->mutex);
-
-			buf_LRU_drop_page_hash_batch(id, page_arr,
-						     num_entries);
-			num_entries = 0;
-			mutex_enter(&buf_pool->mutex);
-		} else {
-			mutex_exit(&block->mutex);
-		}
-
-next_page:
-		/* Note that we may have released the buf_pool->mutex
-		above after reading the prev_block during processing
-		of a page_hash_batch (i.e.: when the array was full).
-		This means that prev_block can change in LRU list.
-		This is OK because this function is a 'best effort'
-		to drop as many search hash entries as possible and
-		it does not guarantee that ALL such entries will be
-		dropped. */
-		block = prev_block;
-
-		/* If, however, block has been removed from LRU list
-		to the free list then we should restart the scan.
-		block->state is protected by buf_pool->mutex. */
-		if (block && block->state != BUF_BLOCK_FILE_PAGE) {
-			ut_a(num_entries == 0);
-			goto scan_again;
-		}
-	}
-
-	mutex_exit(&buf_pool->mutex);
-
-	/* Drop any remaining batch of search hashed pages. */
-	buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
-	ut_free(page_arr);
-}
-
-/**********************************************************************
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. */
-
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
-	ulint	id)	/* in: space id */
-{
-	buf_block_t*	block;
-	ulint		page_no;
-	ibool		all_freed;
-
-	/* Before we attempt to drop pages one by one we first
-	attempt to drop page hash index entries in batches to make
-	it more efficient. The batching attempt is a best effort
-	attempt and does not guarantee that all pages hash entries
-	will be dropped. We get rid of remaining page hash entries
-	one by one below. */
-	buf_LRU_drop_page_hash_for_tablespace(id);
-
-scan_again:
-	mutex_enter(&(buf_pool->mutex));
-
-	all_freed = TRUE;
-
-	block = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	while (block != NULL) {
-		buf_block_t*	prev_block;
-
-		mutex_enter(&block->mutex);
-		prev_block = UT_LIST_GET_PREV(LRU, block);
-
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-		if (block->space == id
-		    && (block->buf_fix_count > 0 || block->io_fix != 0)) {
-
-			/* We cannot remove this page during this scan yet;
-			maybe the system is currently reading it in, or
-			flushing the modifications to the file */
-
-			all_freed = FALSE;
-
-			goto next_page;
-		}
-
-		if (block->space == id) {
-#ifdef UNIV_DEBUG
-			if (buf_debug_prints) {
-				fprintf(stderr,
-					"Dropping space %lu page %lu\n",
-					(ulong) block->space,
-					(ulong) block->offset);
-			}
-#endif
-			if (block->is_hashed) {
-				page_no = block->offset;
-
-				mutex_exit(&block->mutex);
-
-				mutex_exit(&(buf_pool->mutex));
-
-				/* Note that the following call will acquire
-				an S-latch on the page */
-
-				btr_search_drop_page_hash_when_freed(id,
-								     page_no);
-				goto scan_again;
-			}
-
-			if (0 != ut_dulint_cmp(block->oldest_modification,
-					       ut_dulint_zero)) {
-
-				/* Remove from the flush list of modified
-				blocks */
-				block->oldest_modification = ut_dulint_zero;
-
-				UT_LIST_REMOVE(flush_list,
-					       buf_pool->flush_list, block);
-			}
-
-			/* Remove from the LRU list */
-			buf_LRU_block_remove_hashed_page(block);
-			buf_LRU_block_free_hashed_page(block);
-		}
-next_page:
-		mutex_exit(&block->mutex);
-		block = prev_block;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	if (!all_freed) {
-		os_thread_sleep(20000);
-
-		goto scan_again;
-	}
-}
-
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
-
-ulint
-buf_LRU_get_recent_limit(void)
-/*==========================*/
-			/* out: the limit; zero if could not determine it */
-{
-	buf_block_t*	block;
-	ulint		len;
-	ulint		limit;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	len = UT_LIST_GET_LEN(buf_pool->LRU);
-
-	if (len < BUF_LRU_OLD_MIN_LEN) {
-		/* The LRU list is too short to do read-ahead */
-
-		mutex_exit(&(buf_pool->mutex));
-
-		return(0);
-	}
-
-	block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(limit);
-}
-
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
-
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
-				/* out: TRUE if freed */
-	ulint	n_iterations)	/* in: how many times this has been called
-				repeatedly without result: a high value means
-				that we should search farther; if value is
-				k < 10, then we only search k/10 * [number
-				of pages in the buffer pool] from the end
-				of the LRU list */
-{
-	buf_block_t*	block;
-	ulint		distance = 0;
-	ibool		freed;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	freed = FALSE;
-	block = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	while (block != NULL) {
-		ut_a(block->in_LRU_list);
-
-		mutex_enter(&block->mutex);
-
-		if (buf_flush_ready_for_replace(block)) {
-
-#ifdef UNIV_DEBUG
-			if (buf_debug_prints) {
-				fprintf(stderr,
-					"Putting space %lu page %lu"
-					" to free list\n",
-					(ulong) block->space,
-					(ulong) block->offset);
-			}
-#endif /* UNIV_DEBUG */
-
-			buf_LRU_block_remove_hashed_page(block);
-
-			mutex_exit(&(buf_pool->mutex));
-			mutex_exit(&block->mutex);
-
-			/* Remove possible adaptive hash index built on the
-			page; in the case of AWE the block may not have a
-			frame at all */
-
-			if (block->frame) {
-				/* The page was declared uninitialized
-				by buf_LRU_block_remove_hashed_page().
-				We need to flag the contents of the
-				page valid (which it still is) in
-				order to avoid bogus Valgrind
-				warnings. */
-				UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
-				btr_search_drop_page_hash_index(block->frame);
-				UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
-			}
-
-			ut_a(block->buf_fix_count == 0);
-
-			mutex_enter(&(buf_pool->mutex));
-			mutex_enter(&block->mutex);
-
-			buf_LRU_block_free_hashed_page(block);
-			freed = TRUE;
-			mutex_exit(&block->mutex);
-
-			break;
-		}
-
-		mutex_exit(&block->mutex);
-
-		block = UT_LIST_GET_PREV(LRU, block);
-		distance++;
-
-		if (!freed && n_iterations <= 10
-		    && distance > 100 + (n_iterations * buf_pool->curr_size)
-		    / 10) {
-			buf_pool->LRU_flush_ended = 0;
-
-			mutex_exit(&(buf_pool->mutex));
-
-			return(FALSE);
-		}
-	}
-	if (buf_pool->LRU_flush_ended > 0) {
-		buf_pool->LRU_flush_ended--;
-	}
-	if (!freed) {
-		buf_pool->LRU_flush_ended = 0;
-	}
-	mutex_exit(&(buf_pool->mutex));
-
-	return(freed);
-}
-
-/**********************************************************************
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-
-void
-buf_LRU_try_free_flushed_blocks(void)
-/*=================================*/
-{
-	mutex_enter(&(buf_pool->mutex));
-
-	while (buf_pool->LRU_flush_ended > 0) {
-
-		mutex_exit(&(buf_pool->mutex));
-
-		buf_LRU_search_and_free_block(1);
-
-		mutex_enter(&(buf_pool->mutex));
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-}
-
-/**********************************************************************
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
-ibool
-buf_LRU_buf_pool_running_out(void)
-/*==============================*/
-				/* out: TRUE if less than 25 % of buffer pool
-				left */
-{
-	ibool	ret	= FALSE;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
-	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
-
-		ret = TRUE;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(ret);
-}
-
-/**********************************************************************
-Returns a free block from buf_pool. The block is taken off the free list.
-If it is empty, blocks are moved from the end of the LRU list to the free
-list. */
-
-buf_block_t*
-buf_LRU_get_free_block(void)
-/*========================*/
-				/* out: the free control block; also if AWE is
-				used, it is guaranteed that the block has its
-				page mapped to a frame when we return */
-{
-	buf_block_t*	block		= NULL;
-	ibool		freed;
-	ulint		n_iterations	= 1;
-	ibool		mon_value_was	= FALSE;
-	ibool		started_monitor	= FALSE;
-loop:
-	mutex_enter(&(buf_pool->mutex));
-
-	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
-	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  InnoDB: ERROR: over 95 percent of the buffer pool"
-			" is occupied by\n"
-			"InnoDB: lock heaps or the adaptive hash index!"
-			" Check that your\n"
-			"InnoDB: transactions do not set too many row locks.\n"
-			"InnoDB: Your buffer pool size is %lu MB."
-			" Maybe you should make\n"
-			"InnoDB: the buffer pool bigger?\n"
-			"InnoDB: We intentionally generate a seg fault"
-			" to print a stack trace\n"
-			"InnoDB: on Linux!\n",
-			(ulong) (buf_pool->curr_size
-				 / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
-		ut_error;
-
-	} else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
-		   + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) {
-
-		if (!buf_lru_switched_on_innodb_mon) {
-
-	   		/* Over 67 % of the buffer pool is occupied by lock
-			heaps or the adaptive hash index. This may be a memory
-			leak! */
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: WARNING: over 67 percent of"
-				" the buffer pool is occupied by\n"
-				"InnoDB: lock heaps or the adaptive"
-				" hash index! Check that your\n"
-				"InnoDB: transactions do not set too many"
-				" row locks.\n"
-				"InnoDB: Your buffer pool size is %lu MB."
-				" Maybe you should make\n"
-				"InnoDB: the buffer pool bigger?\n"
-				"InnoDB: Starting the InnoDB Monitor to print"
-				" diagnostics, including\n"
-				"InnoDB: lock heap and hash index sizes.\n",
-				(ulong) (buf_pool->curr_size
-					 / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
-			buf_lru_switched_on_innodb_mon = TRUE;
-			srv_print_innodb_monitor = TRUE;
-			os_event_set(srv_lock_timeout_thread_event);
-		}
-	} else if (buf_lru_switched_on_innodb_mon) {
-
-		/* Switch off the InnoDB Monitor; this is a simple way
-		to stop the monitor if the situation becomes less urgent,
-		but may also surprise users if the user also switched on the
-		monitor! */
-
-		buf_lru_switched_on_innodb_mon = FALSE;
-		srv_print_innodb_monitor = FALSE;
-	}
-
-	/* If there is a block in the free list, take it */
-	if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
-
-		block = UT_LIST_GET_FIRST(buf_pool->free);
-		ut_a(block->in_free_list);
-		UT_LIST_REMOVE(free, buf_pool->free, block);
-		block->in_free_list = FALSE;
-		ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-		ut_a(!block->in_LRU_list);
-
-		if (srv_use_awe) {
-			if (block->frame) {
-				/* Remove from the list of mapped pages */
-
-				UT_LIST_REMOVE(awe_LRU_free_mapped,
-					       buf_pool->awe_LRU_free_mapped,
-					       block);
-			} else {
-				/* We map the page to a frame; second param
-				FALSE below because we do not want it to be
-				added to the awe_LRU_free_mapped list */
-
-				buf_awe_map_page_to_frame(block, FALSE);
-			}
-		}
-
-		mutex_enter(&block->mutex);
-
-		block->state = BUF_BLOCK_READY_FOR_USE;
-		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
-		mutex_exit(&block->mutex);
-
-		mutex_exit(&(buf_pool->mutex));
-
-		if (started_monitor) {
-			srv_print_innodb_monitor = mon_value_was;
-		}
-
-		return(block);
-	}
-
-	/* If no block was in the free list, search from the end of the LRU
-	list and try to free a block there */
-
-	mutex_exit(&(buf_pool->mutex));
-
-	freed = buf_LRU_search_and_free_block(n_iterations);
-
-	if (freed > 0) {
-		goto loop;
-	}
-
-	if (n_iterations > 30) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: Warning: difficult to find free blocks from\n"
-			"InnoDB: the buffer pool (%lu search iterations)!"
-			" Consider\n"
-			"InnoDB: increasing the buffer pool size.\n"
-			"InnoDB: It is also possible that"
-			" in your Unix version\n"
-			"InnoDB: fsync is very slow, or"
-			" completely frozen inside\n"
-			"InnoDB: the OS kernel. Then upgrading to"
-			" a newer version\n"
-			"InnoDB: of your operating system may help."
-			" Look at the\n"
-			"InnoDB: number of fsyncs in diagnostic info below.\n"
-			"InnoDB: Pending flushes (fsync) log: %lu;"
-			" buffer pool: %lu\n"
-			"InnoDB: %lu OS file reads, %lu OS file writes,"
-			" %lu OS fsyncs\n"
-			"InnoDB: Starting InnoDB Monitor to print further\n"
-			"InnoDB: diagnostics to the standard output.\n",
-			(ulong) n_iterations,
-			(ulong) fil_n_pending_log_flushes,
-			(ulong) fil_n_pending_tablespace_flushes,
-			(ulong) os_n_file_reads, (ulong) os_n_file_writes,
-			(ulong) os_n_fsyncs);
-
-		mon_value_was = srv_print_innodb_monitor;
-		started_monitor = TRUE;
-		srv_print_innodb_monitor = TRUE;
-		os_event_set(srv_lock_timeout_thread_event);
-	}
-
-	/* No free block was found: try to flush the LRU list */
-
-	buf_flush_free_margin();
-	++srv_buf_pool_wait_free;
-
-	os_aio_simulated_wake_handler_threads();
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (buf_pool->LRU_flush_ended > 0) {
-		/* We have written pages in an LRU flush. To make the insert
-		buffer more efficient, we try to move these pages to the free
-		list. */
-
-		mutex_exit(&(buf_pool->mutex));
-
-		buf_LRU_try_free_flushed_blocks();
-	} else {
-		mutex_exit(&(buf_pool->mutex));
-	}
-
-	if (n_iterations > 10) {
-
-		os_thread_sleep(500000);
-	}
-
-	n_iterations++;
-
-	goto loop;
-}
-
-/***********************************************************************
-Moves the LRU_old pointer so that the length of the old blocks list
-is inside the allowed limits. */
-UNIV_INLINE
-void
-buf_LRU_old_adjust_len(void)
-/*========================*/
-{
-	ulint	old_len;
-	ulint	new_len;
-
-	ut_a(buf_pool->LRU_old);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
-
-	for (;;) {
-		old_len = buf_pool->LRU_old_len;
-		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
-
-		ut_a(buf_pool->LRU_old->in_LRU_list);
-
-		/* Update the LRU_old pointer if necessary */
-
-		if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
-
-			buf_pool->LRU_old = UT_LIST_GET_PREV(
-				LRU, buf_pool->LRU_old);
-			(buf_pool->LRU_old)->old = TRUE;
-			buf_pool->LRU_old_len++;
-
-		} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
-
-			(buf_pool->LRU_old)->old = FALSE;
-			buf_pool->LRU_old = UT_LIST_GET_NEXT(
-				LRU, buf_pool->LRU_old);
-			buf_pool->LRU_old_len--;
-		} else {
-			ut_a(buf_pool->LRU_old); /* Check that we did not
-						 fall out of the LRU list */
-			return;
-		}
-	}
-}
-
-/***********************************************************************
-Initializes the old blocks pointer in the LRU list. This function should be
-called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
-static
-void
-buf_LRU_old_init(void)
-/*==================*/
-{
-	buf_block_t*	block;
-
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-
-	/* We first initialize all blocks in the LRU list as old and then use
-	the adjust function to move the LRU_old pointer to the right
-	position */
-
-	block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	while (block != NULL) {
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-		ut_a(block->in_LRU_list);
-		block->old = TRUE;
-		block = UT_LIST_GET_NEXT(LRU, block);
-	}
-
-	buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
-	buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
-	buf_LRU_old_adjust_len();
-}
-
-/**********************************************************************
-Removes a block from the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_remove_block(
-/*=================*/
-	buf_block_t*	block)	/* in: control block */
-{
-	ut_ad(buf_pool);
-	ut_ad(block);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_a(block->in_LRU_list);
-
-	/* If the LRU_old pointer is defined and points to just this block,
-	move it backward one step */
-
-	if (block == buf_pool->LRU_old) {
-
-		/* Below: the previous block is guaranteed to exist, because
-		the LRU_old pointer is only allowed to differ by the
-		tolerance value from strict 3/8 of the LRU list length. */
-
-		buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
-		(buf_pool->LRU_old)->old = TRUE;
-
-		buf_pool->LRU_old_len++;
-		ut_a(buf_pool->LRU_old);
-	}
-
-	/* Remove the block from the LRU list */
-	UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
-	block->in_LRU_list = FALSE;
-
-	if (srv_use_awe && block->frame) {
-		/* Remove from the list of mapped pages */
-
-		UT_LIST_REMOVE(awe_LRU_free_mapped,
-			       buf_pool->awe_LRU_free_mapped, block);
-	}
-
-	/* If the LRU list is so short that LRU_old not defined, return */
-	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
-
-		buf_pool->LRU_old = NULL;
-
-		return;
-	}
-
-	ut_ad(buf_pool->LRU_old);
-
-	/* Update the LRU_old_len field if necessary */
-	if (block->old) {
-
-		buf_pool->LRU_old_len--;
-	}
-
-	/* Adjust the length of the old block list if necessary */
-	buf_LRU_old_adjust_len();
-}
-
-/**********************************************************************
-Adds a block to the LRU list end. */
-UNIV_INLINE
-void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
-	buf_block_t*	block)	/* in: control block */
-{
-	buf_block_t*	last_block;
-
-	ut_ad(buf_pool);
-	ut_ad(block);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-	block->old = TRUE;
-
-	last_block = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	if (last_block) {
-		block->LRU_position = last_block->LRU_position;
-	} else {
-		block->LRU_position = buf_pool_clock_tic();
-	}
-
-	ut_a(!block->in_LRU_list);
-	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
-	block->in_LRU_list = TRUE;
-
-	if (srv_use_awe && block->frame) {
-		/* Add to the list of mapped pages */
-
-		UT_LIST_ADD_LAST(awe_LRU_free_mapped,
-				 buf_pool->awe_LRU_free_mapped, block);
-	}
-
-	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
-		buf_pool->LRU_old_len++;
-	}
-
-	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
-		ut_ad(buf_pool->LRU_old);
-
-		/* Adjust the length of the old block list if necessary */
-
-		buf_LRU_old_adjust_len();
-
-	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
-		/* The LRU list is now long enough for LRU_old to become
-		defined: init it */
-
-		buf_LRU_old_init();
-	}
-}
-
-/**********************************************************************
-Adds a block to the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_add_block_low(
-/*==================*/
-	buf_block_t*	block,	/* in: control block */
-	ibool		old)	/* in: TRUE if should be put to the old blocks
-				in the LRU list, else put to the start; if the
-				LRU list is very short, the block is added to
-				the start, regardless of this parameter */
-{
-	ulint	cl;
-
-	ut_ad(buf_pool);
-	ut_ad(block);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_a(!block->in_LRU_list);
-
-	block->old = old;
-	cl = buf_pool_clock_tic();
-
-	if (srv_use_awe && block->frame) {
-		/* Add to the list of mapped pages; for simplicity we always
-		add to the start, even if the user would have set 'old'
-		TRUE */
-
-		UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
-				  buf_pool->awe_LRU_free_mapped, block);
-	}
-
-	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
-
-		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
-
-		block->LRU_position = cl;
-		block->freed_page_clock = buf_pool->freed_page_clock;
-	} else {
-		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
-				     block);
-		buf_pool->LRU_old_len++;
-
-		/* We copy the LRU position field of the previous block
-		to the new block */
-
-		block->LRU_position = (buf_pool->LRU_old)->LRU_position;
-	}
-
-	block->in_LRU_list = TRUE;
-
-	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
-		ut_ad(buf_pool->LRU_old);
-
-		/* Adjust the length of the old block list if necessary */
-
-		buf_LRU_old_adjust_len();
-
-	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
-		/* The LRU list is now long enough for LRU_old to become
-		defined: init it */
-
-		buf_LRU_old_init();
-	}
-}
-
-/**********************************************************************
-Adds a block to the LRU list. */
-
-void
-buf_LRU_add_block(
-/*==============*/
-	buf_block_t*	block,	/* in: control block */
-	ibool		old)	/* in: TRUE if should be put to the old
-				blocks in the LRU list, else put to the start;
-				if the LRU list is very short, the block is
-				added to the start, regardless of this
-				parameter */
-{
-	buf_LRU_add_block_low(block, old);
-}
-
-/**********************************************************************
-Moves a block to the start of the LRU list. */
-
-void
-buf_LRU_make_block_young(
-/*=====================*/
-	buf_block_t*	block)	/* in: control block */
-{
-	buf_LRU_remove_block(block);
-	buf_LRU_add_block_low(block, FALSE);
-}
-
-/**********************************************************************
-Moves a block to the end of the LRU list. */
-
-void
-buf_LRU_make_block_old(
-/*===================*/
-	buf_block_t*	block)	/* in: control block */
-{
-	buf_LRU_remove_block(block);
-	buf_LRU_add_block_to_end_low(block);
-}
-
-/**********************************************************************
-Puts a block back to the free list. */
-
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
-	buf_block_t*	block)	/* in: block, must not contain a file page */
-{
-
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&block->mutex));
-	ut_ad(block);
-
-	ut_a((block->state == BUF_BLOCK_MEMORY)
-	     || (block->state == BUF_BLOCK_READY_FOR_USE));
-
-	ut_a(block->n_pointers == 0);
-	ut_a(!block->in_free_list);
-
-	block->state = BUF_BLOCK_NOT_USED;
-
-	UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-#ifdef UNIV_DEBUG
-	/* Wipe contents of page to reveal possible stale pointers to it */
-	memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
-	UT_LIST_ADD_FIRST(free, buf_pool->free, block);
-	block->in_free_list = TRUE;
-
-	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
-
-	if (srv_use_awe && block->frame) {
-		/* Add to the list of mapped pages */
-
-		UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
-				  buf_pool->awe_LRU_free_mapped, block);
-	}
-}
-
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
-static
-void
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
-	buf_block_t*	block)	/* in: block, must contain a file page and
-				be in a state where it can be freed; there
-				may or may not be a hash index to the page */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&block->mutex));
-	ut_ad(block);
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_a(block->io_fix == 0);
-	ut_a(block->buf_fix_count == 0);
-	ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
-
-	buf_LRU_remove_block(block);
-
-	buf_pool->freed_page_clock += 1;
-
-	/* Note that if AWE is enabled the block may not have a frame at all */
-
-	buf_block_modify_clock_inc(block);
-
-	if (block != buf_page_hash_get(block->space, block->offset)) {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu not found"
-			" in the hash table\n",
-			(ulong) block->space,
-			(ulong) block->offset);
-		if (buf_page_hash_get(block->space, block->offset)) {
-			fprintf(stderr,
-				"InnoDB: In hash table we find block"
-				" %p of %lu %lu which is not %p\n",
-				(void*) buf_page_hash_get
-				(block->space, block->offset),
-				(ulong) buf_page_hash_get
-				(block->space, block->offset)->space,
-				(ulong) buf_page_hash_get
-				(block->space, block->offset)->offset,
-				(void*) block);
-		}
-
-#ifdef UNIV_DEBUG
-		buf_print();
-		buf_LRU_print();
-		buf_validate();
-		buf_LRU_validate();
-#endif
-		ut_a(0);
-	}
-
-	HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
-		    buf_page_address_fold(block->space, block->offset),
-		    block);
-
-	UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
-	block->state = BUF_BLOCK_REMOVE_HASH;
-}
-
-/**********************************************************************
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
-	buf_block_t*	block)	/* in: block, must contain a file page and
-				be in a state where it can be freed */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-	ut_ad(mutex_own(&block->mutex));
-
-	ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
-
-	block->state = BUF_BLOCK_MEMORY;
-
-	buf_LRU_block_free_non_file_page(block);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
-
-ibool
-buf_LRU_validate(void)
-/*==================*/
-{
-	buf_block_t*	block;
-	ulint		old_len;
-	ulint		new_len;
-	ulint		LRU_pos;
-
-	ut_ad(buf_pool);
-	mutex_enter(&(buf_pool->mutex));
-
-	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
-		ut_a(buf_pool->LRU_old);
-		old_len = buf_pool->LRU_old_len;
-		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
-		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
-		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
-	}
-
-	UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
-
-	block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	old_len = 0;
-
-	while (block != NULL) {
-
-		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-		if (block->old) {
-			old_len++;
-		}
-
-		if (buf_pool->LRU_old && (old_len == 1)) {
-			ut_a(buf_pool->LRU_old == block);
-		}
-
-		LRU_pos	= block->LRU_position;
-
-		block = UT_LIST_GET_NEXT(LRU, block);
-
-		if (block) {
-			/* If the following assert fails, it may
-			not be an error: just the buf_pool clock
-			has wrapped around */
-			ut_a(LRU_pos >= block->LRU_position);
-		}
-	}
-
-	if (buf_pool->LRU_old) {
-		ut_a(buf_pool->LRU_old_len == old_len);
-	}
-
-	UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
-
-	block = UT_LIST_GET_FIRST(buf_pool->free);
-
-	while (block != NULL) {
-		ut_a(block->state == BUF_BLOCK_NOT_USED);
-
-		block = UT_LIST_GET_NEXT(free, block);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-	return(TRUE);
-}
-
-/**************************************************************************
-Prints the LRU list. */
-
-void
-buf_LRU_print(void)
-/*===============*/
-{
-	buf_block_t*	block;
-	buf_frame_t*	frame;
-	ulint		len;
-
-	ut_ad(buf_pool);
-	mutex_enter(&(buf_pool->mutex));
-
-	fprintf(stderr, "Pool ulint clock %lu\n",
-		(ulong) buf_pool->ulint_clock);
-
-	block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	len = 0;
-
-	while (block != NULL) {
-
-		fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
-
-		if (block->old) {
-			fputs("old ", stderr);
-		}
-
-		if (block->buf_fix_count) {
-			fprintf(stderr, "buffix count %lu ",
-				(ulong) block->buf_fix_count);
-		}
-
-		if (block->io_fix) {
-			fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
-		}
-
-		if (ut_dulint_cmp(block->oldest_modification,
-				  ut_dulint_zero) > 0) {
-			fputs("modif. ", stderr);
-		}
-
-		frame = buf_block_get_frame(block);
-
-		fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
-			(ulong) block->LRU_position,
-			(ulong) fil_page_get_type(frame),
-			(ulong) ut_dulint_get_low
-			(btr_page_get_index_id(frame)));
-
-		block = UT_LIST_GET_NEXT(LRU, block);
-		if (++len == 10) {
-			len = 0;
-			putc('\n', stderr);
-		}
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c
deleted file mode 100644
index 0f03de4ca9d..00000000000
--- a/storage/innobase/data/data0data.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "data0data.h"
-
-#ifdef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "page0page.h"
-#include "dict0dict.h"
-#include "btr0cur.h"
-
-#include <ctype.h>
-
-#ifdef UNIV_DEBUG
-byte	data_error;	/* data pointers of tuple fields are initialized
-			to point here for error checking */
-
-ulint	data_dummy;	/* this is used to fool the compiler in
-			dtuple_validate */
-#endif /* UNIV_DEBUG */
-
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
-	dfield_t*	field,	/* in: field */
-	void*		data,	/* in: data */
-	ulint		len)	/* in: length or UNIV_SQL_NULL */
-{
-	dfield_set_data(field, data, len);
-}
-void*
-dfield_get_data_noninline(
-	dfield_t* field)	/* in: field */
-{
-	return(dfield_get_data(field));
-}
-ulint
-dfield_get_len_noninline(
-	dfield_t* field)	/* in: field */
-{
-	return(dfield_get_len(field));
-}
-ulint
-dtuple_get_n_fields_noninline(
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	return(dtuple_get_n_fields(tuple));
-}
-dfield_t*
-dtuple_get_nth_field_noninline(
-	dtuple_t*	tuple,	/* in: tuple */
-	ulint		n)	/* in: index of field */
-{
-	return(dtuple_get_nth_field(tuple, n));
-}
-
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
-				/* out: TRUE if equal */
-	dfield_t*	field,	/* in: field */
-	ulint		len,	/* in: data length or UNIV_SQL_NULL */
-	byte*		data)	/* in: data */
-{
-	if (len != field->len) {
-
-		return(FALSE);
-	}
-
-	if (len == UNIV_SQL_NULL) {
-
-		return(TRUE);
-	}
-
-	if (0 != ut_memcmp(field->data, data, len)) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
-				/* out: TRUE if length and fieds are equal
-				when compared with cmp_data_data:
-				NOTE: in character type fields some letters
-				are identified with others! (collation) */
-	dtuple_t*	tuple1,	/* in: tuple 1 */
-	dtuple_t*	tuple2)	/* in: tuple 2 */
-{
-	dfield_t*	field1;
-	dfield_t*	field2;
-	ulint		n_fields;
-	ulint		i;
-
-	ut_ad(tuple1 && tuple2);
-	ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
-	ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
-	ut_ad(dtuple_check_typed(tuple1));
-	ut_ad(dtuple_check_typed(tuple2));
-
-	n_fields = dtuple_get_n_fields(tuple1);
-
-	if (n_fields != dtuple_get_n_fields(tuple2)) {
-
-		return(FALSE);
-	}
-
-	for (i = 0; i < n_fields; i++) {
-
-		field1 = dtuple_get_nth_field(tuple1, i);
-		field2 = dtuple_get_nth_field(tuple2, i);
-
-		if (0 != cmp_dfield_dfield(field1, field2)) {
-
-			return(FALSE);
-		}
-	}
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
-
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
-				/* out, own created dtuple */
-	void**	heap,		/* out: created memory heap */
-	ulint	n_fields)	/* in: number of fields */
-{
-	*heap = (void*)mem_heap_create(500);
-
-	return(dtuple_create(*((mem_heap_t**)heap), n_fields));
-}
-
-/*************************************************************************
-Frees a dtuple used in MySQL. */
-
-void
-dtuple_free_for_mysql(
-/*==================*/
-	void*	heap) /* in: memory heap where tuple was created */
-{
-	mem_heap_free((mem_heap_t*)heap);
-}
-
-/*************************************************************************
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-
-void
-dtuple_set_n_fields(
-/*================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		n_fields)	/* in: number of fields */
-{
-	ut_ad(tuple);
-
-	tuple->n_fields = n_fields;
-	tuple->n_fields_cmp = n_fields;
-}
-
-/**************************************************************
-Checks that a data field is typed. */
-static
-ibool
-dfield_check_typed_no_assert(
-/*=========================*/
-				/* out: TRUE if ok */
-	dfield_t*	field)	/* in: data field */
-{
-	if (dfield_get_type(field)->mtype > DATA_MYSQL
-	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
-		fprintf(stderr,
-			"InnoDB: Error: data field type %lu, len %lu\n",
-			(ulong) dfield_get_type(field)->mtype,
-			(ulong) dfield_get_len(field));
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/**************************************************************
-Checks that a data tuple is typed. */
-
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	dfield_t*	field;
-	ulint		i;
-
-	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
-		fprintf(stderr,
-			"InnoDB: Error: index entry has %lu fields\n",
-			(ulong) dtuple_get_n_fields(tuple));
-dump:
-		fputs("InnoDB: Tuple contents: ", stderr);
-		dtuple_print(stderr, tuple);
-		putc('\n', stderr);
-
-		return(FALSE);
-	}
-
-	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
-		field = dtuple_get_nth_field(tuple, i);
-
-		if (!dfield_check_typed_no_assert(field)) {
-			goto dump;
-		}
-	}
-
-	return(TRUE);
-}
-
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
-ibool
-dfield_check_typed(
-/*===============*/
-				/* out: TRUE if ok */
-	dfield_t*	field)	/* in: data field */
-{
-	if (dfield_get_type(field)->mtype > DATA_MYSQL
-	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
-		fprintf(stderr,
-			"InnoDB: Error: data field type %lu, len %lu\n",
-			(ulong) dfield_get_type(field)->mtype,
-			(ulong) dfield_get_len(field));
-
-		ut_error;
-	}
-
-	return(TRUE);
-}
-
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
-ibool
-dtuple_check_typed(
-/*===============*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	dfield_t*	field;
-	ulint		i;
-
-	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
-		field = dtuple_get_nth_field(tuple, i);
-
-		ut_a(dfield_check_typed(field));
-	}
-
-	return(TRUE);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
-ibool
-dtuple_validate(
-/*============*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	dfield_t*	field;
-	byte*		data;
-	ulint		n_fields;
-	ulint		len;
-	ulint		i;
-	ulint		j;
-
-	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
-	n_fields = dtuple_get_n_fields(tuple);
-
-	/* We dereference all the data of each field to test
-	for memory traps */
-
-	for (i = 0; i < n_fields; i++) {
-
-		field = dtuple_get_nth_field(tuple, i);
-		len = dfield_get_len(field);
-
-		if (len != UNIV_SQL_NULL) {
-
-			data = field->data;
-
-			for (j = 0; j < len; j++) {
-
-				data_dummy  += *data; /* fool the compiler not
-						      to optimize out this
-						      code */
-				data++;
-			}
-		}
-	}
-
-	ut_a(dtuple_check_typed(tuple));
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/*****************************************************************
-Pretty prints a dfield value according to its data type. */
-
-void
-dfield_print(
-/*=========*/
-	dfield_t*	dfield)	 /* in: dfield */
-{
-	byte*	data;
-	ulint	len;
-	ulint	mtype;
-	ulint	i;
-
-	len = dfield_get_len(dfield);
-	data = dfield_get_data(dfield);
-
-	if (len == UNIV_SQL_NULL) {
-		fputs("NULL", stderr);
-
-		return;
-	}
-
-	mtype = dtype_get_mtype(dfield_get_type(dfield));
-
-	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
-
-		for (i = 0; i < len; i++) {
-			int	c = *data++;
-			putc(isprint(c) ? c : ' ', stderr);
-		}
-	} else if (mtype == DATA_INT) {
-		ut_a(len == 4); /* only works for 32-bit integers */
-		fprintf(stderr, "%d", (int)mach_read_from_4(data));
-	} else {
-		ut_error;
-	}
-}
-
-/*****************************************************************
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-
-void
-dfield_print_also_hex(
-/*==================*/
-	dfield_t*	dfield)	 /* in: dfield */
-{
-	byte*	data;
-	ulint	len;
-	ulint	mtype;
-	ulint	i;
-	ibool	print_also_hex;
-
-	len = dfield_get_len(dfield);
-	data = dfield_get_data(dfield);
-
-	if (len == UNIV_SQL_NULL) {
-		fputs("NULL", stderr);
-
-		return;
-	}
-
-	mtype = dtype_get_mtype(dfield_get_type(dfield));
-
-	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
-
-		print_also_hex = FALSE;
-
-		for (i = 0; i < len; i++) {
-			int c = *data++;
-			if (!isprint(c)) {
-				print_also_hex = TRUE;
-				c = ' ';
-			}
-			putc(c, stderr);
-		}
-
-		if (!print_also_hex) {
-
-			return;
-		}
-
-		fputs(" Hex: ", stderr);
-
-		data = dfield_get_data(dfield);
-
-		for (i = 0; i < len; i++) {
-			fprintf(stderr, "%02lx", (ulint)*data);
-
-			data++;
-		}
-	} else if (mtype == DATA_INT) {
-		ut_a(len == 4); /* only works for 32-bit integers */
-		fprintf(stderr, "%d", (int)mach_read_from_4(data));
-	} else {
-		ut_error;
-	}
-}
-
-/*****************************************************************
-Print a dfield value using ut_print_buf. */
-static
-void
-dfield_print_raw(
-/*=============*/
-	FILE*		f,		/* in: output stream */
-	dfield_t*	dfield)		/* in: dfield */
-{
-	ulint	len	= dfield->len;
-	if (len != UNIV_SQL_NULL) {
-		ulint	print_len = ut_min(len, 1000);
-		ut_print_buf(f, dfield->data, print_len);
-		if (len != print_len) {
-			fprintf(f, "(total %lu bytes)", (ulong) len);
-		}
-	} else {
-		fputs(" SQL NULL", f);
-	}
-}
-
-/**************************************************************
-The following function prints the contents of a tuple. */
-
-void
-dtuple_print(
-/*=========*/
-	FILE*		f,	/* in: output stream */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	ulint		n_fields;
-	ulint		i;
-
-	n_fields = dtuple_get_n_fields(tuple);
-
-	fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
-
-	for (i = 0; i < n_fields; i++) {
-		fprintf(f, " %lu:", (ulong) i);
-
-		dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
-
-		putc(';', f);
-	}
-
-	putc('\n', f);
-	ut_ad(dtuple_validate(tuple));
-}
-
-/******************************************************************
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
-				/* out, own: created big record vector,
-				NULL if we are not able to shorten
-				the entry enough, i.e., if there are
-				too many short fields in entry */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint*		ext_vec,/* in: array of externally stored fields,
-				or NULL: if a field already is externally
-				stored, then we cannot move it to the vector
-				this function returns */
-	ulint		n_ext_vec)/* in: number of elements is ext_vec */
-{
-	mem_heap_t*	heap;
-	big_rec_t*	vector;
-	dfield_t*	dfield;
-	ulint		size;
-	ulint		n_fields;
-	ulint		longest;
-	ulint		longest_i		= ULINT_MAX;
-	ibool		is_externally_stored;
-	ulint		i;
-	ulint		j;
-
-	ut_a(dtuple_check_typed_no_assert(entry));
-
-	size = rec_get_converted_size(index, entry);
-
-	if (UNIV_UNLIKELY(size > 1000000000)) {
-		fprintf(stderr,
-			"InnoDB: Warning: tuple size very big: %lu\n",
-			(ulong) size);
-		fputs("InnoDB: Tuple contents: ", stderr);
-		dtuple_print(stderr, entry);
-		putc('\n', stderr);
-	}
-
-	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
-			       * sizeof(big_rec_field_t) + 1000);
-
-	vector = mem_heap_alloc(heap, sizeof(big_rec_t));
-
-	vector->heap = heap;
-	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
-					* sizeof(big_rec_field_t));
-
-	/* Decide which fields to shorten: the algorithm is to look for
-	the longest field whose type is DATA_BLOB */
-
-	n_fields = 0;
-
-	while (rec_get_converted_size(index, entry)
-	       >= ut_min(page_get_free_space_of_empty(
-				 dict_table_is_comp(index->table)) / 2,
-			 REC_MAX_DATA_SIZE)) {
-
-		longest = 0;
-		for (i = dict_index_get_n_unique_in_tree(index);
-		     i < dtuple_get_n_fields(entry); i++) {
-
-			/* Skip over fields which already are externally
-			stored */
-
-			is_externally_stored = FALSE;
-
-			if (ext_vec) {
-				for (j = 0; j < n_ext_vec; j++) {
-					if (ext_vec[j] == i) {
-						is_externally_stored = TRUE;
-					}
-				}
-			}
-
-			if (!is_externally_stored) {
-
-				dfield = dtuple_get_nth_field(entry, i);
-
-				if (dfield->len != UNIV_SQL_NULL
-				    && dfield->len > longest) {
-
-					longest = dfield->len;
-
-					longest_i = i;
-				}
-			}
-		}
-
-		/* We do not store externally fields which are smaller than
-		DICT_MAX_INDEX_COL_LEN */
-
-#if DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT
-# error "DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT"
-#endif
-
-		if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
-		    + DICT_MAX_INDEX_COL_LEN) {
-			/* Cannot shorten more */
-
-			mem_heap_free(heap);
-
-			return(NULL);
-		}
-
-		/* Move data from field longest_i to big rec vector;
-		we do not let data size of the remaining entry
-		drop below 128 which is the limit for the 2-byte
-		offset storage format in a physical record. This
-		we accomplish by storing 128 bytes of data in entry
-		itself, and only the remaining part to big rec vec.
-
-		We store the first bytes locally to the record. Then
-		we can calculate all ordering fields in all indexes
-		from locally stored data. */
-
-		dfield = dtuple_get_nth_field(entry, longest_i);
-		vector->fields[n_fields].field_no = longest_i;
-
-		ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN);
-
-		vector->fields[n_fields].len = dfield->len
-			- DICT_MAX_INDEX_COL_LEN;
-
-		vector->fields[n_fields].data = mem_heap_alloc(
-			heap, vector->fields[n_fields].len);
-
-		/* Copy data (from the end of field) to big rec vector */
-
-		ut_memcpy(vector->fields[n_fields].data,
-			  ((byte*)dfield->data) + dfield->len
-			  - vector->fields[n_fields].len,
-			  vector->fields[n_fields].len);
-		dfield->len = dfield->len - vector->fields[n_fields].len
-			+ BTR_EXTERN_FIELD_REF_SIZE;
-
-		/* Set the extern field reference in dfield to zero */
-		memset(((byte*)dfield->data)
-		       + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
-		       0, BTR_EXTERN_FIELD_REF_SIZE);
-		n_fields++;
-	}
-
-	vector->n_fields = n_fields;
-	return(vector);
-}
-
-/******************************************************************
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-
-void
-dtuple_convert_back_big_rec(
-/*========================*/
-	dict_index_t*	index __attribute__((unused)),	/* in: index */
-	dtuple_t*	entry,	/* in: entry whose data was put to vector */
-	big_rec_t*	vector)	/* in, own: big rec vector; it is
-				freed in this function */
-{
-	dfield_t*	dfield;
-	ulint		i;
-
-	for (i = 0; i < vector->n_fields; i++) {
-
-		dfield = dtuple_get_nth_field(entry,
-					      vector->fields[i].field_no);
-		/* Copy data from big rec vector */
-
-		ut_memcpy(((byte*)dfield->data)
-			  + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
-			  vector->fields[i].data,
-			  vector->fields[i].len);
-		dfield->len = dfield->len + vector->fields[i].len
-			- BTR_EXTERN_FIELD_REF_SIZE;
-	}
-
-	mem_heap_free(vector->heap);
-}
-
-/******************************************************************
-Frees the memory in a big rec vector. */
-
-void
-dtuple_big_rec_free(
-/*================*/
-	big_rec_t*	vector)	/* in, own: big rec vector; it is
-				freed in this function */
-{
-	mem_heap_free(vector->heap);
-}
diff --git a/storage/innobase/dyn/dyn0dyn.c b/storage/innobase/dyn/dyn0dyn.c
deleted file mode 100644
index bcf2fda2b08..00000000000
--- a/storage/innobase/dyn/dyn0dyn.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/******************************************************
-The dynamically allocated array
-
-(c) 1996 Innobase Oy
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/****************************************************************
-Adds a new block to a dyn array. */
-
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
-				/* out: created block */
-	dyn_array_t*	arr)	/* in: dyn array */
-{
-	mem_heap_t*	heap;
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	if (arr->heap == NULL) {
-		UT_LIST_INIT(arr->base);
-		UT_LIST_ADD_FIRST(list, arr->base, arr);
-
-		arr->heap = mem_heap_create(sizeof(dyn_block_t));
-	}
-
-	block = dyn_array_get_last_block(arr);
-	block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
-	heap = arr->heap;
-
-	block = mem_heap_alloc(heap, sizeof(dyn_block_t));
-
-	block->used = 0;
-
-	UT_LIST_ADD_LAST(list, arr->base, block);
-
-	return(block);
-}
diff --git a/storage/innobase/fut/fut0fut.c b/storage/innobase/fut/fut0fut.c
deleted file mode 100644
index 7f7a8fa39e7..00000000000
--- a/storage/innobase/fut/fut0fut.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-
-#ifdef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
deleted file mode 100644
index 077497493b4..00000000000
--- a/storage/innobase/ha/ha0ha.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/************************************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/22/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ha0ha.h"
-#ifdef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#include "buf0buf.h"
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-ha_create_func(
-/*===========*/
-				/* out, own: created table */
-	ibool	in_btr_search,	/* in: TRUE if the hash table is used in
-				the btr_search module */
-	ulint	n,		/* in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	mutex_level,	/* in: level of the mutexes in the latching
-				order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint	n_mutexes)	/* in: number of mutexes to protect the
-				hash table: must be a power of 2, or 0 */
-{
-	hash_table_t*	table;
-	ulint		i;
-
-	table = hash_create(n);
-
-	if (in_btr_search) {
-		table->adaptive = TRUE;
-	} else {
-		table->adaptive = FALSE;
-	}
-
-	/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
-	but in practise it never should in this case, hence the asserts. */
-
-	if (n_mutexes == 0) {
-		if (in_btr_search) {
-			table->heap = mem_heap_create_in_btr_search(4096);
-			ut_a(table->heap);
-		} else {
-			table->heap = mem_heap_create_in_buffer(4096);
-		}
-
-		return(table);
-	}
-
-	hash_create_mutexes(table, n_mutexes, mutex_level);
-
-	table->heaps = mem_alloc(n_mutexes * sizeof(void*));
-
-	for (i = 0; i < n_mutexes; i++) {
-		if (in_btr_search) {
-			table->heaps[i] = mem_heap_create_in_btr_search(4096);
-			ut_a(table->heaps[i]);
-		} else {
-			table->heaps[i] = mem_heap_create_in_buffer(4096);
-		}
-	}
-
-	return(table);
-}
-
-/*****************************************************************
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
-ibool
-ha_insert_for_fold(
-/*===============*/
-				/* out: TRUE if succeed, FALSE if no more
-				memory could be allocated */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of data; if a node with
-				the same fold value already exists, it is
-				updated to point to the same data, and no new
-				node is created! */
-	void*		data)	/* in: data, must not be NULL */
-{
-	hash_cell_t*	cell;
-	ha_node_t*	node;
-	ha_node_t*	prev_node;
-	buf_block_t*	prev_block;
-	ulint		hash;
-
-	ut_ad(table && data);
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	hash = hash_calc_hash(fold, table);
-
-	cell = hash_get_nth_cell(table, hash);
-
-	prev_node = cell->node;
-
-	while (prev_node != NULL) {
-		if (prev_node->fold == fold) {
-			if (table->adaptive) {
-				prev_block = buf_block_align(prev_node->data);
-				ut_a(prev_block->n_pointers > 0);
-				prev_block->n_pointers--;
-				buf_block_align(data)->n_pointers++;
-			}
-
-			prev_node->data = data;
-
-			return(TRUE);
-		}
-
-		prev_node = prev_node->next;
-	}
-
-	/* We have to allocate a new chain node */
-
-	node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
-
-	if (node == NULL) {
-		/* It was a btr search type memory heap and at the moment
-		no more memory could be allocated: return */
-
-		ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
-
-		return(FALSE);
-	}
-
-	ha_node_set_data(node, data);
-
-	if (table->adaptive) {
-		buf_block_align(data)->n_pointers++;
-	}
-
-	node->fold = fold;
-
-	node->next = NULL;
-
-	prev_node = cell->node;
-
-	if (prev_node == NULL) {
-
-		cell->node = node;
-
-		return(TRUE);
-	}
-
-	while (prev_node->next != NULL) {
-
-		prev_node = prev_node->next;
-	}
-
-	prev_node->next = node;
-
-	return(TRUE);
-}
-
-/***************************************************************
-Deletes a hash node. */
-
-void
-ha_delete_hash_node(
-/*================*/
-	hash_table_t*	table,		/* in: hash table */
-	ha_node_t*	del_node)	/* in: node to be deleted */
-{
-	if (table->adaptive) {
-		ut_a(buf_block_align(del_node->data)->n_pointers > 0);
-		buf_block_align(del_node->data)->n_pointers--;
-	}
-
-	HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
-}
-
-/*****************************************************************
-Deletes an entry from a hash table. */
-
-void
-ha_delete(
-/*======*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of data */
-	void*		data)	/* in: data, must not be NULL and must exist
-				in the hash table */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_search_with_data(table, fold, data);
-
-	ut_a(node);
-
-	ha_delete_hash_node(table, node);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data, and updates
-the pointer to data, if found. */
-
-void
-ha_search_and_update_if_found(
-/*==========================*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data,	/* in: pointer to the data */
-	void*		new_data)/* in: new pointer to the data */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_search_with_data(table, fold, data);
-
-	if (node) {
-		if (table->adaptive) {
-			ut_a(buf_block_align(node->data)->n_pointers > 0);
-			buf_block_align(node->data)->n_pointers--;
-			buf_block_align(new_data)->n_pointers++;
-		}
-
-		node->data = new_data;
-	}
-}
-
-/*********************************************************************
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: fold value */
-	page_t*		page)	/* in: buffer page */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_chain_get_first(table, fold);
-
-	while (node) {
-		if (buf_frame_align(ha_node_get_data(node)) == page) {
-
-			/* Remove the hash node */
-
-			ha_delete_hash_node(table, node);
-
-			/* Start again from the first node in the chain
-			because the deletion may compact the heap of
-			nodes and move other nodes! */
-
-			node = ha_chain_get_first(table, fold);
-		} else {
-			node = ha_chain_get_next(node);
-		}
-	}
-#ifdef UNIV_DEBUG
-	/* Check that all nodes really got deleted */
-
-	node = ha_chain_get_first(table, fold);
-
-	while (node) {
-		ut_a(buf_frame_align(ha_node_get_data(node)) != page);
-
-		node = ha_chain_get_next(node);
-	}
-#endif
-}
-
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
-ibool
-ha_validate(
-/*========*/
-					/* out: TRUE if ok */
-	hash_table_t*	table,		/* in: hash table */
-	ulint		start_index,	/* in: start index */
-	ulint		end_index)	/* in: end index */
-{
-	hash_cell_t*	cell;
-	ha_node_t*	node;
-	ibool		ok	= TRUE;
-	ulint		i;
-
-	ut_a(start_index <= end_index);
-	ut_a(start_index < hash_get_n_cells(table));
-	ut_a(end_index < hash_get_n_cells(table));
-
-	for (i = start_index; i <= end_index; i++) {
-
-		cell = hash_get_nth_cell(table, i);
-
-		node = cell->node;
-
-		while (node) {
-			if (hash_calc_hash(node->fold, table) != i) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"InnoDB: Error: hash table node"
-					" fold value %lu does not\n"
-					"InnoDB: match the cell number %lu.\n",
-					(ulong) node->fold, (ulong) i);
-
-				ok = FALSE;
-			}
-
-			node = node->next;
-		}
-	}
-
-	return(ok);
-}
-
-/*****************************************************************
-Prints info of a hash table. */
-
-void
-ha_print_info(
-/*==========*/
-	FILE*		file,	/* in: file where to print */
-	hash_table_t*	table)	/* in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
-	hash_cell_t*	cell;
-	ulint		cells	= 0;
-	ulint		i;
-#endif /* PRINT_USED_CELLS */
-	ulint		n_bufs;
-
-#ifdef PRINT_USED_CELLS
-	for (i = 0; i < hash_get_n_cells(table); i++) {
-
-		cell = hash_get_nth_cell(table, i);
-
-		if (cell->node) {
-
-			cells++;
-		}
-	}
-#endif /* PRINT_USED_CELLS */
-
-	fprintf(file, "Hash table size %lu",
-		(ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
-	fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
-	if (table->heaps == NULL && table->heap != NULL) {
-
-		/* This calculation is intended for the adaptive hash
-		index: how many buffer frames we have reserved? */
-
-		n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
-		if (table->heap->free_block) {
-			n_bufs++;
-		}
-
-		fprintf(file, ", node heap has %lu buffer(s)\n",
-			(ulong) n_bufs);
-	}
-}
diff --git a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
deleted file mode 100644
index 4807015eee5..00000000000
--- a/storage/innobase/ha/hash0hash.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/******************************************************
-The simple hash table utility
-
-(c) 1997 Innobase Oy
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "hash0hash.h"
-#ifdef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#include "mem0mem.h"
-
-/****************************************************************
-Reserves the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_enter(
-/*=============*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold */
-{
-	mutex_enter(hash_get_mutex(table, fold));
-}
-
-/****************************************************************
-Releases the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_exit(
-/*============*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold */
-{
-	mutex_exit(hash_get_mutex(table, fold));
-}
-
-/****************************************************************
-Reserves all the mutexes of a hash table, in an ascending order. */
-
-void
-hash_mutex_enter_all(
-/*=================*/
-	hash_table_t*	table)	/* in: hash table */
-{
-	ulint	i;
-
-	for (i = 0; i < table->n_mutexes; i++) {
-
-		mutex_enter(table->mutexes + i);
-	}
-}
-
-/****************************************************************
-Releases all the mutexes of a hash table. */
-
-void
-hash_mutex_exit_all(
-/*================*/
-	hash_table_t*	table)	/* in: hash table */
-{
-	ulint	i;
-
-	for (i = 0; i < table->n_mutexes; i++) {
-
-		mutex_exit(table->mutexes + i);
-	}
-}
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-hash_create(
-/*========*/
-			/* out, own: created table */
-	ulint	n)	/* in: number of array cells */
-{
-	hash_cell_t*	array;
-	ulint		prime;
-	hash_table_t*	table;
-	ulint		i;
-	hash_cell_t*	cell;
-
-	prime = ut_find_prime(n);
-
-	table = mem_alloc(sizeof(hash_table_t));
-
-	array = ut_malloc(sizeof(hash_cell_t) * prime);
-
-	table->adaptive = FALSE;
-	table->array = array;
-	table->n_cells = prime;
-	table->n_mutexes = 0;
-	table->mutexes = NULL;
-	table->heaps = NULL;
-	table->heap = NULL;
-	table->magic_n = HASH_TABLE_MAGIC_N;
-
-	/* Initialize the cell array */
-
-	for (i = 0; i < prime; i++) {
-
-		cell = hash_get_nth_cell(table, i);
-		cell->node = NULL;
-	}
-
-	return(table);
-}
-
-/*****************************************************************
-Frees a hash table. */
-
-void
-hash_table_free(
-/*============*/
-	hash_table_t*	table)	/* in, own: hash table */
-{
-	ut_a(table->mutexes == NULL);
-
-	ut_free(table->array);
-	mem_free(table);
-}
-
-/*****************************************************************
-Creates a mutex array to protect a hash table. */
-
-void
-hash_create_mutexes_func(
-/*=====================*/
-	hash_table_t*	table,		/* in: hash table */
-#ifdef UNIV_SYNC_DEBUG
-	ulint		sync_level,	/* in: latching order level of the
-					mutexes: used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint		n_mutexes)	/* in: number of mutexes, must be a
-					power of 2 */
-{
-	ulint	i;
-
-	ut_a(n_mutexes == ut_2_power_up(n_mutexes));
-
-	table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
-
-	for (i = 0; i < n_mutexes; i++) {
-		mutex_create(table->mutexes + i, sync_level);
-	}
-
-	table->n_mutexes = n_mutexes;
-}
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
deleted file mode 100644
index 1573de7e818..00000000000
--- a/storage/innobase/include/btr0btr.h
+++ /dev/null
@@ -1,451 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0btr_h
-#define btr0btr_h
-
-#include "univ.i"
-
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "rem0rec.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
-
-/* Maximum record size which can be stored on a page, without using the
-special big record storage structure */
-
-#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
-
-/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
-such; none of the tree operations avoid producing trees bigger than this. It
-is instead a "max depth that other code must work with", useful for e.g.
-fixed-size arrays that must store some information about each level in a
-tree. In other words: if a B-tree with bigger depth than this is
-encountered, it is not acceptable for it to lead to mysterious memory
-corruption, but it is acceptable for the program to die with a clear assert
-failure. */
-#define BTR_MAX_LEVELS		100
-
-/* Latching modes for btr_cur_search_to_nth_level(). */
-#define BTR_SEARCH_LEAF		RW_S_LATCH
-#define BTR_MODIFY_LEAF		RW_X_LATCH
-#define BTR_NO_LATCHES		RW_NO_LATCH
-#define	BTR_MODIFY_TREE		33
-#define	BTR_CONT_MODIFY_TREE	34
-#define	BTR_SEARCH_PREV		35
-#define	BTR_MODIFY_PREV		36
-
-/* If this is ORed to the latch mode, it means that the search tuple will be
-inserted to the index, at the searched position */
-#define BTR_INSERT		512
-
-/* This flag ORed to latch mode says that we do the search in query
-optimization */
-#define BTR_ESTIMATE		1024
-
-/* This flag ORed to latch mode says that we can ignore possible
-UNIQUE definition on secondary indexes when we decide if we can use the
-insert buffer to speed up inserts */
-#define BTR_IGNORE_SEC_UNIQUE	2048
-
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
-
-page_t*
-btr_root_get(
-/*=========*/
-				/* out: root page, x-latched */
-	dict_index_t*	index,	/* in: index tree */
-	mtr_t*		mtr);	/* in: mtr */
-/******************************************************************
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
-	ulint	space,		/* in: space id */
-	ulint	page_no,	/* in: page number */
-	ulint	mode,		/* in: latch mode */
-	mtr_t*	mtr);		/* in: mtr */
-/******************************************************************
-Gets the index id field of a page. */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
-				/* out: index id */
-	page_t*		page);	/* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
-			/* out: level, leaf level == 0 */
-	page_t*	page);	/* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
-			/* out: level, leaf level == 0 */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Gets the next index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
-			/* out: next page number */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Gets the previous index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
-			/* out: prev page number */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/*****************************************************************
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-			/* out: previous user record, NULL if there is none */
-	rec_t*	rec,	/* in: record on leaf level */
-	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-/*****************************************************************
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-			/* out: next user record, NULL if there is none */
-	rec_t*	rec,	/* in: record on leaf level */
-	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-/******************************************************************
-Releases the latch on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
-	page_t*	page,		/* in: page */
-	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
-	mtr_t*	mtr);		/* in: mtr */
-/******************************************************************
-Gets the child node file address in a node pointer. */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
-				/* out: child node address */
-	rec_t*		rec,	/* in: node pointer record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
-Creates the root node for a new index tree. */
-
-ulint
-btr_create(
-/*=======*/
-			/* out: page number of the created root, FIL_NULL if
-			did not succeed */
-	ulint	type,	/* in: type of the index */
-	ulint	space,	/* in: space where created */
-	dulint	index_id,/* in: index id */
-	ulint	comp,	/* in: nonzero=compact page format */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/****************************************************************
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-
-void
-btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/* in: space where created */
-	ulint	root_page_no);	/* in: root page number */
-/****************************************************************
-Frees the B-tree root page. Other tree MUST already have been freed. */
-
-void
-btr_free_root(
-/*==========*/
-	ulint	space,		/* in: space where created */
-	ulint	root_page_no,	/* in: root page number */
-	mtr_t*	mtr);		/* in: a mini-transaction which has already
-				been started */
-/*****************************************************************
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
-				/* out: inserted record */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
-				on the root page; when the function returns,
-				the cursor is positioned on the predecessor
-				of the inserted record */
-	dtuple_t*	tuple,	/* in: tuple to insert */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize(
-/*================*/
-	page_t*		page,	/* in: page to be reorganized */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Decides if the page should be split at the convergence point of
-inserts converging to left. */
-
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
-				/* out: TRUE if split recommended */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert */
-	rec_t**		split_rec);/* out: if split recommended,
-				the first record on upper half page,
-				or NULL if tuple should be first */
-/*****************************************************************
-Decides if the page should be split at the convergence point of
-inserts converging to right. */
-
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
-				/* out: TRUE if split recommended */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert */
-	rec_t**		split_rec);/* out: if split recommended,
-				the first record on upper half page,
-				or NULL if tuple should be first */
-/*****************************************************************
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
-				/* out: inserted record; NOTE: the tree
-				x-latch is released! NOTE: 2 free disk
-				pages must be available! */
-	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
-				function returns, the cursor is positioned
-				on the predecessor of the inserted record */
-	dtuple_t*	tuple,	/* in: tuple to insert */
-	mtr_t*		mtr);	/* in: mtr */
-/***********************************************************
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: level, must be > 0 */
-	dtuple_t*	tuple,	/* in: the record to be inserted */
-	mtr_t*		mtr);	/* in: mtr */
-/********************************************************************
-Sets a record as the predefined minimum record. */
-
-void
-btr_set_min_rec_mark(
-/*=================*/
-	rec_t*	rec,	/* in: record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	mtr_t*	mtr);	/* in: mtr */
-/*****************************************************************
-Deletes on the upper level the node pointer to a page. */
-
-void
-btr_node_ptr_delete(
-/*================*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page whose node pointer is deleted */
-	mtr_t*		mtr);	/* in: mtr */
-#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
-ibool
-btr_check_node_ptr(
-/*===============*/
-				/* out: TRUE */
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: index page */
-	mtr_t*		mtr);	/* in: mtr */
-#endif /* UNIV_DEBUG */
-/*****************************************************************
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the
-brother reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to
-the brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-void
-btr_compress(
-/*=========*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-
-void
-btr_discard_page(
-/*=============*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
-				the root page */
-	mtr_t*		mtr);	/* in: mtr */
-/********************************************************************
-Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	ulint	comp,	/* in: nonzero=compact page format */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
-byte*
-btr_parse_page_reorganize(
-/*======================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr);	/* in: mtr or NULL */
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
-ulint
-btr_get_size(
-/*=========*/
-				/* out: number of pages */
-	dict_index_t*	index,	/* in: index */
-	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-/******************************************************************
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
-					/* out: new allocated page, x-latched;
-					NULL if out of space */
-	dict_index_t*	index,		/* in: index tree */
-	ulint		hint_page_no,	/* in: hint of a good page */
-	byte		file_direction,	/* in: direction where a possible
-					page split is made */
-	ulint		level,		/* in: level where the page is placed
-					in the tree */
-	mtr_t*		mtr);		/* in: mtr */
-/******************************************************************
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-
-void
-btr_page_free(
-/*==========*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page to be freed, x-latched */
-	mtr_t*		mtr);	/* in: mtr */
-/******************************************************************
-Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-
-void
-btr_page_free_low(
-/*==============*/
-	dict_index_t*	index,	/* in: index tree */
-	page_t*		page,	/* in: page to be freed, x-latched */
-	ulint		level,	/* in: page level */
-	mtr_t*		mtr);	/* in: mtr */
-#ifdef UNIV_BTR_PRINT
-/*****************************************************************
-Prints size info of a B-tree. */
-
-void
-btr_print_size(
-/*===========*/
-	dict_index_t*	index);	/* in: index tree */
-/******************************************************************
-Prints directories and other info of all nodes in the index. */
-
-void
-btr_print_index(
-/*============*/
-	dict_index_t*	index,	/* in: index */
-	ulint		width);	/* in: print this many entries from start
-				and end */
-#endif /* UNIV_BTR_PRINT */
-/****************************************************************
-Checks the size and number of fields in a record based on the definition of
-the index. */
-
-ibool
-btr_index_rec_validate(
-/*===================*/
-					/* out: TRUE if ok */
-	rec_t*		rec,		/* in: index record */
-	dict_index_t*	index,		/* in: index */
-	ibool		dump_on_error);	/* in: TRUE if the function
-					should print hex dump of record
-					and page on error */
-/******************************************************************
-Checks the consistency of an index tree. */
-
-ibool
-btr_validate_index(
-/*===============*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index */
-	trx_t*		trx);	/* in: transaction or NULL */
-
-#define BTR_N_LEAF_PAGES	1
-#define BTR_TOTAL_SIZE		2
-
-#ifndef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
deleted file mode 100644
index 4a88f58b318..00000000000
--- a/storage/innobase/include/btr0btr.ic
+++ /dev/null
@@ -1,234 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-
-#define BTR_MAX_NODE_LEVEL	50	/* used in debug checking */
-
-/******************************************************************
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
-	ulint	space,		/* in: space id */
-	ulint	page_no,	/* in: page number */
-	ulint	mode,		/* in: latch mode */
-	mtr_t*	mtr)		/* in: mtr */
-{
-	page_t*	page;
-
-	page = buf_page_get(space, page_no, mode, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	if (mode != RW_NO_LATCH) {
-
-		buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-	}
-#endif
-	return(page);
-}
-
-/******************************************************************
-Sets the index id field of a page. */
-UNIV_INLINE
-void
-btr_page_set_index_id(
-/*==================*/
-	page_t*		page,	/* in: page to be created */
-	dulint		id,	/* in: index id */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr);
-}
-
-/******************************************************************
-Gets the index id field of a page. */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
-				/* out: index id */
-	page_t*		page)	/* in: index page */
-{
-	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
-}
-
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
-			/* out: level, leaf level == 0 */
-	page_t*	page)	/* in: index page */
-{
-	ulint	level;
-
-	ut_ad(page);
-
-	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
-	ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
-	return(level);
-}
-
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
-			/* out: level, leaf level == 0 */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-
-	return(btr_page_get_level_low(page));
-}
-
-/************************************************************
-Sets the node level field in an index page. */
-UNIV_INLINE
-void
-btr_page_set_level(
-/*===============*/
-	page_t*	page,	/* in: index page */
-	ulint	level,	/* in: level, leaf level == 0 */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-	ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
-	mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
-			 MLOG_2BYTES, mtr);
-}
-
-/************************************************************
-Gets the next index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
-			/* out: next page number */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(page),
-				   MTR_MEMO_PAGE_S_FIX));
-
-	return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/************************************************************
-Sets the next index page field. */
-UNIV_INLINE
-void
-btr_page_set_next(
-/*==============*/
-	page_t*	page,	/* in: index page */
-	ulint	next,	/* in: next page number */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-
-	mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
-}
-
-/************************************************************
-Gets the previous index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
-			/* out: prev page number */
-	page_t*	page,	/* in: index page */
-	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-
-	return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/************************************************************
-Sets the previous index page field. */
-UNIV_INLINE
-void
-btr_page_set_prev(
-/*==============*/
-	page_t*	page,	/* in: index page */
-	ulint	prev,	/* in: previous page number */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-
-	mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
-}
-
-/******************************************************************
-Gets the child node file address in a node pointer. */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
-				/* out: child node address */
-	rec_t*		rec,	/* in: node pointer record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	byte*	field;
-	ulint	len;
-	ulint	page_no;
-
-	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
-	/* The child address is in the last field */
-	field = rec_get_nth_field(rec, offsets,
-				  rec_offs_n_fields(offsets) - 1, &len);
-
-	ut_ad(len == 4);
-
-	page_no = mach_read_from_4(field);
-
-	if (UNIV_UNLIKELY(page_no == 0)) {
-		fprintf(stderr,
-			"InnoDB: a nonsensical page number 0"
-			" in a node ptr record at offset %lu\n",
-			(ulong) page_offset(rec));
-		buf_page_print(buf_frame_align(rec));
-	}
-
-	return(page_no);
-}
-
-/******************************************************************
-Releases the latches on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
-	page_t*	page,		/* in: page */
-	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
-	mtr_t*	mtr)		/* in: mtr */
-{
-	ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
-				 MTR_MEMO_MODIFY));
-	if (latch_mode == BTR_SEARCH_LEAF) {
-		mtr_memo_release(mtr, buf_block_align(page),
-				 MTR_MEMO_PAGE_S_FIX);
-	} else {
-		ut_ad(latch_mode == BTR_MODIFY_LEAF);
-		mtr_memo_release(mtr, buf_block_align(page),
-				 MTR_MEMO_PAGE_X_FIX);
-	}
-}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
deleted file mode 100644
index 213dcb7f568..00000000000
--- a/storage/innobase/include/btr0cur.h
+++ /dev/null
@@ -1,706 +0,0 @@
-/******************************************************
-The index tree cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0cur_h
-#define btr0cur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "btr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "ha0ha.h"
-
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
-#define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
-					update vector or inserted entry */
-
-#define BTR_CUR_ADAPT
-#define BTR_CUR_HASH_ADAPT
-
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
-				/* out: pointer to page cursor component */
-	btr_cur_t*	cursor);/* in: tree cursor */
-/*************************************************************
-Returns the record pointer of a tree cursor. */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
-				/* out: pointer to record */
-	btr_cur_t*	cursor);/* in: tree cursor */
-/*************************************************************
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor);/* in: tree cursor */
-/*************************************************************
-Returns the page of a tree cursor. */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
-				/* out: pointer to page */
-	btr_cur_t*	cursor);/* in: tree cursor */
-/*************************************************************
-Returns the index of a cursor. */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
-				/* out: index */
-	btr_cur_t*	cursor);/* in: B-tree cursor */
-/*************************************************************
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in tree */
-	btr_cur_t*	cursor);/* in: cursor */
-/************************************************************************
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-
-void
-btr_cur_search_to_nth_level(
-/*========================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		level,	/* in: the tree level of search */
-	dtuple_t*	tuple,	/* in: data tuple; NOTE: n_fields_cmp in
-				tuple must be set so that it cannot get
-				compared to the node ptr page number field! */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
-				NOTE that if the search is made using a unique
-				prefix of a record, mode should be PAGE_CUR_LE,
-				not PAGE_CUR_GE, as the latter may end up on
-				the previous page of the record! Inserts
-				should always be made using PAGE_CUR_LE to
-				search the position! */
-	ulint		latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
-				BTR_INSERT and BTR_ESTIMATE;
-				cursor->left_page is used to store a pointer
-				to the left neighbor page, in the cases
-				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
-				NOTE that if has_search_latch
-				is != 0, we maybe do not have a latch set
-				on the cursor page, we assume
-				the caller uses his search latch
-				to protect the record! */
-	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
-				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/* in: latch mode the caller
-				currently has on btr_search_latch:
-				RW_S_LATCH, or 0 */
-	mtr_t*		mtr);	/* in: mtr */
-/*********************************************************************
-Opens a cursor at either end of an index. */
-
-void
-btr_cur_open_at_index_side(
-/*=======================*/
-	ibool		from_left,	/* in: TRUE if open to the low end,
-					FALSE if to the high end */
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: latch mode */
-	btr_cur_t*	cursor,		/* in: cursor */
-	mtr_t*		mtr);		/* in: mtr */
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/* in/out: B-tree cursor */
-	mtr_t*		mtr);		/* in: mtr */
-/*****************************************************************
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
-				/* out: DB_SUCCESS, DB_WAIT_LOCK,
-				DB_FAIL, or error number */
-	ulint		flags,	/* in: undo logging and locking flags: if not
-				zero, the parameters index and thr should be
-				specified */
-	btr_cur_t*	cursor,	/* in: cursor on page after which to insert;
-				cursor stays valid */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t**		rec,	/* out: pointer to inserted record if
-				succeed */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or
-				NULL */
-	que_thr_t*	thr,	/* in: query thread or NULL */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
-				/* out: DB_SUCCESS or error number */
-	ulint		flags,	/* in: undo logging and locking flags: if not
-				zero, the parameter thr should be
-				specified; if no undo logging is specified,
-				then the caller must have reserved enough
-				free extents in the file space so that the
-				insertion will certainly succeed */
-	btr_cur_t*	cursor,	/* in: cursor after which to insert;
-				cursor stays valid */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t**		rec,	/* out: pointer to inserted record if
-				succeed */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or
-				NULL */
-	que_thr_t*	thr,	/* in: query thread or NULL */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields. */
-
-ulint
-btr_cur_update_in_place(
-/*====================*/
-				/* out: DB_SUCCESS or error number */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. */
-
-ulint
-btr_cur_optimistic_update(
-/*======================*/
-				/* out: DB_SUCCESS, or DB_OVERFLOW if the
-				updated record does not fit, DB_UNDERFLOW
-				if the page would become too empty */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector; this must also
-				contain trx id and roll ptr fields */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
-				/* out: DB_SUCCESS or error code */
-	ulint		flags,	/* in: undo logging, locking, and rollback
-				flags */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update */
-	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
-				be stored externally by the caller, or NULL */
-	upd_t*		update,	/* in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
-	ulint		cmpl_info,/* in: compiler info on secondary index
-				updates */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				number */
-	ulint		flags,	/* in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/* in: cursor */
-	ibool		val,	/* in: value to set */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				number */
-	ulint		flags,	/* in: locking flag */
-	btr_cur_t*	cursor,	/* in: cursor */
-	ibool		val,	/* in: value to set */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is
-only used by the insert buffer insert merge mechanism. */
-
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
-	rec_t*		rec,	/* in: record to delete unmark */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
-	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
-				cursor does not stay valid */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
-				/* out: TRUE if compression occurred */
-	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
-				cursor does not stay valid if compression
-				occurs */
-	mtr_t*		mtr);	/* in: mtr */
-/***********************************************************
-Removes the record on which the tree cursor is positioned. It is assumed
-that the mtr has an x-latch on the page where the cursor is positioned,
-but no latch on the whole tree. */
-
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
-				/* out: TRUE if success, i.e., the page
-				did not become too empty */
-	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
-				cursor stays valid: if deletion succeeds,
-				on function exit it points to the successor
-				of the deleted record */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
-				/* out: TRUE if compression occurred */
-	ulint*		err,	/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
-				the latter may occur because we may have
-				to update node pointers on upper levels,
-				and in the case of variable length keys
-				these may actually grow in size */
-	ibool		has_reserved_extents, /* in: TRUE if the
-				caller has already reserved enough free
-				extents so that he knows that the operation
-				will succeed */
-	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
-				if compression does not occur, the cursor
-				stays valid: it points to successor of
-				deleted record on function exit */
-	ibool		in_rollback,/* in: TRUE if called in rollback */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	page_t*		page,	/* in: page or NULL */
-	dict_index_t*	index);	/* in: index corresponding to page */
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: index corresponding to page */
-	page_t*		page);	/* in: page or NULL */
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	page_t*		page);	/* in: page or NULL */
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
-btr_estimate_n_rows_in_range(
-/*=========================*/
-				/* out: estimated number of rows */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple1,	/* in: range start, may also be empty tuple */
-	ulint		mode1,	/* in: search mode for range start */
-	dtuple_t*	tuple2,	/* in: range end, may also be empty tuple */
-	ulint		mode2);	/* in: search mode for range end */
-/***********************************************************************
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
-	dict_index_t*	index);	/* in: index */
-/***********************************************************************
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
-	rec_t*		rec,	/* in: record in a clustered index */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update,	/* in: update vector */
-	mtr_t*		mtr);	/* in: mtr */
-/***********************************************************************
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
-	dtuple_t*	entry,		/* in: updated entry to be inserted to
-					clustered index */
-	ulint*		ext_vec,	/* in: array of extern fields in the
-					original record */
-	ulint		n_ext_vec,	/* in: number of elements in ext_vec */
-	upd_t*		update);	/* in: update vector */
-/***********************************************************************
-Marks all extern fields in a dtuple as owned by the record. */
-
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
-	dtuple_t*	entry,		/* in: clustered index entry */
-	ulint*		ext_vec,	/* in: array of numbers of fields
-					which have been stored externally */
-	ulint		n_ext_vec);	/* in: number of elements in ext_vec */
-/***********************************************************************
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
-
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
-					/* out: DB_SUCCESS or error */
-	dict_index_t*	index,		/* in: index of rec; the index tree
-					MUST be X-latched */
-	rec_t*		rec,		/* in: record */
-	const ulint*	offsets,	/* in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
-	big_rec_t*	big_rec_vec,	/* in: vector containing fields
-					to be stored externally */
-	mtr_t*		local_mtr);	/* in: mtr containing the latch to
-					rec and to the tree */
-/***********************************************************************
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-
-void
-btr_free_externally_stored_field(
-/*=============================*/
-	dict_index_t*	index,		/* in: index of the data, the index
-					tree MUST be X-latched; if the tree
-					height is 1, then also the root page
-					must be X-latched! (this is relevant
-					in the case this function is called
-					from purge where 'data' is located on
-					an undo log page, not an index
-					page) */
-	byte*		data,		/* in: internally stored data
-					+ reference to the externally
-					stored part */
-	ulint		local_len,	/* in: length of data */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-					rollback and we do not want to free
-					inherited fields */
-	mtr_t*		local_mtr);	/* in: mtr containing the latch to
-					data an an X-latch to the index
-					tree */
-/***************************************************************
-Frees the externally stored fields for a record. */
-
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
-	dict_index_t*	index,	/* in: index of the data, the index
-				tree MUST be X-latched */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ibool		do_not_free_inherited,/* in: TRUE if called in a
-				rollback and we do not want to free
-				inherited fields */
-	mtr_t*		mtr);	/* in: mini-transaction handle which contains
-				an X-latch to record page and to the index
-				tree */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
-				/* out: the field copied to heap */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		no,	/* in: field number */
-	ulint*		len,	/* out: length of the field */
-	mem_heap_t*	heap);	/* in: mem heap */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
-
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
-				/* out: the whole field copied to heap */
-	ulint*		len,	/* out: length of the whole field */
-	byte*		data,	/* in: 'internally' stored part of the
-				field containing also the reference to
-				the external part */
-	ulint		local_len,/* in: length of data */
-	mem_heap_t*	heap);	/* in: mem heap */
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
-				/* out: number of values stored in ext_vect */
-	ulint*		ext_vect,/* in: array of ulints, must be preallocated
-				to have space for all fields in rec */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update);/* in: update vector or NULL */
-
-
-/*######################################################################*/
-
-/* In the pessimistic delete, if the page data size drops below this
-limit, merging it to a neighbor is tried */
-
-#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
-
-/* A slot in the path array. We store here info on a search path down the
-tree. Each slot contains data on a single level of the tree. */
-
-typedef struct btr_path_struct	btr_path_t;
-struct btr_path_struct{
-	ulint	nth_rec;	/* index of the record
-				where the page cursor stopped on
-				this level (index in alphabetical
-				order); value ULINT_UNDEFINED
-				denotes array end */
-	ulint	n_recs;		/* number of records on the page */
-};
-
-#define BTR_PATH_ARRAY_N_SLOTS	250	/* size of path array (in slots) */
-
-/* The tree cursor: the definition appears here only for the compiler
-to know struct size! */
-
-struct btr_cur_struct {
-	dict_index_t*	index;		/* index where positioned */
-	page_cur_t	page_cur;	/* page cursor */
-	page_t*		left_page;	/* this field is used to store
-					a pointer to the left neighbor
-					page, in the cases
-					BTR_SEARCH_PREV and
-					BTR_MODIFY_PREV */
-	/*------------------------------*/
-	que_thr_t*	thr;		/* this field is only used when
-					btr_cur_search_... is called for an
-					index entry insertion: the calling
-					query thread is passed here to be
-					used in the insert buffer */
-	/*------------------------------*/
-	/* The following fields are used in btr_cur_search... to pass
-	information: */
-	ulint		flag;		/* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
-					BTR_CUR_BINARY, or
-					BTR_CUR_INSERT_TO_IBUF */
-	ulint		tree_height;	/* Tree height if the search is done
-					for a pessimistic insert or update
-					operation */
-	ulint		up_match;	/* If the search mode was PAGE_CUR_LE,
-					the number of matched fields to the
-					the first user record to the right of
-					the cursor record after
-					btr_cur_search_...;
-					for the mode PAGE_CUR_GE, the matched
-					fields to the first user record AT THE
-					CURSOR or to the right of it;
-					NOTE that the up_match and low_match
-					values may exceed the correct values
-					for comparison to the adjacent user
-					record if that record is on a
-					different leaf page! (See the note in
-					row_ins_duplicate_key.) */
-	ulint		up_bytes;	/* number of matched bytes to the
-					right at the time cursor positioned;
-					only used internally in searches: not
-					defined after the search */
-	ulint		low_match;	/* if search mode was PAGE_CUR_LE,
-					the number of matched fields to the
-					first user record AT THE CURSOR or
-					to the left of it after
-					btr_cur_search_...;
-					NOT defined for PAGE_CUR_GE or any
-					other search modes; see also the NOTE
-					in up_match! */
-	ulint		low_bytes;	/* number of matched bytes to the
-					right at the time cursor positioned;
-					only used internally in searches: not
-					defined after the search */
-	ulint		n_fields;	/* prefix length used in a hash
-					search if hash_node != NULL */
-	ulint		n_bytes;	/* hash prefix bytes if hash_node !=
-					NULL */
-	ulint		fold;		/* fold value used in the search if
-					flag is BTR_CUR_HASH */
-	/*------------------------------*/
-	btr_path_t*	path_arr;	/* in estimating the number of
-					rows in range, we store in this array
-					information of the path through
-					the tree */
-};
-
-/* Values for the flag documenting the used search method */
-#define BTR_CUR_HASH		1	/* successful shortcut using the hash
-					index */
-#define BTR_CUR_HASH_FAIL	2	/* failure using hash, success using
-					binary search: the misleading hash
-					reference is stored in the field
-					hash_node, and might be necessary to
-					update */
-#define BTR_CUR_BINARY		3	/* success using the binary search */
-#define BTR_CUR_INSERT_TO_IBUF	4	/* performed the intended insert to
-					the insert buffer */
-
-/* If pessimistic delete fails because of lack of file space,
-there is still a good change of success a little later: try this many times,
-and sleep this many microseconds in between */
-#define BTR_CUR_RETRY_DELETE_N_TIMES	100
-#define BTR_CUR_RETRY_SLEEP_TIME	50000
-
-/* The reference in a field for which data is stored on a different page.
-The reference is at the end of the 'locally' stored part of the field.
-'Locally' means storage in the index record.
-We store locally a long enough prefix of each column so that we can determine
-the ordering parts of each index record without looking into the externally
-stored part. */
-
-/*--------------------------------------*/
-#define BTR_EXTERN_SPACE_ID		0	/* space id where stored */
-#define BTR_EXTERN_PAGE_NO		4	/* page no where stored */
-#define BTR_EXTERN_OFFSET		8	/* offset of BLOB header
-						on that page */
-#define BTR_EXTERN_LEN			12	/* 8 bytes containing the
-						length of the externally
-						stored part of the BLOB.
-						The 2 highest bits are
-						reserved to the flags below. */
-/*--------------------------------------*/
-#define BTR_EXTERN_FIELD_REF_SIZE	20
-
-/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
-at lowest address) is set to 1 if this field does not 'own' the externally
-stored field; only the owner field is allowed to free the field in purge!
-If the 2nd highest bit is 1 then it means that the externally stored field
-was inherited from an earlier version of the row. In rollback we are not
-allowed to free an inherited external field. */
-
-#define BTR_EXTERN_OWNER_FLAG		128
-#define BTR_EXTERN_INHERITED_FLAG	64
-
-extern ulint	btr_cur_n_non_sea;
-extern ulint	btr_cur_n_sea;
-extern ulint	btr_cur_n_non_sea_old;
-extern ulint	btr_cur_n_sea_old;
-
-#ifndef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
deleted file mode 100644
index bd2c46eb734..00000000000
--- a/storage/innobase/include/btr0cur.ic
+++ /dev/null
@@ -1,154 +0,0 @@
-/******************************************************
-The index tree cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
-				/* out: pointer to page cursor component */
-	btr_cur_t*	cursor)	/* in: tree cursor */
-{
-	return(&(cursor->page_cur));
-}
-
-/*************************************************************
-Returns the record pointer of a tree cursor. */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
-				/* out: pointer to record */
-	btr_cur_t*	cursor)	/* in: tree cursor */
-{
-	return(page_cur_get_rec(&(cursor->page_cur)));
-}
-
-/*************************************************************
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor)	/* in: tree cursor */
-{
-	page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*************************************************************
-Returns the page of a tree cursor. */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
-				/* out: pointer to page */
-	btr_cur_t*	cursor)	/* in: tree cursor */
-{
-	return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur))));
-}
-
-/*************************************************************
-Returns the index of a cursor. */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
-				/* out: index */
-	btr_cur_t*	cursor)	/* in: B-tree cursor */
-{
-	return(cursor->index);
-}
-
-/*************************************************************
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in tree */
-	btr_cur_t*	cursor)	/* in: cursor */
-{
-	page_cur_position(rec, btr_cur_get_page_cur(cursor));
-
-	cursor->index = index;
-}
-
-/*************************************************************************
-Checks if compressing an index page where a btr cursor is placed makes
-sense. */
-UNIV_INLINE
-ibool
-btr_cur_compress_recommendation(
-/*============================*/
-				/* out: TRUE if compression is recommended */
-	btr_cur_t*	cursor,	/* in: btr cursor */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		page;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
-				MTR_MEMO_PAGE_X_FIX));
-
-	page = btr_cur_get_page(cursor);
-
-	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
-
-		/* The page fillfactor has dropped below a predefined
-		minimum value OR the level in the B-tree contains just
-		one page: we recommend compression if this is not the
-		root page. */
-
-		return(dict_index_get_page(cursor->index)
-		       != buf_frame_get_page_no(page));
-	}
-
-	return(FALSE);
-}
-
-/*************************************************************************
-Checks if the record on which the cursor is placed can be deleted without
-making tree compression necessary (or, recommended). */
-UNIV_INLINE
-ibool
-btr_cur_can_delete_without_compress(
-/*================================*/
-				/* out: TRUE if can be deleted without
-				recommended compression */
-	btr_cur_t*	cursor,	/* in: btr cursor */
-	ulint		rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_t*		page;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
-				MTR_MEMO_PAGE_X_FIX));
-
-	page = btr_cur_get_page(cursor);
-
-	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
-	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
-		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
-	    || (page_get_n_recs(page) < 2)) {
-
-		/* The page fillfactor will drop below a predefined
-		minimum value, OR the level in the B-tree contains just
-		one page, OR the page will become empty: we recommend
-		compression if this is not the root page. */
-
-		return(dict_index_get_page(cursor->index)
-		       == buf_frame_get_page_no(page));
-	}
-
-	return(TRUE);
-}
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
deleted file mode 100644
index f4e33027c25..00000000000
--- a/storage/innobase/include/btr0sea.ic
+++ /dev/null
@@ -1,67 +0,0 @@
-/************************************************************************
-The index tree adaptive search
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "dict0mem.h"
-#include "btr0cur.h"
-#include "buf0buf.h"
-
-/*************************************************************************
-Updates the search info. */
-
-void
-btr_search_info_update_slow(
-/*========================*/
-	btr_search_t*	info,	/* in/out: search info */
-	btr_cur_t*	cursor);/* in: cursor which was just positioned */
-
-/************************************************************************
-Returns search info for an index. */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
-				/* out: search info; search mutex reserved */
-	dict_index_t*	index)	/* in: index */
-{
-	ut_ad(index);
-
-	return(index->search_info);
-}
-
-/*************************************************************************
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
-	dict_index_t*	index,	/* in: index of the cursor */
-	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
-{
-	btr_search_t*	info;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	info = btr_search_get_info(index);
-
-	info->hash_analysis++;
-
-	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
-
-		/* Do nothing */
-
-		return;
-
-	}
-
-	ut_ad(cursor->flag != BTR_CUR_HASH);
-
-	btr_search_info_update_slow(info, cursor);
-}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
deleted file mode 100644
index 8fa0bf0602d..00000000000
--- a/storage/innobase/include/btr0types.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/************************************************************************
-The index tree general types
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0types_h
-#define btr0types_h
-
-#include "univ.i"
-
-#include "rem0types.h"
-#include "page0types.h"
-
-typedef struct btr_pcur_struct		btr_pcur_t;
-typedef struct btr_cur_struct		btr_cur_t;
-typedef struct btr_search_struct	btr_search_t;
-
-#endif
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
deleted file mode 100644
index 3e8972d9182..00000000000
--- a/storage/innobase/include/buf0buf.h
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*   Innobase relational database engine; Copyright (C) 2001 Innobase Oy
-
-     This program is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License 2
-     as published by the Free Software Foundation in June 1991.
-
-     This program is distributed in the hope that it will be useful,
-     but WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-     GNU General Public License for more details.
-
-     You should have received a copy of the GNU General Public License 2
-     along with this program (in file COPYING); if not, write to the Free
-     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer pool high-level routines
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0buf_h
-#define buf0buf_h
-
-#include "univ.i"
-#include "fil0fil.h"
-#include "mtr0types.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "ut0byte.h"
-#include "os0proc.h"
-
-/* Flags for flush types */
-#define BUF_FLUSH_LRU		1
-#define BUF_FLUSH_SINGLE_PAGE	2
-#define BUF_FLUSH_LIST		3	/* An array in the pool struct
-					has size BUF_FLUSH_LIST + 1: if you
-					add more flush types, put them in
-					the middle! */
-/* Modes for buf_page_get_gen */
-#define BUF_GET			10	/* get always */
-#define	BUF_GET_IF_IN_POOL	11	/* get if in pool */
-#define	BUF_GET_NOWAIT		12	/* get if can set the latch without
-					waiting */
-#define BUF_GET_NO_LATCH	14	/* get and bufferfix, but set no latch;
-					we have separated this case, because
-					it is error-prone programming not to
-					set a latch, and it should be used
-					with care */
-/* Modes for buf_page_get_known_nowait */
-#define BUF_MAKE_YOUNG	51
-#define BUF_KEEP_OLD	52
-/* Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-extern buf_pool_t*	buf_pool;	/* The buffer pool of the database */
-#ifdef UNIV_DEBUG
-extern ibool		buf_debug_prints;/* If this is set TRUE, the program
-					prints info whenever read or flush
-					occurs */
-#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /* variable to count write request
-					  issued */
-
-/************************************************************************
-Creates the buffer pool. */
-
-buf_pool_t*
-buf_pool_init(
-/*==========*/
-				/* out, own: buf_pool object, NULL if not
-				enough memory or error */
-	ulint	max_size,	/* in: maximum size of the buf_pool in
-				blocks */
-	ulint	curr_size,	/* in: current size to use, must be <=
-				max_size, currently must be equal to
-				max_size */
-	ulint	n_frames);	/* in: number of frames; if AWE is used,
-				this is the size of the address space window
-				where physical memory pages are mapped; if
-				AWE is not used then this must be the same
-				as max_size */
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
-			/* out: size in bytes */
-/*************************************************************************
-Gets the maximum size of buffer pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_max_size(void);
-/*=======================*/
-			/* out: size in bytes */
-/************************************************************************
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
-UNIV_INLINE
-dulint
-buf_pool_get_oldest_modification(void);
-/*==================================*/
-				/* out: oldest modification in pool,
-				ut_dulint_zero if none */
-/*************************************************************************
-Allocates a buffer frame. */
-
-buf_frame_t*
-buf_frame_alloc(void);
-/*==================*/
-				/* out: buffer frame */
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
-
-void
-buf_frame_free(
-/*===========*/
-	buf_frame_t*	frame);	/* in: buffer frame */
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
-				/* out: buf */
-	byte*		buf,	/* in: buffer to copy to */
-	buf_frame_t*	frame);	/* in: buffer frame */
-/******************************************************************
-NOTE! The following macros should be used instead of buf_page_get_gen,
-to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
-in LA! */
-#define buf_page_get(SP, OF, LA, MTR)	 buf_page_get_gen(\
-				SP, OF, LA, NULL,\
-				BUF_GET, __FILE__, __LINE__, MTR)
-/******************************************************************
-Use these macros to bufferfix a page with no latching. Remember not to
-read the contents of the page unless you know it is safe. Do not modify
-the contents of the page! We have separated this case, because it is
-error-prone programming not to set a latch, and it should be used
-with care. */
-#define buf_page_get_with_no_latch(SP, OF, MTR)	   buf_page_get_gen(\
-				SP, OF, RW_NO_LATCH, NULL,\
-				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/******************************************************************
-NOTE! The following macros should be used instead of buf_page_get_gen, to
-improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
-#define buf_page_get_nowait(SP, OF, LA, MTR)	buf_page_get_gen(\
-				SP, OF, LA, NULL,\
-				BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
-/******************************************************************
-NOTE! The following macros should be used instead of
-buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
-RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, G, MC, MTR)			     \
-	buf_page_optimistic_get_func(LA, BL, G, MC, __FILE__, __LINE__, MTR)
-/************************************************************************
-This is the general function used to get optimistic access to a database
-page. */
-
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
-				/* out: TRUE if success */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_block_t*	block,	/* in: guessed block */
-	buf_frame_t*	guess,	/* in: guessed frame; note that AWE may move
-				frames */
-	dulint		modify_clock,/* in: modify clock value if mode is
-				..._GUESS_ON_CLOCK */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr);	/* in: mini-transaction */
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
-UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
-				/* out: pointer to the frame, or NULL
-				if not in buffer buf_pool */
-	ulint	space,		/* in: space id */
-	ulint	offset,		/* in: offset of the page within space
-				in units of a page */
-	buf_frame_t* guess,	/* in: guessed frame or NULL */
-	ulint	rw_latch,	/* in: RW_X_LATCH, RW_S_LATCH,
-				or RW_NO_LATCH */
-	ulint	savepoint,	/* in: mtr savepoint */
-	mtr_t*	mtr);		/* in: mtr */
-/************************************************************************
-This is used to get access to a known database page, when no waiting can be
-done. */
-
-ibool
-buf_page_get_known_nowait(
-/*======================*/
-				/* out: TRUE if success */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_frame_t*	guess,	/* in: the known page frame */
-	ulint		mode,	/* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr);	/* in: mini-transaction */
-/************************************************************************
-This is the general function used to get access to a database page. */
-
-buf_frame_t*
-buf_page_get_gen(
-/*=============*/
-				/* out: pointer to the frame or NULL */
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: page number */
-	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_frame_t*	guess,	/* in: guessed frame or NULL */
-	ulint		mode,	/* in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_GET_NO_LATCH */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line where called */
-	mtr_t*		mtr);	/* in: mini-transaction */
-/************************************************************************
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
-buf_page_create(
-/*============*/
-			/* out: pointer to the frame, page bufferfixed */
-	ulint	space,	/* in: space id */
-	ulint	offset,	/* in: offset of the page within space in units of
-			a page */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: offset of the page within space
-				in units of a page */
-	buf_block_t*	block);	/* in: block to init */
-/************************************************************************
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
-	buf_block_t*	block,		/* in: buffer block */
-	ulint		rw_latch,	/* in: RW_S_LATCH, RW_X_LATCH,
-					RW_NO_LATCH */
-	mtr_t*		mtr);		/* in: mtr */
-/************************************************************************
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
-the buffer pool. */
-
-void
-buf_page_make_young(
-/*================*/
-	buf_frame_t*	frame);	/* in: buffer frame of a file page */
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
-
-ibool
-buf_page_peek(
-/*==========*/
-			/* out: TRUE if found from page hash table,
-			NOTE that the page is not necessarily yet read
-			from disk! */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
-
-buf_block_t*
-buf_page_peek_block(
-/*================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL; NOTE that the page is not necessarily
-			yet read from disk! */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
-/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
-/************************************************************************
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL */
-	ulint	space,	/* in: space id */
-	ulint	offset);	/* in: page number */
-/************************************************************************
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
-			/* out: control block if found from page hash table,
-			otherwise NULL */
-	ulint	space,	/* in: space id */
-	ulint	offset);	/* in: page number */
-/************************************************************************
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
-UNIV_INLINE
-ibool
-buf_block_peek_if_too_old(
-/*======================*/
-				/* out: TRUE if should be made younger */
-	buf_block_t*	block);	/* in: block to make younger */
-/************************************************************************
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
-			/* out: TRUE if page hash index is built in search
-			system */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
-/************************************************************************
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet. */
-UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
-				/* out: newest modification to page */
-	buf_frame_t*	frame);	/* in: pointer to a frame */
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
-				/* out: new value */
-	buf_frame_t*	frame);	/* in: pointer to a frame */
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_block_modify_clock_inc(
-/*=======================*/
-				/* out: new value */
-	buf_block_t*	block);	/* in: block */
-/************************************************************************
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
-UNIV_INLINE
-dulint
-buf_block_get_modify_clock(
-/*=======================*/
-				/* out: value */
-	buf_block_t*	block);	/* in: block */
-/************************************************************************
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value
-on 32-bit and 64-bit architectures. */
-
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
-			/* out: checksum */
-	byte*	page);	/* in: buffer page */
-/************************************************************************
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
-			/* out: checksum */
-	byte*	 page);	/* in: buffer page */
-/************************************************************************
-Checks if a page is corrupt. */
-
-ibool
-buf_page_is_corrupted(
-/*==================*/
-				/* out: TRUE if corrupted */
-	byte*	read_buf);	/* in: a database page */
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_page_no(
-/*==================*/
-			/* out: page number */
-	byte*	ptr);	/* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_space_id(
-/*===================*/
-			/* out: space id */
-	byte*	ptr);	/* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
-	byte*		ptr,	/* in: pointer to a buffer frame */
-	ulint*		space,	/* out: space id */
-	fil_addr_t*	addr);	/* out: page offset and byte offset */
-/**************************************************************************
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
-UNIV_INLINE
-ulint
-buf_frame_get_lock_hash_val(
-/*========================*/
-			/* out: lock hash value */
-	byte*	ptr);	/* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
-UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
-			/* out: mutex */
-	byte*	ptr);	/* in: pointer to within a buffer frame */
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
-			/* out: pointer to frame */
-	byte*	ptr);	/* in: pointer to a frame */
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
-UNIV_INLINE
-ibool
-buf_pool_is_block(
-/*==============*/
-			/* out: TRUE if pointer to block */
-	void*	ptr);	/* in: pointer to memory */
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer pool data structure. */
-
-ibool
-buf_validate(void);
-/*==============*/
-/*************************************************************************
-Prints info of the buffer pool data structure. */
-
-void
-buf_print(void);
-/*============*/
-
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
-ulint
-buf_get_latched_pages_number(void);
-/*==============================*/
-#endif /* UNIV_DEBUG */
-
-/************************************************************************
-Prints a page to stderr. */
-
-void
-buf_page_print(
-/*===========*/
-	byte*	read_buf);	/* in: a database page */
-
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
-ulint
-buf_get_n_pending_ios(void);
-/*=======================*/
-/*************************************************************************
-Prints info of the buffer i/o. */
-
-void
-buf_print_io(
-/*=========*/
-	FILE*	file);	/* in: file where to print */
-/*************************************************************************
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
-ulint
-buf_get_modified_ratio_pct(void);
-/*============================*/
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-buf_refresh_io_stats(void);
-/*======================*/
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
-ibool
-buf_all_freed(void);
-/*===============*/
-/*************************************************************************
-Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
-ibool
-buf_pool_check_no_pending_io(void);
-/*==============================*/
-				/* out: TRUE if there is no pending i/o */
-/*************************************************************************
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-
-void
-buf_pool_invalidate(void);
-/*=====================*/
-
-/*========================================================================
---------------------------- LOWER LEVEL ROUTINES -------------------------
-=========================================================================*/
-
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
-
-void
-buf_awe_map_page_to_frame(
-/*======================*/
-	buf_block_t*	block,		/* in: block whose page should be
-					mapped to a frame */
-	ibool		add_to_mapped_list);/* in: TRUE if we in the case
-					we need to map the page should also
-					add the block to the
-					awe_LRU_free_mapped list */
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_page_dbg_add_level(
-/*===================*/
-	buf_frame_t*	frame,	/* in: buffer page where we have acquired
-				a latch */
-	ulint		level);	/* in: latching order level */
-#endif /* UNIV_SYNC_DEBUG */
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
-				/* out: pointer to the frame */
-	buf_block_t*	block);	/* in: pointer to the control block */
-/*************************************************************************
-Gets the space id of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-				/* out: space id */
-	buf_block_t*	block);	/* in: pointer to the control block */
-/*************************************************************************
-Gets the page number of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-				/* out: page number */
-	buf_block_t*	block);	/* in: pointer to the control block */
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align(
-/*============*/
-			/* out: pointer to block */
-	byte*	ptr);	/* in: pointer to a frame */
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
-ibool
-buf_page_io_query(
-/*==============*/
-				/* out: TRUE if io going on */
-	buf_block_t*	block);	/* in: pool block, must be bufferfixed */
-/***********************************************************************
-Accessor function for block array. */
-UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
-/*===================*/
-				/* out: pointer to block */
-	buf_pool_t*	pool,	/* in: pool */
-	ulint		i);	/* in: index of the block */
-/************************************************************************
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
-buf_page_init_for_read(
-/*===================*/
-				/* out: pointer to the block or NULL */
-	ulint*		err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/* in: space id */
-	ib_longlong	tablespace_version,/* in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset);/* in: page number */
-/************************************************************************
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-
-void
-buf_page_io_complete(
-/*=================*/
-	buf_block_t*	block);	/* in: pointer to the block in question */
-/************************************************************************
-Calculates a folded value of a file page address to use in the page hash
-table. */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-			/* out: the folded value */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: offset of the page within space */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
-UNIV_INLINE
-buf_block_t*
-buf_page_hash_get(
-/*==============*/
-			/* out: block, NULL if not found */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: offset of the page within space */
-/***********************************************************************
-Increments the pool clock by one and returns its new value. Remember that
-in the 32 bit version the clock wraps around at 4 billion! */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void);
-/*====================*/
-			/* out: new clock value */
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
-ulint
-buf_get_free_list_len(void);
-/*=======================*/
-
-
-
-/* The buffer control block structure */
-
-struct buf_block_struct{
-
-	/* 1. General fields */
-
-	ulint		magic_n;	/* magic number to check */
-	ulint		state;		/* state of the control block:
-					BUF_BLOCK_NOT_USED, ...; changing
-					this is only allowed when a thread
-					has BOTH the buffer pool mutex AND
-					block->mutex locked */
-	byte*		frame;		/* pointer to buffer frame which
-					is of size UNIV_PAGE_SIZE, and
-					aligned to an address divisible by
-					UNIV_PAGE_SIZE; if AWE is used, this
-					will be NULL for the pages which are
-					currently not mapped into the virtual
-					address space window of the buffer
-					pool */
-	os_awe_t*	awe_info;	/* if AWE is used, then an array of
-					awe page infos for
-					UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
-					(normally = 4) physical memory
-					pages; otherwise NULL */
-	ulint		space;		/* space id of the page */
-	ulint		offset;		/* page number within the space */
-	ulint		lock_hash_val;	/* hashed value of the page address
-					in the record lock hash table */
-	mutex_t		mutex;		/* mutex protecting this block:
-					state (also protected by the buffer
-					pool mutex), io_fix, buf_fix_count,
-					and accessed; we introduce this new
-					mutex in InnoDB-5.1 to relieve
-					contention on the buffer pool mutex */
-	rw_lock_t	lock;		/* read-write lock of the buffer
-					frame */
-	buf_block_t*	hash;		/* node used in chaining to the page
-					hash table */
-	ibool		check_index_page_at_flush;
-					/* TRUE if we know that this is
-					an index page, and want the database
-					to check its consistency before flush;
-					note that there may be pages in the
-					buffer pool which are index pages,
-					but this flag is not set because
-					we do not keep track of all pages */
-	/* 2. Page flushing fields */
-
-	UT_LIST_NODE_T(buf_block_t) flush_list;
-					/* node of the modified, not yet
-					flushed blocks list */
-	dulint		newest_modification;
-					/* log sequence number of the youngest
-					modification to this block, zero if
-					not modified */
-	dulint		oldest_modification;
-					/* log sequence number of the START of
-					the log entry written of the oldest
-					modification to this block which has
-					not yet been flushed on disk; zero if
-					all modifications are on disk */
-	ulint		flush_type;	/* if this block is currently being
-					flushed to disk, this tells the
-					flush_type: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST */
-
-	/* 3. LRU replacement algorithm fields */
-
-	UT_LIST_NODE_T(buf_block_t) free;
-					/* node of the free block list */
-	ibool		in_free_list;	/* TRUE if in the free list; used in
-					debugging */
-	UT_LIST_NODE_T(buf_block_t) LRU;
-					/* node of the LRU list */
-	UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
-					/* in the AWE version node in the
-					list of free and LRU blocks which are
-					mapped to a frame */
-	ibool		in_LRU_list;	/* TRUE of the page is in the LRU list;
-					used in debugging */
-	ulint		LRU_position;	/* value which monotonically
-					decreases (or may stay constant if
-					the block is in the old blocks) toward
-					the end of the LRU list, if the pool
-					ulint_clock has not wrapped around:
-					NOTE that this value can only be used
-					in heuristic algorithms, because of
-					the possibility of a wrap-around! */
-	ulint		freed_page_clock;/* the value of freed_page_clock
-					of the buffer pool when this block was
-					the last time put to the head of the
-					LRU list; a thread is allowed to
-					read this for heuristic purposes
-					without holding any mutex or latch */
-	ibool		old;		/* TRUE if the block is in the old
-					blocks in the LRU list */
-	ibool		accessed;	/* TRUE if the page has been accessed
-					while in the buffer pool: read-ahead
-					may read in pages which have not been
-					accessed yet; this is protected by
-					block->mutex; a thread is allowed to
-					read this for heuristic purposes
-					without holding any mutex or latch */
-	ulint		buf_fix_count;	/* count of how manyfold this block
-					is currently bufferfixed; this is
-					protected by block->mutex */
-	ulint		io_fix;		/* if a read is pending to the frame,
-					io_fix is BUF_IO_READ, in the case
-					of a write BUF_IO_WRITE, otherwise 0;
-					this is protected by block->mutex */
-	/* 4. Optimistic search field */
-
-	dulint		modify_clock;	/* this clock is incremented every
-					time a pointer to a record on the
-					page may become obsolete; this is
-					used in the optimistic cursor
-					positioning: if the modify clock has
-					not changed, we know that the pointer
-					is still valid; this field may be
-					changed if the thread (1) owns the
-					pool mutex and the page is not
-					bufferfixed, or (2) the thread has an
-					x-latch on the block */
-
-	/* 5. Hash search fields: NOTE that the first 4 fields are NOT
-	protected by any semaphore! */
-
-	ulint		n_hash_helps;	/* counter which controls building
-					of a new hash index for the page */
-	ulint		n_fields;	/* recommended prefix length for hash
-					search: number of full fields */
-	ulint		n_bytes;	/* recommended prefix: number of bytes
-					in an incomplete field */
-	ibool		left_side;	/* TRUE or FALSE, depending on
-					whether the leftmost record of several
-					records with the same prefix should be
-					indexed in the hash index */
-
-	/* These 6 fields may only be modified when we have
-	an x-latch on btr_search_latch AND
-	a) we are holding an s-latch or x-latch on block->lock or
-	b) we know that block->buf_fix_count == 0.
-
-	An exception to this is when we init or create a page
-	in the buffer pool in buf0buf.c. */
-
-	ibool		is_hashed;	/* TRUE if hash index has already been
-					built on this page; note that it does
-					not guarantee that the index is
-					complete, though: there may have been
-					hash collisions, record deletions,
-					etc. */
-	ulint		n_pointers;	/* used in debugging: the number of
-					pointers in the adaptive hash index
-					pointing to this frame */
-	ulint		curr_n_fields;	/* prefix length for hash indexing:
-					number of full fields */
-	ulint		curr_n_bytes;	/* number of bytes in hash indexing */
-	ibool		curr_left_side;	/* TRUE or FALSE in hash indexing */
-	dict_index_t*	index;		/* Index for which the adaptive
-					hash index has been created. */
-	/* 6. Debug fields */
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_t	debug_latch;	/* in the debug version, each thread
-					which bufferfixes the block acquires
-					an s-latch here; so we can use the
-					debug utilities in sync0rw */
-#endif
-	ibool		file_page_was_freed;
-					/* this is set to TRUE when fsp
-					frees a page in buffer pool */
-};
-
-#define BUF_BLOCK_MAGIC_N	41526563
-
-/* The buffer pool structure. NOTE! The definition appears here only for
-other modules of this directory (buf) to see it. Do not use from outside! */
-
-struct buf_pool_struct{
-
-	/* 1. General fields */
-
-	mutex_t		mutex;		/* mutex protecting the buffer pool
-					struct and control blocks, except the
-					read-write lock in them */
-	byte*		frame_mem;	/* pointer to the memory area which
-					was allocated for the frames; in AWE
-					this is the virtual address space
-					window where we map pages stored
-					in physical memory */
-	byte*		frame_zero;	/* pointer to the first buffer frame:
-					this may differ from frame_mem, because
-					this is aligned by the frame size */
-	byte*		high_end;	/* pointer to the end of the buffer
-					frames */
-	ulint		n_frames;	/* number of frames */
-	buf_block_t*	blocks;		/* array of buffer control blocks */
-	buf_block_t**	blocks_of_frames;/* inverse mapping which can be used
-					to retrieve the buffer control block
-					of a frame; this is an array which
-					lists the blocks of frames in the
-					order frame_zero,
-					frame_zero + UNIV_PAGE_SIZE, ...
-					a control block is always assigned
-					for each frame, even if the frame does
-					not contain any data; note that in AWE
-					there are more control blocks than
-					buffer frames */
-	os_awe_t*	awe_info;	/* if AWE is used, AWE info for the
-					physical 4 kB memory pages associated
-					with buffer frames */
-	ulint		max_size;	/* number of control blocks ==
-					maximum pool size in pages */
-	ulint		curr_size;	/* current pool size in pages;
-					currently always the same as
-					max_size */
-	hash_table_t*	page_hash;	/* hash table of the file pages */
-
-	ulint		n_pend_reads;	/* number of pending read operations */
-
-	time_t		last_printout_time; /* when buf_print was last time
-					called */
-	ulint		n_pages_read;	/* number read operations */
-	ulint		n_pages_written;/* number write operations */
-	ulint		n_pages_created;/* number of pages created in the pool
-					with no read */
-	ulint		n_page_gets;	/* number of page gets performed;
-					also successful searches through
-					the adaptive hash index are
-					counted as page gets; this field
-					is NOT protected by the buffer
-					pool mutex */
-	ulint		n_pages_awe_remapped; /* if AWE is enabled, the
-					number of remaps of blocks to
-					buffer frames */
-	ulint		n_page_gets_old;/* n_page_gets when buf_print was
-					last time called: used to calculate
-					hit rate */
-	ulint		n_pages_read_old;/* n_pages_read when buf_print was
-					last time called */
-	ulint		n_pages_written_old;/* number write operations */
-	ulint		n_pages_created_old;/* number of pages created in
-					the pool with no read */
-	ulint		n_pages_awe_remapped_old;
-	/* 2. Page flushing algorithm fields */
-
-	UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
-					/* base node of the modified block
-					list */
-	ibool		init_flush[BUF_FLUSH_LIST + 1];
-					/* this is TRUE when a flush of the
-					given type is being initialized */
-	ulint		n_flush[BUF_FLUSH_LIST + 1];
-					/* this is the number of pending
-					writes in the given flush type */
-	os_event_t	no_flush[BUF_FLUSH_LIST + 1];
-					/* this is in the set state when there
-					is no flush batch of the given type
-					running */
-	ulint		ulint_clock;	/* a sequence number used to count
-					time. NOTE! This counter wraps
-					around at 4 billion (if ulint ==
-					32 bits)! */
-	ulint		freed_page_clock;/* a sequence number used to count the
-					number of buffer blocks removed from
-					the end of the LRU list; NOTE that
-					this counter may wrap around at 4
-					billion! A thread is allowed to
-					read this for heuristic purposes
-					without holding any mutex or latch */
-	ulint		LRU_flush_ended;/* when an LRU flush ends for a page,
-					this is incremented by one; this is
-					set to zero when a buffer block is
-					allocated */
-
-	/* 3. LRU replacement algorithm fields */
-
-	UT_LIST_BASE_NODE_T(buf_block_t) free;
-					/* base node of the free block list;
-					in the case of AWE, at the start are
-					always free blocks for which the
-					physical memory is mapped to a frame */
-	UT_LIST_BASE_NODE_T(buf_block_t) LRU;
-					/* base node of the LRU list */
-	buf_block_t*	LRU_old;	/* pointer to the about 3/8 oldest
-					blocks in the LRU list; NULL if LRU
-					length less than BUF_LRU_OLD_MIN_LEN */
-	ulint		LRU_old_len;	/* length of the LRU list from
-					the block to which LRU_old points
-					onward, including that block;
-					see buf0lru.c for the restrictions
-					on this value; not defined if
-					LRU_old == NULL */
-	UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
-					/* list of those blocks which are
-					in the LRU list or the free list, and
-					where the page is mapped to a frame;
-					thus, frames allocated, e.g., to the
-					locki table, are not in this list */
-};
-
-/* States of a control block */
-#define	BUF_BLOCK_NOT_USED	211	/* is in the free list */
-#define BUF_BLOCK_READY_FOR_USE	212	/* when buf_get_free_block returns
-					a block, it is in this state */
-#define	BUF_BLOCK_FILE_PAGE	213	/* contains a buffered file page */
-#define	BUF_BLOCK_MEMORY	214	/* contains some main memory object */
-#define BUF_BLOCK_REMOVE_HASH	215	/* hash index should be removed
-					before putting to the free list */
-
-/* Io_fix states of a control block; these must be != 0 */
-#define BUF_IO_READ		561
-#define BUF_IO_WRITE		562
-
-/************************************************************************
-Let us list the consistency conditions for different control block states.
-
-NOT_USED:	is in free list, not in LRU list, not in flush list, nor
-		page hash table
-READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
-		hash table
-MEMORY:		is not in free list, LRU list, or flush list, nor page
-		hash table
-FILE_PAGE:	space and offset are defined, is in page hash table
-		if io_fix == BUF_IO_WRITE,
-			pool: no_flush[block->flush_type] is in reset state,
-			pool: n_flush[block->flush_type] > 0
-
-		(1) if buf_fix_count == 0, then
-			is in LRU list, not in free list
-			is in flush list,
-				if and only if oldest_modification > 0
-			is x-locked,
-				if and only if io_fix == BUF_IO_READ
-			is s-locked,
-				if and only if io_fix == BUF_IO_WRITE
-
-		(2) if buf_fix_count > 0, then
-			is not in LRU list, not in free list
-			is in flush list,
-				if and only if oldest_modification > 0
-			if io_fix == BUF_IO_READ,
-				is x-locked
-			if io_fix == BUF_IO_WRITE,
-				is s-locked
-
-State transitions:
-
-NOT_USED => READY_FOR_USE
-READY_FOR_USE => MEMORY
-READY_FOR_USE => FILE_PAGE
-MEMORY => NOT_USED
-FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
-				(1) buf_fix_count == 0,
-				(2) oldest_modification == 0, and
-				(3) io_fix == 0.
-*/
-
-#ifndef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
deleted file mode 100644
index 4e96e13b8dc..00000000000
--- a/storage/innobase/include/buf0buf.ic
+++ /dev/null
@@ -1,665 +0,0 @@
-/******************************************************
-The database buffer buf_pool
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_DEBUG
-extern ulint		buf_dbg_counter; /* This is used to insert validation
-					operations in execution in the
-					debug version */
-#endif /* UNIV_DEBUG */
-/************************************************************************
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
-UNIV_INLINE
-ibool
-buf_block_peek_if_too_old(
-/*======================*/
-				/* out: TRUE if should be made younger */
-	buf_block_t*	block)	/* in: block to make younger */
-{
-	return(buf_pool->freed_page_clock >= block->freed_page_clock
-	       + 1 + (buf_pool->curr_size / 4));
-}
-
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
-			/* out: size in bytes */
-{
-	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
-}
-
-/*************************************************************************
-Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_max_size(void)
-/*=======================*/
-			/* out: size in bytes */
-{
-	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
-}
-
-/***********************************************************************
-Accessor function for block array. */
-UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
-/*===================*/
-				/* out: pointer to block */
-	buf_pool_t*	buf_pool,/* in: buf_pool */
-	ulint		i)	/* in: index of the block */
-{
-	ut_ad(buf_pool);
-	ut_ad(i < buf_pool->max_size);
-
-	return(i + buf_pool->blocks);
-}
-
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
-UNIV_INLINE
-ibool
-buf_pool_is_block(
-/*==============*/
-			/* out: TRUE if pointer to block */
-	void*	ptr)	/* in: pointer to memory */
-{
-	if ((buf_pool->blocks <= (buf_block_t*)ptr)
-	    && ((buf_block_t*)ptr < buf_pool->blocks
-		+ buf_pool->max_size)) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/************************************************************************
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
-UNIV_INLINE
-dulint
-buf_pool_get_oldest_modification(void)
-/*==================================*/
-				/* out: oldest modification in pool,
-				ut_dulint_zero if none */
-{
-	buf_block_t*	block;
-	dulint		lsn;
-
-	mutex_enter(&(buf_pool->mutex));
-
-	block = UT_LIST_GET_LAST(buf_pool->flush_list);
-
-	if (block == NULL) {
-		lsn = ut_dulint_zero;
-	} else {
-		lsn = block->oldest_modification;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(lsn);
-}
-
-/***********************************************************************
-Increments the buf_pool clock by one and returns its new value. Remember
-that in the 32 bit version the clock wraps around at 4 billion! */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void)
-/*====================*/
-			/* out: new clock value */
-{
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	buf_pool->ulint_clock++;
-
-	return(buf_pool->ulint_clock);
-}
-
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
-				/* out: pointer to the frame */
-	buf_block_t*	block)	/* in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_ad(block >= buf_pool->blocks);
-	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
-	ut_ad(block->state != BUF_BLOCK_NOT_USED);
-	ut_ad((block->state != BUF_BLOCK_FILE_PAGE)
-	      || (block->buf_fix_count > 0));
-
-	return(block->frame);
-}
-
-/*************************************************************************
-Gets the space id of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-				/* out: space id */
-	buf_block_t*	block)	/* in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_ad(block >= buf_pool->blocks);
-	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
-
-	return(block->space);
-}
-
-/*************************************************************************
-Gets the page number of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-				/* out: page number */
-	buf_block_t*	block)	/* in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_ad(block >= buf_pool->blocks);
-	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
-
-	return(block->offset);
-}
-
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align(
-/*============*/
-			/* out: pointer to block */
-	byte*	ptr)	/* in: pointer to a frame */
-{
-	buf_block_t*	block;
-	buf_frame_t*	frame_zero;
-
-	ut_ad(ptr);
-
-	frame_zero = buf_pool->frame_zero;
-
-	if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero)
-	    || UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) {
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: Error: trying to access a stray pointer %p\n"
-			"InnoDB: buf pool start is at %p, end at %p\n"
-			"InnoDB: Probable reason is database corruption"
-			" or memory\n"
-			"InnoDB: corruption. If this happens in an"
-			" InnoDB database recovery, see\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"forcing-recovery.html\n"
-			"InnoDB: how to force recovery.\n",
-			ptr, frame_zero,
-			buf_pool->high_end);
-		ut_error;
-	}
-
-	block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
-						>> UNIV_PAGE_SIZE_SHIFT));
-	return(block);
-}
-
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
-			/* out: pointer to frame */
-	byte*	ptr)	/* in: pointer to a frame */
-{
-	buf_frame_t*	frame;
-
-	ut_ad(ptr);
-
-	frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
-
-	if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero))
-	    || UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) {
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: Error: trying to access a stray pointer %p\n"
-			"InnoDB: buf pool start is at %p, end at %p\n"
-			"InnoDB: Probable reason is database corruption"
-			" or memory\n"
-			"InnoDB: corruption. If this happens in an"
-			" InnoDB database recovery, see\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"forcing-recovery.html\n"
-			"InnoDB: how to force recovery.\n",
-			ptr, buf_pool->frame_zero,
-			buf_pool->high_end);
-		ut_error;
-	}
-
-	return(frame);
-}
-
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_page_no(
-/*==================*/
-			/* out: page number */
-	byte*	ptr)	/* in: pointer to within a buffer frame */
-{
-	return(buf_block_get_page_no(buf_block_align(ptr)));
-}
-
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_space_id(
-/*===================*/
-			/* out: space id */
-	byte*	ptr)	/* in: pointer to within a buffer frame */
-{
-	return(buf_block_get_space(buf_block_align(ptr)));
-}
-
-/**************************************************************************
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
-	byte*		ptr,	/* in: pointer to a buffer frame */
-	ulint*		space,	/* out: space id */
-	fil_addr_t*	addr)	/* out: page offset and byte offset */
-{
-	buf_block_t*	block;
-
-	block = buf_block_align(ptr);
-
-	*space = buf_block_get_space(block);
-	addr->page = buf_block_get_page_no(block);
-	addr->boffset = ptr - buf_frame_align(ptr);
-}
-
-/**************************************************************************
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
-UNIV_INLINE
-ulint
-buf_frame_get_lock_hash_val(
-/*========================*/
-			/* out: lock hash value */
-	byte*	ptr)	/* in: pointer to within a buffer frame */
-{
-	buf_block_t*	block;
-
-	block = buf_block_align(ptr);
-
-	return(block->lock_hash_val);
-}
-
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
-UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
-			/* out: mutex */
-	byte*	ptr)	/* in: pointer to within a buffer frame */
-{
-	buf_block_t*	block;
-
-	block = buf_block_align(ptr);
-
-	return(&block->mutex);
-}
-
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
-				/* out: buf */
-	byte*		buf,	/* in: buffer to copy to */
-	buf_frame_t*	frame)	/* in: buffer frame */
-{
-	ut_ad(buf && frame);
-
-	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
-
-	return(buf);
-}
-
-/************************************************************************
-Calculates a folded value of a file page address to use in the page hash
-table. */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-			/* out: the folded value */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: offset of the page within space */
-{
-	return((space << 20) + space + offset);
-}
-
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
-ibool
-buf_page_io_query(
-/*==============*/
-				/* out: TRUE if io going on */
-	buf_block_t*	block)	/* in: buf_pool block, must be bufferfixed */
-{
-	mutex_enter(&(buf_pool->mutex));
-
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
-
-	if (block->io_fix != 0) {
-		mutex_exit(&(buf_pool->mutex));
-
-		return(TRUE);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(FALSE);
-}
-
-/************************************************************************
-Gets the youngest modification log sequence number for a frame. Returns zero
-if not a file page or no modification occurred yet. */
-UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
-				/* out: newest modification to the page */
-	buf_frame_t*	frame)	/* in: pointer to a frame */
-{
-	buf_block_t*	block;
-	dulint		lsn;
-
-	ut_ad(frame);
-
-	block = buf_block_align(frame);
-
-	mutex_enter(&(buf_pool->mutex));
-
-	if (block->state == BUF_BLOCK_FILE_PAGE) {
-		lsn = block->newest_modification;
-	} else {
-		lsn = ut_dulint_zero;
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-
-	return(lsn);
-}
-
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
-				/* out: new value */
-	buf_frame_t*	frame)	/* in: pointer to a frame */
-{
-	buf_block_t*	block;
-
-	ut_ad(frame);
-
-	block = buf_block_align(frame);
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
-	UT_DULINT_INC(block->modify_clock);
-
-	return(block->modify_clock);
-}
-
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_block_modify_clock_inc(
-/*=======================*/
-				/* out: new value */
-	buf_block_t*	block)	/* in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
-	UT_DULINT_INC(block->modify_clock);
-
-	return(block->modify_clock);
-}
-
-/************************************************************************
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
-UNIV_INLINE
-dulint
-buf_block_get_modify_clock(
-/*=======================*/
-				/* out: value */
-	buf_block_t*	block)	/* in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(block->modify_clock);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/***********************************************************************
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_debug(
-/*========================*/
-	buf_block_t*	block,	/* in: block to bufferfix */
-	const char*	file __attribute__ ((unused)),	/* in: file name */
-	ulint		line __attribute__ ((unused)))	/* in: line */
-{
-	ibool	ret;
-
-	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
-
-	ut_ad(ret == TRUE);
-	ut_ad(mutex_own(&block->mutex));
-	block->buf_fix_count++;
-}
-#else /* UNIV_SYNC_DEBUG */
-/***********************************************************************
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc(
-/*==================*/
-	buf_block_t*	block)	/* in: block to bufferfix */
-{
-	ut_ad(mutex_own(&block->mutex));
-
-	block->buf_fix_count++;
-}
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
-UNIV_INLINE
-buf_block_t*
-buf_page_hash_get(
-/*==============*/
-			/* out: block, NULL if not found */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: offset of the page within space */
-{
-	buf_block_t*	block;
-	ulint		fold;
-
-	ut_ad(buf_pool);
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	/* Look for the page in the hash table */
-
-	fold = buf_page_address_fold(space, offset);
-
-	HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
-		    (block->space == space) && (block->offset == offset));
-	ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
-
-	return(block);
-}
-
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
-UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
-				/* out: pointer to the frame, or NULL
-				if not in buffer buf_pool */
-	ulint	space,		/* in: space id */
-	ulint	offset,		/* in: offset of the page within space
-				in units of a page */
-	buf_frame_t* guess,	/* in: guessed frame or NULL */
-	ulint	rw_latch,	/* in: RW_X_LATCH, RW_S_LATCH,
-				or RW_NO_LATCH */
-	ulint	savepoint,	/* in: mtr savepoint */
-	mtr_t*	mtr)		/* in: mtr */
-{
-	buf_frame_t*	frame;
-
-	frame = buf_page_get_gen(space, offset, rw_latch, guess,
-				 BUF_GET_IF_IN_POOL,
-				 __FILE__, __LINE__,
-				 mtr);
-	if (frame != NULL) {
-
-		return(frame);
-	}
-
-	/* The page was not in the buffer buf_pool: release the latches
-	down to the savepoint */
-
-	mtr_rollback_to_savepoint(mtr, savepoint);
-
-	buf_page_get(space, offset, RW_S_LATCH, mtr);
-
-	/* When we get here, the page is in buffer, but we release
-	the latches again down to the savepoint, before returning */
-
-	mtr_rollback_to_savepoint(mtr, savepoint);
-
-	return(NULL);
-}
-
-/************************************************************************
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
-	buf_block_t*	block,		/* in: buffer block */
-	ulint		rw_latch,	/* in: RW_S_LATCH, RW_X_LATCH,
-					RW_NO_LATCH */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	ut_ad(block);
-
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_a(block->buf_fix_count > 0);
-
-	if (rw_latch == RW_X_LATCH && mtr->modifications) {
-		mutex_enter(&buf_pool->mutex);
-		buf_flush_note_modification(block, mtr);
-		mutex_exit(&buf_pool->mutex);
-	}
-
-	mutex_enter(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_s_unlock(&(block->debug_latch));
-#endif
-	block->buf_fix_count--;
-
-	mutex_exit(&block->mutex);
-
-	if (rw_latch == RW_S_LATCH) {
-		rw_lock_s_unlock(&(block->lock));
-	} else if (rw_latch == RW_X_LATCH) {
-		rw_lock_x_unlock(&(block->lock));
-	}
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. If
-UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
-UNIV_INLINE
-void
-buf_page_dbg_add_level(
-/*===================*/
-	buf_frame_t*	frame __attribute__((unused)), /* in: buffer page
-				where we have acquired latch */
-	ulint		level __attribute__((unused))) /* in: latching order
-				level */
-{
-	sync_thread_add_level(&(buf_block_align(frame)->lock), level);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
deleted file mode 100644
index 322848509f4..00000000000
--- a/storage/innobase/include/buf0flu.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/******************************************************
-The database buffer pool flush algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0flu_h
-#define buf0flu_h
-
-#include "univ.i"
-#include "buf0types.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-
-/************************************************************************
-Updates the flush system data structures when a write is completed. */
-
-void
-buf_flush_write_complete(
-/*=====================*/
-	buf_block_t*	block);	/* in: pointer to the block in question */
-/*************************************************************************
-Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. */
-
-void
-buf_flush_free_margin(void);
-/*=======================*/
-/************************************************************************
-Initializes a page for writing to the tablespace. */
-
-void
-buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/* in: page */
-	dulint	newest_lsn,	/* in: newest modification lsn to the page */
-	ulint	space,		/* in: space id */
-	ulint	page_no);	/* in: page number */
-/***********************************************************************
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
-ulint
-buf_flush_batch(
-/*============*/
-				/* out: number of blocks for which the write
-				request was queued */
-	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
-				BUF_FLUSH_LIST, then the caller must not own
-				any latches on pages */
-	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
-				(it is not guaranteed that the actual number
-				is that big, though) */
-	dulint	lsn_limit);	/* in the case BUF_FLUSH_LIST all blocks whose
-				oldest_modification is smaller than this
-				should be flushed (if their number does not
-				exceed min_n), otherwise ignored */
-/**********************************************************************
-Waits until a flush batch of the given type ends */
-
-void
-buf_flush_wait_batch_end(
-/*=====================*/
-	ulint	type);	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-/************************************************************************
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
-	buf_block_t*	block,	/* in: block which is modified */
-	mtr_t*		mtr);	/* in: mtr */
-/************************************************************************
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
-	buf_block_t*	block,		/* in: block which is modified */
-	dulint		start_lsn,	/* in: start lsn of the first mtr in a
-					set of mtr's */
-	dulint		end_lsn);	/* in: end lsn of the last mtr in the
-					set of mtr's */
-/************************************************************************
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed. */
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
-				/* out: TRUE if can replace immediately */
-	buf_block_t*	block);	/* in: buffer control block, must be in state
-				BUF_BLOCK_FILE_PAGE and in the LRU list */
-/**********************************************************************
-Validates the flush list. */
-
-ibool
-buf_flush_validate(void);
-/*====================*/
-		/* out: TRUE if ok */
-
-/* When buf_flush_free_margin is called, it tries to make this many blocks
-available to replacement in the free list and at the end of the LRU list (to
-make sure that a read-ahead batch can be read efficiently in a single
-sweep). */
-
-#define BUF_FLUSH_FREE_BLOCK_MARGIN	(5 + BUF_READ_AHEAD_AREA)
-#define BUF_FLUSH_EXTRA_MARGIN		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
-
-#ifndef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
deleted file mode 100644
index ae873c42088..00000000000
--- a/storage/innobase/include/buf0flu.ic
+++ /dev/null
@@ -1,106 +0,0 @@
-/******************************************************
-The database buffer pool flush algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-#include "mtr0mtr.h"
-
-/************************************************************************
-Inserts a modified block into the flush list. */
-
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
-	buf_block_t*	block);	/* in: block which is modified */
-/************************************************************************
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
-	buf_block_t*	block);	/* in: block which is modified */
-
-/************************************************************************
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it is not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
-	buf_block_t*	block,	/* in: block which is modified */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ut_ad(block);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mutex_own(&(buf_pool->mutex)));
-
-	ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
-	ut_ad(mtr->modifications);
-	ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
-
-	block->newest_modification = mtr->end_lsn;
-
-	if (ut_dulint_is_zero(block->oldest_modification)) {
-
-		block->oldest_modification = mtr->start_lsn;
-		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
-
-		buf_flush_insert_into_flush_list(block);
-	} else {
-		ut_ad(ut_dulint_cmp(block->oldest_modification,
-				    mtr->start_lsn) <= 0);
-	}
-
-	++srv_buf_pool_write_requests;
-}
-
-/************************************************************************
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
-	buf_block_t*	block,		/* in: block which is modified */
-	dulint		start_lsn,	/* in: start lsn of the first mtr in a
-					set of mtr's */
-	dulint		end_lsn)	/* in: end lsn of the last mtr in the
-					set of mtr's */
-{
-	ut_ad(block);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	mutex_enter(&(buf_pool->mutex));
-
-	ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
-
-	block->newest_modification = end_lsn;
-
-	if (ut_dulint_is_zero(block->oldest_modification)) {
-
-		block->oldest_modification = start_lsn;
-
-		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
-
-		buf_flush_insert_sorted_into_flush_list(block);
-	} else {
-		ut_ad(ut_dulint_cmp(block->oldest_modification,
-				    start_lsn) <= 0);
-	}
-
-	mutex_exit(&(buf_pool->mutex));
-}
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
deleted file mode 100644
index 6d26fd4d3b2..00000000000
--- a/storage/innobase/include/buf0lru.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/******************************************************
-The database buffer pool LRU replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0lru_h
-#define buf0lru_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "buf0types.h"
-
-/**********************************************************************
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-
-void
-buf_LRU_try_free_flushed_blocks(void);
-/*==================================*/
-/**********************************************************************
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
-ibool
-buf_LRU_buf_pool_running_out(void);
-/*==============================*/
-				/* out: TRUE if less than 25 % of buffer pool
-				left */
-
-/*#######################################################################
-These are low-level functions
-#########################################################################*/
-
-/* Minimum LRU list length for which the LRU_old pointer is defined */
-
-#define BUF_LRU_OLD_MIN_LEN	80
-
-#define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
-
-/**********************************************************************
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
-what guarantees that it will not try to read in pages after this operation has
-completed? */
-
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
-	ulint	id);	/* in: space id */
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
-
-ulint
-buf_LRU_get_recent_limit(void);
-/*==========================*/
-			/* out: the limit; zero if could not determine it */
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
-
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
-				/* out: TRUE if freed */
-	ulint	n_iterations);	 /* in: how many times this has been called
-				repeatedly without result: a high value means
-				that we should search farther; if value is
-				k < 10, then we only search k/10 * number
-				of pages in the buffer pool from the end
-				of the LRU list */
-/**********************************************************************
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list. */
-
-buf_block_t*
-buf_LRU_get_free_block(void);
-/*=========================*/
-				/* out: the free control block; also if AWE is
-				used, it is guaranteed that the block has its
-				page mapped to a frame when we return */
-/**********************************************************************
-Puts a block back to the free list. */
-
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
-	buf_block_t*	block);	/* in: block, must not contain a file page */
-/**********************************************************************
-Adds a block to the LRU list. */
-
-void
-buf_LRU_add_block(
-/*==============*/
-	buf_block_t*	block,	/* in: control block */
-	ibool		old);	/* in: TRUE if should be put to the old
-				blocks in the LRU list, else put to the
-				start; if the LRU list is very short, added to
-				the start regardless of this parameter */
-/**********************************************************************
-Moves a block to the start of the LRU list. */
-
-void
-buf_LRU_make_block_young(
-/*=====================*/
-	buf_block_t*	block);	/* in: control block */
-/**********************************************************************
-Moves a block to the end of the LRU list. */
-
-void
-buf_LRU_make_block_old(
-/*===================*/
-	buf_block_t*	block);	/* in: control block */
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
-
-ibool
-buf_LRU_validate(void);
-/*==================*/
-/**************************************************************************
-Prints the LRU list. */
-
-void
-buf_LRU_print(void);
-/*===============*/
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic
deleted file mode 100644
index 7b8ee457b0b..00000000000
--- a/storage/innobase/include/buf0lru.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-The database buffer replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
deleted file mode 100644
index 44fdfa80e73..00000000000
--- a/storage/innobase/include/buf0types.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/******************************************************
-The database buffer pool global types for the directory
-
-(c) 1995 Innobase Oy
-
-Created 11/17/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0types_h
-#define buf0types_h
-
-typedef	struct buf_block_struct		buf_block_t;
-typedef	struct buf_pool_struct		buf_pool_t;
-
-/* The 'type' used of a buffer frame */
-typedef	byte	buf_frame_t;
-
-
-#endif
-
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
deleted file mode 100644
index 40592c3c0ce..00000000000
--- a/storage/innobase/include/data0data.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0data_h
-#define data0data_h
-
-#include "univ.i"
-
-#include "data0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "dict0types.h"
-
-typedef struct big_rec_struct		big_rec_t;
-
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
-	dfield_t*	field,	/* in: field */
-	void*		data,	/* in: data */
-	ulint		len);	/* in: length or UNIV_SQL_NULL */
-void*
-dfield_get_data_noninline(
-	dfield_t* field);	/* in: field */
-ulint
-dfield_get_len_noninline(
-	dfield_t* field);	/* in: field */
-ulint
-dtuple_get_n_fields_noninline(
-	dtuple_t*	tuple);	/* in: tuple */
-dfield_t*
-dtuple_get_nth_field_noninline(
-	dtuple_t*	tuple,	/* in: tuple */
-	ulint		n);	/* in: index of field */
-
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
-				/* out: pointer to the type struct */
-	dfield_t*	field);	/* in: SQL data field */
-/*************************************************************************
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
-	dfield_t*	field,	/* in: SQL data field */
-	dtype_t*	type);	/* in: pointer to data type struct */
-/*************************************************************************
-Gets pointer to the data in a field. */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
-				/* out: pointer to data */
-	dfield_t* field);	/* in: field */
-/*************************************************************************
-Gets length of field data. */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
-				/* out: length of data; UNIV_SQL_NULL if
-				SQL null data */
-	dfield_t* field);	/* in: field */
-/*************************************************************************
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
-	dfield_t*	field,	/* in: field */
-	ulint		len);	/* in: length or UNIV_SQL_NULL */
-/*************************************************************************
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
-	dfield_t*	field,	/* in: field */
-	const void*	data,	/* in: data */
-	ulint		len);	/* in: length or UNIV_SQL_NULL */
-/**************************************************************************
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
-	byte*	data,	/* in: pointer to a buffer of size len */
-	ulint	len);	/* in: SQL null size in bytes */
-/*************************************************************************
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
-	dfield_t*	field1,	/* in: field to copy to */
-	dfield_t*	field2);/* in: field to copy from */
-/*************************************************************************
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
-	dfield_t*	field1,	/* in: field to copy to */
-	dfield_t*	field2);/* in: field to copy from */
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
-				/* out: TRUE if equal */
-	dfield_t*	field1,	/* in: field */
-	dfield_t*	field2);/* in: field */
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
-				/* out: TRUE if equal */
-	dfield_t*	field,	/* in: field */
-	ulint		len,	/* in: data length or UNIV_SQL_NULL */
-	byte*		data);	/* in: data */
-/*************************************************************************
-Gets number of fields in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
-				/* out: number of fields */
-	dtuple_t*	tuple);	/* in: tuple */
-/*************************************************************************
-Gets nth field of a tuple. */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
-				/* out: nth field */
-	dtuple_t*	tuple,	/* in: tuple */
-	ulint		n);	/* in: index of field */
-/*************************************************************************
-Gets info bits in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
-				/* out: info bits */
-	dtuple_t*	tuple);	/* in: tuple */
-/*************************************************************************
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		info_bits);	/* in: info bits */
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
-				/* out: number of fields used in comparisons
-				in rem0cmp.* */
-	dtuple_t*	tuple);	/* in: tuple */
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		n_fields_cmp);	/* in: number of fields used in
-					comparisons in rem0cmp.* */
-/**************************************************************
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
-				/* out, own: created tuple */
-	mem_heap_t*	heap,	/* in: memory heap where the tuple
-				is created */
-	ulint		n_fields); /* in: number of fields */
-
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
-
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
-			/* out, own created dtuple */
-	void** heap,	/* out: created memory heap */
-	ulint n_fields); /* in: number of fields */
-/*************************************************************************
-Frees a dtuple used in MySQL. */
-
-void
-dtuple_free_for_mysql(
-/*==================*/
-	void* heap);
-/*************************************************************************
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-
-void
-dtuple_set_n_fields(
-/*================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		n_fields);	/* in: number of fields */
-/**************************************************************
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
-				/* out: sum of data lens */
-	dtuple_t*	tuple);	/* in: typed data tuple */
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
-				/* out: TRUE if length and fieds are equal
-				when compared with cmp_data_data:
-				NOTE: in character type fields some letters
-				are identified with others! (collation) */
-	dtuple_t*	tuple1,	/* in: tuple 1 */
-	dtuple_t*	tuple2);/* in: tuple 2 */
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
-				/* out: the folded value */
-	dtuple_t*	tuple,	/* in: the tuple */
-	ulint		n_fields,/* in: number of complete fields to fold */
-	ulint		n_bytes,/* in: number of bytes to fold in an
-				incomplete last field */
-	dulint		tree_id);/* in: index tree id */
-/***********************************************************************
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		n);	/* in: number of fields to set */
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
-				/* out: TRUE if some field is SQL null */
-	dtuple_t*	tuple);	/* in: dtuple */
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
-ibool
-dfield_check_typed(
-/*===============*/
-				/* out: TRUE if ok */
-	dfield_t*	field);	/* in: data field */
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
-ibool
-dtuple_check_typed(
-/*===============*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple);	/* in: tuple */
-/**************************************************************
-Checks that a data tuple is typed. */
-
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple);	/* in: tuple */
-#ifdef UNIV_DEBUG
-/**************************************************************
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
-ibool
-dtuple_validate(
-/*============*/
-				/* out: TRUE if ok */
-	dtuple_t*	tuple);	/* in: tuple */
-#endif /* UNIV_DEBUG */
-/*****************************************************************
-Pretty prints a dfield value according to its data type. */
-
-void
-dfield_print(
-/*=========*/
-	dfield_t*	dfield);/* in: dfield */
-/*****************************************************************
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-
-void
-dfield_print_also_hex(
-/*==================*/
-	dfield_t*	dfield);	 /* in: dfield */
-/**************************************************************
-The following function prints the contents of a tuple. */
-
-void
-dtuple_print(
-/*=========*/
-	FILE*		f,	/* in: output stream */
-	dtuple_t*	tuple);	/* in: tuple */
-/******************************************************************
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
-				/* out, own: created big record vector,
-				NULL if we are not able to shorten
-				the entry enough, i.e., if there are
-				too many short fields in entry */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint*		ext_vec,/* in: array of externally stored fields,
-				or NULL: if a field already is externally
-				stored, then we cannot move it to the vector
-				this function returns */
-	ulint		n_ext_vec);/* in: number of elements is ext_vec */
-/******************************************************************
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-
-void
-dtuple_convert_back_big_rec(
-/*========================*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: entry whose data was put to vector */
-	big_rec_t*	vector);/* in, own: big rec vector; it is
-				freed in this function */
-/******************************************************************
-Frees the memory in a big rec vector. */
-
-void
-dtuple_big_rec_free(
-/*================*/
-	big_rec_t*	vector);	/* in, own: big rec vector; it is
-				freed in this function */
-
-/*######################################################################*/
-
-/* Structure for an SQL data field */
-struct dfield_struct{
-	void*		data;	/* pointer to data */
-	ulint		len;	/* data length; UNIV_SQL_NULL if SQL null; */
-	dtype_t		type;	/* type of data */
-};
-
-struct dtuple_struct {
-	ulint		info_bits;	/* info bits of an index record:
-					the default is 0; this field is used
-					if an index record is built from
-					a data tuple */
-	ulint		n_fields;	/* number of fields in dtuple */
-	ulint		n_fields_cmp;	/* number of fields which should
-					be used in comparison services
-					of rem0cmp.*; the index search
-					is performed by comparing only these
-					fields, others are ignored; the
-					default value in dtuple creation is
-					the same value as n_fields */
-	dfield_t*	fields;		/* fields */
-	UT_LIST_NODE_T(dtuple_t) tuple_list;
-					/* data tuples can be linked into a
-					list using this field */
-	ulint		magic_n;
-};
-#define	DATA_TUPLE_MAGIC_N	65478679
-
-/* A slot for a field in a big rec vector */
-
-typedef struct big_rec_field_struct	big_rec_field_t;
-struct big_rec_field_struct {
-	ulint		field_no;	/* field number in record */
-	ulint		len;		/* stored data len */
-	byte*		data;		/* stored data */
-};
-
-/* Storage format for overflow data in a big record, that is, a record
-which needs external storage of data fields */
-
-struct big_rec_struct {
-	mem_heap_t*	heap;		/* memory heap from which allocated */
-	ulint		n_fields;	/* number of stored fields */
-	big_rec_field_t* fields;	/* stored fields */
-};
-
-#ifndef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
deleted file mode 100644
index 753fa9ba45f..00000000000
--- a/storage/innobase/include/data0data.ic
+++ /dev/null
@@ -1,436 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#include "ut0rnd.h"
-
-#ifdef UNIV_DEBUG
-extern byte data_error;
-#endif /* UNIV_DEBUG */
-
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
-				/* out: pointer to the type struct */
-	dfield_t*	field)	/* in: SQL data field */
-{
-	ut_ad(field);
-
-	return(&(field->type));
-}
-
-/*************************************************************************
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
-	dfield_t*	field,	/* in: SQL data field */
-	dtype_t*	type)	/* in: pointer to data type struct */
-{
-	ut_ad(field && type);
-
-	field->type = *type;
-}
-
-/*************************************************************************
-Gets pointer to the data in a field. */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
-				/* out: pointer to data */
-	dfield_t* field)	/* in: field */
-{
-	ut_ad(field);
-	ut_ad((field->len == UNIV_SQL_NULL)
-	      || (field->data != &data_error));
-
-	return(field->data);
-}
-
-/*************************************************************************
-Gets length of field data. */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
-				/* out: length of data; UNIV_SQL_NULL if
-				SQL null data */
-	dfield_t* field)	/* in: field */
-{
-	ut_ad(field);
-	ut_ad((field->len == UNIV_SQL_NULL)
-	      || (field->data != &data_error));
-
-	return(field->len);
-}
-
-/*************************************************************************
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
-	dfield_t*	field,	/* in: field */
-	ulint		len)	/* in: length or UNIV_SQL_NULL */
-{
-	ut_ad(field);
-
-	field->len = len;
-}
-
-/*************************************************************************
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
-	dfield_t*	field,	/* in: field */
-	const void*	data,	/* in: data */
-	ulint		len)	/* in: length or UNIV_SQL_NULL */
-{
-	ut_ad(field);
-
-	field->data = (void*) data;
-	field->len = len;
-}
-
-/*************************************************************************
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
-	dfield_t*	field1,	/* in: field to copy to */
-	dfield_t*	field2)	/* in: field to copy from */
-{
-	ut_ad(field1 && field2);
-
-	field1->data = field2->data;
-	field1->len = field2->len;
-}
-
-/*************************************************************************
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
-	dfield_t*	field1,	/* in: field to copy to */
-	dfield_t*	field2)	/* in: field to copy from */
-{
-	*field1 = *field2;
-}
-
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
-				/* out: TRUE if equal */
-	dfield_t*	field1,	/* in: field */
-	dfield_t*	field2)	/* in: field */
-{
-	ulint	len;
-
-	len = field1->len;
-
-	if ((len != field2->len)
-	    || ((len != UNIV_SQL_NULL)
-		&& (0 != ut_memcmp(field1->data, field2->data,
-				   len)))) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Gets info bits in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
-				/* out: info bits */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	ut_ad(tuple);
-
-	return(tuple->info_bits);
-}
-
-/*************************************************************************
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		info_bits)	/* in: info bits */
-{
-	ut_ad(tuple);
-
-	tuple->info_bits = info_bits;
-}
-
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
-				/* out: number of fields used in comparisons
-				in rem0cmp.* */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	ut_ad(tuple);
-
-	return(tuple->n_fields_cmp);
-}
-
-/*************************************************************************
-Sets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
-	dtuple_t*	tuple,		/* in: tuple */
-	ulint		n_fields_cmp)	/* in: number of fields used in
-					comparisons in rem0cmp.* */
-{
-	ut_ad(tuple);
-	ut_ad(n_fields_cmp <= tuple->n_fields);
-
-	tuple->n_fields_cmp = n_fields_cmp;
-}
-
-/*************************************************************************
-Gets number of fields in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
-				/* out: number of fields */
-	dtuple_t*	tuple)	/* in: tuple */
-{
-	ut_ad(tuple);
-
-	return(tuple->n_fields);
-}
-
-/*************************************************************************
-Gets nth field of a tuple. */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
-				/* out: nth field */
-	dtuple_t*	tuple,	/* in: tuple */
-	ulint		n)	/* in: index of field */
-{
-	ut_ad(tuple);
-	ut_ad(n < tuple->n_fields);
-
-	return(tuple->fields + n);
-}
-
-/**************************************************************
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
-				/* out, own: created tuple */
-	mem_heap_t*	heap,	/* in: memory heap where the tuple
-				is created */
-	ulint		n_fields) /* in: number of fields */
-{
-	dtuple_t*	tuple;
-
-	ut_ad(heap);
-
-	tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
-					   + n_fields * sizeof(dfield_t));
-	tuple->info_bits = 0;
-	tuple->n_fields = n_fields;
-	tuple->n_fields_cmp = n_fields;
-	tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
-
-#ifdef UNIV_DEBUG
-	tuple->magic_n = DATA_TUPLE_MAGIC_N;
-
-	{	/* In the debug version, initialize fields to an error value */
-		ulint	i;
-
-		for (i = 0; i < n_fields; i++) {
-			(tuple->fields + i)->data = &data_error;
-			dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
-		}
-	}
-#endif
-	return(tuple);
-}
-
-/**************************************************************
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. Neither
-is possible space in externally stored parts of the field. */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
-				/* out: sum of data lengths */
-	dtuple_t*	tuple)	/* in: typed data tuple */
-{
-	dfield_t*	field;
-	ulint		n_fields;
-	ulint		len;
-	ulint		i;
-	ulint		sum	= 0;
-
-	ut_ad(tuple);
-	ut_ad(dtuple_check_typed(tuple));
-	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
-	n_fields = tuple->n_fields;
-
-	for (i = 0; i < n_fields; i++) {
-		field = dtuple_get_nth_field(tuple,  i);
-		len = dfield_get_len(field);
-
-		if (len == UNIV_SQL_NULL) {
-			len = dtype_get_sql_null_size(dfield_get_type(field));
-		}
-
-		sum += len;
-	}
-
-	return(sum);
-}
-
-/***********************************************************************
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		n)	/* in: number of fields to set */
-{
-	dtype_t*	dfield_type;
-	ulint		i;
-
-	for (i = 0; i < n; i++) {
-		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
-		dtype_set(dfield_type, DATA_BINARY, 0, 0);
-	}
-}
-
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
-				/* out: the folded value */
-	dtuple_t*	tuple,	/* in: the tuple */
-	ulint		n_fields,/* in: number of complete fields to fold */
-	ulint		n_bytes,/* in: number of bytes to fold in an
-				incomplete last field */
-	dulint		tree_id)/* in: index tree id */
-{
-	dfield_t*	field;
-	ulint		i;
-	byte*		data;
-	ulint		len;
-	ulint		fold;
-
-	ut_ad(tuple);
-	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-	ut_ad(dtuple_check_typed(tuple));
-
-	fold = ut_fold_dulint(tree_id);
-
-	for (i = 0; i < n_fields; i++) {
-		field = dtuple_get_nth_field(tuple, i);
-
-		data = (byte*) dfield_get_data(field);
-		len = dfield_get_len(field);
-
-		if (len != UNIV_SQL_NULL) {
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	if (n_bytes > 0) {
-		field = dtuple_get_nth_field(tuple, i);
-
-		data = (byte*) dfield_get_data(field);
-		len = dfield_get_len(field);
-
-		if (len != UNIV_SQL_NULL) {
-			if (len > n_bytes) {
-				len = n_bytes;
-			}
-
-			fold = ut_fold_ulint_pair(fold,
-						  ut_fold_binary(data, len));
-		}
-	}
-
-	return(fold);
-}
-
-/**************************************************************************
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
-	byte*	data,	/* in: pointer to a buffer of size len */
-	ulint	len)	/* in: SQL null size in bytes */
-{
-	ulint	j;
-
-	for (j = 0; j < len; j++) {
-		data[j] = '\0';
-	}
-}
-
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
-				/* out: TRUE if some field is SQL null */
-	dtuple_t*	tuple)	/* in: dtuple */
-{
-	ulint	n;
-	ulint	i;
-
-	n = dtuple_get_n_fields(tuple);
-
-	for (i = 0; i < n; i++) {
-		if (dfield_get_len(dtuple_get_nth_field(tuple, i))
-		    == UNIV_SQL_NULL) {
-
-			return(TRUE);
-		}
-	}
-
-	return(FALSE);
-}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
deleted file mode 100644
index ab314f8f471..00000000000
--- a/storage/innobase/include/data0types.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/************************************************************************
-Some type definitions
-
-(c) 1994-2000 Innobase Oy
-
-Created 9/21/2000 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0types_h
-#define data0types_h
-
-/* SQL data field struct */
-typedef struct dfield_struct	dfield_t;
-
-/* SQL data tuple struct */
-typedef struct dtuple_struct	dtuple_t;
-
-#endif
-
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
deleted file mode 100644
index ed7ce151718..00000000000
--- a/storage/innobase/include/db0err.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/******************************************************
-Global error codes for the database
-
-(c) 1996 Innobase Oy
-
-Created 5/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef db0err_h
-#define db0err_h
-
-
-#define DB_SUCCESS		10
-
-/* The following are error codes */
-#define	DB_ERROR		11
-#define DB_OUT_OF_MEMORY	12
-#define DB_OUT_OF_FILE_SPACE	13
-#define DB_LOCK_WAIT		14
-#define DB_DEADLOCK		15
-#define DB_ROLLBACK		16
-#define DB_DUPLICATE_KEY	17
-#define DB_QUE_THR_SUSPENDED	18
-#define DB_MISSING_HISTORY	19	/* required history data has been
-					deleted due to lack of space in
-					rollback segment */
-#define DB_CLUSTER_NOT_FOUND	30
-#define DB_TABLE_NOT_FOUND	31
-#define DB_MUST_GET_MORE_FILE_SPACE 32	/* the database has to be stopped
-					and restarted with more file space */
-#define DB_TABLE_IS_BEING_USED	33
-#define DB_TOO_BIG_RECORD	34	/* a record in an index would become
-					bigger than 1/2 free space in a page
-					frame */
-#define DB_LOCK_WAIT_TIMEOUT	35	/* lock wait lasted too long */
-#define DB_NO_REFERENCED_ROW	36	/* referenced key value not found
-					for a foreign key in an insert or
-					update of a row */
-#define DB_ROW_IS_REFERENCED	37	/* cannot delete or update a row
-					because it contains a key value
-					which is referenced */
-#define DB_CANNOT_ADD_CONSTRAINT 38	/* adding a foreign key constraint
-					to a table failed */
-#define DB_CORRUPTION		39	/* data structure corruption noticed */
-#define DB_COL_APPEARS_TWICE_IN_INDEX 40/* InnoDB cannot handle an index
-					where same column appears twice */
-#define DB_CANNOT_DROP_CONSTRAINT 41	/* dropping a foreign key constraint
-					from a table failed */
-#define DB_NO_SAVEPOINT		42	/* no savepoint exists with the given
-					name */
-#define	DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
-					tablespace because a file of the same
-					name already exists */
-#define DB_TABLESPACE_DELETED	44	/* tablespace does not exist or is
-					being dropped right now */
-#define	DB_LOCK_TABLE_FULL	45	/* lock structs have exhausted the
-					buffer pool (for big transactions,
-					InnoDB stores the lock structs in the
-					buffer pool) */
-#define DB_FOREIGN_DUPLICATE_KEY 46	/* foreign key constraints
-					activated by the operation would
-					lead to a duplicate key in some
-					table */
-#define DB_TOO_MANY_CONCURRENT_TRXS 47	/* when InnoDB runs out of the
-					preconfigured undo slots, this can
-					only happen when there are too many
-					concurrent transactions */
-#define DB_UNSUPPORTED		48	/* when InnoDB sees any artefact or
-					a feature that it can't recoginize or
-					work with e.g., FT indexes created by
-					a later version of the engine. */
-/* The following are partial failure codes */
-#define DB_FAIL			1000
-#define DB_OVERFLOW		1001
-#define DB_UNDERFLOW		1002
-#define DB_STRONG_FAIL		1003
-#define DB_RECORD_NOT_FOUND	1500
-#define DB_END_OF_INDEX		1501
-
-#endif
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
deleted file mode 100644
index b4da2d7e03f..00000000000
--- a/storage/innobase/include/dict0crea.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-Database object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
deleted file mode 100644
index 7d5ff09c7a6..00000000000
--- a/storage/innobase/include/dict0dict.h
+++ /dev/null
@@ -1,1002 +0,0 @@
-/******************************************************
-Data dictionary system
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0dict_h
-#define dict0dict_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
-void
-dict_casedn_str(
-/*============*/
-	char*	a);	/* in/out: string to put in lower case */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************************
-Get the database name length in a table name. */
-
-ulint
-dict_get_db_name_len(
-/*=================*/
-				/* out: database name length */
-	const char*	name);	/* in: table name in the form
-				dbname '/' tablename */
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
-
-const char*
-dict_remove_db_name(
-/*================*/
-				/* out: table name */
-	const char*	name);	/* in: table name in the form
-				dbname '/' tablename */
-/************************************************************************
-Decrements the count of open MySQL handles to a table. */
-
-void
-dict_table_decrement_handle_count(
-/*==============================*/
-	dict_table_t*	table);	/* in: table */
-/**************************************************************************
-Inits the data dictionary module. */
-
-void
-dict_init(void);
-/*===========*/
-/************************************************************************
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-
-void
-dict_load_space_id_list(void);
-/*=========================*/
-/*************************************************************************
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
-	const dict_col_t*	col,	/* in: column */
-	dtype_t*		type);	/* out: data type */
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
-	const dict_col_t*	col,	/* in: column */
-	dtype_t*		type);	/* out: data type */
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
-					/* out: TRUE */
-	const dict_col_t*	col,	/* in: column */
-	const dtype_t*		type);	/* in: data type */
-#endif /* UNIV_DEBUG */
-/***************************************************************************
-Returns the minimum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
-					/* out: minimum size */
-	const dict_col_t*	col);	/* in: column */
-/***************************************************************************
-Returns the maximum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
-					/* out: maximum size */
-	const dict_col_t*	col);	/* in: column */
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
-					/* out: fixed size, or 0 */
-	const dict_col_t*	col);	/* in: column */
-/***************************************************************************
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
-					/* out: SQL null storage size
-					in ROW_FORMAT=REDUNDANT */
-	const dict_col_t*	col);	/* in: column */
-
-/*************************************************************************
-Gets the column number. */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
-	const dict_col_t*	col);
-/*************************************************************************
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
-	const dict_col_t*	col,		/* in: table column */
-	const dict_index_t*	clust_index);	/* in: clustered index */
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
-	const dict_col_t*	col,		/* in: table column */
-	const dict_index_t*	clust_index);	/* in: clustered index */
-/********************************************************************
-If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
-ibool
-dict_col_name_is_reserved(
-/*======================*/
-				/* out: TRUE if name is reserved */
-	const char*	name);	/* in: column name */
-/************************************************************************
-Acquire the autoinc lock.*/
-
-void
-dict_table_autoinc_lock(
-/*====================*/
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Unconditionally set the autoinc counter. */
-
-void
-dict_table_autoinc_initialize(
-/*==========================*/
-	dict_table_t*	table,	/* in: table */
-	ib_ulonglong	value);	/* in: next value to assign to a row */
-/************************************************************************
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
-dict_table_autoinc_read(
-/*====================*/
-				/* out: value for a new row, or 0 */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
-	dict_table_t*	table,	/* in: table */
-	ib_ulonglong	value);	/* in: value which was assigned to a row */
-/************************************************************************
-Release the autoinc lock.*/
-
-void
-dict_table_autoinc_unlock(
-/*======================*/
-	dict_table_t*	table);	/* in: table */
-/**************************************************************************
-Adds system columns to a table object. */
-
-void
-dict_table_add_system_columns(
-/*==========================*/
-	dict_table_t*	table,	/* in/out: table */
-	mem_heap_t*	heap);	/* in: temporary heap */
-/**************************************************************************
-Adds a table object to the dictionary cache. */
-
-void
-dict_table_add_to_cache(
-/*====================*/
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap);	/* in: temporary heap */
-/**************************************************************************
-Removes a table object from the dictionary cache. */
-
-void
-dict_table_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table);	/* in, own: table */
-/**************************************************************************
-Renames a table object. */
-
-ibool
-dict_table_rename_in_cache(
-/*=======================*/
-					/* out: TRUE if success */
-	dict_table_t*	table,		/* in: table */
-	const char*	new_name,	/* in: new name */
-	ibool		rename_also_foreigns);/* in: in ALTER TABLE we want
-					to preserve the original table name
-					in constraints which reference it */
-/**************************************************************************
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-
-void
-dict_table_change_id_in_cache(
-/*==========================*/
-	dict_table_t*	table,	/* in: table object already in cache */
-	dulint		new_id);/* in: new id to set */
-/**************************************************************************
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of foreign table or referenced table must already be in
-the dictionary cache! */
-
-ulint
-dict_foreign_add_to_cache(
-/*======================*/
-					/* out: DB_SUCCESS or error code */
-	dict_foreign_t*	foreign,	/* in, own: foreign key constraint */
-	ibool		check_charsets);/* in: TRUE=check charset
-					compatibility */
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
-ibool
-dict_table_referenced_by_foreign_key(
-/*=================================*/
-				/* out: TRUE if table is referenced by a
-				foreign key */
-	dict_table_t*	table);	/* in: InnoDB table */
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
-					/* out: TRUE if str starts
-					with keyword */
-	void*		mysql_thd,	/* in: MySQL thread handle */
-	const char*	str,		/* in: string to scan for keyword */
-	const char*	keyword);	/* in: keyword to look for */
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
-ulint
-dict_create_foreign_constraints(
-/*============================*/
-					/* out: error code or DB_SUCCESS */
-	trx_t*		trx,		/* in: transaction */
-	const char*	sql_string,	/* in: table create statement where
-					foreign keys are declared like:
-					FOREIGN KEY (a, b) REFERENCES
-					table2(c, d), table2 can be written
-					also with the database
-					name before it: test.table2; the
-					default database id the database of
-					parameter name */
-	const char*	name,		/* in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks);	/* in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
-ulint
-dict_foreign_parse_drop_constraints(
-/*================================*/
-						/* out: DB_SUCCESS or
-						DB_CANNOT_DROP_CONSTRAINT if
-						syntax error or the constraint
-						id does not match */
-	mem_heap_t*	heap,			/* in: heap from which we can
-						allocate memory */
-	trx_t*		trx,			/* in: transaction */
-	dict_table_t*	table,			/* in: table */
-	ulint*		n,			/* out: number of constraints
-						to drop */
-	const char***	constraints_to_drop);	/* out: id's of the
-						constraints to drop */
-/**************************************************************************
-Returns a table object and optionally increment its MySQL open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
-dict_table_t*
-dict_table_get(
-/*===========*/
-					/* out: table, NULL if
-					does not exist */
-	const char*	table_name,	/* in: table name */
-	ibool		inc_mysql_count);
-					/* in: whether to increment the open
-					handle count on the table */
-/**************************************************************************
-Returns a table object based on table id. */
-
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
-				/* out: table, NULL if does not exist */
-	dulint	table_id,	/* in: table id */
-	trx_t*	trx);		/* in: transaction handle */
-/**************************************************************************
-Returns a table object based on table id. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
-				/* out: table, NULL if does not exist */
-	dulint	table_id);	/* in: table id */
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
-					/* out: table, NULL if not found */
-	const char*	table_name);	/* in: table name */
-/**************************************************************************
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
-					/* out: table, NULL if not found */
-	const char*	table_name);	/* in: table name */
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
-					/* out: table, NULL if not found */
-	const char*	table_name);	/* in: table name */
-/**************************************************************************
-Returns an index object. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index(
-/*=================*/
-				/* out: index, NULL if does not exist */
-	dict_table_t*	table,	/* in: table */
-	const char*	name);	/* in: index name */
-/**************************************************************************
-Returns an index object. */
-
-dict_index_t*
-dict_table_get_index_noninline(
-/*===========================*/
-				/* out: index, NULL if does not exist */
-	dict_table_t*	table,	/* in: table */
-	const char*	name);	/* in: index name */
-/**************************************************************************
-Returns a column's name. */
-
-const char*
-dict_table_get_col_name(
-/*====================*/
-					/* out: column name. NOTE: not
-					guaranteed to stay valid if table is
-					modified in any way (columns added,
-					etc.). */
-	const dict_table_t*	table,	/* in: table */
-	ulint			col_nr);/* in: column number */
-
-/**************************************************************************
-Prints a table definition. */
-
-void
-dict_table_print(
-/*=============*/
-	dict_table_t*	table);	/* in: table */
-/**************************************************************************
-Prints a table data. */
-
-void
-dict_table_print_low(
-/*=================*/
-	dict_table_t*	table);	/* in: table */
-/**************************************************************************
-Prints a table data when we know the table name. */
-
-void
-dict_table_print_by_name(
-/*=====================*/
-	const char*	name);
-/**************************************************************************
-Outputs info on foreign keys of a table. */
-
-void
-dict_print_info_on_foreign_keys(
-/*============================*/
-	ibool		create_table_format, /* in: if TRUE then print in
-				a format suitable to be inserted into
-				a CREATE TABLE, otherwise in the format
-				of SHOW TABLE STATUS */
-	FILE*		file,	/* in: file where to print */
-	trx_t*		trx,	/* in: transaction */
-	dict_table_t*	table);	/* in: table */
-/**************************************************************************
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-void
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
-	FILE*		file,		/* in: file where to print */
-	trx_t*		trx,		/* in: transaction */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint */
-	ibool		add_newline);	/* in: whether to add a newline */
-/************************************************************************
-Displays the names of the index and the table. */
-void
-dict_index_name_print(
-/*==================*/
-	FILE*			file,	/* in: output stream */
-	trx_t*			trx,	/* in: transaction */
-	const dict_index_t*	index);	/* in: index to print */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
-				/* out: index, NULL if none exists */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
-				/* out: index, NULL if none exists */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Gets the next index on the table. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
-				/* out: index, NULL if none left */
-	dict_index_t*	index);	/* in: index */
-/************************************************************************
-Gets the next index on the table. */
-
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
-				/* out: index, NULL if none left */
-	dict_index_t*	index);	/* in: index */
-/************************************************************************
-Gets the number of user-defined columns in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
-				/* out: number of user-defined (e.g., not
-				ROW_ID) columns of a table */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
-				/* out: number of system (e.g.,
-				ROW_ID) columns of a table */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Gets the number of all columns (also system) in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
-				/* out: number of columns of a table */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Gets the nth column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			pos);	/* in: position of column */
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			pos);	/* in: position of column */
-/************************************************************************
-Gets the given system column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			sys);	/* in: DATA_ROW_ID, ... */
-/************************************************************************
-Gets the given system column number of a table. */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
-				/* out: column number */
-	dict_table_t*	table,	/* in: table */
-	ulint		sys);	/* in: DATA_ROW_ID, ... */
-/************************************************************************
-Check whether the table uses the compact page format. */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
-					/* out: TRUE if table uses the
-					compact page format */
-	const dict_table_t*	table);	/* in: table */
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
-/*=========================*/
-					/* out: TRUE if table uses the
-					compact page format */
-	const dict_table_t*	table);	/* in: table */
-/************************************************************************
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
-				/* out: TRUE if the column, or its prefix, is
-				in the clustered key */
-	dict_table_t*	table,	/* in: table */
-	ulint		n);	/* in: column number */
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
-void
-dict_table_copy_types(
-/*==================*/
-	dtuple_t*	tuple,	/* in: data tuple */
-	dict_table_t*	table);	/* in: index */
-/**************************************************************************
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
-			/* out: index or NULL if not found from cache */
-	dulint	id);	/* in: index id */
-/**************************************************************************
-Adds an index to the dictionary cache. */
-
-void
-dict_index_add_to_cache(
-/*====================*/
-	dict_table_t*	table,	/* in: table on which the index is */
-	dict_index_t*	index,	/* in, own: index; NOTE! The index memory
-				object is freed in this function! */
-	ulint		page_no);/* in: root page number of the index */
-/************************************************************************
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
-				/* out: number of fields */
-	dict_index_t*	index);	/* in: an internal representation of index
-				(in the dictionary cache) */
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
-				/* out: number of fields */
-	dict_index_t*	index);	/* in: an internal representation of index
-				(in the dictionary cache) */
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
-				/* out: number of fields */
-	dict_index_t*	index);	/* in: an internal representation of index
-				(in the dictionary cache) */
-/************************************************************************
-Gets the number of user-defined ordering fields in the index. In the internal
-representation we add the row id to the ordering fields to make all indexes
-unique, but this function returns the number of fields the user defined
-in the index as ordering fields. */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
-				/* out: number of fields */
-	dict_index_t*	index);	/* in: an internal representation of index
-				(in the dictionary cache) */
-/************************************************************************
-Gets the nth field of an index. */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
-				/* out: pointer to field object */
-	dict_index_t*	index,	/* in: index */
-	ulint		pos);	/* in: position of field */
-/************************************************************************
-Gets pointer to the nth column in an index. */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
-					/* out: column */
-	const dict_index_t*	index,	/* in: index */
-	ulint			pos);	/* in: position of the field */
-/************************************************************************
-Gets the column number of the nth field in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
-					/* out: column number */
-	const dict_index_t*	index,	/* in: index */
-	ulint			pos);	/* in: position of the field */
-/************************************************************************
-Looks for column n in an index. */
-
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
-				/* out: position in internal representation
-				of the index; if not contained, returns
-				ULINT_UNDEFINED */
-	dict_index_t*	index,	/* in: index */
-	ulint		n);	/* in: column number */
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
-				/* out: TRUE if contains the column or its
-				prefix */
-	dict_index_t*	index,	/* in: index */
-	ulint		n);	/* in: column number */
-/************************************************************************
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
-				/* out: position in internal representation
-				of the index; if not contained, returns
-				ULINT_UNDEFINED */
-	dict_index_t*	index,	/* in: index from which to search */
-	dict_index_t*	index2,	/* in: index */
-	ulint		n);	/* in: field number in index2 */
-/************************************************************************
-Looks for column n position in the clustered index. */
-
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
-				/* out: position in internal representation
-				of the clustered index */
-	dict_table_t*	table,	/* in: table */
-	ulint		n);	/* in: column number */
-/************************************************************************
-Returns the position of a system column in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
-				/* out: position, ULINT_UNDEFINED if not
-				contained */
-	dict_index_t*	index,	/* in: index */
-	ulint		type);	/* in: DATA_ROW_ID, ... */
-/***********************************************************************
-Adds a column to index. */
-
-void
-dict_index_add_col(
-/*===============*/
-	dict_index_t*	index,		/* in: index */
-	dict_table_t*	table,		/* in: table */
-	dict_col_t*	col,		/* in: column */
-	ulint		prefix_len);	/* in: column prefix length */
-/***********************************************************************
-Copies types of fields contained in index to tuple. */
-
-void
-dict_index_copy_types(
-/*==================*/
-	dtuple_t*	tuple,		/* in: data tuple */
-	dict_index_t*	index,		/* in: index */
-	ulint		n_fields);	/* in: number of field types to copy */
-/*************************************************************************
-Gets the field column. */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
-	const dict_field_t*	field);
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
-
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
-				/* out: index, NULL if not found */
-	dulint	index_id);	/* in: index id */
-/**************************************************************************
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple);	/* in: tuple used in a search */
-#endif /* UNIV_DEBUG */
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
-
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
-				/* out, own: node pointer */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record for which to build node
-				pointer */
-	ulint		page_no,/* in: page number to put in node pointer */
-	mem_heap_t*	heap,	/* in: memory heap where pointer created */
-	ulint		level);	 /* in: level of rec in tree: 0 means leaf
-				level */
-/**************************************************************************
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
-				/* out: pointer to the prefix record */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record for which to copy prefix */
-	ulint*		n_fields,/* out: number of fields copied */
-	byte**		buf,	/* in/out: memory buffer for the copied prefix,
-				or NULL */
-	ulint*		buf_size);/* in/out: buffer size */
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
-				/* out, own: data tuple */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record for which to build data tuple */
-	ulint		n_fields,/* in: number of data fields */
-	mem_heap_t*	heap);	/* in: memory heap where tuple created */
-/*************************************************************************
-Gets the space id of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
-				/* out: space id */
-	dict_index_t*	index);	/* in: index */
-/*************************************************************************
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		space);	/* in: space id */
-/*************************************************************************
-Gets the page number of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
-				/* out: page number */
-	dict_index_t*	tree);	/* in: index */
-/*************************************************************************
-Sets the page number of the root of index tree. */
-UNIV_INLINE
-void
-dict_index_set_page(
-/*================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		page);	/* in: page number */
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
-				/* out: type */
-	dict_index_t*	index);	/* in: index */
-/*************************************************************************
-Gets the read-write lock of the index tree. */
-UNIV_INLINE
-rw_lock_t*
-dict_index_get_lock(
-/*================*/
-				/* out: read-write lock */
-	dict_index_t*	index);	/* in: index */
-/************************************************************************
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void);
-/*==============================*/
-				/* out: number of free bytes on page,
-				reserved for updates */
-/*************************************************************************
-Calculates the minimum record length in an index. */
-
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
-	dict_index_t*	index);	/* in: index */
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics_low(
-/*=======================*/
-	dict_table_t*	table,		/* in: table */
-	ibool		has_dict_mutex);/* in: TRUE if the caller has the
-					dictionary mutex */
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics(
-/*===================*/
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Reserves the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_enter_for_mysql(void);
-/*============================*/
-/************************************************************************
-Releases the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_exit_for_mysql(void);
-/*===========================*/
-/************************************************************************
-Checks if the database name in two table names is the same. */
-
-ibool
-dict_tables_have_same_db(
-/*=====================*/
-				/* out: TRUE if same db name */
-	const char*	name1,	/* in: table name in the form
-				dbname '/' tablename */
-	const char*	name2);	/* in: table name in the form
-				dbname '/' tablename */
-/*************************************************************************
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity. Stops
-also at '\0'. */
-
-const char*
-dict_scan_to(
-/*=========*/
-				/* out: scanned up to this */
-	const char*	ptr,	/* in: scan from */
-	const char*	string);/* in: look for this */
-/* Buffers for storing detailed information about the latest foreign key
-and unique key errors */
-extern FILE*	dict_foreign_err_file;
-extern mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
-
-extern dict_sys_t*	dict_sys;	/* the dictionary system */
-extern rw_lock_t	dict_operation_lock;
-
-/* Dictionary system struct */
-struct dict_sys_struct{
-	mutex_t		mutex;		/* mutex protecting the data
-					dictionary; protects also the
-					disk-based dictionary system tables;
-					this mutex serializes CREATE TABLE
-					and DROP TABLE, as well as reading
-					the dictionary data for a table from
-					system tables */
-	dulint		row_id;		/* the next row id to assign;
-					NOTE that at a checkpoint this
-					must be written to the dict system
-					header and flushed to a file; in
-					recovery this must be derived from
-					the log records */
-	hash_table_t*	table_hash;	/* hash table of the tables, based
-					on name */
-	hash_table_t*	table_id_hash;	/* hash table of the tables, based
-					on id */
-	UT_LIST_BASE_NODE_T(dict_table_t)
-			table_LRU;	/* LRU list of tables */
-	ulint		size;		/* varying space in bytes occupied
-					by the data dictionary table and
-					index objects */
-	dict_table_t*	sys_tables;	/* SYS_TABLES table */
-	dict_table_t*	sys_columns;	/* SYS_COLUMNS table */
-	dict_table_t*	sys_indexes;	/* SYS_INDEXES table */
-	dict_table_t*	sys_fields;	/* SYS_FIELDS table */
-};
-
-#ifndef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic
deleted file mode 100644
index 1a207fbf0fd..00000000000
--- a/storage/innobase/include/dict0load.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/******************************************************
-Loads to the memory cache database object definitions
-from dictionary tables
-
-(c) 1996 Innobase Oy
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
deleted file mode 100644
index ac28fdb1bae..00000000000
--- a/storage/innobase/include/dict0mem.h
+++ /dev/null
@@ -1,431 +0,0 @@
-/******************************************************
-Data dictionary memory object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0mem_h
-#define dict0mem_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "sync0rw.h"
-#include "lock0types.h"
-#include "hash0hash.h"
-#include "que0types.h"
-
-/* Type flags of an index: OR'ing of the flags is allowed to define a
-combination of types */
-#define DICT_CLUSTERED	1	/* clustered index */
-#define DICT_UNIQUE	2	/* unique index */
-#define	DICT_UNIVERSAL	4	/* index which can contain records from any
-				other index */
-#define	DICT_IBUF	8	/* insert buffer tree */
-
-/* Types for a table object */
-#define DICT_TABLE_ORDINARY		1
-#if 0 /* not implemented */
-#define	DICT_TABLE_CLUSTER_MEMBER	2
-#define	DICT_TABLE_CLUSTER		3 /* this means that the table is
-					  really a cluster definition */
-#endif
-
-/* Table flags */
-#define DICT_TF_COMPACT			1	/* compact page format */
-
-/**************************************************************************
-Creates a table memory object. */
-
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
-					/* out, own: table object */
-	const char*	name,		/* in: table name */
-	ulint		space,		/* in: space where the clustered index
-					of the table is placed; this parameter
-					is ignored if the table is made
-					a member of a cluster */
-	ulint		n_cols,		/* in: number of columns */
-	ulint		flags);		/* in: table flags */
-/********************************************************************
-Free a table memory object. */
-
-void
-dict_mem_table_free(
-/*================*/
-	dict_table_t*	table);		/* in: table */
-/**************************************************************************
-Adds a column definition to a table. */
-
-void
-dict_mem_table_add_col(
-/*===================*/
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap,	/* in: temporary memory heap, or NULL */
-	const char*	name,	/* in: column name, or NULL */
-	ulint		mtype,	/* in: main datatype */
-	ulint		prtype,	/* in: precise type */
-	ulint		len);	/* in: precision */
-/**************************************************************************
-Creates an index memory object. */
-
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
-					/* out, own: index object */
-	const char*	table_name,	/* in: table name */
-	const char*	index_name,	/* in: index name */
-	ulint		space,		/* in: space where the index tree is
-					placed, ignored if the index is of
-					the clustered type */
-	ulint		type,		/* in: DICT_UNIQUE,
-					DICT_CLUSTERED, ... ORed */
-	ulint		n_fields);	/* in: number of fields */
-/**************************************************************************
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-
-void
-dict_mem_index_add_field(
-/*=====================*/
-	dict_index_t*	index,		/* in: index */
-	const char*	name,		/* in: column name */
-	ulint		prefix_len);	/* in: 0 or the column prefix length
-					in a MySQL index like
-					INDEX (textcol(25)) */
-/**************************************************************************
-Frees an index memory object. */
-
-void
-dict_mem_index_free(
-/*================*/
-	dict_index_t*	index);	/* in: index */
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
-dict_foreign_t*
-dict_mem_foreign_create(void);
-/*=========================*/
-				/* out, own: foreign constraint struct */
-
-/* Data structure for a column in a table */
-struct dict_col_struct{
-	/*----------------------*/
-	/* The following are copied from dtype_t,
-	so that all bit-fields can be packed tightly. */
-	unsigned	mtype:8;	/* main data type */
-	unsigned	prtype:24;	/* precise type; MySQL data
-					type, charset code, flags to
-					indicate nullability,
-					signedness, whether this is a
-					binary string, whether this is
-					a true VARCHAR where MySQL
-					uses 2 bytes to store the length */
-
-	/* the remaining fields do not affect alphabetical ordering: */
-
-	unsigned	len:16;		/* length; for MySQL data this
-					is field->pack_length(),
-					except that for a >= 5.0.3
-					type true VARCHAR this is the
-					maximum byte length of the
-					string data (in addition to
-					the string, MySQL uses 1 or 2
-					bytes to store the string length) */
-
-	unsigned	mbminlen:2;	/* minimum length of a
-					character, in bytes */
-	unsigned	mbmaxlen:3;	/* maximum length of a
-					character, in bytes */
-	/*----------------------*/
-	/* End of definitions copied from dtype_t */
-
-	unsigned	ind:10;		/* table column position
-					(starting from 0) */
-	unsigned	ord_part:1;	/* nonzero if this column
-					appears in the ordering fields
-					of an index */
-};
-
-/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
-This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-
-#define DICT_MAX_INDEX_COL_LEN		768
-
-/* Data structure for a field in an index */
-struct dict_field_struct{
-	dict_col_t*	col;		/* pointer to the table column */
-	const char*	name;		/* name of the column */
-	unsigned	prefix_len:10;	/* 0 or the length of the column
-					prefix in bytes in a MySQL index of
-					type, e.g., INDEX (textcol(25));
-					must be smaller than
-					DICT_MAX_INDEX_COL_LEN; NOTE that
-					in the UTF-8 charset, MySQL sets this
-					to 3 * the prefix len in UTF-8 chars */
-	unsigned	fixed_len:10;	/* 0 or the fixed length of the
-					column if smaller than
-					DICT_MAX_INDEX_COL_LEN */
-};
-
-/* Data structure for an index */
-struct dict_index_struct{
-	dulint		id;	/* id of the index */
-	mem_heap_t*	heap;	/* memory heap */
-	ulint		type;	/* index type */
-	const char*	name;	/* index name */
-	const char*	table_name; /* table name */
-	dict_table_t*	table;	/* back pointer to table */
-	unsigned	space:32;
-				/* space where the index tree is placed */
-	unsigned	page:32;/* index tree root page number */
-	unsigned	trx_id_offset:10;/* position of the the trx id column
-				in a clustered index record, if the fields
-				before it are known to be of a fixed size,
-				0 otherwise */
-	unsigned	n_user_defined_cols:10;
-				/* number of columns the user defined to
-				be in the index: in the internal
-				representation we add more columns */
-	unsigned	n_uniq:10;/* number of fields from the beginning
-				which are enough to determine an index
-				entry uniquely */
-	unsigned	n_def:10;/* number of fields defined so far */
-	unsigned	n_fields:10;/* number of fields in the index */
-	unsigned	n_nullable:10;/* number of nullable fields */
-	unsigned	cached:1;/* TRUE if the index object is in the
-				dictionary cache */
-	dict_field_t*	fields;	/* array of field descriptions */
-	UT_LIST_NODE_T(dict_index_t)
-			indexes;/* list of indexes of the table */
-	btr_search_t*	search_info; /* info used in optimistic searches */
-	/*----------------------*/
-	ib_longlong*	stat_n_diff_key_vals;
-				/* approximate number of different key values
-				for this index, for each n-column prefix
-				where n <= dict_get_n_unique(index); we
-				periodically calculate new estimates */
-	ulint		stat_index_size;
-				/* approximate index size in database pages */
-	ulint		stat_n_leaf_pages;
-				/* approximate number of leaf pages in the
-				index tree */
-	rw_lock_t	lock;	/* read-write lock protecting the upper levels
-				of the index tree */
-#ifdef UNIV_DEBUG
-	ulint		magic_n;/* magic number */
-# define DICT_INDEX_MAGIC_N	76789786
-#endif
-};
-
-/* Data structure for a foreign key constraint; an example:
-FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
-
-struct dict_foreign_struct{
-	mem_heap_t*	heap;		/* this object is allocated from
-					this memory heap */
-	char*		id;		/* id of the constraint as a
-					null-terminated string */
-	unsigned	n_fields:10;	/* number of indexes' first fields
-					for which the the foreign key
-					constraint is defined: we allow the
-					indexes to contain more fields than
-					mentioned in the constraint, as long
-					as the first fields are as mentioned */
-	unsigned	type:6;		/* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
-					or DICT_FOREIGN_ON_DELETE_SET_NULL */
-	char*		foreign_table_name;/* foreign table name */
-	dict_table_t*	foreign_table;	/* table where the foreign key is */
-	const char**	foreign_col_names;/* names of the columns in the
-					foreign key */
-	char*		referenced_table_name;/* referenced table name */
-	dict_table_t*	referenced_table;/* table where the referenced key
-					is */
-	const char**	referenced_col_names;/* names of the referenced
-					columns in the referenced table */
-	dict_index_t*	foreign_index;	/* foreign index; we require that
-					both tables contain explicitly defined
-					indexes for the constraint: InnoDB
-					does not generate new indexes
-					implicitly */
-	dict_index_t*	referenced_index;/* referenced index */
-	UT_LIST_NODE_T(dict_foreign_t)
-			foreign_list;	/* list node for foreign keys of the
-					table */
-	UT_LIST_NODE_T(dict_foreign_t)
-			referenced_list;/* list node for referenced keys of the
-					table */
-};
-
-/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
-a foreign key constraint is enforced, therefore RESTRICT just means no flag */
-#define DICT_FOREIGN_ON_DELETE_CASCADE	1
-#define DICT_FOREIGN_ON_DELETE_SET_NULL	2
-#define DICT_FOREIGN_ON_UPDATE_CASCADE	4
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL	8
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
-
-
-/* Data structure for a database table */
-struct dict_table_struct{
-	dulint		id;	/* id of the table */
-	mem_heap_t*	heap;	/* memory heap */
-	const char*	name;	/* table name */
-	const char*	dir_path_of_temp_table;/* NULL or the directory path
-				where a TEMPORARY table that was explicitly
-				created by a user should be placed if
-				innodb_file_per_table is defined in my.cnf;
-				in Unix this is usually /tmp/..., in Windows
-				\temp\... */
-	unsigned	space:32;
-				/* space where the clustered index of the
-				table is placed */
-	unsigned	ibd_file_missing:1;
-				/* TRUE if this is in a single-table
-				tablespace and the .ibd file is missing; then
-				we must return in ha_innodb.cc an error if the
-				user tries to query such an orphaned table */
-	unsigned	tablespace_discarded:1;
-				/* this flag is set TRUE when the user
-				calls DISCARD TABLESPACE on this
-				table, and reset to FALSE in IMPORT
-				TABLESPACE */
-	unsigned	cached:1;/* TRUE if the table object has been added
-				to the dictionary cache */
-	unsigned	flags:8;/* DICT_TF_COMPACT, ... */
-	unsigned	n_def:10;/* number of columns defined so far */
-	unsigned	n_cols:10;/* number of columns */
-	dict_col_t*	cols;	/* array of column descriptions */
-	const char*	col_names;
-				/* Column names packed in a character string
-				"name1\0name2\0...nameN\0".  Until
-				the string contains n_cols, it will be
-				allocated from a temporary heap.  The final
-				string will be allocated from table->heap. */
-	hash_node_t	name_hash; /* hash chain node */
-	hash_node_t	id_hash; /* hash chain node */
-	UT_LIST_BASE_NODE_T(dict_index_t)
-			indexes; /* list of indexes of the table */
-	UT_LIST_BASE_NODE_T(dict_foreign_t)
-			foreign_list;/* list of foreign key constraints
-				in the table; these refer to columns
-				in other tables */
-	UT_LIST_BASE_NODE_T(dict_foreign_t)
-			referenced_list;/* list of foreign key constraints
-				which refer to this table */
-	UT_LIST_NODE_T(dict_table_t)
-			table_LRU; /* node of the LRU list of tables */
-	ulint		n_mysql_handles_opened;
-				/* count of how many handles MySQL has opened
-				to this table; dropping of the table is
-				NOT allowed until this count gets to zero;
-				MySQL does NOT itself check the number of
-				open handles at drop */
-	ulint		n_foreign_key_checks_running;
-				/* count of how many foreign key check
-				operations are currently being performed
-				on the table: we cannot drop the table while
-				there are foreign key checks running on
-				it! */
-	lock_t*		auto_inc_lock;/* a buffer for an auto-inc lock
-				for this table: we allocate the memory here
-				so that individual transactions can get it
-				and release it without a need to allocate
-				space from the lock heap of the trx:
-				otherwise the lock heap would grow rapidly
-				if we do a large insert from a select */
-	dulint		query_cache_inv_trx_id;
-				/* transactions whose trx id < than this
-				number are not allowed to store to the MySQL
-				query cache or retrieve from it; when a trx
-				with undo logs commits, it sets this to the
-				value of the trx id counter for the tables it
-				had an IX lock on */
-	UT_LIST_BASE_NODE_T(lock_t)
-			locks; /* list of locks on the table */
-#ifdef UNIV_DEBUG
-	/*----------------------*/
-	ibool		does_not_fit_in_memory;
-				/* this field is used to specify in simulations
-				tables which are so big that disk should be
-				accessed: disk access is simulated by
-				putting the thread to sleep for a while;
-				NOTE that this flag is not stored to the data
-				dictionary on disk, and the database will
-				forget about value TRUE if it has to reload
-				the table definition from disk */
-#endif /* UNIV_DEBUG */
-	/*----------------------*/
-	unsigned	big_rows:1;
-				/* flag: TRUE if the maximum length of
-				a single row exceeds BIG_ROW_SIZE;
-				initialized in dict_table_add_to_cache() */
-	unsigned	stat_initialized:1; /* TRUE if statistics have
-				been calculated the first time
-				after database startup or table creation */
-	ib_longlong	stat_n_rows;
-				/* approximate number of rows in the table;
-				we periodically calculate new estimates */
-	ulint		stat_clustered_index_size;
-				/* approximate clustered index size in
-				database pages */
-	ulint		stat_sum_of_other_index_sizes;
-				/* other indexes in database pages */
-	ulint		stat_modified_counter;
-				/* when a row is inserted, updated, or deleted,
-				we add 1 to this number; we calculate new
-				estimates for the stat_... values for the
-				table and the indexes at an interval of 2 GB
-				or when about 1 / 16 of table has been
-				modified; also when the estimate operation is
-				called for MySQL SHOW TABLE STATUS; the
-				counter is reset to zero at statistics
-				calculation; this counter is not protected by
-				any latch, because this is only used for
-				heuristics */
-	/*----------------------*/
-	mutex_t		autoinc_mutex;
-				/* mutex protecting the autoincrement
-				counter */
-	ib_ulonglong	autoinc;/* autoinc counter value to give to the
-				next inserted row */
-	ulong		n_waiting_or_granted_auto_inc_locks;
-				/* This counter is used to track the number
-				of granted and pending autoinc locks on this
-				table. This value is set after acquiring the
-				kernel mutex but we peek the contents to
-				determine whether other transactions have
-				acquired the AUTOINC lock or not. Of course
-				only one transaction can be granted the
-				lock but there can be multiple waiters. */
-	/*----------------------*/
-
-#ifdef UNIV_DEBUG
-	ulint		magic_n;/* magic number */
-# define DICT_TABLE_MAGIC_N	76333786
-#endif /* UNIV_DEBUG */
-};
-
-#ifndef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
deleted file mode 100644
index 9bcefc2a51f..00000000000
--- a/storage/innobase/include/dict0mem.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/**********************************************************************
-Data dictionary memory object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
deleted file mode 100644
index b90545f2105..00000000000
--- a/storage/innobase/include/dict0types.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/******************************************************
-Data dictionary global types
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0types_h
-#define dict0types_h
-
-typedef struct dict_sys_struct		dict_sys_t;
-typedef struct dict_col_struct		dict_col_t;
-typedef struct dict_field_struct	dict_field_t;
-typedef struct dict_index_struct	dict_index_t;
-typedef struct dict_table_struct	dict_table_t;
-typedef struct dict_foreign_struct	dict_foreign_t;
-
-/* A cluster object is a table object with the type field set to
-DICT_CLUSTERED */
-
-typedef dict_table_t			dict_cluster_t;
-
-typedef struct ind_node_struct		ind_node_t;
-typedef struct tab_node_struct		tab_node_t;
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
deleted file mode 100644
index 7affccbf67e..00000000000
--- a/storage/innobase/include/dyn0dyn.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/******************************************************
-The dynamically allocated array
-
-(c) 1996 Innobase Oy
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-typedef struct dyn_block_struct		dyn_block_t;
-typedef dyn_block_t			dyn_array_t;
-
-
-/* This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define	DYN_ARRAY_DATA_SIZE	512
-
-/*************************************************************************
-Initializes a dynamic array. */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
-				/* out: initialized dyn array */
-	dyn_array_t*	arr);	/* in: pointer to a memory buffer of
-				size sizeof(dyn_array_t) */
-/****************************************************************
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
-	dyn_array_t*	arr);	/* in: dyn array */
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close. */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
-				/* out: pointer to the buffer */
-	dyn_array_t*	arr,	/* in: dynamic array */
-	ulint		size);	/* in: size in bytes of the buffer; MUST be
-				smaller than DYN_ARRAY_DATA_SIZE! */
-/*************************************************************************
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
-	dyn_array_t*	arr,	/* in: dynamic array */
-	byte*		ptr);	/* in: buffer space from ptr up was not used */
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned. */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
-				/* out: pointer to the element */
-	dyn_array_t*	arr,	/* in: dynamic array */
-	ulint		size);	/* in: size in bytes of the element */
-/****************************************************************
-Returns pointer to an element in dyn array. */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
-				/* out: pointer to element */
-	dyn_array_t*	arr,	/* in: dyn array */
-	ulint		pos);	/* in: position of element as bytes
-				from array start */
-/****************************************************************
-Returns the size of stored data in a dyn array. */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
-				/* out: data size in bytes */
-	dyn_array_t*	arr);	/* in: dyn array */
-/****************************************************************
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
-	dyn_array_t*	arr);	/* in: dyn array */
-/****************************************************************
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
-	dyn_array_t*	arr);	/* in: dyn array */
-/************************************************************************
-Gets the next block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
-				/* out: pointer to next, NULL if end of list */
-	dyn_array_t*	arr,	/* in: dyn array */
-	dyn_block_t*	block);	/* in: dyn array block */
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
-				/* out: number of bytes used */
-	dyn_block_t*	block);	/* in: dyn array block */
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
-				/* out: pointer to data */
-	dyn_block_t*	block);	/* in: dyn array block */
-/************************************************************
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
-	dyn_array_t*	arr,	/* in: dyn array */
-	const byte*	str,	/* in: string to write */
-	ulint		len);	/* in: string length */
-
-/*#################################################################*/
-
-/* NOTE! Do not use the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_struct{
-	mem_heap_t*	heap;	/* in the first block this is != NULL
-				if dynamic allocation has been needed */
-	ulint		used;	/* number of data bytes used in this block */
-	byte		data[DYN_ARRAY_DATA_SIZE];
-				/* storage for array elements */
-	UT_LIST_BASE_NODE_T(dyn_block_t) base;
-				/* linear list of dyn blocks: this node is
-				used only in the first block */
-	UT_LIST_NODE_T(dyn_block_t) list;
-				/* linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
-	ulint		buf_end;/* only in the debug version: if dyn array is
-				opened, this is the buffer end offset, else
-				this is 0 */
-	ulint		magic_n;
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
deleted file mode 100644
index 8416551d0ba..00000000000
--- a/storage/innobase/include/eval0proc.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
-
-(c) 1998 Innobase Oy
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0proc_h
-#define eval0proc_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/**************************************************************************
-Performs an execution step of a procedure node. */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
-que_thr_t*
-if_step(
-/*====*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
-que_thr_t*
-while_step(
-/*=======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
-que_thr_t*
-for_step(
-/*=====*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
-que_thr_t*
-assign_step(
-/*========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of a procedure call node. */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
-que_thr_t*
-exit_step(
-/*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
-que_thr_t*
-return_step(
-/*========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
deleted file mode 100644
index cf738056576..00000000000
--- a/storage/innobase/include/eval0proc.ic
+++ /dev/null
@@ -1,71 +0,0 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
-
-(c) 1998 Innobase Oy
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-
-/**************************************************************************
-Performs an execution step of a procedure node. */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
-{
-	proc_node_t*	node;
-
-	ut_ad(thr);
-
-	node = thr->run_node;
-	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
-
-	if (thr->prev_node == que_node_get_parent(node)) {
-		/* Start execution from the first statement in the statement
-		list */
-
-		thr->run_node = node->stat_list;
-	} else {
-		/* Move to the next statement */
-		ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
-		thr->run_node = NULL;
-	}
-
-	if (thr->run_node == NULL) {
-		thr->run_node = que_node_get_parent(node);
-	}
-
-	return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of a procedure call node. */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
-{
-	func_node_t*	node;
-
-	ut_ad(thr);
-
-	node = thr->run_node;
-	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
-	/* Evaluate the procedure */
-
-	eval_exp(node);
-
-	thr->run_node = que_node_get_parent(node);
-
-	return(thr);
-}
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
deleted file mode 100644
index 82e95a2e920..00000000000
--- a/storage/innobase/include/fsp0fsp.h
+++ /dev/null
@@ -1,391 +0,0 @@
-/******************************************************
-File space management
-
-(c) 1995 Innobase Oy
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fsp0fsp_h
-#define fsp0fsp_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "fut0lst.h"
-#include "ut0byte.h"
-#include "page0types.h"
-
-/* If records are inserted in order, there are the following
-flags to tell this (their type is made byte for the compiler
-to warn if direction and hint parameters are switched in
-fseg_alloc_free_page): */
-#define	FSP_UP		((byte)111)	/* alphabetically upwards */
-#define	FSP_DOWN	((byte)112)	/* alphabetically downwards */
-#define	FSP_NO_DIR	((byte)113)	/* no order */
-
-/* File space extent size in pages */
-#define	FSP_EXTENT_SIZE		64
-
-/* On a page of any file segment, data may be put starting from this offset: */
-#define FSEG_PAGE_DATA		FIL_PAGE_DATA
-
-/* File segment header which points to the inode describing the file segment */
-typedef	byte	fseg_header_t;
-
-#define FSEG_HDR_SPACE		0	/* space id of the inode */
-#define FSEG_HDR_PAGE_NO	4	/* page number of the inode */
-#define FSEG_HDR_OFFSET		8	/* byte offset of the inode */
-
-#define FSEG_HEADER_SIZE	10
-
-/**************************************************************************
-Initializes the file space system. */
-
-void
-fsp_init(void);
-/*==========*/
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
-ulint
-fsp_header_get_free_limit(
-/*======================*/
-			/* out: free limit in megabytes */
-	ulint	space);	/* in: space id, must be 0 */
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
-ulint
-fsp_header_get_tablespace_size(
-/*===========================*/
-			/* out: size in pages */
-	ulint	space);	/* in: space id, must be 0 */
-/**************************************************************************
-Reads the file space size stored in the header page. */
-
-ulint
-fsp_get_size_low(
-/*=============*/
-			/* out: tablespace size stored in the space header */
-	page_t*	page);	/* in: header page (page 0 in the tablespace) */
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
-ulint
-fsp_header_get_space_id(
-/*====================*/
-			/* out: space id, ULINT UNDEFINED if error */
-	page_t* page);	 /* in: first page of a tablespace */
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
-void
-fsp_header_write_space_id(
-/*======================*/
-	page_t*	page,		/* in: first page in the space */
-	ulint	space_id);	/* in: space id */
-/**************************************************************************
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-
-void
-fsp_header_init(
-/*============*/
-	ulint	space,	/* in: space id */
-	ulint	size,	/* in: current size in blocks */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/**************************************************************************
-Increases the space size field of a space. */
-
-void
-fsp_header_inc_size(
-/*================*/
-	ulint	space,	/* in: space id */
-	ulint	size_inc,/* in: size increment in pages */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create(
-/*========*/
-			/* out: the page where the segment header is placed,
-			x-latched, NULL if could not create segment
-			because of lack of space */
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page where the segment header is placed: if
-			this is != 0, the page must belong to another segment,
-			if this is 0, a new page will be allocated and it
-			will belong to the created segment */
-	ulint	byte_offset, /* in: byte offset of the created segment header
-			on the page */
-	mtr_t*	mtr);	/* in: mtr */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create_general(
-/*================*/
-			/* out: the page where the segment header is placed,
-			x-latched, NULL if could not create segment
-			because of lack of space */
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page where the segment header is placed: if
-			this is != 0, the page must belong to another segment,
-			if this is 0, a new page will be allocated and it
-			will belong to the created segment */
-	ulint	byte_offset, /* in: byte offset of the created segment header
-			on the page */
-	ibool	has_done_reservation, /* in: TRUE if the caller has already
-			done the reservation for the pages with
-			fsp_reserve_free_extents (at least 2 extents: one for
-			the inode and the other for the segment) then there is
-			no need to do the check for this individual
-			operation */
-	mtr_t*	mtr);	/* in: mtr */
-/**************************************************************************
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
-ulint
-fseg_n_reserved_pages(
-/*==================*/
-				/* out: number of reserved pages */
-	fseg_header_t*	header,	/* in: segment header */
-	ulint*		used,	/* out: number of pages used (<= reserved) */
-	mtr_t*		mtr);	/* in: mtr handle */
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize
-file space fragmentation. */
-
-ulint
-fseg_alloc_free_page(
-/*=================*/
-				/* out: the allocated page offset
-				FIL_NULL if no page could be allocated */
-	fseg_header_t*	seg_header, /* in: segment header */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction, /* in: if the new page is needed because
-				of an index page split, and records are
-				inserted there in order, into which
-				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr);	/* in: mtr handle */
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
-ulint
-fseg_alloc_free_page_general(
-/*=========================*/
-				/* out: allocated page offset, FIL_NULL if no
-				page could be allocated */
-	fseg_header_t*	seg_header,/* in: segment header */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction,/* in: if the new page is needed because
-				of an index page split, and records are
-				inserted there in order, into which
-				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	ibool		has_done_reservation, /* in: TRUE if the caller has
-				already done the reservation for the page
-				with fsp_reserve_free_extents, then there
-				is no need to do the check for this individual
-				page */
-	mtr_t*		mtr);	/* in: mtr handle */
-/**************************************************************************
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
-			/* out: TRUE if we were able to make the reservation */
-	ulint*	n_reserved,/* out: number of extents actually reserved; if we
-			return TRUE and the tablespace size is < 64 pages,
-			then this can be 0, otherwise it is n_ext */
-	ulint	space,	/* in: space id */
-	ulint	n_ext,	/* in: number of extents to reserve */
-	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
-	mtr_t*	mtr);	/* in: mtr */
-/**************************************************************************
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
-			/* out: available space in kB */
-	ulint	space);	/* in: space id */
-/**************************************************************************
-Frees a single page of a segment. */
-
-void
-fseg_free_page(
-/*===========*/
-	fseg_header_t*	seg_header, /* in: segment header */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset */
-	mtr_t*		mtr);	/* in: mtr handle */
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
-	ulint	space,	/* in: space id */
-	ulint	page_no,/* in: page number where the segment header is
-			placed */
-	ulint	offset);/* in: byte offset of the segment header on that
-			page */
-/**************************************************************************
-Frees part of a segment. This function can be used to free a segment
-by repeatedly calling this function in different mini-transactions.
-Doing the freeing in a single mini-transaction might result in
-too big a mini-transaction. */
-
-ibool
-fseg_free_step(
-/*===========*/
-				/* out: TRUE if freeing completed */
-	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
-				resides on the first page of the frag list
-				of the segment, this pointer becomes obsolete
-				after the last freeing step */
-	mtr_t*		mtr);	/* in: mtr */
-/**************************************************************************
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
-ibool
-fseg_free_step_not_header(
-/*======================*/
-				/* out: TRUE if freeing completed, except the
-				header page */
-	fseg_header_t*	header,	/* in: segment header which must reside on
-				the first fragment page of the segment */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
-			/* out: TRUE if a descriptor page */
-	ulint	page_no);/* in: page number */
-/***************************************************************
-Parses a redo log record of a file page init. */
-
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page);	/* in: page or NULL */
-/***********************************************************************
-Validates the file space system and its segments. */
-
-ibool
-fsp_validate(
-/*=========*/
-			/* out: TRUE if ok */
-	ulint	space);	/* in: space id */
-/***********************************************************************
-Prints info of a file space. */
-
-void
-fsp_print(
-/*======*/
-	ulint	space);	/* in: space id */
-/***********************************************************************
-Validates a segment. */
-
-ibool
-fseg_validate(
-/*==========*/
-				/* out: TRUE if ok */
-	fseg_header_t*	header, /* in: segment header */
-	mtr_t*		mtr2);	/* in: mtr */
-/***********************************************************************
-Writes info of a segment. */
-
-void
-fseg_print(
-/*=======*/
-	fseg_header_t*	header, /* in: segment header */
-	mtr_t*		mtr);	/* in: mtr */
-
-/* Flags for fsp_reserve_free_extents */
-#define FSP_NORMAL	1000000
-#define	FSP_UNDO	2000000
-#define FSP_CLEANING	3000000
-
-/* Number of pages described in a single descriptor page: currently each page
-description takes less than 1 byte; a descriptor page is repeated every
-this many file pages */
-#define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE
-
-/* The space low address page map */
-/*--------------------------------------*/
-				/* The following two pages are repeated
-				every XDES_DESCRIBED_PER_PAGE pages in
-				every tablespace. */
-#define FSP_XDES_OFFSET			0	/* extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET		1	/* insert buffer bitmap */
-				/* The ibuf bitmap pages are the ones whose
-				page number is the number above plus a
-				multiple of XDES_DESCRIBED_PER_PAGE */
-
-#define FSP_FIRST_INODE_PAGE_NO		2	/* in every tablespace */
-				/* The following pages exist
-				in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO		3	/* in tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/* in tablespace 0 */
-				/* The ibuf tree root page number in
-				tablespace 0; its fseg inode is on the page
-				number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO		5	/* in tablespace 0 */
-#define	FSP_FIRST_RSEG_PAGE_NO		6	/* in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO		7	/* in tablespace 0 */
-/*--------------------------------------*/
-
-#ifndef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
deleted file mode 100644
index 89cd9263bd6..00000000000
--- a/storage/innobase/include/fsp0fsp.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/******************************************************
-File space management
-
-(c) 1995 Innobase Oy
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
-			/* out: TRUE if a descriptor page */
-	ulint	page_no)/* in: page number */
-{
-	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
deleted file mode 100644
index b9546b4e1a0..00000000000
--- a/storage/innobase/include/fut0fut.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-
-#ifndef fut0fut_h
-#define fut0fut_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
-				/* out: pointer to a byte in a frame; the file
-				page in the frame is bufferfixed and latched */
-	ulint		space,	/* in: space id */
-	fil_addr_t	addr,	/* in: file address */
-	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr);	/* in: mtr handle */
-
-#ifndef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
deleted file mode 100644
index 6a107786376..00000000000
--- a/storage/innobase/include/fut0fut.ic
+++ /dev/null
@@ -1,38 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
-				/* out: pointer to a byte in a frame; the file
-				page in the frame is bufferfixed and latched */
-	ulint		space,	/* in: space id */
-	fil_addr_t	addr,	/* in: file address */
-	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr)	/* in: mtr handle */
-{
-	byte*	ptr;
-
-	ut_ad(mtr);
-	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
-	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
-	ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(ptr);
-}
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
deleted file mode 100644
index 5427e2248da..00000000000
--- a/storage/innobase/include/fut0lst.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/**********************************************************************
-File-based list utilities
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef fut0lst_h
-#define fut0lst_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-
-/* The C 'types' of base node and list node: these should be used to
-write self-documenting code. Of course, the sizeof macro cannot be
-applied to these types! */
-
-typedef	byte	flst_base_node_t;
-typedef	byte	flst_node_t;
-
-/* The physical size of a list base node in bytes */
-#define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
-
-/* The physical size of a list node in bytes */
-#define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
-
-
-/************************************************************************
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Adds a node as the last node in a list. */
-
-void
-flst_add_last(
-/*==========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node,	/* in: node to add */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Adds a node as the first node in a list. */
-
-void
-flst_add_first(
-/*===========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node,	/* in: node to add */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Inserts a node after another in a list. */
-
-void
-flst_insert_after(
-/*==============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node1,	/* in: node to insert after */
-	flst_node_t*		node2,	/* in: node to add */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Inserts a node before another in a list. */
-
-void
-flst_insert_before(
-/*===============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: node to insert */
-	flst_node_t*		node3,	/* in: node to insert before */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Removes a node. */
-
-void
-flst_remove(
-/*========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: node to remove */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Cuts off the tail of the list, including the node given. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_cut_end(
-/*=========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: first node to remove */
-	ulint			n_nodes,/* in: number of nodes to remove,
-					must be >= 1 */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Cuts off the tail of the list, not including the given node. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_truncate_end(
-/*==============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: first node not to remove */
-	ulint			n_nodes,/* in: number of nodes to remove */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Gets list length. */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
-					/* out: length */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Gets list first node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
-					/* out: file address */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Gets list last node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
-					/* out: file address */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Gets list next node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
-				/* out: file address */
-	flst_node_t*	node,	/* in: pointer to node */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Gets list prev node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
-				/* out: file address */
-	flst_node_t*	node,	/* in: pointer to node */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
-	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
-	fil_addr_t	addr,	/* in: file address */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Reads a file address. */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
-				/* out: file address */
-	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/************************************************************************
-Validates a file-based list. */
-
-ibool
-flst_validate(
-/*==========*/
-					/* out: TRUE if ok */
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	mtr_t*			mtr1);	/* in: mtr */
-/************************************************************************
-Prints info of a file-based list. */
-
-void
-flst_print(
-/*=======*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	mtr_t*			mtr);	/* in: mtr */
-
-
-#ifndef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
deleted file mode 100644
index beaa06ae755..00000000000
--- a/storage/innobase/include/ha0ha.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/******************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/18/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef ha0ha_h
-#define ha0ha_h
-
-#include "univ.i"
-
-#include "hash0hash.h"
-#include "page0types.h"
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
-				/* out: pointer to the data of the first hash
-				table node in chain having the fold number,
-				NULL if not found */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: folded value of the searched data */
-/*************************************************************
-Looks for an element when we know the pointer to the data and updates
-the pointer to data if found. */
-
-void
-ha_search_and_update_if_found(
-/*==========================*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data,	/* in: pointer to the data */
-	void*		new_data);/* in: new pointer to the data */
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-ha_create_func(
-/*===========*/
-				/* out, own: created table */
-	ibool	in_btr_search,	/* in: TRUE if the hash table is used in
-				the btr_search module */
-	ulint	n,		/* in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	mutex_level,	/* in: level of the mutexes in the latching
-				order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint	n_mutexes);	/* in: number of mutexes to protect the
-				hash table: must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,level,n_m)
-#else /* UNIV_SYNC_DEBUG */
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,n_m)
-#endif /* UNIV_SYNC_DEBUG */
-/*****************************************************************
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
-ibool
-ha_insert_for_fold(
-/*===============*/
-				/* out: TRUE if succeed, FALSE if no more
-				memory could be allocated */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of data; if a node with
-				the same fold value already exists, it is
-				updated to point to the same data, and no new
-				node is created! */
-	void*		data);	/* in: data, must not be NULL */
-/*****************************************************************
-Deletes an entry from a hash table. */
-
-void
-ha_delete(
-/*======*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of data */
-	void*		data);	/* in: data, must not be NULL and must exist
-				in the hash table */
-/*************************************************************
-Looks for an element when we know the pointer to the data and deletes
-it from the hash table if found. */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
-				/* out: TRUE if found */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data);	/* in: pointer to the data */
-/*********************************************************************
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: fold value */
-	page_t*		page);	/* in: buffer page */
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
-ibool
-ha_validate(
-/*========*/
-					/* out: TRUE if ok */
-	hash_table_t*	table,		/* in: hash table */
-	ulint		start_index,	/* in: start index */
-	ulint		end_index);	/* in: end index */
-/*****************************************************************
-Prints info of a hash table. */
-
-void
-ha_print_info(
-/*==========*/
-	FILE*		file,	/* in: file where to print */
-	hash_table_t*	table);	/* in: hash table */
-
-/* The hash table external chain node */
-
-typedef struct ha_node_struct ha_node_t;
-struct ha_node_struct {
-	ha_node_t* next; /* next chain node or NULL if none */
-	void*	data;	/* pointer to the data */
-	ulint	fold;	/* fold value for the data */
-};
-
-#ifndef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
deleted file mode 100644
index fb264377f28..00000000000
--- a/storage/innobase/include/ha0ha.ic
+++ /dev/null
@@ -1,185 +0,0 @@
-/************************************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/18/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0rnd.h"
-#include "mem0mem.h"
-
-/***************************************************************
-Deletes a hash node. */
-
-void
-ha_delete_hash_node(
-/*================*/
-	hash_table_t*	table,		/* in: hash table */
-	ha_node_t*	del_node);	/* in: node to be deleted */
-
-/**********************************************************************
-Gets a hash node data. */
-UNIV_INLINE
-void*
-ha_node_get_data(
-/*=============*/
-				/* out: pointer to the data */
-	ha_node_t*	node)	/* in: hash chain node */
-{
-	return(node->data);
-}
-
-/**********************************************************************
-Sets hash node data. */
-UNIV_INLINE
-void
-ha_node_set_data(
-/*=============*/
-	ha_node_t*	node,	/* in: hash chain node */
-	void*		data)	/* in: pointer to the data */
-{
-	node->data = data;
-}
-
-/**********************************************************************
-Gets the next node in a hash chain. */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_next(
-/*==============*/
-				/* out: next node, NULL if none */
-	ha_node_t*	node)	/* in: hash chain node */
-{
-	return(node->next);
-}
-
-/**********************************************************************
-Gets the first node in a hash chain. */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_first(
-/*===============*/
-				/* out: first node, NULL if none */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold value determining the chain */
-{
-	return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
-}
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-ha_node_t*
-ha_search(
-/*======*/
-				/* out: pointer to the first hash table node
-				in chain having the fold number, NULL if not
-				found */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: folded value of the searched data */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_chain_get_first(table, fold);
-
-	while (node) {
-		if (node->fold == fold) {
-
-			return(node);
-		}
-
-		node = ha_chain_get_next(node);
-	}
-
-	return(NULL);
-}
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
-				/* out: pointer to the data of the first hash
-				table node in chain having the fold number,
-				NULL if not found */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: folded value of the searched data */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_chain_get_first(table, fold);
-
-	while (node) {
-		if (node->fold == fold) {
-
-			return(node->data);
-		}
-
-		node = ha_chain_get_next(node);
-	}
-
-	return(NULL);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data. */
-UNIV_INLINE
-ha_node_t*
-ha_search_with_data(
-/*================*/
-				/* out: pointer to the hash table node, NULL
-				if not found in the table */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data)	/* in: pointer to the data */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_chain_get_first(table, fold);
-
-	while (node) {
-		if (node->data == data) {
-
-			return(node);
-		}
-
-		node = ha_chain_get_next(node);
-	}
-
-	return(NULL);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data, and deletes
-it from the hash table, if found. */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
-				/* out: TRUE if found */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data)	/* in: pointer to the data */
-{
-	ha_node_t*	node;
-
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
-	node = ha_search_with_data(table, fold, data);
-
-	if (node) {
-		ha_delete_hash_node(table, node);
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
deleted file mode 100644
index 6bfc43579b3..00000000000
--- a/storage/innobase/include/ha_prototypes.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef HA_INNODB_PROTOTYPES_H
-#define HA_INNODB_PROTOTYPES_H
-
-#ifndef UNIV_HOTBACKUP
-
-#include "univ.i" /* ulint, uint */
-#include "m_ctype.h" /* CHARSET_INFO */
-
-/* Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB's C-code. */
-
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
-
-ulint
-innobase_convert_string(
-/*====================*/
-	void*		to,
-	ulint		to_length,
-	CHARSET_INFO*	to_cs,
-	const void*	from,
-	ulint		from_length,
-	CHARSET_INFO*	from_cs,
-	uint*		errors);
-
-/*********************************************************************
-Display an SQL identifier. */
-
-void
-innobase_print_identifier(
-/*======================*/
-	FILE*		f,	/* in: output stream */
-	trx_t*		trx,	/* in: transaction */
-	ibool		table_id,/* in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/* in: name to print */
-	ulint		namelen);/* in: length of name */
-
-/**********************************************************************
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
-			/* out: true if thd is the replication thread */
-	void*	thd);	/* in: thread handle (THD*) */
-
-/**********************************************************************
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
-			/* out: true if non-transactional tables have
-			been edited */
-	void*	thd);	/* in: thread handle (THD*) */
-
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
-
-ibool
-thd_is_select(
-/*==========*/
-				/* out: true if thd is executing SELECT */
-	const void*	thd);	/* in: thread handle (THD*) */
-
-#endif
-#endif
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
deleted file mode 100644
index d246d8ee831..00000000000
--- a/storage/innobase/include/hash0hash.ic
+++ /dev/null
@@ -1,131 +0,0 @@
-/******************************************************
-The simple hash table utility
-
-(c) 1997 Innobase Oy
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ut0rnd.h"
-
-/****************************************************************
-Gets the nth cell in a hash table. */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
-				/* out: pointer to cell */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		n)	/* in: cell index */
-{
-	ut_ad(n < table->n_cells);
-
-	return(table->array + n);
-}
-
-/*****************************************************************
-Returns the number of cells in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
-				/* out: number of cells */
-	hash_table_t*	table)	/* in: table */
-{
-	return(table->n_cells);
-}
-
-/******************************************************************
-Calculates the hash value from a folded value. */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
-				/* out: hashed value */
-	ulint		fold,	/* in: folded value */
-	hash_table_t*	table)	/* in: hash table */
-{
-	return(ut_hash_ulint(fold, table->n_cells));
-}
-
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_mutex_no(
-/*==============*/
-				/* out: mutex number */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold */
-{
-	return(ut_2pow_remainder(hash_calc_hash(fold, table),
-				 table->n_mutexes));
-}
-
-/****************************************************************
-Gets the nth heap in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
-				/* out: mem heap */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		i)	/* in: index of the heap */
-{
-	ut_ad(i < table->n_mutexes);
-
-	return(table->heaps[i]);
-}
-
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
-				/* out: mem heap */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold */
-{
-	ulint	i;
-
-	if (table->heap) {
-		return(table->heap);
-	}
-
-	i = hash_get_mutex_no(table, fold);
-
-	return(hash_get_nth_heap(table, i));
-}
-
-/****************************************************************
-Gets the nth mutex in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_nth_mutex(
-/*===============*/
-				/* out: mutex */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		i)	/* in: index of the mutex */
-{
-	ut_ad(i < table->n_mutexes);
-
-	return(table->mutexes + i);
-}
-
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_mutex(
-/*===========*/
-				/* out: mutex */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold)	/* in: fold */
-{
-	ulint	i;
-
-	i = hash_get_mutex_no(table, fold);
-
-	return(hash_get_nth_mutex(table, i));
-}
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
deleted file mode 100644
index 77fefe2020b..00000000000
--- a/storage/innobase/include/ibuf0ibuf.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/******************************************************
-Insert buffer
-
-(c) 1997 Innobase Oy
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0ibuf_h
-#define ibuf0ibuf_h
-
-#include "univ.i"
-
-#include "dict0mem.h"
-#include "dict0dict.h"
-#include "mtr0mtr.h"
-#include "que0types.h"
-#include "ibuf0types.h"
-#include "fsp0fsp.h"
-
-extern ibuf_t*	ibuf;
-
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index for this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
-			/* out, own: ibuf data struct, linked to the list
-			in ibuf control structure. */
-	ulint	space);	/* in: space id */
-/**********************************************************************
-Creates the insert buffer data structure at a database startup and
-initializes the data structures for the insert buffer of each tablespace. */
-
-void
-ibuf_init_at_db_start(void);
-/*=======================*/
-/*************************************************************************
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-
-void
-ibuf_update_max_tablespace_id(void);
-/*===============================*/
-/*************************************************************************
-Initializes an ibuf bitmap page. */
-
-void
-ibuf_bitmap_page_init(
-/*==================*/
-	page_t*	page,	/* in: bitmap page */
-	mtr_t*	mtr);	/* in: mtr */
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
-	ulint	type,	/* in: index type */
-	page_t*	page);	/* in: index page; free bits are set to 0 if the index
-			is non-clustered and non-unique and the page level is
-			0 */
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
-void
-ibuf_reset_free_bits(
-/*=================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page);	/* in: index page; free bits are set to 0 if
-				the index is non-clustered and non-unique and
-				the page level is 0 */
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page,	/* in: index page to which we have added new
-				records; the free bits are updated if the
-				index is non-clustered and non-unique and
-				the page level is 0, and the page becomes
-				fuller */
-	ulint		max_ins_size,/* in: value of maximum insert size with
-				reorganize before the latest operation
-				performed to the page */
-	ulint		increase);/* in: upper limit for the additional space
-				used in the latest operation, if known, or
-				ULINT_UNDEFINED */
-/**************************************************************************
-Updates the free bits for the page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually
-prevent any further operations for this OS thread until mtr is committed. */
-
-void
-ibuf_update_free_bits_low(
-/*======================*/
-	dict_index_t*	index,		/* in: index */
-	page_t*		page,		/* in: index page */
-	ulint		max_ins_size,	/* in: value of maximum insert size
-					with reorganize before the latest
-					operation performed to the page */
-	mtr_t*		mtr);		/* in: mtr */
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
-
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page1,	/* in: index page */
-	page_t*		page2,	/* in: index page */
-	mtr_t*		mtr);	/* in: mtr */
-/**************************************************************************
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
-	dict_index_t*	index,			/* in: index where to insert */
-	ulint		ignore_sec_unique);	/* in: if != 0, we should
-						ignore UNIQUE constraint on
-						a secondary index when we
-						decide */
-/**********************************************************************
-Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
-
-ibool
-ibuf_inside(void);
-/*=============*/
-		/* out: TRUE if inside an insert buffer routine: for instance,
-		a read-ahead of non-ibuf pages is then forbidden */
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page (level 3 page) address. */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
-			/* out: TRUE if a bitmap page */
-	ulint	page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page(
-/*======*/
-			/* out: TRUE if level 2 or level 3 page */
-	ulint	space,	/* in: space id */
-	ulint	page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page_low(
-/*==========*/
-			/* out: TRUE if level 2 or level 3 page */
-	ulint	space,	/* in: space id */
-	ulint	page_no,/* in: page number */
-	mtr_t*	mtr);	/* in: mtr which will contain an x-latch to the
-			bitmap page if the page is not one of the fixed
-			address ibuf pages */
-/***************************************************************************
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-
-void
-ibuf_free_excess_pages(
-/*===================*/
-	ulint	space);	/* in: space id */
-/*************************************************************************
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
-ibool
-ibuf_insert(
-/*========*/
-				/* out: TRUE if success */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	dict_index_t*	index,	/* in: index where to insert */
-	ulint		space,	/* in: space id where to insert */
-	ulint		page_no,/* in: page number where to insert */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
-
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
-	page_t*	page,	/* in: if page has been read from disk, pointer to
-			the page x-latched, else NULL */
-	ulint	space,	/* in: space id of the index page */
-	ulint	page_no,/* in: page number of the index page */
-	ibool	update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
-			we have deleted or are deleting the tablespace, then we
-			naturally do not want to update a non-existent bitmap
-			page */
-/*************************************************************************
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
-	ulint	space);	/* in: space id */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract(
-/*==========*/
-			/* out: a lower limit for the combined size in bytes
-			of entries which will be merged from ibuf trees to the
-			pages read, 0 if ibuf is empty */
-	ibool	sync);	/* in: TRUE if the caller wants to wait for the
-			issued read with the highest tablespace address
-			to complete */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
-			/* out: a lower limit for the combined size in bytes
-			of entries which will be merged from ibuf trees to the
-			pages read, 0 if ibuf is empty */
-	ibool	sync,	/* in: TRUE if the caller wants to wait for the
-			issued read with the highest tablespace address
-			to complete */
-	ulint	n_pages);/* in: try to read at least this many pages to
-			the buffer pool and merge the ibuf contents to
-			them */
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
-
-ulint
-ibuf_count_get(
-/*===========*/
-			/* out: number of entries in the insert buffer
-			currently buffered for this page */
-	ulint	space,	/* in: space id */
-	ulint	page_no);/* in: page number */
-#endif
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
-ibool
-ibuf_is_empty(void);
-/*===============*/
-			/* out: TRUE if empty */
-/**********************************************************************
-Prints info of ibuf. */
-
-void
-ibuf_print(
-/*=======*/
-	FILE*	file);	/* in: file where to print */
-
-#define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
-#define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
-
-/* The ibuf header page currently contains only the file segment header
-for the file segment from which the pages for the ibuf tree are allocated */
-#define IBUF_HEADER		PAGE_DATA
-#define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
-
-#ifndef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
deleted file mode 100644
index 4d65a7f5250..00000000000
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ /dev/null
@@ -1,224 +0,0 @@
-/******************************************************
-Insert buffer
-
-(c) 1997 Innobase Oy
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-#include "page0page.h"
-
-extern ulint	ibuf_flush_count;
-
-/* If this number is n, an index page must contain at least the page size
-per n bytes of free space for ibuf to try to buffer inserts to this page.
-If there is this much of free space, the corresponding bits are set in the
-ibuf bitmap. */
-#define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
-
-/* Insert buffer data struct for a single tablespace */
-struct ibuf_data_struct{
-	ulint		space;	/* space id */
-	ulint		seg_size;/* allocated pages if the file segment
-				containing ibuf header and tree */
-	ulint		size;	/* size of the insert buffer tree in pages */
-	ibool		empty;	/* after an insert to the ibuf tree is
-				performed, this is set to FALSE, and if a
-				contract operation finds the tree empty, this
-				is set to TRUE */
-	ulint		free_list_len;
-				/* length of the free list */
-	ulint		height;	/* tree height */
-	dict_index_t*	index;	/* insert buffer index */
-	UT_LIST_NODE_T(ibuf_data_t) data_list;
-				/* list of ibuf data structs */
-	ulint		n_inserts;/* number of inserts made to the insert
-				buffer */
-	ulint		n_merges;/* number of pages merged */
-	ulint		n_merged_recs;/* number of records merged */
-};
-
-struct ibuf_struct{
-	ulint		size;		/* current size of the ibuf index
-					trees in pages */
-	ulint		max_size;	/* recommended maximum size in pages
-					for the ibuf index tree */
-	UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
-					/* list of ibuf data structs for
-					each tablespace */
-};
-
-/****************************************************************************
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-
-void
-ibuf_set_free_bits(
-/*===============*/
-	ulint	type,	/* in: index type */
-	page_t*	page,	/* in: index page; free bit is reset if the index is
-			a non-clustered non-unique, and page level is 0 */
-	ulint	val,	/* in: value to set: < 4 */
-	ulint	max_val);/* in: ULINT_UNDEFINED or a maximum value which
-			the bits must have before setting; this is for
-			debugging */
-
-/**************************************************************************
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
-	dict_index_t*	index,			/* in: index where to insert */
-	ulint		ignore_sec_unique)	/* in: if != 0, we should
-						ignore UNIQUE constraint on
-						a secondary index when we
-						decide */
-{
-	if (!(index->type & DICT_CLUSTERED)
-	    && (ignore_sec_unique || !(index->type & DICT_UNIQUE))) {
-
-		ibuf_flush_count++;
-
-		if (ibuf_flush_count % 8 == 0) {
-
-			buf_LRU_try_free_flushed_blocks();
-		}
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page address. */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
-			/* out: TRUE if a bitmap page */
-	ulint	page_no)/* in: page number */
-{
-	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_bits(
-/*===========================*/
-				/* out: value for ibuf bitmap bits */
-	ulint	max_ins_size)	/* in: maximum insert size after reorganize
-				for the page */
-{
-	ulint	n;
-
-	n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-
-	if (n == 3) {
-		n = 2;
-	}
-
-	if (n > 3) {
-		n = 3;
-	}
-
-	return(n);
-}
-
-/*************************************************************************
-Translates the ibuf free bits to the free space on a page in bytes. */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_from_bits(
-/*================================*/
-			/* out: maximum insert size after reorganize for the
-			page */
-	ulint	bits)	/* in: value for ibuf bitmap bits */
-{
-	ut_ad(bits < 4);
-
-	if (bits == 3) {
-		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	}
-
-	return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-}
-
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free(
-/*======================*/
-			/* out: value for ibuf bitmap bits */
-	page_t*	page)	/* in: non-unique secondary index page */
-{
-	return(ibuf_index_page_calc_free_bits(
-		       page_get_max_insert_size_after_reorganize(page, 1)));
-}
-
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page,	/* in: index page to which we have added new
-				records; the free bits are updated if the
-				index is non-clustered and non-unique and
-				the page level is 0, and the page becomes
-				fuller */
-	ulint		max_ins_size,/* in: value of maximum insert size with
-				reorganize before the latest operation
-				performed to the page */
-	ulint		increase)/* in: upper limit for the additional space
-				used in the latest operation, if known, or
-				ULINT_UNDEFINED */
-{
-	ulint	before;
-	ulint	after;
-
-	before = ibuf_index_page_calc_free_bits(max_ins_size);
-
-	if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
-#endif
-		after = ibuf_index_page_calc_free_bits(max_ins_size
-						       - increase);
-#ifdef UNIV_IBUF_DEBUG
-		ut_a(after <= ibuf_index_page_calc_free(page));
-#endif
-	} else {
-		after = ibuf_index_page_calc_free(page);
-	}
-
-	if (after == 0) {
-		/* We move the page to the front of the buffer pool LRU list:
-		the purpose of this is to prevent those pages to which we
-		cannot make inserts using the insert buffer from slipping
-		out of the buffer pool */
-
-		buf_page_make_young(page);
-	}
-
-	if (before > after) {
-		ibuf_set_free_bits(index->type, page, after, before);
-	}
-}
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
deleted file mode 100644
index fb202ac44b0..00000000000
--- a/storage/innobase/include/ibuf0types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/******************************************************
-Insert buffer global types
-
-(c) 1997 Innobase Oy
-
-Created 7/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0types_h
-#define ibuf0types_h
-
-typedef struct ibuf_data_struct	ibuf_data_t;
-typedef	struct ibuf_struct	ibuf_t;
-
-#endif
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
deleted file mode 100644
index 635724bf5a1..00000000000
--- a/storage/innobase/include/lock0lock.h
+++ /dev/null
@@ -1,709 +0,0 @@
-/******************************************************
-The transaction lock system
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0lock_h
-#define lock0lock_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "rem0types.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "page0types.h"
-#include "lock0types.h"
-#include "read0types.h"
-#include "hash0hash.h"
-
-#ifdef UNIV_DEBUG
-extern ibool	lock_print_waits;
-#endif /* UNIV_DEBUG */
-/* Buffer for storing information about the most recent deadlock error */
-extern FILE*	lock_latest_err_file;
-
-/*************************************************************************
-Gets the size of a lock struct. */
-
-ulint
-lock_get_size(void);
-/*===============*/
-			/* out: size in bytes */
-/*************************************************************************
-Creates the lock system at database start. */
-
-void
-lock_sys_create(
-/*============*/
-	ulint	n_cells);	/* in: number of slots in lock hash table */
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
-trx_t*
-lock_sec_rec_some_has_impl_off_kernel(
-/*==================================*/
-				/* out: transaction which has the x-lock, or
-				NULL */
-	rec_t*		rec,	/* in: user record */
-	dict_index_t*	index,	/* in: secondary index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
-				/* out: transaction which has the x-lock, or
-				NULL */
-	rec_t*		rec,	/* in: user record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*****************************************************************
-Makes a record to inherit the locks of another record as gap type
-locks, but does not reset the lock bits of the other record. Also
-waiting lock requests on rec are inherited as GRANTED gap locks. */
-
-void
-lock_rec_inherit_to_gap(
-/*====================*/
-	rec_t*	heir,	/* in: record which inherits */
-	rec_t*	rec);	/* in: record from which inherited; does NOT reset
-			the locks on this record */
-/*****************************************************************
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-
-void
-lock_move_reorganize_page(
-/*======================*/
-	page_t*	page,		/* in: old index page */
-	page_t*	new_page);	/* in: reorganized page */
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-
-void
-lock_move_rec_list_end(
-/*===================*/
-	page_t*	new_page,	/* in: index page to move to */
-	page_t*	page,		/* in: index page */
-	rec_t*	rec);		/* in: record on page: this is the
-				first record moved */
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-
-void
-lock_move_rec_list_start(
-/*=====================*/
-	page_t*	new_page,	/* in: index page to move to */
-	page_t*	page,		/* in: index page */
-	rec_t*	rec,		/* in: record on page: this is the
-				first record NOT copied */
-	rec_t*	old_end);	/* in: old previous-to-last record on
-				new_page before the records were copied */
-/*****************************************************************
-Updates the lock table when a page is split to the right. */
-
-void
-lock_update_split_right(
-/*====================*/
-	page_t*	right_page,	/* in: right page */
-	page_t*	left_page);	/* in: left page */
-/*****************************************************************
-Updates the lock table when a page is merged to the right. */
-
-void
-lock_update_merge_right(
-/*====================*/
-	rec_t*	orig_succ,	/* in: original successor of infimum
-				on the right page before merge */
-	page_t*	left_page);	/* in: merged index page which will be
-				discarded */
-/*****************************************************************
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-
-void
-lock_update_root_raise(
-/*===================*/
-	page_t*	new_page,	/* in: index page to which copied */
-	page_t*	root);		/* in: root page */
-/*****************************************************************
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-
-void
-lock_update_copy_and_discard(
-/*=========================*/
-	page_t*	new_page,	/* in: index page to which copied */
-	page_t*	page);		/* in: index page; NOT the root! */
-/*****************************************************************
-Updates the lock table when a page is split to the left. */
-
-void
-lock_update_split_left(
-/*===================*/
-	page_t*	right_page,	/* in: right page */
-	page_t*	left_page);	/* in: left page */
-/*****************************************************************
-Updates the lock table when a page is merged to the left. */
-
-void
-lock_update_merge_left(
-/*===================*/
-	page_t*	left_page,	/* in: left page to which merged */
-	rec_t*	orig_pred,	/* in: original predecessor of supremum
-				on the left page before merge */
-	page_t*	right_page);	/* in: merged index page which will be
-				discarded */
-/*****************************************************************
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
-	rec_t*	heir,	/* in: heir record */
-	rec_t*	rec);	/* in: record */
-/*****************************************************************
-Updates the lock table when a page is discarded. */
-
-void
-lock_update_discard(
-/*================*/
-	rec_t*	heir,	/* in: record which will inherit the locks */
-	page_t*	page);	/* in: index page which will be discarded */
-/*****************************************************************
-Updates the lock table when a new user record is inserted. */
-
-void
-lock_update_insert(
-/*===============*/
-	rec_t*	rec);	/* in: the inserted record */
-/*****************************************************************
-Updates the lock table when a record is removed. */
-
-void
-lock_update_delete(
-/*===============*/
-	rec_t*	rec);	/* in: the record to be removed */
-/*************************************************************************
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is in such an update moved, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
-	page_t*	page,	/* in: page containing the record */
-	rec_t*	rec);	/* in: record whose lock state is stored
-			on the infimum record of the same page; lock
-			bits are reset on the record */
-/*************************************************************************
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
-	rec_t*	rec,	/* in: record whose lock state is restored */
-	page_t*	page);	/* in: page (rec is not necessarily on this page)
-			whose infimum stored the lock state; lock bits are
-			reset on the infimum */
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
-ibool
-lock_rec_expl_exist_on_page(
-/*========================*/
-			/* out: TRUE if there are explicit record locks on
-			the page */
-	ulint	space,	/* in: space id */
-	ulint	page_no);/* in: page number */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
-ulint
-lock_rec_insert_check_and_lock(
-/*===========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record after which to insert */
-	dict_index_t*	index,	/* in: index */
-	que_thr_t*	thr,	/* in: query thread */
-	ibool*		inherit);/* out: set to TRUE if the new inserted
-				record maybe should inherit LOCK_GAP type
-				locks from the successor record */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
-ulint
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record which should be modified */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify
-(delete mark or delete unmark) of a secondary index record. */
-
-ulint
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record which should be modified;
-				NOTE: as this is a secondary index, we
-				always have to modify the clustered index
-				record first: see the comment below */
-	dict_index_t*	index,	/* in: secondary index */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
-ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: secondary index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
-ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
-ulint
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
-ibool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
-				/* out: TRUE if sees, or FALSE if an earlier
-				version of the record should be retrieved */
-	rec_t*		rec,	/* in: user record which should be read or
-				passed over by a read cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	read_view_t*	view);	/* in: consistent read view */
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
-
-ulint
-lock_sec_rec_cons_read_sees(
-/*========================*/
-				/* out: TRUE if certainly sees, or FALSE if an
-				earlier version of the clustered index record
-				might be needed: NOTE that a non-clustered
-				index page contains so little information on
-				its modifications that also in the case FALSE,
-				the present version of rec may be the right,
-				but we must check this from the clustered
-				index record */
-	rec_t*		rec,	/* in: user record which should be read or
-				passed over by a read cursor */
-	dict_index_t*	index,	/* in: non-clustered index */
-	read_view_t*	view);	/* in: consistent read view */
-/*************************************************************************
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
-ulint
-lock_table(
-/*=======*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	dict_table_t*	table,	/* in: database table in dictionary cache */
-	ulint		mode,	/* in: lock mode */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
-				/* out: TRUE if there are lock(s) */
-	dict_table_t*	table);	/* in: database table in dictionary cache */
-/*****************************************************************
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-
-void
-lock_rec_unlock(
-/*============*/
-	trx_t*	trx,		/* in: transaction that has set a record
-				lock */
-	rec_t*	rec,		/* in: record */
-	ulint	lock_mode);	/* in: LOCK_S or LOCK_X */
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
-	lock_t*	lock);	/* in: lock */
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-
-void
-lock_release_off_kernel(
-/*====================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-
-void
-lock_cancel_waiting_and_release(
-/*============================*/
-	lock_t*	lock);	/* in: waiting lock request */
-
-/*************************************************************************
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-
-void
-lock_remove_all_on_table(
-/*=====================*/
-	dict_table_t*	table,			/* in: table to be dropped
-						or truncated */
-	ibool		remove_also_table_sx_locks);/* in: also removes
-						table S and X locks */
-
-/*************************************************************************
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
-			/* out: folded value */
-	ulint	space,	/* in: space */
-	ulint	page_no);/* in: page number */
-/*************************************************************************
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
-			/* out: hashed value */
-	ulint	space,	/* in: space */
-	ulint	page_no);/* in: page number */
-/*************************************************************************
-Gets the source table of an ALTER TABLE transaction.  The table must be
-covered by an IX or IS table lock. */
-
-dict_table_t*
-lock_get_src_table(
-/*===============*/
-				/* out: the source table of transaction,
-				if it is covered by an IX or IS table lock;
-				dest if there is no source table, and
-				NULL if the transaction is locking more than
-				two tables or an inconsistency is found */
-	trx_t*		trx,	/* in: transaction */
-	dict_table_t*	dest,	/* in: destination of ALTER TABLE */
-	ulint*		mode);	/* out: lock mode of the source table */
-/*************************************************************************
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
-ibool
-lock_is_table_exclusive(
-/*====================*/
-				/* out: TRUE if table is only locked by trx,
-				with LOCK_IX, and possibly LOCK_AUTO_INC */
-	dict_table_t*	table,	/* in: table */
-	trx_t*		trx);	/* in: transaction */
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
-ibool
-lock_has_to_wait(
-/*=============*/
-			/* out: TRUE if lock1 has to wait for lock2 to be
-			removed */
-	lock_t*	lock1,	/* in: waiting lock */
-	lock_t*	lock2);	/* in: another lock; NOTE that it is assumed that this
-			has a lock bit set on the same record as in lock1 if
-			the locks are record locks */
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
-					/* out: TRUE if ok */
-	dulint		trx_id,		/* in: trx id */
-	rec_t*		rec,		/* in: user record */
-	dict_index_t*	index,		/* in: clustered index */
-	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
-	ibool		has_kernel_mutex);/* in: TRUE if the caller owns the
-					kernel mutex */
-/*************************************************************************
-Validates the lock queue on a single record. */
-
-ibool
-lock_rec_queue_validate(
-/*====================*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: record to look at */
-	dict_index_t*	index,	/* in: index, or NULL if not known */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Prints info of a table lock. */
-
-void
-lock_table_print(
-/*=============*/
-	FILE*	file,	/* in: file where to print */
-	lock_t*	lock);	/* in: table type lock */
-/*************************************************************************
-Prints info of a record lock. */
-
-void
-lock_rec_print(
-/*===========*/
-	FILE*	file,	/* in: file where to print */
-	lock_t*	lock);	/* in: record type lock */
-/*************************************************************************
-Prints info of locks for all transactions. */
-
-void
-lock_print_info_summary(
-/*====================*/
-	FILE*	file);	/* in: file where to print */
-/*************************************************************************
-Prints info of locks for each transaction. */
-
-void
-lock_print_info_all_transactions(
-/*=============================*/
-	FILE*	file);	/* in: file where to print */
-/*************************************************************************
-Validates the lock queue on a table. */
-
-ibool
-lock_table_queue_validate(
-/*======================*/
-				/* out: TRUE if ok */
-	dict_table_t*	table);	/* in: table */
-/*************************************************************************
-Validates the record lock queues on a page. */
-
-ibool
-lock_rec_validate_page(
-/*===================*/
-			/* out: TRUE if ok */
-	ulint	space,	/* in: space id */
-	ulint	page_no);/* in: page number */
-/*************************************************************************
-Validates the lock system. */
-
-ibool
-lock_validate(void);
-/*===============*/
-			/* out: TRUE if ok */
-/*************************************************************************
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
-
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
-	trx_t*	trx);	/* in: transaction */
-
-/* The lock system */
-extern lock_sys_t*	lock_sys;
-
-/* Lock modes and types */
-/* Basic modes */
-#define	LOCK_NONE	0	/* this flag is used elsewhere to note
-				consistent read */
-#define	LOCK_IS		2	/* intention shared */
-#define	LOCK_IX		3	/* intention exclusive */
-#define	LOCK_S		4	/* shared */
-#define	LOCK_X		5	/* exclusive */
-#define	LOCK_AUTO_INC	6	/* locks the auto-inc counter of a table
-				in an exclusive mode */
-#define LOCK_MODE_MASK	0xFUL	/* mask used to extract mode from the
-				type_mode field in a lock */
-/* Lock types */
-#define LOCK_TABLE	16	/* these type values should be so high that */
-#define	LOCK_REC	32	/* they can be ORed to the lock mode */
-#define LOCK_TYPE_MASK	0xF0UL	/* mask used to extract lock type from the
-				type_mode field in a lock */
-/* Waiting lock flag */
-#define LOCK_WAIT	256	/* this wait bit should be so high that
-				it can be ORed to the lock mode and type;
-				when this bit is set, it means that the
-				lock has not yet been granted, it is just
-				waiting for its turn in the wait queue */
-/* Precise modes */
-#define LOCK_ORDINARY	0	/* this flag denotes an ordinary next-key lock
-				in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
-#define LOCK_GAP	512	/* this gap bit should be so high that
-				it can be ORed to the other flags;
-				when this bit is set, it means that the
-				lock holds only on the gap before the record;
-				for instance, an x-lock on the gap does not
-				give permission to modify the record on which
-				the bit is set; locks of this type are created
-				when records are removed from the index chain
-				of records */
-#define LOCK_REC_NOT_GAP 1024	/* this bit means that the lock is only on
-				the index record and does NOT block inserts
-				to the gap before the index record; this is
-				used in the case when we retrieve a record
-				with a unique key, and is also used in
-				locking plain SELECTs (not part of UPDATE
-				or DELETE) when the user has set the READ
-				COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
-				gap type record lock request in order to let
-				an insert of an index record to wait until
-				there are no conflicting locks by other
-				transactions on the gap; note that this flag
-				remains set when the waiting lock is granted,
-				or if the lock is inherited to a neighboring
-				record */
-
-/* When lock bits are reset, the following flags are available: */
-#define LOCK_RELEASE_WAIT	1
-#define LOCK_NOT_RELEASE_WAIT	2
-
-/* Lock operation struct */
-typedef struct lock_op_struct	lock_op_t;
-struct lock_op_struct{
-	dict_table_t*	table;	/* table to be locked */
-	ulint		mode;	/* lock mode */
-};
-
-#define LOCK_OP_START		1
-#define LOCK_OP_COMPLETE	2
-
-/* The lock system struct */
-struct lock_sys_struct{
-	hash_table_t*	rec_hash;	/* hash table of the record locks */
-};
-
-/* The lock system */
-extern lock_sys_t*	lock_sys;
-
-
-#ifndef UNIV_NONINL
-#include "lock0lock.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
deleted file mode 100644
index 311623b190b..00000000000
--- a/storage/innobase/include/lock0lock.ic
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-The transaction lock system
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "row0row.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "row0vers.h"
-#include "que0que.h"
-#include "btr0cur.h"
-#include "read0read.h"
-#include "log0recv.h"
-
-/*************************************************************************
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
-			/* out: folded value */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
-{
-	return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*************************************************************************
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
-			/* out: hashed value */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
-{
-	return(hash_calc_hash(lock_rec_fold(space, page_no),
-			      lock_sys->rec_hash));
-}
-
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
-				/* out: transaction which has the x-lock, or
-				NULL */
-	rec_t*		rec,	/* in: user record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
-{
-	dulint	trx_id;
-
-	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(page_rec_is_user_rec(rec));
-
-	trx_id = row_get_rec_trx_id(rec, index, offsets);
-
-	if (trx_is_active(trx_id)) {
-		/* The modifying or inserting transaction is active */
-
-		return(trx_get_on_id(trx_id));
-	}
-
-	return(NULL);
-}
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
deleted file mode 100644
index 7703a2b7def..00000000000
--- a/storage/innobase/include/lock0priv.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/******************************************************
-Lock module internal structures and methods.
-
-(c) 2007 Innobase Oy
-
-Created July 12, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0priv_h
-#define lock0priv_h
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-/* If you need to access members of the structures defined in this
-file, please write appropriate functions that retrieve them and put
-those functions in lock/ */
-#error Do not include lock0priv.h outside of the lock/ module
-#endif
-
-#include "univ.i"
-#include "dict0types.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-#include "ut0lst.h"
-
-/* A table lock */
-typedef struct lock_table_struct	lock_table_t;
-struct lock_table_struct {
-	dict_table_t*	table;		/* database table in dictionary
-					cache */
-	UT_LIST_NODE_T(lock_t)
-			locks;		/* list of locks on the same
-					table */
-};
-
-/* Record lock for a page */
-typedef struct lock_rec_struct		lock_rec_t;
-struct lock_rec_struct {
-	ulint	space;			/* space id */
-	ulint	page_no;		/* page number */
-	ulint	n_bits;			/* number of bits in the lock
-					bitmap; NOTE: the lock bitmap is
-					placed immediately after the
-					lock struct */
-};
-
-/* Lock struct */
-struct lock_struct {
-	trx_t*		trx;		/* transaction owning the
-					lock */
-	UT_LIST_NODE_T(lock_t)
-			trx_locks;	/* list of the locks of the
-					transaction */
-	ulint		type_mode;	/* lock type, mode, LOCK_GAP or
-					LOCK_REC_NOT_GAP,
-					LOCK_INSERT_INTENTION,
-					wait flag, ORed */
-	hash_node_t	hash;		/* hash chain node for a record
-					lock */
-	dict_index_t*	index;		/* index for a record lock */
-	union {
-		lock_table_t	tab_lock;/* table lock */
-		lock_rec_t	rec_lock;/* record lock */
-	} un_member;
-};
-
-/*************************************************************************
-Gets the type of a lock. */
-UNIV_INLINE
-ulint
-lock_get_type(
-/*==========*/
-				/* out: LOCK_TABLE or LOCK_REC */
-	const lock_t*	lock);	/* in: lock */
-
-/**************************************************************************
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
-ulint
-lock_rec_find_set_bit(
-/*==================*/
-			/* out: bit index == heap number of the record, or
-			ULINT_UNDEFINED if none found */
-	lock_t*	lock);	/* in: record lock with at least one bit set */
-
-/*************************************************************************
-Gets the previous record lock set on a record. */
-
-lock_t*
-lock_rec_get_prev(
-/*==============*/
-			/* out: previous lock on the same record, NULL if
-			none exists */
-	lock_t*	in_lock,/* in: record lock */
-	ulint	heap_no);/* in: heap number of the record */
-
-#ifndef UNIV_NONINL
-#include "lock0priv.ic"
-#endif
-
-#endif /* lock0priv_h */
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
deleted file mode 100644
index 4bc8397509d..00000000000
--- a/storage/innobase/include/lock0priv.ic
+++ /dev/null
@@ -1,32 +0,0 @@
-/******************************************************
-Lock module internal inline methods.
-
-(c) 2007 Innobase Oy
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-/* This file contains only methods which are used in
-lock/lock0* files, other than lock/lock0lock.c.
-I.e. lock/lock0lock.c contains more internal inline
-methods but they are used only in that file. */
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-#error Do not include lock0priv.ic outside of the lock/ module
-#endif
-
-/*************************************************************************
-Gets the type of a lock. */
-UNIV_INLINE
-ulint
-lock_get_type(
-/*==========*/
-				/* out: LOCK_TABLE or LOCK_REC */
-	const lock_t*	lock)	/* in: lock */
-{
-	ut_ad(lock);
-
-	return(lock->type_mode & LOCK_TYPE_MASK);
-}
-
-/* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
deleted file mode 100644
index 43fd2d60da5..00000000000
--- a/storage/innobase/include/lock0types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/******************************************************
-The transaction lock system global types
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0types_h
-#define lock0types_h
-
-#define lock_t ib_lock_t
-typedef struct lock_struct	lock_t;
-typedef struct lock_sys_struct	lock_sys_t;
-
-#endif
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
deleted file mode 100644
index 091bbe34562..00000000000
--- a/storage/innobase/include/log0recv.h
+++ /dev/null
@@ -1,349 +0,0 @@
-/******************************************************
-Recovery
-
-(c) 1997 Innobase Oy
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0recv_h
-#define log0recv_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "page0types.h"
-#include "hash0hash.h"
-#include "log0log.h"
-
-#ifdef UNIV_HOTBACKUP
-extern ibool	recv_replay_file_ops;
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
-
-ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
-			/* out: TRUE if success */
-	byte*	hdr,	/* in: buffer containing the log group header */
-	dulint*	lsn,	/* out: checkpoint lsn */
-	ulint*	offset,	/* out: checkpoint offset in the log group */
-	ulint*	fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
-			database is running with < version 3.23.50 of InnoDB */
-	dulint*	cp_no,	/* out: checkpoint number */
-	dulint*	first_header_lsn);
-			/* out: lsn of of the start of the first log file */
-/***********************************************************************
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
-	byte*		buf,		/* in: buffer containing log data */
-	ulint		buf_len,	/* in: data length in that buffer */
-	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
-					we return scanned lsn */
-	ulint*		scanned_checkpoint_no,
-					/* in/out: 4 lowest bytes of the
-					highest scanned checkpoint number so
-					far */
-	ulint*		n_bytes_scanned);/* out: how much we were able to
-					scan, smaller than buf_len if log
-					data ended here */
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void);
-/*=====================*/
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-/****************************************************************************
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-
-void
-recv_recover_page(
-/*==============*/
-	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
-				page: then we do not acquire any latches
-				since the page was read in outside the
-				buffer pool */
-	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
-				a freshly read page */
-	page_t*	page,		/* in: buffer page */
-	ulint	space,		/* in: space id */
-	ulint	page_no);	/* in: page number */
-/************************************************************
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
-ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
-				/* out: error code or DB_SUCCESS */
-	ulint	type,		/* in: LOG_CHECKPOINT or LOG_ARCHIVE */
-	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
-	dulint	min_flushed_lsn,/* in: min flushed lsn from data files */
-	dulint	max_flushed_lsn);/* in: max flushed lsn from data files */
-/************************************************************
-Completes recovery from a checkpoint. */
-
-void
-recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
-ibool
-recv_scan_log_recs(
-/*===============*/
-				/* out: TRUE if limit_lsn has been reached, or
-				not able to scan any more in this log group */
-	ibool	apply_automatically,/* in: TRUE if we want this function to
-				apply log records automatically when the
-				hash table becomes full; in the hot backup tool
-				the tool does the applying, not this
-				function */
-	ulint	available_memory,/* in: we let the hash table of recs to grow
-				to this size, at the maximum */
-	ibool	store_to_hash,	/* in: TRUE if the records should be stored
-				to the hash table; this is set to FALSE if just
-				debug checking is needed */
-	byte*	buf,		/* in: buffer containing a log segment or
-				garbage */
-	ulint	len,		/* in: buffer length */
-	dulint	start_lsn,	/* in: buffer start lsn */
-	dulint*	contiguous_lsn,	/* in/out: it is known that all log groups
-				contain contiguous log data up to this lsn */
-	dulint*	group_scanned_lsn);/* out: scanning succeeded up to this lsn */
-/**********************************************************
-Resets the logs. The contents of log files will be lost! */
-
-void
-recv_reset_logs(
-/*============*/
-	dulint	lsn,		/* in: reset to this lsn rounded up to
-				be divisible by OS_FILE_LOG_BLOCK_SIZE,
-				after which we add LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint	arch_log_no,	/* in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
-	ibool	new_logs_created);/* in: TRUE if resetting logs is done
-				at the log creation; FALSE if it is done
-				after archive recovery */
-#ifdef UNIV_HOTBACKUP
-/**********************************************************
-Creates new log files after a backup has been restored. */
-
-void
-recv_reset_log_files_for_backup(
-/*============================*/
-	const char*	log_dir,	/* in: log file directory path */
-	ulint		n_log_files,	/* in: number of log files */
-	ulint		log_file_size,	/* in: log file size */
-	dulint		lsn);		/* in: new start lsn, must be
-					divisible by OS_FILE_LOG_BLOCK_SIZE */
-#endif /* UNIV_HOTBACKUP */
-/************************************************************
-Creates the recovery system. */
-
-void
-recv_sys_create(void);
-/*=================*/
-/************************************************************
-Inits the recovery system for a recovery operation. */
-
-void
-recv_sys_init(
-/*==========*/
-	ibool	recover_from_backup,	/* in: TRUE if this is called
-					to recover from a hot backup */
-	ulint	available_memory);	/* in: available memory in bytes */
-/***********************************************************************
-Empties the hash table of stored log records, applying them to appropriate
-pages. */
-
-void
-recv_apply_hashed_log_recs(
-/*=======================*/
-	ibool	allow_ibuf);	/* in: if TRUE, also ibuf operations are
-				allowed during the application; if FALSE,
-				no ibuf operations are allowed, and after
-				the application all file pages are flushed to
-				disk and invalidated in buffer pool: this
-				alternative means that no new log records
-				can be generated during the application */
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Applies log records in the hash table to a backup. */
-
-void
-recv_apply_log_recs_for_backup(void);
-/*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
-				/* out: error code or DB_SUCCESS */
-	dulint	min_flushed_lsn,/* in: min flushed lsn field from the
-				data files */
-	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
-	ulint	first_log_no);	/* in: number of the first archived log file
-				to use in the recovery; the file will be
-				searched from INNOBASE_LOG_ARCH_DIR specified
-				in server config file */
-/************************************************************
-Completes recovery from archive. */
-
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* Block of log record data */
-typedef struct recv_data_struct	recv_data_t;
-struct recv_data_struct{
-	recv_data_t*	next;	/* pointer to the next block or NULL */
-				/* the log record data is stored physically
-				immediately after this struct, max amount
-				RECV_DATA_BLOCK_SIZE bytes of it */
-};
-
-/* Stored log record struct */
-typedef struct recv_struct	recv_t;
-struct recv_struct{
-	byte		type;	/* log record type */
-	ulint		len;	/* log record body length in bytes */
-	recv_data_t*	data;	/* chain of blocks containing the log record
-				body */
-	dulint		start_lsn;/* start lsn of the log segment written by
-				the mtr which generated this log record: NOTE
-				that this is not necessarily the start lsn of
-				this log record */
-	dulint		end_lsn;/* end lsn of the log segment written by
-				the mtr which generated this log record: NOTE
-				that this is not necessarily the end lsn of
-				this log record */
-	UT_LIST_NODE_T(recv_t)
-			rec_list;/* list of log records for this page */
-};
-
-/* Hashed page file address struct */
-typedef struct recv_addr_struct	recv_addr_t;
-struct recv_addr_struct{
-	ulint		state;	/* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED,
-				or RECV_PROCESSED */
-	ulint		space;	/* space id */
-	ulint		page_no;/* page number */
-	UT_LIST_BASE_NODE_T(recv_t)
-			rec_list;/* list of log records for this page */
-	hash_node_t	addr_hash;
-};
-
-/* Recovery system data structure */
-typedef struct recv_sys_struct	recv_sys_t;
-struct recv_sys_struct{
-	mutex_t		mutex;	/* mutex protecting the fields apply_log_recs,
-				n_addrs, and the state field in each recv_addr
-				struct */
-	ibool		apply_log_recs;
-				/* this is TRUE when log rec application to
-				pages is allowed; this flag tells the
-				i/o-handler if it should do log record
-				application */
-	ibool		apply_batch_on;
-				/* this is TRUE when a log rec application
-				batch is running */
-	dulint		lsn;	/* log sequence number */
-	ulint		last_log_buf_size;
-				/* size of the log buffer when the database
-				last time wrote to the log */
-	byte*		last_block;
-				/* possible incomplete last recovered log
-				block */
-	byte*		last_block_buf_start;
-				/* the nonaligned start address of the
-				preceding buffer */
-	byte*		buf;	/* buffer for parsing log records */
-	ulint		len;	/* amount of data in buf */
-	dulint		parse_start_lsn;
-				/* this is the lsn from which we were able to
-				start parsing log records and adding them to
-				the hash table; ut_dulint_zero if a suitable
-				start point not found yet */
-	dulint		scanned_lsn;
-				/* the log data has been scanned up to this
-				lsn */
-	ulint		scanned_checkpoint_no;
-				/* the log data has been scanned up to this
-				checkpoint number (lowest 4 bytes) */
-	ulint		recovered_offset;
-				/* start offset of non-parsed log records in
-				buf */
-	dulint		recovered_lsn;
-				/* the log records have been parsed up to
-				this lsn */
-	dulint		limit_lsn;/* recovery should be made at most up to this
-				lsn */
-	ibool		found_corrupt_log;
-				/* this is set to TRUE if we during log
-				scan find a corrupt log block, or a corrupt
-				log record, or there is a log parsing
-				buffer overflow */
-	log_group_t*	archive_group;
-				/* in archive recovery: the log group whose
-				archive is read */
-	mem_heap_t*	heap;	/* memory heap of log records and file
-				addresses*/
-	hash_table_t*	addr_hash;/* hash table of file addresses of pages */
-	ulint		n_addrs;/* number of not processed hashed file
-				addresses in the hash table */
-};
-
-extern recv_sys_t*	recv_sys;
-extern ibool		recv_recovery_on;
-extern ibool		recv_no_ibuf_operations;
-extern ibool		recv_needed_recovery;
-
-extern ibool		recv_lsn_checks_on;
-#ifdef UNIV_HOTBACKUP
-extern ibool		recv_is_making_a_backup;
-#endif /* UNIV_HOTBACKUP */
-extern ulint		recv_max_parsed_page_no;
-
-/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
-times! */
-#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
-
-/* Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
-
-/* States of recv_addr_struct */
-#define RECV_NOT_PROCESSED	71
-#define RECV_BEING_READ		72
-#define RECV_BEING_PROCESSED	73
-#define RECV_PROCESSED		74
-
-extern ulint	recv_n_pool_free_frames;
-
-#ifndef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
deleted file mode 100644
index 489641bade2..00000000000
--- a/storage/innobase/include/log0recv.ic
+++ /dev/null
@@ -1,35 +0,0 @@
-/******************************************************
-Recovery
-
-(c) 1997 Innobase Oy
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "mem0mem.h"
-#include "log0log.h"
-#include "os0file.h"
-
-extern ibool	recv_recovery_from_backup_on;
-
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
-{
-	return(recv_recovery_on);
-}
-
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
-	return(recv_recovery_from_backup_on);
-}
-
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
deleted file mode 100644
index 25b619b3f12..00000000000
--- a/storage/innobase/include/mach0data.h
+++ /dev/null
@@ -1,345 +0,0 @@
-/**********************************************************************
-Utilities for converting data from the database file
-to the machine format.
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef mach0data_h
-#define mach0data_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/* The data and all fields are always stored in a database file
-in the same format: ascii, big-endian, ... .
-All data in the files MUST be accessed using the functions in this
-module. */
-
-/***********************************************************
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
-	byte*	b,	/* in: pointer to byte where to store */
-	ulint	n);	 /* in: ulint integer to be stored, >= 0, < 256 */
-/************************************************************
-The following function is used to fetch data from one byte. */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
-			/* out: ulint integer, >= 0, < 256 */
-	byte*	b);	 /* in: pointer to byte */
-/***********************************************************
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lower address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
-	byte*	b,	/* in: pointer to two bytes where to store */
-	ulint	n);	 /* in: ulint integer to be stored, >= 0, < 64k */
-/************************************************************
-The following function is used to fetch data from two consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
-			/* out: ulint integer, >= 0, < 64k */
-	byte*	b);	 /* in: pointer to two bytes */
-
-/************************************************************
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
-			/* out: 16-bit integer in canonical format */
-	ulint	n);	/* in: integer in machine-dependent format */
-/************************************************************
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
-			/* out: integer in machine-dependent format */
-	uint16	n);	/* in: 16-bit integer in canonical format */
-/***********************************************************
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
-	byte*	b,	/* in: pointer to 3 bytes where to store */
-	ulint	n);	 /* in: ulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
-			/* out: ulint integer */
-	byte*	b);	 /* in: pointer to 3 bytes */
-/***********************************************************
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
-	byte*	b,	/* in: pointer to four bytes where to store */
-	ulint	n);	 /* in: ulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
-			/* out: ulint integer */
-	byte*	b);	 /* in: pointer to four bytes */
-/*************************************************************
-Writes a ulint in a compressed form (1..5 bytes). */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
-			/* out: stored size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	ulint	n);	/* in: ulint integer to be stored */
-/*************************************************************
-Returns the size of an ulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
-			/* out: compressed size in bytes */
-	ulint	n);	/* in: ulint integer to be stored */
-/*************************************************************
-Reads a ulint in a compressed form. */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
-			/* out: read integer */
-	byte*	b);	/* in: pointer to memory from where to read */
-/***********************************************************
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
-	byte*	b,	/* in: pointer to 6 bytes where to store */
-	dulint	n);	 /* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_6(
-/*=============*/
-			/* out: dulint integer */
-	byte*	b);	 /* in: pointer to 6 bytes */
-/***********************************************************
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
-	byte*	b,	/* in: pointer to 7 bytes where to store */
-	dulint	n);	 /* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_7(
-/*=============*/
-			/* out: dulint integer */
-	byte*	b);	 /* in: pointer to 7 bytes */
-/***********************************************************
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
-	byte*	b,	/* in: pointer to 8 bytes where to store */
-	dulint	n);	/* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
-			/* out: dulint integer */
-	byte*	b);	 /* in: pointer to 8 bytes */
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_compressed(
-/*=========================*/
-			/* out: size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	dulint	n);	/* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_compressed_size(
-/*============================*/
-			/* out: compressed size in bytes */
-	dulint	 n);	/* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
-			/* out: read dulint */
-	byte*	b);	/* in: pointer to memory from where to read */
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
-			/* out: size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	dulint	n);	/* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
-			/* out: compressed size in bytes */
-	dulint	 n);	 /* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
-			/* out: read dulint */
-	byte*	b);	 /* in: pointer to memory from where to read */
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_parse_compressed(
-/*==================*/
-			/* out: pointer to end of the stored field, NULL if
-			not complete */
-	byte*	ptr,	/* in: pointer to buffer from where to read */
-	byte*	end_ptr,/* in: pointer to end of the buffer */
-	ulint*	val);	/* out: read value */
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_dulint_parse_compressed(
-/*=========================*/
-			/* out: pointer to end of the stored field, NULL if
-			not complete */
-	byte*	ptr,	/* in: pointer to buffer from where to read */
-	byte*	end_ptr,/* in: pointer to end of the buffer */
-	dulint*	val);	/* out: read value */
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
-			/* out: double read */
-	byte*	b);	 /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
-	byte*	b,	/* in: pointer to memory where to write */
-	double	d);	/* in: double */
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
-			/* out: float read */
-	byte*	b);	 /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
-	byte*	b,	/* in: pointer to memory where to write */
-	float	d);	/* in: float */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
-				/* out: unsigned long int */
-	byte*	buf,		/* in: from where to read */
-	ulint	buf_size);	/* in: from how many bytes to read */
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
-	byte*	dest,		/* in: where to write */
-	ulint	dest_size,	/* in: into how many bytes to write */
-	ulint	n);		/* in: unsigned long int to write */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
-				/* out: unsigned long int */
-	byte*	buf);		/* in: from where to read */
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
-	byte*	dest,		/* in: where to write */
-	ulint	n);		/* in: unsigned long int to write */
-
-/*************************************************************
-Convert integral type from storage byte order (big endian) to
-host byte order. */
-UNIV_INLINE
-ullint
-mach_read_int_type(
-/*===============*/
-					/* out: integer value */
-	const byte*	src,		/* in: where to read from */
-	ulint		len,		/* in: length of src */
-	ibool		unsigned_type);	/* in: signed or unsigned flag */
-#ifndef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
deleted file mode 100644
index bf659ca9a72..00000000000
--- a/storage/innobase/include/mem0pool.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/******************************************************
-The lowest-level memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-typedef struct mem_area_struct	mem_area_t;
-typedef struct mem_pool_struct	mem_pool_t;
-
-/* The common memory pool */
-extern mem_pool_t*	mem_comm_pool;
-
-/* Memory area header */
-
-struct mem_area_struct{
-	ulint		size_and_free;	/* memory area size is obtained by
-					anding with ~MEM_AREA_FREE; area in
-					a free list if ANDing with
-					MEM_AREA_FREE results in nonzero */
-	UT_LIST_NODE_T(mem_area_t)
-			free_list;	/* free list node */
-};
-
-/* Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_struct),\
-			UNIV_MEM_ALIGNMENT))
-
-/************************************************************************
-Creates a memory pool. */
-
-mem_pool_t*
-mem_pool_create(
-/*============*/
-			/* out: memory pool */
-	ulint	size);	/* in: pool size in bytes */
-/************************************************************************
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
-void*
-mem_area_alloc(
-/*===========*/
-				/* out, own: allocated memory buffer */
-	ulint		size,	/* in: allocated size in bytes; for optimum
-				space usage, the size should be a power of 2
-				minus MEM_AREA_EXTRA_SIZE */
-	mem_pool_t*	pool);	/* in: memory pool */
-/************************************************************************
-Frees memory to a pool. */
-
-void
-mem_area_free(
-/*==========*/
-	void*		ptr,	/* in, own: pointer to allocated memory
-				buffer */
-	mem_pool_t*	pool);	/* in: memory pool */
-/************************************************************************
-Returns the amount of reserved memory. */
-
-ulint
-mem_pool_get_reserved(
-/*==================*/
-				/* out: reserved mmeory in bytes */
-	mem_pool_t*	pool);	/* in: memory pool */
-/************************************************************************
-Reserves the mem pool mutex. */
-
-void
-mem_pool_mutex_enter(void);
-/*======================*/
-/************************************************************************
-Releases the mem pool mutex. */
-
-void
-mem_pool_mutex_exit(void);
-/*=====================*/
-/************************************************************************
-Validates a memory pool. */
-
-ibool
-mem_pool_validate(
-/*==============*/
-				/* out: TRUE if ok */
-	mem_pool_t*	pool);	/* in: memory pool */
-/************************************************************************
-Prints info of a memory pool. */
-
-void
-mem_pool_print_info(
-/*================*/
-	FILE*		outfile,/* in: output file to write to */
-	mem_pool_t*	pool);	/* in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic
deleted file mode 100644
index 4e8c08733ed..00000000000
--- a/storage/innobase/include/mem0pool.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/************************************************************************
-The lowest-level memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
deleted file mode 100644
index 6a3920aa8a1..00000000000
--- a/storage/innobase/include/mtr0log.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/******************************************************
-Mini-transaction logging routines
-
-(c) 1995 Innobase Oy
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0log_h
-#define mtr0log_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-
-/************************************************************
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_ulint(
-/*=============*/
-	byte*	ptr,	/* in: pointer where to write */
-	ulint	val,	/* in: value to write */
-	byte	type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_dulint(
-/*==============*/
-	byte*	ptr,	/* in: pointer where to write */
-	dulint	val,	/* in: value to write */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_string(
-/*==============*/
-	byte*		ptr,	/* in: pointer where to write */
-	const byte*	str,	/* in: string to write */
-	ulint		len,	/* in: string length */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/************************************************************
-Writes initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. */
-
-void
-mlog_write_initial_log_record(
-/*==========================*/
-	byte*	ptr,	/* in: pointer to (inside) a buffer frame
-			holding the file page where modification
-			is made */
-	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-			/* out: new value of log_ptr */
-	ulint	type,	/* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/* in: space id, if applicable */
-	ulint	page_no,/* in: page number (not relevant currently) */
-	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
-	mtr_t*	mtr);	/* in: mtr */
-/************************************************************
-Catenates 1 - 4 bytes to the mtr log. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	val,	/* in: value to write */
-	ulint	type);	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-/************************************************************
-Catenates n bytes to the mtr log. */
-
-void
-mlog_catenate_string(
-/*=================*/
-	mtr_t*		mtr,	/* in: mtr */
-	const byte*	str,	/* in: string to write */
-	ulint		len);	/* in: string length */
-/************************************************************
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	val);	/* in: value to write */
-/************************************************************
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
-	mtr_t*	mtr,	/* in: mtr */
-	dulint	val);	/* in: value to write */
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
-			/* out: buffer, NULL if log mode MTR_LOG_NONE */
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	size);	/* in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
-/************************************************************
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
-	mtr_t*	mtr,	/* in: mtr */
-	byte*	ptr);	/* in: buffer space from ptr up was not used */
-/************************************************************
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
-			/* out: new value of log_ptr */
-	byte*	ptr,	/* in: pointer to (inside) a buffer frame holding the
-			file page where modification is made */
-	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
-	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
-	mtr_t*	mtr);	/* in: mtr */
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
-			/* out: parsed record end, NULL if not a complete
-			record */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	type,	/* out: log record type: MLOG_1BYTE, ... */
-	ulint*	space,	/* out: space id */
-	ulint*	page_no);/* out: page number */
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
-byte*
-mlog_parse_nbytes(
-/*==============*/
-			/* out: parsed record end, NULL if not a complete
-			record */
-	ulint	type,	/* in: log record type: MLOG_1BYTE, ... */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	page);	/* in: page where to apply the log record, or NULL */
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
-byte*
-mlog_parse_string(
-/*==============*/
-			/* out: parsed record end, NULL if not a complete
-			record */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	page);	/* in: page where to apply the log record, or NULL */
-
-
-/************************************************************
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index.  Reserves space
-for further log entries.  The log entry must be closed with
-mtr_close(). */
-
-byte*
-mlog_open_and_write_index(
-/*======================*/
-				/* out: buffer, NULL if log mode
-				MTR_LOG_NONE */
-	mtr_t*		mtr,	/* in: mtr */
-	byte*		rec,	/* in: index record or page */
-	dict_index_t*	index,	/* in: record descriptor */
-	byte		type,	/* in: log item type */
-	ulint		size);	/* in: requested buffer size in bytes
-				(if 0, calls mlog_close() and returns NULL) */
-
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
-byte*
-mlog_parse_index(
-/*=============*/
-				/* out: parsed record end,
-				NULL if not a complete record */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-				/* out: new value of log_ptr */
-	ibool		comp,	/* in: TRUE=compact record format */
-	dict_index_t**	index);	/* out, own: dummy index */
-
-/* Insert, update, and maybe other functions may use this value to define an
-extra mlog buffer size for variable size data */
-#define MLOG_BUF_MARGIN	256
-
-#ifndef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
deleted file mode 100644
index 5b1d1ed34d9..00000000000
--- a/storage/innobase/include/mtr0log.ic
+++ /dev/null
@@ -1,227 +0,0 @@
-/******************************************************
-Mini-transaction logging routines
-
-(c) 1995 Innobase Oy
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
-			/* out: buffer, NULL if log mode MTR_LOG_NONE */
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	size)	/* in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
-{
-	dyn_array_t*	mlog;
-
-	mtr->modifications = TRUE;
-
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
-		return(NULL);
-	}
-
-	mlog = &(mtr->log);
-
-	return(dyn_array_open(mlog, size));
-}
-
-/************************************************************
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
-	mtr_t*	mtr,	/* in: mtr */
-	byte*	ptr)	/* in: buffer space from ptr up was not used */
-{
-	dyn_array_t*	mlog;
-
-	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
-
-	mlog = &(mtr->log);
-
-	dyn_array_close(mlog, ptr);
-}
-
-/************************************************************
-Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	val,	/* in: value to write */
-	ulint	type)	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-{
-	dyn_array_t*	mlog;
-	byte*		ptr;
-
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
-		return;
-	}
-
-	mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
-	ptr = dyn_array_push(mlog, type);
-
-	if (type == MLOG_4BYTES) {
-		mach_write_to_4(ptr, val);
-	} else if (type == MLOG_2BYTES) {
-		mach_write_to_2(ptr, val);
-	} else {
-		ut_ad(type == MLOG_1BYTE);
-		mach_write_to_1(ptr, val);
-	}
-}
-
-/************************************************************
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	val)	/* in: value to write */
-{
-	byte*	log_ptr;
-
-	log_ptr = mlog_open(mtr, 10);
-
-	/* If no logging is requested, we may return now */
-	if (log_ptr == NULL) {
-
-		return;
-	}
-
-	log_ptr += mach_write_compressed(log_ptr, val);
-
-	mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
-	mtr_t*	mtr,	/* in: mtr */
-	dulint	val)	/* in: value to write */
-{
-	byte*	log_ptr;
-
-	log_ptr = mlog_open(mtr, 15);
-
-	/* If no logging is requested, we may return now */
-	if (log_ptr == NULL) {
-
-		return;
-	}
-
-	log_ptr += mach_dulint_write_compressed(log_ptr, val);
-
-	mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
-			/* out: new value of log_ptr */
-	byte*	ptr,	/* in: pointer to (inside) a buffer frame holding the
-			file page where modification is made */
-	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
-	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	buf_block_t*	block;
-	ulint		space;
-	ulint		offset;
-
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(type <= MLOG_BIGGEST_TYPE);
-	ut_ad(ptr && log_ptr);
-
-	block = buf_block_align(ptr);
-
-	space = buf_block_get_space(block);
-	offset = buf_block_get_page_no(block);
-
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-	log_ptr += mach_write_compressed(log_ptr, space);
-	log_ptr += mach_write_compressed(log_ptr, offset);
-
-	mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
-	/*	fprintf(stderr,
-	"Adding to mtr log record type %lu space %lu page no %lu\n",
-	type, space, offset); */
-#endif
-
-#ifdef UNIV_DEBUG
-	/* We now assume that all x-latched pages have been modified! */
-
-	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
-		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
-	}
-#endif
-	return(log_ptr);
-}
-
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-			/* out: new value of log_ptr */
-	ulint	type,	/* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/* in: space id, if applicable */
-	ulint	page_no,/* in: page number (not relevant currently) */
-	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	ut_ad(log_ptr);
-
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-
-	/* We write dummy space id and page number */
-	log_ptr += mach_write_compressed(log_ptr, space_id);
-	log_ptr += mach_write_compressed(log_ptr, page_no);
-
-	mtr->n_log_recs++;
-
-	return(log_ptr);
-}
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
deleted file mode 100644
index 2a160d27e0c..00000000000
--- a/storage/innobase/include/mtr0mtr.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/******************************************************
-Mini-transaction buffer
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0mtr_h
-#define mtr0mtr_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-#include "page0types.h"
-
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL		21	/* default mode: log all operations
-					modifying disk-based data */
-#define	MTR_LOG_NONE		22	/* log no operations */
-/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
-					file space page allocation data
-					(operations in fsp0fsp.* ) */
-#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
-					form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
-#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
-#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
-#define MTR_MEMO_MODIFY		54
-#define	MTR_MEMO_S_LOCK		55
-#define	MTR_MEMO_X_LOCK		56
-
-/* Log item types: we have made them to be of the type 'byte'
-for the compiler to warn if val and type parameters are switched
-in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the
-flag value must give the length also! */
-#define	MLOG_SINGLE_REC_FLAG	128		/* if the mtr contains only
-						one log record for one page,
-						i.e., write_initial_log_record
-						has been called only once,
-						this flag is ORed to the type
-						of that first log record */
-#define	MLOG_1BYTE		(1)		/* one byte is written */
-#define	MLOG_2BYTES		(2)		/* 2 bytes ... */
-#define	MLOG_4BYTES		(4)		/* 4 bytes ... */
-#define	MLOG_8BYTES		(8)		/* 8 bytes ... */
-#define	MLOG_REC_INSERT		((byte)9)	/* record insert */
-#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10)	/* mark clustered index record
-						deleted */
-#define	MLOG_REC_SEC_DELETE_MARK ((byte)11)	/* mark secondary index record
-						deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/* update of a record,
-						preserves record field sizes */
-#define MLOG_REC_DELETE		((byte)14)	/* delete a record from a
-						page */
-#define	MLOG_LIST_END_DELETE	((byte)15)	/* delete record list end on
-						index page */
-#define	MLOG_LIST_START_DELETE	((byte)16)	/* delete record list start on
-						index page */
-#define	MLOG_LIST_END_COPY_CREATED ((byte)17)	/* copy record list end to a
-						new created index page */
-#define	MLOG_PAGE_REORGANIZE	((byte)18)	/* reorganize an index page */
-#define MLOG_PAGE_CREATE	((byte)19)	/* create an index page */
-#define	MLOG_UNDO_INSERT	((byte)20)	/* insert entry in an undo
-						log */
-#define MLOG_UNDO_ERASE_END	((byte)21)	/* erase an undo log
-						page end */
-#define	MLOG_UNDO_INIT		((byte)22)	/* initialize a page in an
-						undo log */
-#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/* discard an update undo log
-						header */
-#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/* reuse an insert undo log
-						header */
-#define MLOG_UNDO_HDR_CREATE	((byte)25)	/* create an undo log header */
-#define MLOG_REC_MIN_MARK	((byte)26)	/* mark an index record as the
-						predefined minimum record */
-#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/* initialize an ibuf bitmap
-						page */
-/*#define	MLOG_FULL_PAGE	((byte)28)	full contents of a page */
-#define MLOG_INIT_FILE_PAGE	((byte)29)	/* this means that a file page
-						is taken into use and the prior
-						contents of the page should be
-						ignored: in recovery we must
-						not trust the lsn values stored
-						to the file page */
-#define MLOG_WRITE_STRING	((byte)30)	/* write a string to a page */
-#define	MLOG_MULTI_REC_END	((byte)31)	/* if a single mtr writes
-						log records for several pages,
-						this log record ends the
-						sequence of these records */
-#define MLOG_DUMMY_RECORD	((byte)32)	/* dummy log record used to
-						pad a log block full */
-#define MLOG_FILE_CREATE	((byte)33)	/* log record about an .ibd
-						file creation */
-#define MLOG_FILE_RENAME	((byte)34)	/* log record about an .ibd
-						file rename */
-#define MLOG_FILE_DELETE	((byte)35)	/* log record about an .ibd
-						file deletion */
-#define MLOG_COMP_REC_MIN_MARK	((byte)36)	/* mark a compact index record
-						as the predefined minimum
-						record */
-#define MLOG_COMP_PAGE_CREATE	((byte)37)	/* create a compact
-						index page */
-#define MLOG_COMP_REC_INSERT	((byte)38)	/* compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
-						/* mark compact clustered index
-						record deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index
-						record deleted; this log
-						record type is redundant, as
-						MLOG_REC_SEC_DELETE_MARK is
-						independent of the record
-						format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record,
-						preserves record field sizes */
-#define MLOG_COMP_REC_DELETE	((byte)42)	/* delete a compact record
-						from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43)	/* delete compact record list
-						end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44)	/* delete compact record list
-						start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
-						/* copy compact record list end
-						to a new created index page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46)	/* reorganize an index page */
-
-#define MLOG_BIGGEST_TYPE	((byte)46)	/* biggest value (used in
-						asserts) */
-
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-UNIV_INLINE
-mtr_t*
-mtr_start(
-/*======*/
-			/* out: mtr buffer which also acts as
-			the mtr handle */
-	mtr_t*	mtr);	/* in: memory buffer for the mtr buffer */
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
-			/* out: mtr buffer which also acts as
-			the mtr handle */
-	mtr_t*	mtr);	/* in: memory buffer for the mtr buffer */
-/*******************************************************************
-Commits a mini-transaction. */
-
-void
-mtr_commit(
-/*=======*/
-	mtr_t*	mtr);	/* in: mini-transaction */
-/**************************************************************
-Sets and returns a savepoint in mtr. */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
-			/* out: savepoint */
-	mtr_t*	mtr);	/* in: mtr */
-/**************************************************************
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-
-void
-mtr_rollback_to_savepoint(
-/*======================*/
-	mtr_t*	mtr,		/* in: mtr */
-	ulint	savepoint);	/* in: savepoint */
-/**************************************************************
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/* in: mtr */
-	ulint		savepoint,	/* in: savepoint */
-	rw_lock_t*	lock);		/* in: latch to release */
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
-			/* out: logging mode: MTR_LOG_NONE, ... */
-	mtr_t*	mtr);	/* in: mtr */
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
-			/* out: old mode */
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	mode);	/* in: logging mode: MTR_LOG_NONE, ... */
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
-ulint
-mtr_read_ulint(
-/*===========*/
-			/* out: value read */
-	byte*	ptr,	/* in: pointer from where to read */
-	ulint	type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
-dulint
-mtr_read_dulint(
-/*============*/
-			/* out: value read */
-	byte*	ptr,	/* in: pointer from where to read */
-	mtr_t*	mtr);	/* in: mini-transaction handle */
-/*************************************************************************
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*************************************************************************
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*************************************************************************
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/* in: rw-lock */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line number */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************************
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/* in: rw-lock */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line number */
-	mtr_t*		mtr);	/* in: mtr */
-
-/*******************************************************
-Releases an object in the memo stack. */
-
-void
-mtr_memo_release(
-/*=============*/
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object */
-	ulint	type);	/* in: object type: MTR_MEMO_S_LOCK, ... */
-#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
-UNIV_INLINE
-ibool
-mtr_memo_contains(
-/*==============*/
-			/* out: TRUE if contains */
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object to search */
-	ulint	type);	/* in: type of object */
-/*************************************************************
-Prints info of an mtr handle. */
-
-void
-mtr_print(
-/*======*/
-	mtr_t*	mtr);	/* in: mtr */
-#endif /* UNIV_DEBUG */
-/*######################################################################*/
-
-#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
-
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
-			/* out: log */
-	mtr_t*	mtr);	/* in: mini-transaction */
-/*******************************************************
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object */
-	ulint	type);	/* in: object type: MTR_MEMO_S_LOCK, ... */
-
-
-/* Type definition of a mini-transaction memo stack slot. */
-typedef	struct mtr_memo_slot_struct	mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
-	ulint	type;	/* type of the stored object (MTR_MEMO_S_LOCK, ...) */
-	void*	object;	/* pointer to the object */
-};
-
-/* Mini-transaction handle and buffer */
-struct mtr_struct{
-	ulint		state;	/* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-	dyn_array_t	memo;	/* memo stack for locks etc. */
-	dyn_array_t	log;	/* mini-transaction log */
-	ibool		modifications;
-				/* TRUE if the mtr made modifications to
-				buffer pool pages */
-	ulint		n_log_recs;
-				/* count of how many page initial log records
-				have been written to the mtr log */
-	ulint		log_mode; /* specifies which operations should be
-				logged; default value MTR_LOG_ALL */
-	dulint		start_lsn;/* start lsn of the possible log entry for
-				this mtr */
-	dulint		end_lsn;/* end lsn of the possible log entry for
-				this mtr */
-	ulint		magic_n;
-};
-
-#define	MTR_MAGIC_N		54551
-
-#define MTR_ACTIVE		12231
-#define MTR_COMMITTING		56456
-#define MTR_COMMITTED		34676
-
-#ifndef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
deleted file mode 100644
index e3b6ec9a84f..00000000000
--- a/storage/innobase/include/mtr0types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/******************************************************
-Mini-transaction buffer global types
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0types_h
-#define mtr0types_h
-
-typedef struct mtr_struct	mtr_t;
-
-#endif
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
deleted file mode 100644
index f54e08de7ee..00000000000
--- a/storage/innobase/include/os0proc.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0proc_h
-#define os0proc_h
-
-#include "univ.i"
-
-#ifdef UNIV_LINUX
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#endif
-
-typedef void*			os_process_t;
-typedef unsigned long int	os_process_id_t;
-
-/* The cell type in os_awe_allocate_mem page info */
-#if defined(__WIN2000__) && defined(ULONG_PTR)
-typedef ULONG_PTR	os_awe_t;
-#else
-typedef ulint		os_awe_t;
-#endif
-
-/* Physical page size when Windows AWE is used. This is the normal
-page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
-pages. */
-#define	OS_AWE_X86_PAGE_SIZE	4096
-
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void);
-/*=================================*/
-				/* out: TRUE if success, FALSE if error;
-				prints error info to stderr if no success */
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
-				/* out: TRUE if success */
-	os_awe_t** page_info,	/* out, own: array of opaque data containing
-				the info for allocated physical memory pages;
-				each allocated 4 kB physical memory page has
-				one slot of type os_awe_t in the array */
-	ulint	  n_megabytes);	/* in: number of megabytes to allocate */
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
-			/* out, own: allocated memory, or NULL if did not
-			succeed */
-	ulint	size);	/* in: virtual memory allocation size in bytes, must
-			be < 2 GB */
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
-					/* out: TRUE if success; the function
-					calls exit(1) in case of an error */
-	byte*		ptr,		/* in: a page-aligned pointer to
-					somewhere in the virtual address
-					space window; we map the physical mem
-					pages here */
-	ulint		n_mem_pages,	/* in: number of 4 kB mem pages to
-					map */
-	os_awe_t*	page_info);	/* in: array of page infos for those
-					pages; each page has one slot in the
-					array */
-/********************************************************************
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
-ulint
-os_proc_get_number(void);
-/*====================*/
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
-			/* out: allocated memory */
-	ulint	n);	/* in: number of bytes */
-/********************************************************************
-Allocates large pages memory. */
-
-void*
-os_mem_alloc_large(
-/*===============*/
-					/* out: allocated memory */
-	ulint		n,		/* in: number of bytes */
-	ibool		set_to_zero,	/* in: TRUE if allocated memory
-					should be set to zero if
-					UNIV_SET_MEM_TO_ZERO is defined */
-	ibool		assert_on_error);/* in: if TRUE, we crash mysqld if
-					 the memory cannot be allocated */
-/********************************************************************
-Frees large pages memory. */
-
-void
-os_mem_free_large(
-/*==============*/
-void	*ptr);	/* in: number of bytes */
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
-	ibool	do_boost);	/* in: TRUE if priority boost should be done,
-				FALSE if not */
-
-#ifndef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic
deleted file mode 100644
index 651ba1f17e3..00000000000
--- a/storage/innobase/include/os0proc.ic
+++ /dev/null
@@ -1,10 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
deleted file mode 100644
index 26d2786e33b..00000000000
--- a/storage/innobase/include/os0sync.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/******************************************************
-The interface to the operating system
-synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-
-#ifdef HAVE_SOLARIS_ATOMIC
-#include <atomic.h>
-#endif
-
-#ifdef __WIN__
-
-#define os_fast_mutex_t CRITICAL_SECTION
-
-typedef HANDLE		os_native_event_t;
-
-typedef struct os_event_struct	os_event_struct_t;
-typedef os_event_struct_t*	os_event_t;
-
-struct os_event_struct {
-	os_native_event_t		  handle;
-					/* Windows event */
-	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
-					/* list of all created events */
-};
-#else
-typedef pthread_mutex_t	os_fast_mutex_t;
-
-typedef struct os_event_struct	os_event_struct_t;
-typedef os_event_struct_t*	os_event_t;
-
-struct os_event_struct {
-	os_fast_mutex_t	os_mutex;	/* this mutex protects the next
-					fields */
-	ibool		is_set;		/* this is TRUE when the event is
-					in the signaled state, i.e., a thread
-					does not stop if it tries to wait for
-					this event */
-	ib_longlong	signal_count;	/* this is incremented each time
-					the event becomes signaled */
-	pthread_cond_t	cond_var;	/* condition variable is used in
-					waiting for the event */
-	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
-					/* list of all created events */
-};
-#endif
-
-typedef struct os_mutex_struct	os_mutex_str_t;
-typedef os_mutex_str_t*		os_mutex_t;
-
-#define OS_SYNC_INFINITE_TIME	((ulint)(-1))
-
-#define OS_SYNC_TIME_EXCEEDED	1
-
-/* Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t	os_sync_mutex;
-
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint		os_thread_count;
-
-extern ulint		os_event_count;
-extern ulint		os_mutex_count;
-extern ulint		os_fast_mutex_count;
-
-/*************************************************************
-Initializes global event and OS 'slow' mutex lists. */
-
-void
-os_sync_init(void);
-/*==============*/
-/*************************************************************
-Frees created events and OS 'slow' mutexes. */
-
-void
-os_sync_free(void);
-/*==============*/
-/*************************************************************
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event. */
-
-os_event_t
-os_event_create(
-/*============*/
-				/* out: the event handle */
-	const char*	name);	/* in: the name of the event, if NULL
-				the event is created without a name */
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
-				/* out: the event handle */
-	const char*	name);	/* in: the name of the event, if NULL
-				the event is created without a name */
-#endif
-/**************************************************************
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-
-void
-os_event_set(
-/*=========*/
-	os_event_t	event);	/* in: event to set */
-/**************************************************************
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
-os_event_reset(
-/*===========*/
-	os_event_t	event);	/* in: event to reset */
-/**************************************************************
-Frees an event object. */
-
-void
-os_event_free(
-/*==========*/
-	os_event_t	event);	/* in: event to free */
-
-/**************************************************************
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set()   [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait()  [infinite wait!]
-thread C calls os_event_wait()  [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-
-#define os_event_wait(event) os_event_wait_low((event), 0)
-
-void
-os_event_wait_low(
-/*==============*/
-	os_event_t	event,		/* in: event to wait */
-	ib_longlong	reset_sig_count);/* in: zero or the value
-					returned by previous call of
-					os_event_reset(). */
-
-/**************************************************************
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
-ulint
-os_event_wait_time(
-/*===============*/
-				/* out: 0 if success,
-				OS_SYNC_TIME_EXCEEDED if timeout
-				was exceeded */
-	os_event_t	event,	/* in: event to wait */
-	ulint		time);	/* in: timeout in microseconds, or
-				OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**************************************************************
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
-ulint
-os_event_wait_multiple(
-/*===================*/
-					/* out: index of the event
-					which was signaled */
-	ulint			n,	/* in: number of events in the
-					array */
-	os_native_event_t*	native_event_array);
-					/* in: pointer to an array of event
-					handles */
-#endif
-/*************************************************************
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
-os_mutex_t
-os_mutex_create(
-/*============*/
-				/* out: the mutex handle */
-	const char*	name);	/* in: the name of the mutex, if NULL
-				the mutex is created without a name */
-/**************************************************************
-Acquires ownership of a mutex semaphore. */
-
-void
-os_mutex_enter(
-/*===========*/
-	os_mutex_t	mutex);	/* in: mutex to acquire */
-/**************************************************************
-Releases ownership of a mutex. */
-
-void
-os_mutex_exit(
-/*==========*/
-	os_mutex_t	mutex);	/* in: mutex to release */
-/**************************************************************
-Frees an mutex object. */
-
-void
-os_mutex_free(
-/*==========*/
-	os_mutex_t	mutex);	/* in: mutex to free */
-/**************************************************************
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-						/* out: 0 if success, != 0 if
-						was reserved by another
-						thread */
-	os_fast_mutex_t*	fast_mutex);	/* in: mutex to acquire */
-/**************************************************************
-Releases ownership of a fast mutex. */
-
-void
-os_fast_mutex_unlock(
-/*=================*/
-	os_fast_mutex_t*	fast_mutex);	/* in: mutex to release */
-/*************************************************************
-Initializes an operating system fast mutex semaphore. */
-
-void
-os_fast_mutex_init(
-/*===============*/
-	os_fast_mutex_t*	fast_mutex);	/* in: fast mutex */
-/**************************************************************
-Acquires ownership of a fast mutex. */
-
-void
-os_fast_mutex_lock(
-/*===============*/
-	os_fast_mutex_t*	fast_mutex);	/* in: mutex to acquire */
-/**************************************************************
-Frees an mutex object. */
-
-void
-os_fast_mutex_free(
-/*===============*/
-	os_fast_mutex_t*	fast_mutex);	/* in: mutex to free */
-
-#ifdef UNIV_SYNC_ATOMIC
-/**************************************************************
-Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-ibool
-os_compare_and_swap(
-/*================*/
-						/* out: true if swapped */
-	volatile lint*		ptr,		/* in: pointer to target */
-	lint			oldVal,		/* in: value to compare to */
-	lint			newVal);	/* in: value to swap in */
-
-/**************************************************************
-Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-lint
-os_atomic_increment(
-/*================*/
-						/* out: resulting value */
-	volatile lint*		ptr,		/* in: pointer to target */
-	lint			amount);	/* in: amount of increment */
-
-/**************************************************************
-Memory barrier operations for InnoDB.
-Currently requires GCC atomic builtins. */
-UNIV_INLINE
-void
-os_memory_barrier_load();
-
-UNIV_INLINE
-void
-os_memory_barrier_store();
-
-UNIV_INLINE
-void
-os_memory_barrier();
-
-#endif /* UNIV_SYNC_ATOMIC */
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
deleted file mode 100644
index d1307134172..00000000000
--- a/storage/innobase/include/os0sync.ic
+++ /dev/null
@@ -1,152 +0,0 @@
-/******************************************************
-The interface to the operating system synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**************************************************************
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-						/* out: 0 if success, != 0 if
-						was reserved by another
-						thread */
-	os_fast_mutex_t*	fast_mutex)	/* in: mutex to acquire */
-{
-#ifdef __WIN__
-	EnterCriticalSection(fast_mutex);
-
-	return(0);
-#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
-	/* Since the hot backup version is standalone, MySQL does not redefine
-	pthread_mutex_trylock for HP-UX-10.20, and consequently we must invert
-	the return value here */
-
-	return((ulint) (1 - pthread_mutex_trylock(fast_mutex)));
-#else
-	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
-	so that it returns 0 on success. In the operating system
-	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
-	returns 1 on success (but MySQL remaps that to 0), while Linux,
-	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-	return((ulint) pthread_mutex_trylock(fast_mutex));
-#endif
-#endif
-}
-
-#ifdef UNIV_SYNC_ATOMIC
-/**************************************************************
-Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins
-or Solaris atomic_* functions. */
-UNIV_INLINE
-ibool
-os_compare_and_swap(
-/*================*/
-					/* out: true if swapped */
-	volatile lint* 	ptr,		/* in: pointer to target */
-	lint		oldVal,		/* in: value to compare to */
-	lint		newVal)		/* in: value to swap in */
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
-	return (__sync_bool_compare_and_swap(ptr, oldVal, newVal));
-#elif HAVE_SOLARIS_ATOMIC
-	lint retVal = (lint)atomic_cas_ulong((volatile ulong_t *)ptr,
-		oldVal, newVal);
-	return (retVal == oldVal);
-#elif WIN_ATOMICS32
-        lint retVal = (lint)InterlockedCompareExchange(ptr, newVal, oldVal);
-        return (retVal == oldVal);
-#elif WIN_ATOMICS64
-        lint retVal = (lint)InterlockedCompareExchange64(ptr, newVal, oldVal);
-        return (retVal == oldVal);
-#else
-#error "Need support for atomic ops"
-#endif
-}
-
-/**************************************************************
-Memory barrier for load */
-UNIV_INLINE
-void
-os_memory_barrier_load()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
-  __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
-  membar_consumer();
-#elif WIN_ATOMICS32
-  MemoryBarrier();
-#elif WIN_ATOMICS64
-  MemoryBarrier();
-#endif
-}
-
-/**************************************************************
-Memory barrier for store */
-UNIV_INLINE
-void
-os_memory_barrier_store()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
-  __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
-  membar_producer();
-#elif WIN_ATOMICS32
-  MemoryBarrier();
-#elif WIN_ATOMICS64
-  MemoryBarrier();
-#endif
-}
-
-/**************************************************************
-Memory barrier */
-UNIV_INLINE
-void
-os_memory_barrier()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
-  __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
-  membar_enter();
-#elif WIN_ATOMICS32
-  MemoryBarrier();
-#elif WIN_ATOMICS64
-  MemoryBarrier();
-#endif
-}
-
-
-/**************************************************************
-Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-lint
-os_atomic_increment(
-/*================*/
-					/* out: resulting value */
-	volatile lint*	ptr,		/* in: pointer to target */
-	lint		amount)		/* in: amount of increment */
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
-	return (__sync_add_and_fetch(ptr, amount));
-#elif HAVE_SOLARIS_ATOMIC
-	return ((lint)atomic_add_long_nv((volatile ulong_t *)ptr, amount));
-#elif WIN_ATOMICS32
-        return ((lint)InterlockedExchangeAdd(ptr, amount) + amount);
-#elif WIN_ATOMICS64
-        return ((lint)InterlockedExchangeAdd64(ptr, amount) + amount);
-#else
-#error "Need support for atomic ops"
-#endif
-}
-#endif /* UNIV_SYNC_ATOMIC */
diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic
deleted file mode 100644
index a75aa3abb34..00000000000
--- a/storage/innobase/include/os0thread.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-The interface to the operating system
-process and thread control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
deleted file mode 100644
index 04f731414a3..00000000000
--- a/storage/innobase/include/page0cur.h
+++ /dev/null
@@ -1,286 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef page0cur_h
-#define page0cur_h
-
-#include "univ.i"
-
-#include "page0types.h"
-#include "page0page.h"
-#include "rem0rec.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-
-
-#define PAGE_CUR_ADAPT
-
-/* Page cursor search modes; the values must be in this order! */
-
-#define	PAGE_CUR_UNSUPP	0
-#define	PAGE_CUR_G	1
-#define	PAGE_CUR_GE	2
-#define	PAGE_CUR_L	3
-#define	PAGE_CUR_LE	4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
-				 "column LIKE 'abc%' ORDER BY column DESC";
-				 we have to find strings which are <= 'abc' or
-				 which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG	6	/* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-extern ulint	page_cur_short_succ;
-# endif /* UNIV_SEARCH_PERF_STAT */
-#endif /* PAGE_CUR_ADAPT */
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
-				/* out: page */
-	page_cur_t*	cur);	/* in: page cursor */
-/*************************************************************
-Gets the record where the cursor is positioned. */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
-				/* out: record */
-	page_cur_t*	cur);	/* in: page cursor */
-/*************************************************************
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
-	page_t*		page,	/* in: index page */
-	page_cur_t*	cur);	/* in: cursor */
-/*************************************************************
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
-	page_t*		page,	/* in: index page */
-	page_cur_t*	cur);	/* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
-					/* out: TRUE if at start */
-	const page_cur_t*	cur);	/* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
-					/* out: TRUE if at end */
-	const page_cur_t*	cur);	/* in: cursor */
-/**************************************************************
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
-	rec_t*		rec,	/* in: record on a page */
-	page_cur_t*	cur);	/* in: page cursor */
-/**************************************************************
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur);	/* in: page cursor */
-/**************************************************************
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
-	page_cur_t*	cur);	/* in: cursor; must not be after last */
-/**************************************************************
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
-	page_cur_t*	cur);	/* in: cursor; must not before first */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dtuple_t*	tuple,	/* in: pointer to a data tuple */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	rec_t*		rec,	/* in: record to insert */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dtuple_t*	tuple,	/* in: pointer to a data tuple or NULL */
-	dict_index_t*	index,	/* in: record descriptor */
-	rec_t*		rec,	/* in: pointer to a physical record or NULL */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) or NULL */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/*****************************************************************
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: first record to copy */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/***************************************************************
-Deletes a record at the page cursor. The cursor is moved to the
-next record after the deleted one. */
-
-void
-page_cur_delete_rec(
-/*================*/
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dict_index_t*	index,	/* in: record descriptor */
-	const ulint*	offsets,/* in: rec_get_offsets(cursor->rec, index) */
-	mtr_t*		mtr);	/* in: mini-transaction handle */
-/********************************************************************
-Searches the right position for a page cursor. */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
-				/* out: number of matched fields on the left */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	page_cur_t*	cursor);/* out: page cursor */
-/********************************************************************
-Searches the right position for a page cursor. */
-
-void
-page_cur_search_with_match(
-/*=======================*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	ulint*		iup_matched_fields,
-				/* in/out: already matched fields in upper
-				limit record */
-	ulint*		iup_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	ulint*		ilow_matched_fields,
-				/* in/out: already matched fields in lower
-				limit record */
-	ulint*		ilow_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	page_cur_t*	cursor); /* out: page cursor */
-/***************************************************************
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
-	page_t*		page,	/* in: page */
-	page_cur_t*	cursor);/* in/out: page cursor */
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
-				/* out: end of log record or NULL */
-	ibool		is_short,/* in: TRUE if short inserts */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr);	/* in: mtr or NULL */
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr);	/* in: mtr or NULL */
-/***************************************************************
-Parses log record of a record delete on a page. */
-
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
-				/* out: pointer to record end or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr);	/* in: mtr or NULL */
-
-/* Index page cursor */
-
-struct page_cur_struct{
-	byte*	rec;	/* pointer to a record on page */
-};
-
-#ifndef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
deleted file mode 100644
index b747874abc2..00000000000
--- a/storage/innobase/include/page0cur.ic
+++ /dev/null
@@ -1,210 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0page.h"
-
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
-				/* out: page */
-	page_cur_t*	cur)	/* in: page cursor */
-{
-	ut_ad(cur);
-
-	return(buf_frame_align(cur->rec));
-}
-
-/*************************************************************
-Gets the record where the cursor is positioned. */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
-				/* out: record */
-	page_cur_t*	cur)	/* in: page cursor */
-{
-	ut_ad(cur);
-
-	return(cur->rec);
-}
-
-/*************************************************************
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
-	page_t*		page,	/* in: index page */
-	page_cur_t*	cur)	/* in: cursor */
-{
-	cur->rec = page_get_infimum_rec(page);
-}
-
-/*************************************************************
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
-	page_t*		page,	/* in: index page */
-	page_cur_t*	cur)	/* in: cursor */
-{
-	cur->rec = page_get_supremum_rec(page);
-}
-
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
-					/* out: TRUE if at start */
-	const page_cur_t*	cur)	/* in: cursor */
-{
-	return(page_rec_is_infimum(cur->rec));
-}
-
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
-					/* out: TRUE if at end */
-	const page_cur_t*	cur)	/* in: cursor */
-{
-	return(page_rec_is_supremum(cur->rec));
-}
-
-/**************************************************************
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
-	rec_t*		rec,	/* in: record on a page */
-	page_cur_t*	cur)	/* in: page cursor */
-{
-	ut_ad(rec && cur);
-
-	cur->rec = rec;
-}
-
-/**************************************************************
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur)	/* in: page cursor */
-{
-	ut_ad(cur);
-
-	cur->rec = NULL;
-}
-
-/**************************************************************
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
-	page_cur_t*	cur)	/* in: cursor; must not be after last */
-{
-	ut_ad(!page_cur_is_after_last(cur));
-
-	cur->rec = page_rec_get_next(cur->rec);
-}
-
-/**************************************************************
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
-	page_cur_t*	cur)	/* in: page cursor, not before first */
-{
-	ut_ad(!page_cur_is_before_first(cur));
-
-	cur->rec = page_rec_get_prev(cur->rec);
-}
-
-/********************************************************************
-Searches the right position for a page cursor. */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
-				/* out: number of matched fields on the left */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	page_cur_t*	cursor)	/* out: page cursor */
-{
-	ulint		low_matched_fields = 0;
-	ulint		low_matched_bytes = 0;
-	ulint		up_matched_fields = 0;
-	ulint		up_matched_bytes = 0;
-
-	ut_ad(dtuple_check_typed(tuple));
-
-	page_cur_search_with_match(page, index, tuple, mode,
-				   &up_matched_fields,
-				   &up_matched_bytes,
-				   &low_matched_fields,
-				   &low_matched_bytes,
-				   cursor);
-	return(low_matched_fields);
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dtuple_t*	tuple,	/* in: pointer to a data tuple */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
-{
-	return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr));
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	rec_t*		rec,	/* in: record to insert */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
-{
-	return(page_cur_insert_rec_low(cursor, NULL, index, rec,
-				       offsets, mtr));
-}
-
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
deleted file mode 100644
index 273007c2778..00000000000
--- a/storage/innobase/include/page0page.h
+++ /dev/null
@@ -1,829 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0page_h
-#define page0page_h
-
-#include "univ.i"
-
-#include "page0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "data0data.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "fsp0fsp.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/*			PAGE HEADER
-			===========
-
-Index page header starts at the first offset left free by the FIL-module */
-
-typedef	byte		page_header_t;
-
-#define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
-				offset */
-/*-----------------------------*/
-#define PAGE_N_DIR_SLOTS 0	/* number of slots in page directory */
-#define	PAGE_HEAP_TOP	 2	/* pointer to record heap top */
-#define	PAGE_N_HEAP	 4	/* number of records in the heap,
-				bit 15=flag: new-style compact page format */
-#define	PAGE_FREE	 6	/* pointer to start of page free record list */
-#define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
-#define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
-				NULL if this info has been reset by a delete,
-				for example */
-#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
-#define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
-				direction */
-#define	PAGE_N_RECS	 16	/* number of user records on the page */
-#define PAGE_MAX_TRX_ID	 18	/* highest id of a trx which may have modified
-				a record on the page; a dulint; defined only
-				in secondary indexes; specifically, not in an
-				ibuf tree; NOTE: this may be modified only
-				when the thread has an x-latch to the page,
-				and ALSO an x-latch to btr_search_latch
-				if there is a hash index to the page! */
-#define PAGE_HEADER_PRIV_END 26	/* end of private data structure of the page
-				header which are set in a page create */
-/*----*/
-#define	PAGE_LEVEL	 26	/* level of the node in an index tree; the
-				leaf level is the level 0 */
-#define	PAGE_INDEX_ID	 28	/* index id where the page belongs */
-#define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
-				a B-tree: defined only on the root page of a
-				B-tree, but not in the root of an ibuf tree */
-#define PAGE_BTR_IBUF_FREE_LIST	PAGE_BTR_SEG_LEAF
-#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
-				/* in the place of PAGE_BTR_SEG_LEAF and _TOP
-				there is a free list base node if the page is
-				the root page of an ibuf tree, and at the same
-				place is the free list node if the page is in
-				a free list */
-#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
-				/* file segment header for the non-leaf pages
-				in a B-tree: defined only on the root page of
-				a B-tree, but not in the root of an ibuf
-				tree */
-/*----*/
-#define PAGE_DATA	(PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
-				/* start of data on the page */
-
-#define PAGE_OLD_INFIMUM	(PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
-				/* offset of the page infimum record on an
-				old-style page */
-#define PAGE_OLD_SUPREMUM	(PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
-				/* offset of the page supremum record on an
-				old-style page */
-#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
-				/* offset of the page supremum record end on
-				an old-style page */
-#define PAGE_NEW_INFIMUM	(PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
-				/* offset of the page infimum record on a
-				new-style compact page */
-#define PAGE_NEW_SUPREMUM	(PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
-				/* offset of the page supremum record on a
-				new-style compact page */
-#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
-				/* offset of the page supremum record end on
-				a new-style compact page */
-/*-----------------------------*/
-
-/* Directions of cursor movement */
-#define	PAGE_LEFT		1
-#define	PAGE_RIGHT		2
-#define	PAGE_SAME_REC		3
-#define	PAGE_SAME_PAGE		4
-#define	PAGE_NO_DIRECTION	5
-
-/*			PAGE DIRECTORY
-			==============
-*/
-
-typedef	byte			page_dir_slot_t;
-typedef page_dir_slot_t		page_dir_t;
-
-/* Offset of the directory start down from the page end. We call the
-slot with the highest file address directory start, as it points to
-the first record in the list of records. */
-#define	PAGE_DIR		FIL_PAGE_DATA_END
-
-/* We define a slot in the page directory as two bytes */
-#define	PAGE_DIR_SLOT_SIZE	2
-
-/* The offset of the physically lower end of the directory, counted from
-page end, when the page is empty */
-#define PAGE_EMPTY_DIR_START	(PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
-
-/* The maximum and minimum number of records owned by a directory slot. The
-number may drop below the minimum in the first and the last slot in the
-directory. */
-#define PAGE_DIR_SLOT_MAX_N_OWNED	8
-#define	PAGE_DIR_SLOT_MIN_N_OWNED	4
-
-/****************************************************************
-Gets the start of a page. */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
-			/* out: start of the page */
-	void*	ptr)	/* in: pointer to page frame */
-		__attribute__((const));
-/****************************************************************
-Gets the offset within a page. */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
-				/* out: offset from the start of the page */
-	const void*	ptr)	/* in: pointer to page frame */
-		__attribute__((const));
-/*****************************************************************
-Returns the max trx id field value. */
-UNIV_INLINE
-dulint
-page_get_max_trx_id(
-/*================*/
-	page_t*	page);	/* in: page */
-/*****************************************************************
-Sets the max trx id field value. */
-
-void
-page_set_max_trx_id(
-/*================*/
-	page_t*	page,	/* in: page */
-	dulint	trx_id);/* in: transaction id */
-/*****************************************************************
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
-	page_t*	page,	/* in: page */
-	dulint	trx_id);	/* in: transaction id */
-/*****************************************************************
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
-	page_t*	page,	/* in: page */
-	ulint	field);	/* in: PAGE_N_DIR_SLOTS, ... */
-/*****************************************************************
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
-	page_t*	page,	/* in: page */
-	ulint	field,	/* in: PAGE_N_DIR_SLOTS, ... */
-	ulint	val);	/* in: value */
-/*****************************************************************
-Returns the pointer stored in the given header field. */
-UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
-			/* out: pointer or NULL */
-	page_t*	page,	/* in: page */
-	ulint	field);	/* in: PAGE_FREE, ... */
-/*****************************************************************
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
-	page_t*	page,	/* in: page */
-	ulint	field,	/* in: PAGE_FREE, ... */
-	byte*	ptr);	/* in: pointer or NULL*/
-/*****************************************************************
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
-	page_t*	page,	/* in: page */
-	mtr_t*	mtr);	/* in: mtr */
-/****************************************************************
-Gets the first record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
-/*=================*/
-			/* out: the first record in record list */
-	page_t*	page);	/* in: page which must have record(s) */
-/****************************************************************
-Gets the last record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
-			/* out: the last record in record list */
-	page_t*	page);	/* in: page which must have record(s) */
-/****************************************************************
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list. */
-
-rec_t*
-page_get_middle_rec(
-/*================*/
-			/* out: middle record */
-	page_t*	page);	/* in: page */
-/*****************************************************************
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /* in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes); /* in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
-/*****************************************************************
-Gets the number of user records on page (the infimum and supremum records
-are not user records). */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
-			/* out: number of user records */
-	page_t*	page);	/* in: index page */
-/*******************************************************************
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
-			/* out: number of records */
-	rec_t*	rec);	/* in: the physical record */
-/*****************************************************************
-Gets the number of records in the heap. */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
-			/* out: number of user records */
-	page_t*	page);	/* in: index page */
-/*****************************************************************
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
-	page_t*	page,	/* in: index page */
-	ulint	n_heap);/* in: number of records */
-/*****************************************************************
-Gets the number of dir slots in directory. */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
-			/* out: number of slots */
-	page_t*	page);	/* in: index page */
-/*****************************************************************
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
-			/* out: number of slots */
-	page_t*	page,	/* in: index page */
-	ulint	n_slots);/* in: number of slots */
-/*****************************************************************
-Gets pointer to nth directory slot. */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
-			/* out: pointer to dir slot */
-	page_t*	page,	/* in: index page */
-	ulint	n);	/* in: position */
-/******************************************************************
-Used to check the consistency of a record on a page. */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
-			/* out: TRUE if succeed */
-	rec_t*	rec);	/* in: record */
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
-UNIV_INLINE
-rec_t*
-page_dir_slot_get_rec(
-/*==================*/
-					/* out: pointer to record */
-	page_dir_slot_t*	slot);	/* in: directory slot */
-/*******************************************************************
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
-	page_dir_slot_t* slot,	/* in: directory slot */
-	rec_t*		 rec);	/* in: record on the page */
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
-					/* out: number of records */
-	page_dir_slot_t*	slot);	/* in: page directory slot */
-/*******************************************************************
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
-	page_dir_slot_t*	slot,	/* in: directory slot */
-	ulint			n);	/* in: number of records owned
-					by the slot */
-/****************************************************************
-Calculates the space reserved for directory slots of a given
-number of records. The exact value is a fraction number
-n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
-rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
-	ulint	n_recs);	/* in: number of records */
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
-				/* out: the directory slot number */
-	rec_t*		rec);	/* in: the physical record */
-/****************************************************************
-Determine whether the page is in new-style compact format. */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
-			/* out: nonzero if the page is in compact
-			format, zero if it is in old-style format */
-	page_t*	page);	/* in: index page */
-/****************************************************************
-TRUE if the record is on a page in compact format. */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
-				/* out: nonzero if in compact format */
-	const rec_t*	rec);	/* in: record */
-/****************************************************************
-Gets the pointer to the next record on the page. */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
-			/* out: pointer to next record */
-	rec_t*	rec);	/* in: pointer to record, must not be page
-			supremum */
-/****************************************************************
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
-	rec_t*	rec,	/* in: pointer to record, must not be
-			page supremum */
-	rec_t*	next);	/* in: pointer to next record, must not
-			be page infimum */
-/****************************************************************
-Gets the pointer to the previous record. */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
-				/* out: pointer to previous record */
-	rec_t*		rec);	/* in: pointer to record,
-				must not be page infimum */
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
-			/* out: TRUE if a user record */
-	ulint	offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
-			/* out: TRUE if the supremum record */
-	ulint	offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
-			/* out: TRUE if the infimum record */
-	ulint	offset);/* in: record offset on page */
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
-				/* out: TRUE if a user record */
-	const rec_t*	rec);	/* in: record */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
-				/* out: TRUE if the supremum record */
-	const rec_t*	rec);	/* in: record */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
-				/* out: TRUE if the infimum record */
-	const rec_t*	rec);	/* in: record */
-/*******************************************************************
-Looks for the record which owns the given record. */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
-			/* out: the owner record */
-	rec_t*	rec);	/* in: the physical record */
-/***************************************************************************
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary
-record. */
-
-void
-page_rec_write_index_page_no(
-/*=========================*/
-	rec_t*	rec,	/* in: record to update */
-	ulint	i,	/* in: index of the field to update */
-	ulint	page_no,/* in: value to write */
-	mtr_t*	mtr);	/* in: mtr */
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of record heap. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
-			/* out: maximum combined size for inserted records */
-	page_t*	page,	/* in: index page */
-	ulint	n_recs);	/* in: number of records */
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of record heap if page is first reorganized. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
-			/* out: maximum combined size for inserted records */
-	page_t*	page,	/* in: index page */
-	ulint	n_recs);/* in: number of records */
-/*****************************************************************
-Calculates free space if a page is emptied. */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
-			/* out: free space */
-	ulint	comp)	/* in: nonzero=compact page format */
-		__attribute__((const));
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
-ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
-			/* out: free space */
-	ulint	comp)	/* in: nonzero=compact page format */
-		__attribute__((const));
-/****************************************************************
-Returns the sum of the sizes of the records in the record list
-excluding the infimum and supremum records. */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
-			/* out: data in bytes */
-	page_t*	page);	/* in: index page */
-/****************************************************************
-Allocates a block of memory from an index page. */
-
-byte*
-page_mem_alloc(
-/*===========*/
-				/* out: pointer to start of allocated
-				buffer, or NULL if allocation fails */
-	page_t*		page,	/* in: index page */
-	ulint		need,	/* in: number of bytes needed */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		heap_no);/* out: this contains the heap number
-				of the allocated record
-				if allocation succeeds */
-/****************************************************************
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: pointer to the (origin of) record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-The index page creation function. */
-
-page_t*
-page_create(
-/*========*/
-					/* out: pointer to the page */
-	buf_frame_t*	frame,		/* in: a buffer frame where the page is
-					created */
-	mtr_t*		mtr,		/* in: mini-transaction handle */
-	ulint		comp);		/* in: nonzero=compact page format */
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
-
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/*****************************************************************
-Copies records from page to new_page, from the given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
-page_copy_rec_list_end(
-/*===================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/*****************************************************************
-Copies records from page to new_page, up to the given record, NOT
-including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
-page_copy_rec_list_start(
-/*=====================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/*****************************************************************
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_end(
-/*=====================*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		n_recs,	/* in: number of records to delete,
-				or ULINT_UNDEFINED if not known */
-	ulint		size,	/* in: the sum of the sizes of the
-				records in the end of the chain to
-				delete, or ULINT_UNDEFINED if not known */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_start(
-/*=======================*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
-Moves record list end to another page. Moved records include
-split_rec. */
-
-void
-page_move_rec_list_end(
-/*===================*/
-	page_t*		new_page,	/* in: index page where to move */
-	page_t*		page,		/* in: index page */
-	rec_t*		split_rec,	/* in: first record to move */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/*****************************************************************
-Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
-page_move_rec_list_start(
-/*=====================*/
-	page_t*		new_page,	/* in: index page where to move */
-	page_t*		page,		/* in: index page */
-	rec_t*		split_rec,	/* in: first record not to move */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr);		/* in: mtr */
-/********************************************************************
-Splits a directory slot which owns too many records. */
-
-void
-page_dir_split_slot(
-/*================*/
-	page_t*	page,		/* in: the index page in question */
-	ulint	slot_no);	/* in: the directory slot */
-/*****************************************************************
-Tries to balance the given directory slot with too few records
-with the upper neighbor, so that there are at least the minimum number
-of records owned by the slot; this may result in the merging of
-two slots. */
-
-void
-page_dir_balance_slot(
-/*==================*/
-	page_t*	page,		/* in: index page */
-	ulint	slot_no);	/* in: the directory slot */
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
-				/* out: end of log record or NULL */
-	byte		type,	/* in: MLOG_LIST_END_DELETE,
-				MLOG_LIST_START_DELETE,
-				MLOG_COMP_LIST_END_DELETE or
-				MLOG_COMP_LIST_START_DELETE */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr);	/* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of creating a page. */
-
-byte*
-page_parse_create(
-/*==============*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	ulint	comp,	/* in: nonzero=compact page format */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-/****************************************************************
-Prints record contents including the data relevant only in
-the index page context. */
-
-void
-page_rec_print(
-/*===========*/
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: record descriptor */
-/*******************************************************************
-This is used to print the contents of the directory for
-debugging purposes. */
-
-void
-page_dir_print(
-/*===========*/
-	page_t*	page,	/* in: index page */
-	ulint	pr_n);	/* in: print n first and n last entries */
-/*******************************************************************
-This is used to print the contents of the page record list for
-debugging purposes. */
-
-void
-page_print_list(
-/*============*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: dictionary index of the page */
-	ulint		pr_n);	/* in: print n first and n last entries */
-/*******************************************************************
-Prints the info in a page header. */
-
-void
-page_header_print(
-/*==============*/
-	page_t*	page);
-/*******************************************************************
-This is used to print the contents of the page for
-debugging purposes. */
-
-void
-page_print(
-/*=======*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: dictionary index of the page */
-	ulint		dn,	/* in: print dn first and last entries
-				in directory */
-	ulint		rn);	/* in: print rn first and last records
-				in directory */
-/*******************************************************************
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
-ibool
-page_rec_validate(
-/*==============*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-
-void
-page_check_dir(
-/*===========*/
-	page_t*	page);	/* in: index page */
-/*******************************************************************
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
-ibool
-page_simple_validate(
-/*=================*/
-			/* out: TRUE if ok */
-	page_t*	page);	/* in: index page */
-/*******************************************************************
-This function checks the consistency of an index page. */
-
-ibool
-page_validate(
-/*==========*/
-				/* out: TRUE if ok */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index);	/* in: data dictionary index containing
-				the page record type definition */
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
-
-rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
-			/* out: record, NULL if not found */
-	page_t*	page,	/* in: index page */
-	ulint	heap_no);/* in: heap number */
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE  UNIV_INLINE_ORIGINAL
-#endif
-
-#ifndef UNIV_NONINL
-#include "page0page.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
deleted file mode 100644
index d9e67f3eeeb..00000000000
--- a/storage/innobase/include/page0page.ic
+++ /dev/null
@@ -1,851 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "rem0cmp.h"
-#include "mtr0log.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/****************************************************************
-Gets the start of a page. */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
-			/* out: start of the page */
-	void*	ptr)	/* in: pointer to page frame */
-{
-	return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
-}
-/****************************************************************
-Gets the offset within a page. */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
-				/* out: offset from the start of the page */
-	const void*	ptr)	/* in: pointer to page frame */
-{
-	return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
-}
-/*****************************************************************
-Returns the max trx id field value. */
-UNIV_INLINE
-dulint
-page_get_max_trx_id(
-/*================*/
-	page_t*	page)	/* in: page */
-{
-	ut_ad(page);
-
-	return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
-}
-
-/*****************************************************************
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
-	page_t*	page,	/* in: page */
-	dulint	trx_id)	/* in: transaction id */
-{
-	ut_ad(page);
-
-	if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) {
-
-		page_set_max_trx_id(page, trx_id);
-	}
-}
-
-/*****************************************************************
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
-	page_t*	page,	/* in: page */
-	ulint	field)	/* in: PAGE_LEVEL, ... */
-{
-	ut_ad(page);
-	ut_ad(field <= PAGE_INDEX_ID);
-
-	return(mach_read_from_2(page + PAGE_HEADER + field));
-}
-
-/*****************************************************************
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
-	page_t*	page,	/* in: page */
-	ulint	field,	/* in: PAGE_LEVEL, ... */
-	ulint	val)	/* in: value */
-{
-	ut_ad(page);
-	ut_ad(field <= PAGE_N_RECS);
-	ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
-	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
-
-	mach_write_to_2(page + PAGE_HEADER + field, val);
-}
-
-/*****************************************************************
-Returns the pointer stored in the given header field. */
-UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
-			/* out: pointer or NULL */
-	page_t*	page,	/* in: page */
-	ulint	field)	/* in: PAGE_FREE, ... */
-{
-	ulint	offs;
-
-	ut_ad(page);
-	ut_ad((field == PAGE_FREE)
-	      || (field == PAGE_LAST_INSERT)
-	      || (field == PAGE_HEAP_TOP));
-
-	offs = page_header_get_field(page, field);
-
-	ut_ad((field != PAGE_HEAP_TOP) || offs);
-
-	if (offs == 0) {
-
-		return(NULL);
-	}
-
-	return(page + offs);
-}
-
-/*****************************************************************
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
-	page_t*	page,	/* in: page */
-	ulint	field,	/* in: PAGE_FREE, ... */
-	byte*	ptr)	/* in: pointer or NULL*/
-{
-	ulint	offs;
-
-	ut_ad(page);
-	ut_ad((field == PAGE_FREE)
-	      || (field == PAGE_LAST_INSERT)
-	      || (field == PAGE_HEAP_TOP));
-
-	if (ptr == NULL) {
-		offs = 0;
-	} else {
-		offs = ptr - page;
-	}
-
-	ut_ad((field != PAGE_HEAP_TOP) || offs);
-
-	page_header_set_field(page, field, offs);
-}
-
-/*****************************************************************
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
-	page_t*	page,	/* in: page */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	ut_ad(page && mtr);
-
-	mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0,
-			 MLOG_2BYTES, mtr);
-}
-
-/****************************************************************
-Determine whether the page is in new-style compact format. */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
-			/* out: nonzero if the page is in compact
-			format, zero if it is in old-style format */
-	page_t*	page)	/* in: index page */
-{
-	return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
-			   0x8000));
-}
-
-/****************************************************************
-TRUE if the record is on a page in compact format. */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
-				/* out: nonzero if in compact format */
-	const rec_t*	rec)	/* in: record */
-{
-	return(page_is_comp(page_align((rec_t*) rec)));
-}
-
-/****************************************************************
-Gets the first record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
-/*=================*/
-			/* out: the first record in record list */
-	page_t*	page)	/* in: page which must have record(s) */
-{
-	ut_ad(page);
-
-	if (page_is_comp(page)) {
-		return(page + PAGE_NEW_INFIMUM);
-	} else {
-		return(page + PAGE_OLD_INFIMUM);
-	}
-}
-
-/****************************************************************
-Gets the last record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
-			/* out: the last record in record list */
-	page_t*	page)	/* in: page which must have record(s) */
-{
-	ut_ad(page);
-
-	if (page_is_comp(page)) {
-		return(page + PAGE_NEW_SUPREMUM);
-	} else {
-		return(page + PAGE_OLD_SUPREMUM);
-	}
-}
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
-			/* out: TRUE if a user record */
-	ulint	offset)	/* in: record offset on page */
-{
-	ut_ad(offset >= PAGE_NEW_INFIMUM);
-#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
-# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
-#endif
-#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
-# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
-#endif
-#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
-# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
-#endif
-#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
-# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
-#endif
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
-	return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM)
-	       && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM)
-	       && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM)
-	       && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
-}
-
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
-			/* out: TRUE if the supremum record */
-	ulint	offset)	/* in: record offset on page */
-{
-	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
-	return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM)
-	       || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
-}
-
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
-			/* out: TRUE if the infimum record */
-	ulint	offset)	/* in: record offset on page */
-{
-	ut_ad(offset >= PAGE_NEW_INFIMUM);
-	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
-	return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM)
-	       || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
-}
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
-				/* out: TRUE if a user record */
-	const rec_t*	rec)	/* in: record */
-{
-	return(page_rec_is_user_rec_low(page_offset(rec)));
-}
-
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
-				/* out: TRUE if the supremum record */
-	const rec_t*	rec)	/* in: record */
-{
-	return(page_rec_is_supremum_low(page_offset(rec)));
-}
-
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
-				/* out: TRUE if the infimum record */
-	const rec_t*	rec)	/* in: record */
-{
-	return(page_rec_is_infimum_low(page_offset(rec)));
-}
-
-/*****************************************************************
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /* in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes) /* in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
-{
-	ulint	rec_offset;
-
-	ut_ad(dtuple_check_typed(dtuple));
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
-	rec_offset = page_offset(rec);
-
-	if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM)
-	    || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) {
-		return(1);
-	}
-	if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM)
-	    || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) {
-		return(-1);
-	}
-
-	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
-					 matched_fields,
-					 matched_bytes));
-}
-
-/*****************************************************************
-Gets the number of user records on page (infimum and supremum records
-are not user records). */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
-			/* out: number of user records */
-	page_t*	page)	/* in: index page */
-{
-	return(page_header_get_field(page, PAGE_N_RECS));
-}
-
-/*****************************************************************
-Gets the number of dir slots in directory. */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
-			/* out: number of slots */
-	page_t*	page)	/* in: index page */
-{
-	return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
-}
-/*****************************************************************
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
-			/* out: number of slots */
-	page_t*	page,	/* in: index page */
-	ulint	n_slots)/* in: number of slots */
-{
-	page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
-}
-
-/*****************************************************************
-Gets the number of records in the heap. */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
-			/* out: number of user records */
-	page_t*	page)	/* in: index page */
-{
-	return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
-}
-
-/*****************************************************************
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
-	page_t*	page,	/* in: index page */
-	ulint	n_heap)	/* in: number of records */
-{
-	ut_ad(n_heap < 0x8000);
-
-	page_header_set_field(page, PAGE_N_HEAP, n_heap
-			      | (0x8000
-				 & page_header_get_field(page, PAGE_N_HEAP)));
-}
-
-/*****************************************************************
-Gets pointer to nth directory slot. */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
-			/* out: pointer to dir slot */
-	page_t*	page,	/* in: index page */
-	ulint	n)	/* in: position */
-{
-	ut_ad(page_dir_get_n_slots(page) > n);
-
-	return(page + UNIV_PAGE_SIZE - PAGE_DIR
-	       - (n + 1) * PAGE_DIR_SLOT_SIZE);
-}
-
-/******************************************************************
-Used to check the consistency of a record on a page. */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
-			/* out: TRUE if succeed */
-	rec_t*	rec)	/* in: record */
-{
-	page_t*	page;
-
-	ut_a(rec);
-
-	page = buf_frame_align(rec);
-
-	ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP));
-	ut_a(rec >= page + PAGE_DATA);
-
-	return(TRUE);
-}
-
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
-UNIV_INLINE
-rec_t*
-page_dir_slot_get_rec(
-/*==================*/
-					/* out: pointer to record */
-	page_dir_slot_t*	slot)	/* in: directory slot */
-{
-	return(buf_frame_align(slot) + mach_read_from_2(slot));
-}
-
-/*******************************************************************
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
-	page_dir_slot_t* slot,	/* in: directory slot */
-	rec_t*		 rec)	/* in: record on the page */
-{
-	ut_ad(page_rec_check(rec));
-
-	mach_write_to_2(slot, page_offset(rec));
-}
-
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
-					/* out: number of records */
-	page_dir_slot_t*	slot)	/* in: page directory slot */
-{
-	rec_t*	rec	= page_dir_slot_get_rec(slot);
-	return(rec_get_n_owned(rec, page_rec_is_comp(rec)));
-}
-
-/*******************************************************************
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
-	page_dir_slot_t*	slot,	/* in: directory slot */
-	ulint			n)	/* in: number of records owned
-					by the slot */
-{
-	rec_t*	rec	= page_dir_slot_get_rec(slot);
-	rec_set_n_owned(rec, page_rec_is_comp(rec), n);
-}
-
-/****************************************************************
-Calculates the space reserved for directory slots of a given number of
-records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
-PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
-	ulint	n_recs)		/* in: number of records */
-{
-	return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
-	       / PAGE_DIR_SLOT_MIN_N_OWNED);
-}
-
-/****************************************************************
-Gets the pointer to the next record on the page. */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
-			/* out: pointer to next record */
-	rec_t*	rec)	/* in: pointer to record */
-{
-	ulint	offs;
-	page_t*	page;
-
-	ut_ad(page_rec_check(rec));
-
-	page = page_align(rec);
-
-	offs = rec_get_next_offs(rec, page_is_comp(page));
-
-	if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
-		fprintf(stderr,
-			"InnoDB: Next record offset is nonsensical %lu"
-			" in record at offset %lu\n"
-			"InnoDB: rec address %p, first buffer frame %p\n"
-			"InnoDB: buffer pool high end %p, buf fix count %lu\n",
-			(ulong)offs, (ulong)(rec - page),
-			(void*) rec, (void*) buf_pool->frame_zero,
-			(void*) buf_pool->high_end,
-			(ulong) buf_block_align(rec)->buf_fix_count);
-		buf_page_print(page);
-
-		ut_error;
-	}
-
-	if (UNIV_UNLIKELY(offs == 0)) {
-
-		return(NULL);
-	}
-
-	return(page + offs);
-}
-
-/****************************************************************
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
-	rec_t*	rec,	/* in: pointer to record, must not be page supremum */
-	rec_t*	next)	/* in: pointer to next record, must not be page
-			infimum */
-{
-	page_t*	page;
-	ulint	offs;
-
-	ut_ad(page_rec_check(rec));
-	ut_ad(!page_rec_is_supremum(rec));
-	page = page_align(rec);
-
-	if (next) {
-		ut_ad(!page_rec_is_infimum(next));
-		ut_ad(page == page_align(next));
-		offs = (ulint) (next - page);
-	} else {
-		offs = 0;
-	}
-
-	rec_set_next_offs(rec, page_is_comp(page), offs);
-}
-
-/****************************************************************
-Gets the pointer to the previous record. */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
-			/* out: pointer to previous record */
-	rec_t*	rec)	/* in: pointer to record, must not be page
-			infimum */
-{
-	page_dir_slot_t*	slot;
-	ulint			slot_no;
-	rec_t*			rec2;
-	rec_t*			prev_rec = NULL;
-	page_t*			page;
-
-	ut_ad(page_rec_check(rec));
-
-	page = page_align(rec);
-
-	ut_ad(!page_rec_is_infimum(rec));
-
-	slot_no = page_dir_find_owner_slot(rec);
-
-	ut_a(slot_no != 0);
-
-	slot = page_dir_get_nth_slot(page, slot_no - 1);
-
-	rec2 = page_dir_slot_get_rec(slot);
-
-	while (rec != rec2) {
-		prev_rec = rec2;
-		rec2 = page_rec_get_next(rec2);
-	}
-
-	ut_a(prev_rec);
-
-	return(prev_rec);
-}
-
-/*******************************************************************
-Looks for the record which owns the given record. */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
-			/* out: the owner record */
-	rec_t*	rec)	/* in: the physical record */
-{
-	ut_ad(page_rec_check(rec));
-
-	if (page_rec_is_comp(rec)) {
-		while (rec_get_n_owned(rec, TRUE) == 0) {
-			rec = page_rec_get_next(rec);
-		}
-	} else {
-		while (rec_get_n_owned(rec, FALSE) == 0) {
-			rec = page_rec_get_next(rec);
-		}
-	}
-
-	return(rec);
-}
-
-/****************************************************************
-Returns the sum of the sizes of the records in the record list, excluding
-the infimum and supremum records. */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
-			/* out: data in bytes */
-	page_t*	page)	/* in: index page */
-{
-	ulint	ret;
-
-	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
-		      - (page_is_comp(page)
-			 ? PAGE_NEW_SUPREMUM_END
-			 : PAGE_OLD_SUPREMUM_END)
-		      - page_header_get_field(page, PAGE_GARBAGE));
-
-	ut_ad(ret < UNIV_PAGE_SIZE);
-
-	return(ret);
-}
-
-/*****************************************************************
-Calculates free space if a page is emptied. */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
-				/* out: free space */
-	ulint	comp)		/* in: nonzero=compact page layout */
-{
-	if (UNIV_LIKELY(comp)) {
-		return((ulint)(UNIV_PAGE_SIZE
-			       - PAGE_NEW_SUPREMUM_END
-			       - PAGE_DIR
-			       - 2 * PAGE_DIR_SLOT_SIZE));
-	}
-
-	return((ulint)(UNIV_PAGE_SIZE
-		       - PAGE_OLD_SUPREMUM_END
-		       - PAGE_DIR
-		       - 2 * PAGE_DIR_SLOT_SIZE));
-}
-
-/****************************************************************
-Each user record on a page, and also the deleted user records in the heap
-takes its size plus the fraction of the dir cell size /
-PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
-value of page_get_free_space_of_empty, the insert is impossible, otherwise
-it is allowed. This function returns the maximum combined size of records
-which can be inserted on top of the record heap. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
-			/* out: maximum combined size for inserted records */
-	page_t*	page,	/* in: index page */
-	ulint	n_recs)	/* in: number of records */
-{
-	ulint	occupied;
-	ulint	free_space;
-
-	if (page_is_comp(page)) {
-		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
-			- PAGE_NEW_SUPREMUM_END
-			+ page_dir_calc_reserved_space(
-				n_recs + page_dir_get_n_heap(page) - 2);
-
-		free_space = page_get_free_space_of_empty(TRUE);
-	} else {
-		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
-			- PAGE_OLD_SUPREMUM_END
-			+ page_dir_calc_reserved_space(
-				n_recs + page_dir_get_n_heap(page) - 2);
-
-		free_space = page_get_free_space_of_empty(FALSE);
-	}
-
-	/* Above the 'n_recs +' part reserves directory space for the new
-	inserted records; the '- 2' excludes page infimum and supremum
-	records */
-
-	if (occupied > free_space) {
-
-		return(0);
-	}
-
-	return(free_space - occupied);
-}
-
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of the record heap if a page is first reorganized. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
-			/* out: maximum combined size for inserted records */
-	page_t*	page,	/* in: index page */
-	ulint	n_recs)	/* in: number of records */
-{
-	ulint	occupied;
-	ulint	free_space;
-
-	occupied = page_get_data_size(page)
-		+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
-
-	free_space = page_get_free_space_of_empty(page_is_comp(page));
-
-	if (occupied > free_space) {
-
-		return(0);
-	}
-
-	return(free_space - occupied);
-}
-
-/****************************************************************
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: pointer to the (origin of) record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	rec_t*		free;
-	ulint		garbage;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-	free = page_header_get_ptr(page, PAGE_FREE);
-
-	page_rec_set_next(rec, free);
-	page_header_set_ptr(page, PAGE_FREE, rec);
-
-#if 0	/* It's better not to destroy the user's data. */
-
-	/* Clear the data bytes of the deleted record in order to improve
-	the compression ratio of the page and to make it easier to read
-	page dumps in corruption reports.  The extra bytes of the record
-	cannot be cleared, because page_mem_alloc() needs them in order
-	to determine the size of the deleted record. */
-	memset(rec, 0, rec_offs_data_size(offsets));
-#endif
-
-	garbage = page_header_get_field(page, PAGE_GARBAGE);
-
-	page_header_set_field(page, PAGE_GARBAGE,
-			      garbage + rec_offs_size(offsets));
-}
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE	UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
deleted file mode 100644
index 1fbeeb0f60f..00000000000
--- a/storage/innobase/include/page0types.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0types_h
-#define page0types_h
-
-#include "univ.i"
-
-/* Type of the index page */
-/* The following define eliminates a name collision on HP-UX */
-#define page_t	   ib_page_t
-typedef	byte		page_t;
-typedef struct page_search_struct	page_search_t;
-typedef struct page_cur_struct	page_cur_t;
-
-
-#endif
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
deleted file mode 100644
index ff92cc062d9..00000000000
--- a/storage/innobase/include/pars0opt.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/******************************************************
-Simple SQL optimizer
-
-(c) 1997 Innobase Oy
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0opt_h
-#define pars0opt_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0sym.h"
-#include "dict0types.h"
-#include "row0sel.h"
-
-/***********************************************************************
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-
-void
-opt_search_plan(
-/*============*/
-	sel_node_t*	sel_node);	/* in: parsed select node */
-/***********************************************************************
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-
-void
-opt_find_all_cols(
-/*==============*/
-	ibool		copy_val,	/* in: if TRUE, new found columns are
-					added as columns to copy */
-	dict_index_t*	index,		/* in: index to use */
-	sym_node_list_t* col_list,	/* in: base node of a list where
-					to add new found columns */
-	plan_t*		plan,		/* in: plan or NULL */
-	que_node_t*	exp);		/* in: expression or condition */
-/************************************************************************
-Prints info of a query plan. */
-
-void
-opt_print_query_plan(
-/*=================*/
-	sel_node_t*	sel_node);	/* in: select node */
-
-#ifndef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic
deleted file mode 100644
index 0bfa8526bee..00000000000
--- a/storage/innobase/include/pars0opt.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Simple SQL optimizer
-
-(c) 1997 Innobase Oy
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
deleted file mode 100644
index 1c6c550d313..00000000000
--- a/storage/innobase/include/pars0pars.h
+++ /dev/null
@@ -1,731 +0,0 @@
-/******************************************************
-SQL parser
-
-(c) 1996 Innobase Oy
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0pars_h
-#define pars0pars_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-#include "trx0types.h"
-#include "ut0vec.h"
-
-/* Type of the user functions. The first argument is always InnoDB-supplied
-and varies in type, while 'user_arg' is a user-supplied argument. The
-meaning of the return type also varies. See the individual use cases, e.g.
-the FETCH statement, for details on them. */
-typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
-
-extern int	yydebug;
-
-/* If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-
-#ifdef UNIV_SQL_DEBUG
-extern ibool	pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-extern sym_tab_t*	pars_sym_tab_global;
-
-extern pars_res_word_t	pars_to_char_token;
-extern pars_res_word_t	pars_to_number_token;
-extern pars_res_word_t	pars_to_binary_token;
-extern pars_res_word_t	pars_binary_to_number_token;
-extern pars_res_word_t	pars_substr_token;
-extern pars_res_word_t	pars_replstr_token;
-extern pars_res_word_t	pars_concat_token;
-extern pars_res_word_t	pars_length_token;
-extern pars_res_word_t	pars_instr_token;
-extern pars_res_word_t	pars_sysdate_token;
-extern pars_res_word_t	pars_printf_token;
-extern pars_res_word_t	pars_assert_token;
-extern pars_res_word_t	pars_rnd_token;
-extern pars_res_word_t	pars_rnd_str_token;
-extern pars_res_word_t	pars_count_token;
-extern pars_res_word_t	pars_sum_token;
-extern pars_res_word_t	pars_distinct_token;
-extern pars_res_word_t	pars_binary_token;
-extern pars_res_word_t	pars_blob_token;
-extern pars_res_word_t	pars_int_token;
-extern pars_res_word_t	pars_char_token;
-extern pars_res_word_t	pars_float_token;
-extern pars_res_word_t	pars_update_token;
-extern pars_res_word_t	pars_asc_token;
-extern pars_res_word_t	pars_desc_token;
-extern pars_res_word_t	pars_open_token;
-extern pars_res_word_t	pars_close_token;
-extern pars_res_word_t	pars_share_token;
-extern pars_res_word_t	pars_unique_token;
-extern pars_res_word_t	pars_clustered_token;
-
-extern ulint		pars_star_denoter;
-
-/* Procedure parameter types */
-#define PARS_INPUT	0
-#define PARS_OUTPUT	1
-#define PARS_NOT_PARAM	2
-
-int
-yyparse(void);
-
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
-que_t*
-pars_sql(
-/*=====*/
-				/* out, own: the query graph */
-	pars_info_t*	info,	/* in: extra information, or NULL */
-	const char*	str);	/* in: SQL string */
-/*****************************************************************
-Retrieves characters to the lexical analyzer. */
-
-void
-pars_get_lex_chars(
-/*===============*/
-	char*	buf,		/* in/out: buffer where to copy */
-	int*	result,		/* out: number of characters copied or EOF */
-	int	max_size);	/* in: maximum number of characters which fit
-				in the buffer */
-/*****************************************************************
-Called by yyparse on error. */
-
-void
-yyerror(
-/*====*/
-	const char*	s);	/* in: error message string */
-/*************************************************************************
-Parses a variable declaration. */
-
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
-				/* out, own: symbol table node of type
-				SYM_VAR */
-	sym_node_t*	node,	/* in: symbol table node allocated for the
-				id of the variable */
-	pars_res_word_t* type);	/* in: pointer to a type token */
-/*************************************************************************
-Parses a function expression. */
-
-func_node_t*
-pars_func(
-/*======*/
-				/* out, own: function node in a query tree */
-	que_node_t*	res_word,/* in: function name reserved word */
-	que_node_t*	arg);	/* in: first argument in the argument list */
-/*************************************************************************
-Parses an operator expression. */
-
-func_node_t*
-pars_op(
-/*====*/
-				/* out, own: function node in a query tree */
-	int		func,	/* in: operator token code */
-	que_node_t*	arg1,	/* in: first argument */
-	que_node_t*	arg2);	/* in: second argument or NULL for an unary
-				operator */
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
-order_node_t*
-pars_order_by(
-/*==========*/
-				/* out, own: order-by node in a query tree */
-	sym_node_t*	column,	/* in: column name */
-	pars_res_word_t* asc);	/* in: &pars_asc_token or pars_desc_token */
-/*************************************************************************
-Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
-sel_node_t*
-pars_select_list(
-/*=============*/
-					/* out, own: select node in a query
-					tree */
-	que_node_t*	select_list,	/* in: select list */
-	sym_node_t*	into_list);	/* in: variables list or NULL */
-/*************************************************************************
-Parses a cursor declaration. */
-
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
-					/* out: sym_node */
-	sym_node_t*	sym_node,	/* in: cursor id node in the symbol
-					table */
-	sel_node_t*	select_node);	/* in: select node */
-/*************************************************************************
-Parses a function declaration. */
-
-que_node_t*
-pars_function_declaration(
-/*======================*/
-					/* out: sym_node */
-	sym_node_t*	sym_node);	/* in: function id node in the symbol
-					table */
-/*************************************************************************
-Parses a select statement. */
-
-sel_node_t*
-pars_select_statement(
-/*==================*/
-					/* out, own: select node in a query
-					tree */
-	sel_node_t*	select_node,	/* in: select node already containing
-					the select list */
-	sym_node_t*	table_list,	/* in: table list */
-	que_node_t*	search_cond,	/* in: search condition or NULL */
-	pars_res_word_t* for_update,	/* in: NULL or &pars_update_token */
-	pars_res_word_t* consistent_read,/* in: NULL or
-						&pars_consistent_token */
-	order_node_t*	order_by);	/* in: NULL or an order-by node */
-/*************************************************************************
-Parses a column assignment in an update. */
-
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
-				/* out: column assignment node */
-	sym_node_t*	column,	/* in: column to assign */
-	que_node_t*	exp);	/* in: value to assign */
-/*************************************************************************
-Parses a delete or update statement start. */
-
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
-					/* out, own: update node in a query
-					tree */
-	ibool		is_delete,	/* in: TRUE if delete */
-	sym_node_t*	table_sym,	/* in: table name node */
-	col_assign_node_t* col_assign_list);/* in: column assignment list, NULL
-					if delete */
-/*************************************************************************
-Parses an update or delete statement. */
-
-upd_node_t*
-pars_update_statement(
-/*==================*/
-					/* out, own: update node in a query
-					tree */
-	upd_node_t*	node,		/* in: update node */
-	sym_node_t*	cursor_sym,	/* in: pointer to a cursor entry in
-					the symbol table or NULL */
-	que_node_t*	search_cond);	/* in: search condition or NULL */
-/*************************************************************************
-Parses an insert statement. */
-
-ins_node_t*
-pars_insert_statement(
-/*==================*/
-					/* out, own: update node in a query
-					tree */
-	sym_node_t*	table_sym,	/* in: table name node */
-	que_node_t*	values_list,	/* in: value expression list or NULL */
-	sel_node_t*	select);	/* in: select condition or NULL */
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
-				/* out, own: symbol table node of type
-				SYM_VAR */
-	sym_node_t*	node,	/* in: symbol table node allocated for the
-				id of the parameter */
-	ulint		param_type,
-				/* in: PARS_INPUT or PARS_OUTPUT */
-	pars_res_word_t* type);	/* in: pointer to a type token */
-/*************************************************************************
-Parses an elsif element. */
-
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
-					/* out: elsif node */
-	que_node_t*	cond,		/* in: if-condition */
-	que_node_t*	stat_list);	/* in: statement list */
-/*************************************************************************
-Parses an if-statement. */
-
-if_node_t*
-pars_if_statement(
-/*==============*/
-					/* out: if-statement node */
-	que_node_t*	cond,		/* in: if-condition */
-	que_node_t*	stat_list,	/* in: statement list */
-	que_node_t*	else_part);	/* in: else-part statement list */
-/*************************************************************************
-Parses a for-loop-statement. */
-
-for_node_t*
-pars_for_statement(
-/*===============*/
-					/* out: for-statement node */
-	sym_node_t*	loop_var,	/* in: loop variable */
-	que_node_t*	loop_start_limit,/* in: loop start expression */
-	que_node_t*	loop_end_limit,	/* in: loop end expression */
-	que_node_t*	stat_list);	/* in: statement list */
-/*************************************************************************
-Parses a while-statement. */
-
-while_node_t*
-pars_while_statement(
-/*=================*/
-					/* out: while-statement node */
-	que_node_t*	cond,		/* in: while-condition */
-	que_node_t*	stat_list);	/* in: statement list */
-/*************************************************************************
-Parses an exit statement. */
-
-exit_node_t*
-pars_exit_statement(void);
-/*=====================*/
-					/* out: exit statement node */
-/*************************************************************************
-Parses a return-statement. */
-
-return_node_t*
-pars_return_statement(void);
-/*=======================*/
-					/* out: return-statement node */
-/*************************************************************************
-Parses a procedure call. */
-
-func_node_t*
-pars_procedure_call(
-/*================*/
-				/* out: function node */
-	que_node_t*	res_word,/* in: procedure name reserved word */
-	que_node_t*	args);	/* in: argument list */
-/*************************************************************************
-Parses an assignment statement. */
-
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
-				/* out: assignment statement node */
-	sym_node_t*	var,	/* in: variable to assign */
-	que_node_t*	val);	/* in: value to assign */
-/*************************************************************************
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
-					/* out: fetch statement node */
-	sym_node_t*	cursor,		/* in: cursor node */
-	sym_node_t*	into_list,	/* in: variables to set, or NULL */
-	sym_node_t*	user_func);	/* in: user function name, or NULL */
-/*************************************************************************
-Parses an open or close cursor statement. */
-
-open_node_t*
-pars_open_statement(
-/*================*/
-				/* out: fetch statement node */
-	ulint		type,	/* in: ROW_SEL_OPEN_CURSOR
-				or ROW_SEL_CLOSE_CURSOR */
-	sym_node_t*	cursor);	/* in: cursor node */
-/*************************************************************************
-Parses a row_printf-statement. */
-
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
-					/* out: row_printf-statement node */
-	sel_node_t*	sel_node);	/* in: select node */
-/*************************************************************************
-Parses a commit statement. */
-
-commit_node_t*
-pars_commit_statement(void);
-/*=======================*/
-/*************************************************************************
-Parses a rollback statement. */
-
-roll_node_t*
-pars_rollback_statement(void);
-/*=========================*/
-/*************************************************************************
-Parses a column definition at a table creation. */
-
-sym_node_t*
-pars_column_def(
-/*============*/
-						/* out: column sym table
-						node */
-	sym_node_t*		sym_node,	/* in: column node in the
-						symbol table */
-	pars_res_word_t*	type,		/* in: data type */
-	sym_node_t*		len,		/* in: length of column, or
-						NULL */
-	void*			is_unsigned,	/* in: if not NULL, column
-						is of type UNSIGNED. */
-	void*			is_not_null);	/* in: if not NULL, column
-						is of type NOT NULL. */
-/*************************************************************************
-Parses a table creation operation. */
-
-tab_node_t*
-pars_create_table(
-/*==============*/
-					/* out: table create subgraph */
-	sym_node_t*	table_sym,	/* in: table name node in the symbol
-					table */
-	sym_node_t*	column_defs,	/* in: list of column names */
-	void*		not_fit_in_memory);/* in: a non-NULL pointer means that
-					this is a table which in simulations
-					should be simulated as not fitting
-					in memory; thread is put to sleep
-					to simulate disk accesses; NOTE that
-					this flag is not stored to the data
-					dictionary on disk, and the database
-					will forget about non-NULL value if
-					it has to reload the table definition
-					from disk */
-/*************************************************************************
-Parses an index creation operation. */
-
-ind_node_t*
-pars_create_index(
-/*==============*/
-					/* out: index create subgraph */
-	pars_res_word_t* unique_def,	/* in: not NULL if a unique index */
-	pars_res_word_t* clustered_def,	/* in: not NULL if a clustered index */
-	sym_node_t*	index_sym,	/* in: index name node in the symbol
-					table */
-	sym_node_t*	table_sym,	/* in: table name node in the symbol
-					table */
-	sym_node_t*	column_list);	/* in: list of column names */
-/*************************************************************************
-Parses a procedure definition. */
-
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
-					/* out: query fork node */
-	sym_node_t*	sym_node,	/* in: procedure id node in the symbol
-					table */
-	sym_node_t*	param_list,	/* in: parameter declaration list */
-	que_node_t*	stat_list);	/* in: statement list */
-
-/*****************************************************************
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
-					/* out: query graph */
-	sym_node_t*	sym_node);	/* in: stored procedure name */
-/**********************************************************************
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
-				/* out: query thread node to run */
-	que_node_t*	node,	/* in: root node for an incomplete
-				query graph */
-	trx_t*		trx,	/* in: transaction handle */
-	mem_heap_t*	heap);	/* in: memory heap from which allocated */
-
-/********************************************************************
-Create parser info struct.*/
-
-pars_info_t*
-pars_info_create(void);
-/*==================*/
-		/* out, own: info struct */
-
-/********************************************************************
-Free info struct and everything it contains.*/
-
-void
-pars_info_free(
-/*===========*/
-	pars_info_t*	info);	/* in: info struct */
-
-/********************************************************************
-Add bound literal. */
-
-void
-pars_info_add_literal(
-/*==================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const void*	address,	/* in: address */
-	ulint		length,		/* in: length of data */
-	ulint		type,		/* in: type, e.g. DATA_FIXBINARY */
-	ulint		prtype);	/* in: precise type, e.g.
-					DATA_UNSIGNED */
-
-/********************************************************************
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-
-void
-pars_info_add_str_literal(
-/*======================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const char*	str);		/* in: string */
-
-/********************************************************************
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_int4_literal(
-/*=======================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	lint		val);		/* in: value */
-
-/********************************************************************
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_dulint_literal(
-/*=========================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	dulint		val);		/* in: value */
-/********************************************************************
-Add user function. */
-
-void
-pars_info_add_function(
-/*===================*/
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name,	/* in: function name */
-	pars_user_func_cb_t	func,	/* in: function address */
-	void*			arg);	/* in: user-supplied argument */
-
-/********************************************************************
-Add bound id. */
-
-void
-pars_info_add_id(
-/*=============*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const char*	id);		/* in: id */
-
-/********************************************************************
-Get user function with the given name.*/
-
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
-					/* out: user func, or NULL if not
-					found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name);	/* in: function name to find*/
-
-/********************************************************************
-Get bound literal with the given name.*/
-
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
-					/* out: bound literal, or NULL if
-					not found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name);	/* in: bound literal name to find */
-
-/********************************************************************
-Get bound id with the given name.*/
-
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
-					/* out: bound id, or NULL if not
-					found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name);	/* in: bound id name to find */
-
-
-/* Extra information supplied for pars_sql(). */
-struct pars_info_struct {
-	mem_heap_t*	heap;		/* our own memory heap */
-
-	ib_vector_t*	funcs;		/* user functions, or NUll
-					(pars_user_func_t*) */
-	ib_vector_t*	bound_lits;	/* bound literals, or NULL
-					(pars_bound_lit_t*) */
-	ib_vector_t*	bound_ids;	/* bound ids, or NULL
-					(pars_bound_id_t*) */
-
-	ibool		graph_owns_us;	/* if TRUE (which is the default),
-					que_graph_free() will free us */
-};
-
-/* User-supplied function and argument. */
-struct pars_user_func_struct {
-	const char*		name;		/* function name */
-	pars_user_func_cb_t	func;		/* function address */
-	void*			arg;		/* user-supplied argument */
-};
-
-/* Bound literal. */
-struct pars_bound_lit_struct {
-	const char*	name;		/* name */
-	const void*	address;	/* address */
-	ulint		length;		/* length of data */
-	ulint		type;		/* type, e.g. DATA_FIXBINARY */
-	ulint		prtype;		/* precise type, e.g. DATA_UNSIGNED */
-};
-
-/* Bound id. */
-struct pars_bound_id_struct {
-	const char*	name;		/* name */
-	const char*	id;		/* id */
-};
-
-/* Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
-	int	code;	/* the token code for the reserved word from
-			pars0grm.h */
-};
-
-/* A predefined function or operator node in a parsing tree; this construct
-is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
-	que_common_t	common;	/* type: QUE_NODE_FUNC */
-	int		func;	/* token code of the function name */
-	ulint		class;	/* class of the function */
-	que_node_t*	args;	/* argument(s) of the function */
-	UT_LIST_NODE_T(func_node_t) cond_list;
-				/* list of comparison conditions; defined
-				only for comparison operator nodes except,
-				presently, for OPT_SCROLL_TYPE ones */
-	UT_LIST_NODE_T(func_node_t) func_node_list;
-				/* list of function nodes in a parsed
-				query graph */
-};
-
-/* An order-by node in a select */
-struct order_node_struct{
-	que_common_t	common;	/* type: QUE_NODE_ORDER */
-	sym_node_t*	column;	/* order-by column */
-	ibool		asc;	/* TRUE if ascending, FALSE if descending */
-};
-
-/* Procedure definition node */
-struct proc_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_PROC */
-	sym_node_t*	proc_id;	/* procedure name symbol in the symbol
-					table of this same procedure */
-	sym_node_t*	param_list;	/* input and output parameters */
-	que_node_t*	stat_list;	/* statement list */
-	sym_tab_t*	sym_tab;	/* symbol table of this procedure */
-};
-
-/* elsif-element node */
-struct elsif_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_ELSIF */
-	que_node_t*	cond;		/* if condition */
-	que_node_t*	stat_list;	/* statement list */
-};
-
-/* if-statement node */
-struct if_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_IF */
-	que_node_t*	cond;		/* if condition */
-	que_node_t*	stat_list;	/* statement list */
-	que_node_t*	else_part;	/* else-part statement list */
-	elsif_node_t*	elsif_list;	/* elsif element list */
-};
-
-/* while-statement node */
-struct while_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_WHILE */
-	que_node_t*	cond;		/* while condition */
-	que_node_t*	stat_list;	/* statement list */
-};
-
-/* for-loop-statement node */
-struct for_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_FOR */
-	sym_node_t*	loop_var;	/* loop variable: this is the
-					dereferenced symbol from the
-					variable declarations, not the
-					symbol occurrence in the for loop
-					definition */
-	que_node_t*	loop_start_limit;/* initial value of loop variable */
-	que_node_t*	loop_end_limit;	/* end value of loop variable */
-	lint		loop_end_value;	/* evaluated value for the end value:
-					it is calculated only when the loop
-					is entered, and will not change within
-					the loop */
-	que_node_t*	stat_list;	/* statement list */
-};
-
-/* exit statement node */
-struct exit_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_EXIT */
-};
-
-/* return-statement node */
-struct return_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_RETURN */
-};
-
-/* Assignment statement node */
-struct assign_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_ASSIGNMENT */
-	sym_node_t*	var;		/* variable to set */
-	que_node_t*	val;		/* value to assign */
-};
-
-/* Column assignment node */
-struct col_assign_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_COL_ASSIGN */
-	sym_node_t*	col;		/* column to set */
-	que_node_t*	val;		/* value to assign */
-};
-
-/* Classes of functions */
-#define PARS_FUNC_ARITH		1	/* +, -, *, / */
-#define	PARS_FUNC_LOGICAL	2
-#define PARS_FUNC_CMP		3
-#define	PARS_FUNC_PREDEFINED	4	/* TO_NUMBER, SUBSTR, ... */
-#define	PARS_FUNC_AGGREGATE	5	/* COUNT, DISTINCT, SUM */
-#define	PARS_FUNC_OTHER		6	/* these are not real functions,
-					e.g., := */
-
-#ifndef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic
deleted file mode 100644
index 155b6659ace..00000000000
--- a/storage/innobase/include/pars0pars.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-SQL parser
-
-(c) 1996 Innobase Oy
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
deleted file mode 100644
index fc7df92ff60..00000000000
--- a/storage/innobase/include/pars0sym.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/******************************************************
-SQL parser symbol table
-
-(c) 1997 Innobase Oy
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0sym_h
-#define pars0sym_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "dict0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
-sym_tab_t*
-sym_tab_create(
-/*===========*/
-				/* out, own: symbol table */
-	mem_heap_t*	heap);	/* in: memory heap where to create */
-/**********************************************************************
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-
-void
-sym_tab_free_private(
-/*=================*/
-	sym_tab_t*	sym_tab);	/* in, own: symbol table */
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	ulint		val);		/* in: integer value */
-/**********************************************************************
-Adds an string literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	byte*		str,		/* in: string with no quotes around
-					it */
-	ulint		len);		/* in: string length */
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	const char*	name,		/* in: name of bound literal */
-	ulint*		lit_type);	/* out: type of literal (PARS_*_LIT) */
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab);	/* in: symbol table */
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	byte*		name,		/* in: identifier name */
-	ulint		len);		/* in: identifier length */
-
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	const char*	name);		/* in: name of bound id */
-
-#define	SYM_CLUST_FIELD_NO	0
-#define	SYM_SEC_FIELD_NO	1
-
-struct sym_node_struct{
-	que_common_t			common;		/* node type:
-							QUE_NODE_SYMBOL */
-	/* NOTE: if the data field in 'common.val' is not NULL and the symbol
-	table node is not for a temporary column, the memory for the value has
-	been allocated from dynamic memory and it should be freed when the
-	symbol table is discarded */
-
-	/* 'alias' and 'indirection' are almost the same, but not quite.
-	'alias' always points to the primary instance of the variable, while
-	'indirection' does the same only if we should use the primary
-	instance's values for the node's data. This is usually the case, but
-	when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
-	t WHERE id = x;"), we copy the values from the primary instance to
-	the cursor's instance so that they are fixed for the duration of the
-	cursor, and set 'indirection' to NULL. If we did not, the value of
-	'x' could change between fetches and things would break horribly.
-
-	TODO: It would be cleaner to make 'indirection' a boolean field and
-	always use 'alias' to refer to the primary node. */
-
-	sym_node_t*			indirection;	/* pointer to
-							another symbol table
-							node which contains
-							the value for this
-							node, NULL otherwise */
-	sym_node_t*			alias;		/* pointer to
-							another symbol table
-							node for which this
-							node is an alias,
-							NULL otherwise */
-	UT_LIST_NODE_T(sym_node_t)	col_var_list;	/* list of table
-							columns or a list of
-							input variables for an
-							explicit cursor */
-	ibool				copy_val;	/* TRUE if a column
-							and its value should
-							be copied to dynamic
-							memory when fetched */
-	ulint				field_nos[2];	/* if a column, in
-							the position
-							SYM_CLUST_FIELD_NO is
-							the field number in the
-							clustered index; in
-							the position
-							SYM_SEC_FIELD_NO
-							the field number in the
-							non-clustered index to
-							use first; if not found
-							from the index, then
-							ULINT_UNDEFINED */
-	ibool				resolved;	/* TRUE if the
-							meaning of a variable
-							or a column has been
-							resolved; for literals
-							this is always TRUE */
-	ulint				token_type;	/* SYM_VAR, SYM_COLUMN,
-							SYM_IMPLICIT_VAR,
-							SYM_LIT, SYM_TABLE,
-							SYM_CURSOR, ... */
-	const char*			name;		/* name of an id */
-	ulint				name_len;	/* id name length */
-	dict_table_t*			table;		/* table definition
-							if a table id or a
-							column id */
-	ulint				col_no;		/* column number if a
-							column */
-	sel_buf_t*			prefetch_buf;	/* NULL, or a buffer
-							for cached column
-							values for prefetched
-							rows */
-	sel_node_t*			cursor_def;	/* cursor definition
-							select node if a
-							named cursor */
-	ulint				param_type;	/* PARS_INPUT,
-							PARS_OUTPUT, or
-							PARS_NOT_PARAM if not a
-							procedure parameter */
-	sym_tab_t*			sym_table;	/* back pointer to
-							the symbol table */
-	UT_LIST_NODE_T(sym_node_t)	sym_list;	/* list of symbol
-							nodes */
-};
-
-struct sym_tab_struct{
-	que_t*			query_graph;
-					/* query graph generated by the
-					parser */
-	const char*		sql_string;
-					/* SQL string to parse */
-	size_t			string_len;
-					/* SQL string length */
-	int			next_char_pos;
-					/* position of the next character in
-					sql_string to give to the lexical
-					analyzer */
-	pars_info_t*		info;	/* extra information, or NULL */
-	sym_node_list_t		sym_list;
-					/* list of symbol nodes in the symbol
-					table */
-	UT_LIST_BASE_NODE_T(func_node_t)
-				func_node_list;
-					/* list of function nodes in the
-					parsed query graph */
-	mem_heap_t*		heap;	/* memory heap from which we can
-					allocate space */
-};
-
-/* Types of a symbol table entry */
-#define	SYM_VAR			91	/* declared parameter or local
-					variable of a procedure */
-#define SYM_IMPLICIT_VAR	92	/* storage for a intermediate result
-					of a calculation */
-#define SYM_LIT			93	/* literal */
-#define SYM_TABLE		94	/* database table name */
-#define SYM_COLUMN		95	/* database table name */
-#define SYM_CURSOR		96	/* named cursor */
-#define SYM_PROCEDURE_NAME	97	/* stored procedure name */
-#define SYM_INDEX		98	/* database index name */
-#define SYM_FUNCTION		99	/* user function name */
-
-#ifndef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic
deleted file mode 100644
index 9508d423769..00000000000
--- a/storage/innobase/include/pars0sym.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-SQL parser symbol table
-
-(c) 1997 Innobase Oy
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
deleted file mode 100644
index 30e3f0a172b..00000000000
--- a/storage/innobase/include/que0types.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/******************************************************
-Query graph global types
-
-(c) 1996 Innobase Oy
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0types_h
-#define que0types_h
-
-#include "data0data.h"
-#include "dict0types.h"
-
-/* Pseudotype for all graph nodes */
-typedef void	que_node_t;
-
-typedef struct que_fork_struct	que_fork_t;
-
-/* Query graph root is a fork node */
-typedef	que_fork_t	que_t;
-
-typedef struct que_thr_struct		que_thr_t;
-typedef struct que_common_struct	que_common_t;
-
-/* Common struct at the beginning of each query graph node; the name of this
-substruct must be 'common' */
-
-struct que_common_struct{
-	ulint		type;	/* query node type */
-	que_node_t*	parent;	/* back pointer to parent node, or NULL */
-	que_node_t*	brother;/* pointer to a possible brother node */
-	dfield_t	val;	/* evaluated value for an expression */
-	ulint		val_buf_size;
-				/* buffer size for the evaluated value data,
-				if the buffer has been allocated dynamically:
-				if this field is != 0, and the node is a
-				symbol node or a function node, then we
-				have to free the data field in val
-				explicitly */
-};
-
-#endif
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
deleted file mode 100644
index 97b6d7e9dd9..00000000000
--- a/storage/innobase/include/read0read.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
-#include "read0types.h"
-
-/*************************************************************************
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
-read_view_t*
-read_view_open_now(
-/*===============*/
-					/* out, own: read view struct */
-	dulint		cr_trx_id,	/* in: trx_id of creating
-					transaction, or (0, 0) used in
-					purge */
-	mem_heap_t*	heap);		/* in: memory heap from which
-					allocated */
-/*************************************************************************
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close. */
-
-read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
-					/* out, own: read view struct */
-	dulint		cr_trx_id,	/* in: trx_id of creating
-					transaction, or (0, 0) used in
-					purge */
-	mem_heap_t*	heap);		/* in: memory heap from which
-					allocated */
-/*************************************************************************
-Closes a read view. */
-
-void
-read_view_close(
-/*============*/
-	read_view_t*	view);	/* in: read view */
-/*************************************************************************
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
-void
-read_view_close_for_mysql(
-/*======================*/
-	trx_t*	trx);	/* in: trx which has a read view */
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
-				/* out: TRUE if sees */
-	read_view_t*	view,	/* in: read view */
-	dulint		trx_id);/* in: trx id */
-/*************************************************************************
-Prints a read view to stderr. */
-
-void
-read_view_print(
-/*============*/
-	read_view_t*	view);	/* in: read view */
-/*************************************************************************
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
-	trx_t*		cr_trx);/* in: trx where cursor view is created */
-/*************************************************************************
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
-	trx_t*		trx,		/* in: trx */
-	cursor_view_t*	curview);	/* in: cursor view to be closed */
-/*************************************************************************
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-
-void
-read_cursor_set_for_mysql(
-/*======================*/
-	trx_t*		trx,	/* in: transaction where cursor is set */
-	cursor_view_t*	curview);/* in: consistent cursor view to be set */
-
-/* Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_struct{
-	ulint	type;		/* VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
-	dulint	undo_no;	/* (0, 0) or if type is VIEW_HIGH_GRANULARITY
-				transaction undo_no when this high-granularity
-				consistent read view was created */
-	dulint	low_limit_no;	/* The view does not need to see the undo
-				logs for transactions whose transaction number
-				is strictly smaller (<) than this value: they
-				can be removed in purge if not needed by other
-				views */
-	dulint	low_limit_id;	/* The read should not see any transaction
-				with trx id >= this value */
-	dulint	up_limit_id;	/* The read should see all trx ids which
-				are strictly smaller (<) than this value */
-	ulint	n_trx_ids;	/* Number of cells in the trx_ids array */
-	dulint*	trx_ids;	/* Additional trx ids which the read should
-				not see: typically, these are the active
-				transactions at the time when the read is
-				serialized, except the reading transaction
-				itself; the trx ids in this array are in a
-				descending order */
-	dulint	creator_trx_id;	/* trx id of creating transaction, or
-				(0, 0) used in purge */
-	UT_LIST_NODE_T(read_view_t) view_list;
-				/* List of read views in trx_sys */
-};
-
-/* Read view types */
-#define VIEW_NORMAL		1	/* Normal consistent read view
-					where transaction does not see changes
-					made by active transactions except
-					creating transaction. */
-#define VIEW_HIGH_GRANULARITY	2	/* High-granularity read view where
-					transaction does not see changes
-					made by active transactions and own
-					changes after a point in time when this
-					read view was created. */
-
-/* Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_struct{
-	mem_heap_t*	heap;
-				/* Memory heap for the cursor view */
-	read_view_t*	read_view;
-				/* Consistent read view of the cursor*/
-	ulint		n_mysql_tables_in_use;
-				/* number of Innobase tables used in the
-				processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
deleted file mode 100644
index 3aded1ca07c..00000000000
--- a/storage/innobase/include/read0read.ic
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-/*************************************************************************
-Gets the nth trx id in a read view. */
-UNIV_INLINE
-dulint
-read_view_get_nth_trx_id(
-/*=====================*/
-				/* out: trx id */
-	read_view_t*	view,	/* in: read view */
-	ulint		n)	/* in: position */
-{
-	ut_ad(n < view->n_trx_ids);
-
-	return(*(view->trx_ids + n));
-}
-
-/*************************************************************************
-Sets the nth trx id in a read view. */
-UNIV_INLINE
-void
-read_view_set_nth_trx_id(
-/*=====================*/
-	read_view_t*	view,	/* in: read view */
-	ulint		n,	/* in: position */
-	dulint		trx_id)	/* in: trx id to set */
-{
-	ut_ad(n < view->n_trx_ids);
-
-	*(view->trx_ids + n) = trx_id;
-}
-
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
-				/* out: TRUE if sees */
-	read_view_t*	view,	/* in: read view */
-	dulint		trx_id)	/* in: trx id */
-{
-	ulint	n_ids;
-	int	cmp;
-	ulint	i;
-
-	if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
-
-		return(TRUE);
-	}
-
-	if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
-
-		return(FALSE);
-	}
-
-	/* We go through the trx ids in the array smallest first: this order
-	may save CPU time, because if there was a very long running
-	transaction in the trx id array, its trx id is looked at first, and
-	the first two comparisons may well decide the visibility of trx_id. */
-
-	n_ids = view->n_trx_ids;
-
-	for (i = 0; i < n_ids; i++) {
-
-		cmp = ut_dulint_cmp(
-			trx_id,
-			read_view_get_nth_trx_id(view, n_ids - i - 1));
-		if (cmp <= 0) {
-			return(cmp < 0);
-		}
-	}
-
-	return(TRUE);
-}
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
deleted file mode 100644
index 7d42728523e..00000000000
--- a/storage/innobase/include/read0types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0types_h
-#define read0types_h
-
-typedef struct read_view_struct	read_view_t;
-typedef struct cursor_view_struct	cursor_view_t;
-
-#endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
deleted file mode 100644
index c6a6e5de4db..00000000000
--- a/storage/innobase/include/rem0cmp.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/***********************************************************************
-Comparison services for records
-
-(c) 1994-2001 Innobase Oy
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef rem0cmp_h
-#define rem0cmp_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
-ibool
-cmp_cols_are_equal(
-/*===============*/
-					/* out: TRUE if the columns are
-					considered equal in comparisons */
-	const dict_col_t*	col1,	/* in: column 1 */
-	const dict_col_t*	col2,	/* in: column 2 */
-	ibool			check_charsets);
-					/* in: whether to check charsets */
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	ulint		mtype,	/* in: main type */
-	ulint		prtype,	/* in: precise type */
-	byte*		data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	byte*		data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-
-int
-cmp_data_data_slow(
-/*===============*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	ulint		mtype,	/* in: main type */
-	ulint		prtype,	/* in: precise type */
-	byte*		data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	byte*		data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made. */
-
-int
-cmp_dtuple_rec_with_match(
-/*======================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared, or
-				until the first externally stored field in
-				rec */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record which differs from
-				dtuple in some of the common fields, or which
-				has an equal number or more fields than
-				dtuple */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /* in/out: number of already completely
-				matched fields; when function returns,
-				contains the value for current comparison */
-	ulint*		matched_bytes); /* in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns, contains the
-				value for current comparison */
-/******************************************************************
-Compares a data tuple to a physical record. */
-
-int
-cmp_dtuple_rec(
-/*===========*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively; see the comments
-				for cmp_dtuple_rec_with_match */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
-				/* out: TRUE if prefix */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
-int
-cmp_rec_rec_with_match(
-/*===================*/
-				/* out: 1, 0 , -1 if rec1 is greater, equal,
-				less, respectively, than rec2; only the common
-				first fields are compared */
-	rec_t*		rec1,	/* in: physical record */
-	rec_t*		rec2,	/* in: physical record */
-	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index,	/* in: data dictionary index */
-	ulint*		matched_fields, /* in/out: number of already completely
-				matched fields; when the function returns,
-				contains the value the for current
-				comparison */
-	ulint*		matched_bytes);/* in/out: number of already matched
-				bytes within the first field not completely
-				matched; when the function returns, contains
-				the value for the current comparison */
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared. */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
-				/* out: 1, 0 , -1 if rec1 is greater, equal,
-				less, respectively, than rec2; only the common
-				first fields are compared */
-	rec_t*		rec1,	/* in: physical record */
-	rec_t*		rec2,	/* in: physical record */
-	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index);	/* in: data dictionary index */
-
-
-#ifndef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
deleted file mode 100644
index 52dc7ff5dc9..00000000000
--- a/storage/innobase/include/rem0cmp.ic
+++ /dev/null
@@ -1,76 +0,0 @@
-/***********************************************************************
-Comparison services for records
-
-(c) 1994-1996 Innobase Oy
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	ulint		mtype,	/* in: main type */
-	ulint		prtype,	/* in: precise type */
-	byte*		data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	byte*		data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2)	/* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2)/* in: data field */
-{
-	const dtype_t*	type;
-
-	ut_ad(dfield_check_typed(dfield1));
-
-	type = dfield_get_type(dfield1);
-
-	return(cmp_data_data(type->mtype, type->prtype,
-			     dfield_get_data(dfield1),
-			     dfield_get_len(dfield1),
-			     dfield_get_data(dfield2),
-			     dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared. */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
-				/* out: 1, 0 , -1 if rec1 is greater, equal,
-				less, respectively, than rec2; only the common
-				first fields are compared */
-	rec_t*		rec1,	/* in: physical record */
-	rec_t*		rec2,	/* in: physical record */
-	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index)	/* in: data dictionary index */
-{
-	ulint	match_f		= 0;
-	ulint	match_b		= 0;
-
-	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
-				      &match_f, &match_b));
-}
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
deleted file mode 100644
index abc204bb583..00000000000
--- a/storage/innobase/include/rem0rec.h
+++ /dev/null
@@ -1,582 +0,0 @@
-/************************************************************************
-Record manager
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0rec_h
-#define rem0rec_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "rem0types.h"
-#include "mtr0types.h"
-
-/* Info bit denoting the predefined minimum record: this bit is set
-if and only if the record is the first user record on a non-leaf
-B-tree page that is the leftmost page on its level
-(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
-#define REC_INFO_MIN_REC_FLAG	0x10UL
-
-/* Number of extra bytes in an old-style record,
-in addition to the data and the offsets */
-#define REC_N_OLD_EXTRA_BYTES	6
-/* Number of extra bytes in a new-style record,
-in addition to the data and the offsets */
-#define REC_N_NEW_EXTRA_BYTES	5
-
-/* Record status values */
-#define REC_STATUS_ORDINARY	0
-#define REC_STATUS_NODE_PTR	1
-#define REC_STATUS_INFIMUM	2
-#define REC_STATUS_SUPREMUM	3
-
-/* Number of elements that should be initially allocated for the
-offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE	100
-#define REC_OFFS_SMALL_SIZE	10
-
-/**********************************************************
-The following function is used to get the offset of the
-next chained record on the same page. */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
-			/* out: the page offset of the next
-			chained record */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the next record offset field
-of the record. */
-UNIV_INLINE
-void
-rec_set_next_offs(
-/*==============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	next);	/* in: offset of the next record */
-/**********************************************************
-The following function is used to get the number of fields
-in an old-style record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
-			/* out: number of data fields */
-	rec_t*	rec);	/* in: physical record */
-/**********************************************************
-The following function is used to get the number of fields
-in a record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
-				/* out: number of data fields */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index);	/* in: record descriptor */
-/**********************************************************
-The following function is used to get the number of records
-owned by the previous directory record. */
-UNIV_INLINE
-ulint
-rec_get_n_owned(
-/*============*/
-			/* out: number of owned records */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the number of owned
-records. */
-UNIV_INLINE
-void
-rec_set_n_owned(
-/*============*/
-	rec_t*	rec,		/* in: physical record */
-	ulint	comp,		/* in: nonzero=compact page format */
-	ulint	n_owned);	/* in: the number of owned */
-/**********************************************************
-The following function is used to retrieve the info bits of
-a record. */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
-			/* out: info bits */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits(
-/*==============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	bits);	/* in: info bits */
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
-			/* out: status bits */
-	rec_t*	rec);	/* in: physical record */
-
-/**********************************************************
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	bits);	/* in: info bits */
-
-/**********************************************************
-The following function is used to retrieve the info and status
-bits of a record.  (Only compact records have status bits.) */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
-			/* out: info bits */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the info and status
-bits of a record.  (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	bits);	/* in: info bits */
-
-/**********************************************************
-The following function tells if record is delete marked. */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
-			/* out: nonzero if delete marked */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag(
-/*=================*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	flag);	/* in: nonzero if delete marked */
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
-			/* out: TRUE if node pointer */
-	rec_t*	rec);	/* in: physical record */
-/**********************************************************
-The following function is used to get the order number
-of the record in the heap of the index page. */
-UNIV_INLINE
-ulint
-rec_get_heap_no(
-/*============*/
-			/* out: heap order number */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp);	/* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the heap number
-field in the record. */
-UNIV_INLINE
-void
-rec_set_heap_no(
-/*============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	heap_no);/* in: the heap number */
-/**********************************************************
-The following function is used to test whether the data offsets
-in the record are stored in one-byte or two-byte format. */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
-			/* out: TRUE if 1-byte form */
-	rec_t*	rec);	/* in: physical record */
-/**********************************************************
-The following function determines the offsets to each field
-in the record.	It can reuse a previously allocated array. */
-
-ulint*
-rec_get_offsets_func(
-/*=================*/
-				/* out: the new offsets */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets,/* in: array consisting of offsets[0]
-				allocated elements, or an array from
-				rec_get_offsets(), or NULL */
-	ulint		n_fields,/* in: maximum number of initialized fields
-				(ULINT_UNDEFINED if all fields) */
-	mem_heap_t**	heap,	/* in/out: memory heap */
-	const char*	file,	/* in: file name where called */
-	ulint		line);	/* in: line number where called */
-
-#define rec_get_offsets(rec,index,offsets,n,heap)	\
-	rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
-
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
-				/* out: TRUE if valid */
-	rec_t*		rec,	/* in: record or NULL */
-	dict_index_t*	index,	/* in: record descriptor or NULL */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,/* in: record descriptor */
-	ulint*		offsets);/* in: array returned by rec_get_offsets() */
-
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
-
-byte*
-rec_get_nth_field_old(
-/*==================*/
-			/* out: pointer to the field */
-	rec_t*	rec,	/* in: record */
-	ulint	n,	/* in: index of the field */
-	ulint*	len);	/* out: length of the field; UNIV_SQL_NULL
-			if SQL null */
-/****************************************************************
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
-			/* out: field size in bytes */
-	rec_t*	rec,	/* in: record */
-	ulint	n);	/* in: index of the field */
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
-UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
-				/* out: pointer to the field */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n,	/* in: index of the field */
-	ulint*		len);	/* out: length of the field; UNIV_SQL_NULL
-				if SQL null */
-/**********************************************************
-Determine if the offsets are for a record in the new
-compact format. */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
-				/* out: nonzero if compact format */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
-				/* out: nonzero if externally stored */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n);	/* in: nth field */
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
-				/* out: nonzero if SQL NULL */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n);	/* in: nth field */
-/**********************************************************
-Gets the physical size of a field. */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
-				/* out: length of field */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n);	/* in: nth field */
-
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of rec. */
-UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
-				/* out: TRUE if a field is stored externally */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
-UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		i,	/* in: ith field */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr);	/* in: mtr holding an X-latch to the page
-				where rec is, or NULL; in the NULL case
-				we do not write to log about the change */
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	const ulint*	vec,	/* in: array of field numbers */
-	ulint		n_fields,/* in: number of fields numbers */
-	mtr_t*		mtr);	/* in: mtr holding an X-latch to the page
-				where rec is, or NULL; in the NULL case
-				we do not write to log about the change */
-/***************************************************************
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n,	/* in: index number of the field */
-	const void*	data,	/* in: pointer to the data if not SQL null */
-	ulint		len);	/* in: length of the data or UNIV_SQL_NULL */
-/**************************************************************
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
-				/* out: size */
-	rec_t*	rec);	/* in: physical record */
-/**************************************************************
-The following function returns the number of fields in a record. */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
-				/* out: number of fields */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
-				/* out: size */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns the total size of record minus data size of record.
-The value returned by the function is the distance from record
-start to record origin in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
-				/* out: size */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns the total size of a physical record.  */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
-				/* out: size */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the start of the record. */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
-				/* out: pointer to start */
-	rec_t*		rec,	/* in: pointer to record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the end of the record. */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
-				/* out: pointer to end */
-	rec_t*		rec,	/* in: pointer to record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Copies a physical record to a buffer. */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
-				/* out: pointer to the origin of the copy */
-	void*		buf,	/* in: buffer */
-	const rec_t*	rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
-Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
-					/* out, own: copied record */
-	rec_t*		rec,		/* in: physical record */
-	dict_index_t*	index,		/* in: record descriptor */
-	ulint		n_fields,	/* in: number of fields to copy */
-	byte**		buf,		/* in/out: memory buffer
-					for the copied prefix, or NULL */
-	ulint*		buf_size);	/* in/out: buffer size */
-/****************************************************************
-Folds a prefix of a physical record to a ulint. */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
-					/* out: the folded value */
-	rec_t*		rec,		/* in: the physical record */
-	const ulint*	offsets,	/* in: array returned by
-					rec_get_offsets() */
-	ulint		n_fields,	/* in: number of complete
-					fields to fold */
-	ulint		n_bytes,	/* in: number of bytes to fold
-					in an incomplete last field */
-	dulint		tree_id);	/* in: index tree id */
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-address destination. */
-
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
-				/* out: pointer to the origin
-				of physical record */
-	byte*		buf,	/* in: start address of the
-				physical record */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple);/* in: data tuple */
-/**************************************************************
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
-				/* out: extra size */
-	ulint	data_size,	/* in: data size */
-	ulint	n_fields)	/* in: number of fields */
-		__attribute__((const));
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a physical record. */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
-				/* out: size */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple);/* in: data tuple */
-/******************************************************************
-Copies the first n fields of a physical record to a data tuple.
-The fields are copied to the memory heap. */
-
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
-	dtuple_t*	tuple,		/* in: data tuple */
-	rec_t*		rec,		/* in: physical record */
-	dict_index_t*	index,		/* in: record descriptor */
-	ulint		n_fields,	/* in: number of fields to copy */
-	mem_heap_t*	heap);		/* in: memory heap */
-/*******************************************************************
-Validates the consistency of a physical record. */
-
-ibool
-rec_validate(
-/*=========*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Prints an old-style physical record. */
-
-void
-rec_print_old(
-/*==========*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec);	/* in: physical record */
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print_new(
-/*==========*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print(
-/*======*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index);	/* in: record descriptor */
-
-#define REC_INFO_BITS		6	/* This is single byte bit-field */
-
-/* Maximum lengths for the data in a physical record if the offsets
-are given in one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT	0x7FUL
-#define REC_2BYTE_OFFS_LIMIT	0x7FFFUL
-
-/* The data size of record must be smaller than this because we reserve
-two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE	(16 * 1024)
-
-#ifndef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
deleted file mode 100644
index 79c162392d2..00000000000
--- a/storage/innobase/include/rem0types.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/************************************************************************
-Record manager global types
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0types_h
-#define rem0types_h
-
-/* We define the physical record simply as an array of bytes */
-typedef byte	rec_t;
-
-/* Maximum values for various fields (for non-blob tuples) */
-#define REC_MAX_N_FIELDS	(1024 - 1)
-#define REC_MAX_HEAP_NO		(2 * 8192 - 1)
-#define REC_MAX_N_OWNED		(16 - 1)
-
-#endif
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
deleted file mode 100644
index b4bcc8ac5ca..00000000000
--- a/storage/innobase/include/row0ins.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/******************************************************
-Insert into a table
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0ins_h
-#define row0ins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-
-/*******************************************************************
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_foreign_key_check_lock. */
-
-ulint
-row_ins_check_foreign_constraint(
-/*=============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_NO_REFERENCED_ROW,
-				or DB_ROW_IS_REFERENCED */
-	ibool		check_ref,/* in: TRUE If we want to check that
-				the referenced table is ok, FALSE if we
-				want to to check the foreign key table */
-	dict_foreign_t*	foreign,/* in: foreign constraint; NOTE that the
-				tables mentioned in it must be in the
-				dictionary cache if they exist at all */
-	dict_table_t*	table,	/* in: if check_ref is TRUE, then the foreign
-				table, else the referenced table */
-	dtuple_t*	entry,	/* in: index entry for index */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Creates an insert node struct. */
-
-ins_node_t*
-ins_node_create(
-/*============*/
-					/* out, own: insert node struct */
-	ulint		ins_type,	/* in: INS_VALUES, ... */
-	dict_table_t*	table,		/* in: table where to insert */
-	mem_heap_t*	heap);		/* in: mem heap where created */
-/*************************************************************************
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-
-void
-ins_node_set_new_row(
-/*=================*/
-	ins_node_t*	node,	/* in: insert node */
-	dtuple_t*	row);	/* in: new row (or first row) for the node */
-/*******************************************************************
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
-ulint
-row_ins_index_entry_low(
-/*====================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
-				if pessimistic retry needed, or error code */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
-				depending on whether we wish optimistic or
-				pessimistic descent down the index tree */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	que_thr_t*	thr);	/* in: query thread */
-/*******************************************************************
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record. */
-
-ulint
-row_ins_index_entry(
-/*================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DUPLICATE_KEY, or some other error code */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	que_thr_t*	thr);	/* in: query thread */
-/***************************************************************
-Inserts a row to a table. */
-
-ulint
-row_ins(
-/*====*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	ins_node_t*	node,	/* in: row insert node */
-	que_thr_t*	thr);	/* in: query thread */
-/***************************************************************
-Inserts a row to a table. This is a high-level function used in
-SQL execution graphs. */
-
-que_thr_t*
-row_ins_step(
-/*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-
-/* Insert node structure */
-
-struct ins_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_INSERT */
-	ulint		ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
-	dtuple_t*	row;	/* row to insert */
-	dict_table_t*	table;	/* table where to insert */
-	sel_node_t*	select;	/* select in searched insert */
-	que_node_t*	values_list;/* list of expressions to evaluate and
-				insert in an INS_VALUES insert */
-	ulint		state;	/* node execution state */
-	dict_index_t*	index;	/* NULL, or the next index where the index
-				entry should be inserted */
-	dtuple_t*	entry;	/* NULL, or entry to insert in the index;
-				after a successful insert of the entry,
-				this should be reset to NULL */
-	UT_LIST_BASE_NODE_T(dtuple_t)
-			entry_list;/* list of entries, one for each index */
-	byte*		row_id_buf;/* buffer for the row id sys field in row */
-	dulint		trx_id;	/* trx id or the last trx which executed the
-				node */
-	byte*		trx_id_buf;/* buffer for the trx id sys field in row */
-	mem_heap_t*	entry_sys_heap;
-				/* memory heap used as auxiliary storage;
-				entry_list and sys fields are stored here;
-				if this is NULL, entry list should be created
-				and buffers for sys fields in row allocated */
-	ulint		magic_n;
-};
-
-#define	INS_NODE_MAGIC_N	15849075
-
-/* Insert node types */
-#define INS_SEARCHED	0	/* INSERT INTO ... SELECT ... */
-#define INS_VALUES	1	/* INSERT INTO ... VALUES ... */
-#define INS_DIRECT	2	/* this is for internal use in dict0crea:
-				insert the row directly */
-
-/* Node execution states */
-#define	INS_NODE_SET_IX_LOCK	1	/* we should set an IX lock on table */
-#define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
-#define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
-					inserted */
-
-#ifndef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic
deleted file mode 100644
index 80a232d41ee..00000000000
--- a/storage/innobase/include/row0ins.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/******************************************************
-Insert into a table
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic
deleted file mode 100644
index aa8a70d8761..00000000000
--- a/storage/innobase/include/row0mysql.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-MySQL interface for Innobase
-
-(C) 2001 Innobase Oy
-
-Created 1/23/2001 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
deleted file mode 100644
index 174dd239eb5..00000000000
--- a/storage/innobase/include/row0purge.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/******************************************************
-Purge obsolete records
-
-(c) 1997 Innobase Oy
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0purge_h
-#define row0purge_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/************************************************************************
-Creates a purge node to a query graph. */
-
-purge_node_t*
-row_purge_node_create(
-/*==================*/
-				/* out, own: purge node */
-	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
-	mem_heap_t*	heap);	/* in: memory heap where created */
-/***************************************************************
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
-que_thr_t*
-row_purge_step(
-/*===========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-
-/* Purge node structure */
-
-struct purge_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_PURGE */
-	/*----------------------*/
-	/* Local storage for this graph node */
-	dulint		roll_ptr;/* roll pointer to undo log record */
-	trx_undo_rec_t*	undo_rec;/* undo log record */
-	trx_undo_inf_t*	reservation;/* reservation for the undo log record in
-				the purge array */
-	dulint		undo_no;/* undo number of the record */
-	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
-				... */
-	btr_pcur_t	pcur;	/* persistent cursor used in searching the
-				clustered index record */
-	ibool		found_clust;/* TRUE if the clustered index record
-				determined by ref was found in the clustered
-				index, and we were able to position pcur on
-				it */
-	dict_table_t*	table;	/* table where purge is done */
-	ulint		cmpl_info;/* compiler analysis info of an update */
-	upd_t*		update;	/* update vector for a clustered index
-				record */
-	dtuple_t*	ref;	/* NULL, or row reference to the next row to
-				handle */
-	dtuple_t*	row;	/* NULL, or a copy (also fields copied to
-				heap) of the indexed fields of the row to
-				handle */
-	dict_index_t*	index;	/* NULL, or the next index whose record should
-				be handled */
-	mem_heap_t*	heap;	/* memory heap used as auxiliary storage for
-				row; this must be emptied after a successful
-				purge of a row */
-};
-
-#ifndef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic
deleted file mode 100644
index 50aabf0bc1b..00000000000
--- a/storage/innobase/include/row0purge.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-
-/******************************************************
-Purge obsolete records
-
-(c) 1997 Innobase Oy
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
deleted file mode 100644
index bea7627cd86..00000000000
--- a/storage/innobase/include/row0row.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0row_h
-#define row0row_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "mtr0mtr.h"
-#include "rem0types.h"
-#include "read0types.h"
-#include "btr0types.h"
-
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_trx_id(
-/*===============*/
-				/* out: value of the field */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_roll_ptr(
-/*=================*/
-				/* out: value of the field */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		trx_id);/* in: value of the field */
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		roll_ptr);/* in: value of the field */
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
-dtuple_t*
-row_build_index_entry(
-/*==================*/
-				/* out: index entry which should be inserted */
-	dtuple_t*	row,	/* in: row which should be inserted to the
-				table */
-	dict_index_t*	index,	/* in: index on the table */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory for
-				the index entry is allocated */
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
-dtuple_t*
-row_build(
-/*======*/
-				/* out, own: row built; see the NOTE below! */
-	ulint		type,	/* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
-				the latter copies also the data fields to
-				heap while the first only places pointers to
-				data fields on the index page, and thus is
-				more efficient */
-	dict_index_t*	index,	/* in: clustered index */
-	rec_t*		rec,	/* in: record in the clustered index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row dtuple is used! */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index)
-				or NULL, in which case this function
-				will invoke rec_get_offsets() */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory
-				needed is allocated */
-/***********************************************************************
-Converts an index record to a typed data tuple. */
-
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
-				/* out, own: index entry built; see the
-				NOTE below! */
-	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
-				the former copies also the data fields to
-				heap as the latter only places pointers to
-				data fields on the index page */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the dtuple is used! */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory
-				needed is allocated */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-dtuple_t*
-row_build_row_ref(
-/*==============*/
-				/* out, own: row reference built; see the
-				NOTE below! */
-	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
-				the former copies also the data fields to
-				heap, whereas the latter only places pointers
-				to data fields on the index page */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row reference is used! */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory
-				needed is allocated */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
-	dtuple_t*	ref,	/* in/out: row reference built; see the
-				NOTE below! */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: the data fields in ref will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row reference is used! */
-	trx_t*		trx);	/* in: transaction */
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
-	dtuple_t*	ref,	/* in/out: row reference built; see the
-				NOTE below! ref must have the right number
-				of fields! */
-	dict_table_t*	table,	/* in: table */
-	dtuple_t*	row);	/* in: row
-				NOTE: the data fields in ref will point
-				directly into data of this row */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
-	dtuple_t*	ref,	/* in: typed data tuple where the
-				reference is built */
-	const ulint*	map,	/* in: array of field numbers in rec
-				telling how ref should be built from
-				the fields of rec */
-	rec_t*		rec,	/* in: record in the index; must be
-				preserved while ref is used, as we do
-				not copy field values to heap */
-	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Searches the clustered index record for a row, if we have the row
-reference. */
-
-ibool
-row_search_on_row_ref(
-/*==================*/
-				/* out: TRUE if found */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
-				be closed by the caller */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	dict_table_t*	table,	/* in: table */
-	dtuple_t*	ref,	/* in: row reference */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************************
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
-rec_t*
-row_get_clust_rec(
-/*==============*/
-				/* out: record or NULL, if no record found */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	rec_t*		rec,	/* in: record in a secondary index */
-	dict_index_t*	index,	/* in: secondary index */
-	dict_index_t**	clust_index,/* out: clustered index */
-	mtr_t*		mtr);	/* in: mtr */
-/*******************************************************************
-Searches an index record. */
-
-ibool
-row_search_index_entry(
-/*===================*/
-				/* out: TRUE if found */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
-				be closed by the caller */
-	mtr_t*		mtr);	/* in: mtr */
-
-
-#define ROW_COPY_DATA		1
-#define ROW_COPY_POINTERS	2
-
-/* The allowed latching order of index records is the following:
-(1) a secondary index record ->
-(2) the clustered index record ->
-(3) rollback segment data for the clustered index record.
-
-No new latches may be obtained while the kernel mutex is reserved.
-However, the kernel mutex can be reserved while latches are owned. */
-
-#ifndef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
deleted file mode 100644
index de417f3d971..00000000000
--- a/storage/innobase/include/row0row.ic
+++ /dev/null
@@ -1,182 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "trx0undo.h"
-
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
-/*==================*/
-				/* out: value of the field */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
-				/* out: value of the field */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		val);	/* in: value to set */
-
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_trx_id(
-/*===============*/
-				/* out: value of the field */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
-{
-	ulint	offset;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	offset = index->trx_id_offset;
-
-	if (offset) {
-		return(trx_read_trx_id(rec + offset));
-	} else {
-		return(row_get_rec_sys_field(DATA_TRX_ID,
-					     rec, index, offsets));
-	}
-}
-
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_roll_ptr(
-/*=================*/
-				/* out: value of the field */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
-{
-	ulint	offset;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	offset = index->trx_id_offset;
-
-	if (offset) {
-		return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
-	} else {
-		return(row_get_rec_sys_field(DATA_ROLL_PTR,
-					     rec, index, offsets));
-	}
-}
-
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		trx_id)	/* in: value of the field */
-{
-	ulint	offset;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	offset = index->trx_id_offset;
-
-	if (offset) {
-		trx_write_trx_id(rec + offset, trx_id);
-	} else {
-		row_set_rec_sys_field(DATA_TRX_ID,
-				      rec, index, offsets, trx_id);
-	}
-}
-
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		roll_ptr)/* in: value of the field */
-{
-	ulint	offset;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	offset = index->trx_id_offset;
-
-	if (offset) {
-		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
-	} else {
-		row_set_rec_sys_field(DATA_ROLL_PTR,
-				      rec, index, offsets, roll_ptr);
-	}
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
-	dtuple_t*	ref,	/* in: typed data tuple where the
-				reference is built */
-	const ulint*	map,	/* in: array of field numbers in rec
-				telling how ref should be built from
-				the fields of rec */
-	rec_t*		rec,	/* in: record in the index; must be
-				preserved while ref is used, as we do
-				not copy field values to heap */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	dfield_t*	dfield;
-	byte*		field;
-	ulint		len;
-	ulint		ref_len;
-	ulint		field_no;
-	ulint		i;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ref_len = dtuple_get_n_fields(ref);
-
-	for (i = 0; i < ref_len; i++) {
-		dfield = dtuple_get_nth_field(ref, i);
-
-		field_no = *(map + i);
-
-		if (field_no != ULINT_UNDEFINED) {
-
-			field = rec_get_nth_field(rec, offsets,
-						  field_no, &len);
-			dfield_set_data(dfield, field, len);
-		}
-	}
-}
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
deleted file mode 100644
index a0a4ccb973b..00000000000
--- a/storage/innobase/include/row0sel.h
+++ /dev/null
@@ -1,392 +0,0 @@
-/******************************************************
-Select
-
-(c) 1997 Innobase Oy
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0sel_h
-#define row0sel_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "btr0pcur.h"
-#include "read0read.h"
-#include "row0mysql.h"
-
-/*************************************************************************
-Creates a select node struct. */
-
-sel_node_t*
-sel_node_create(
-/*============*/
-				/* out, own: select node struct */
-	mem_heap_t*	heap);	/* in: memory heap where created */
-/*************************************************************************
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-
-void
-sel_node_free_private(
-/*==================*/
-	sel_node_t*	node);	/* in: select node struct */
-/*************************************************************************
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-
-void
-sel_col_prefetch_buf_free(
-/*======================*/
-	sel_buf_t*	prefetch_buf);	/* in, own: prefetch buffer */
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
-	sel_node_t*	node,
-	ulint		i);
-/**************************************************************************
-Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-row_sel_step(
-/*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Performs a fetch for a cursor. */
-
-que_thr_t*
-fetch_step(
-/*=======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
-void*
-row_fetch_print(
-/*============*/
-				/* out: always returns non-NULL */
-	void*	row,		/* in:  sel_node_t* */
-	void*	user_arg);	/* in:  not used */
-/********************************************************************
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
-void*
-row_fetch_store_uint4(
-/*==================*/
-				/* out: always returns NULL */
-	void*	row,		/* in:  sel_node_t* */
-	void*	user_arg);	/* in:  data pointer */
-/***************************************************************
-Prints a row in a select result. */
-
-que_thr_t*
-row_printf_step(
-/*============*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. */
-
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
-	dtuple_t*	tuple,		/* in: tuple where to build;
-					NOTE: we assume that the type info
-					in the tuple is already according
-					to index! */
-	byte*		buf,		/* in: buffer to use in field
-					conversions */
-	ulint		buf_len,	/* in: buffer length */
-	dict_index_t*	index,		/* in: index of the key value */
-	byte*		key_ptr,	/* in: MySQL key value */
-	ulint		key_len,	/* in: MySQL key value length */
-	trx_t*		trx);		/* in: transaction */
-/************************************************************************
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
-ulint
-row_search_for_mysql(
-/*=================*/
-					/* out: DB_SUCCESS,
-					DB_RECORD_NOT_FOUND,
-					DB_END_OF_INDEX, DB_DEADLOCK,
-					DB_LOCK_TABLE_FULL,
-					or DB_TOO_BIG_RECORD */
-	byte*		buf,		/* in/out: buffer for the fetched
-					row in the MySQL format */
-	ulint		mode,		/* in: search mode PAGE_CUR_L, ... */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct for the
-					table handle; this contains the info
-					of search_tuple, index; if search
-					tuple contains 0 fields then we
-					position the cursor at the start or
-					the end of the index, depending on
-					'mode' */
-	ulint		match_mode,	/* in: 0 or ROW_SEL_EXACT or
-					ROW_SEL_EXACT_PREFIX */
-	ulint		direction);	/* in: 0 or ROW_SEL_NEXT or
-					ROW_SEL_PREV; NOTE: if this is != 0,
-					then prebuilt must have a pcur
-					with stored position! In opening of a
-					cursor 'direction' should be 0. */
-/***********************************************************************
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
-					/* out: TRUE if storing or retrieving
-					from the query cache is permitted */
-	trx_t*		trx,		/* in: transaction object */
-	const char*	norm_name);	/* in: concatenation of database name,
-					'/' char, table name */
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
-ulint
-row_search_max_autoinc(
-/*===================*/
-					/* out: DB_SUCCESS if all OK else
-					error code */
-	dict_index_t*	index,		/* in: index to search */
-	const char*	col_name,	/* in: autoinc column name */
-	ib_ulonglong*	value);		/* out: AUTOINC value read */
-
-/* A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
-	byte*		data;	/* data, or NULL; if not NULL, this field
-				has allocated memory which must be explicitly
-				freed; can be != NULL even when len is
-				UNIV_SQL_NULL */
-	ulint		len;	/* data length or UNIV_SQL_NULL */
-	ulint		val_buf_size;
-				/* size of memory buffer allocated for data:
-				this can be more than len; this is defined
-				when data != NULL */
-};
-
-struct plan_struct{
-	dict_table_t*	table;		/* table struct in the dictionary
-					cache */
-	dict_index_t*	index;		/* table index used in the search */
-	btr_pcur_t	pcur;		/* persistent cursor used to search
-					the index */
-	ibool		asc;		/* TRUE if cursor traveling upwards */
-	ibool		pcur_is_open;	/* TRUE if pcur has been positioned
-					and we can try to fetch new rows */
-	ibool		cursor_at_end;	/* TRUE if the cursor is open but
-					we know that there are no more
-					qualifying rows left to retrieve from
-					the index tree; NOTE though, that
-					there may still be unprocessed rows in
-					the prefetch stack; always FALSE when
-					pcur_is_open is FALSE */
-	ibool		stored_cursor_rec_processed;
-					/* TRUE if the pcur position has been
-					stored and the record it is positioned
-					on has already been processed */
-	que_node_t**	tuple_exps;	/* array of expressions which are used
-					to calculate the field values in the
-					search tuple: there is one expression
-					for each field in the search tuple */
-	dtuple_t*	tuple;		/* search tuple */
-	ulint		mode;		/* search mode: PAGE_CUR_G, ... */
-	ulint		n_exact_match;	/* number of first fields in the search
-					tuple which must be exactly matched */
-	ibool		unique_search;	/* TRUE if we are searching an
-					index record with a unique key */
-	ulint		n_rows_fetched;	/* number of rows fetched using pcur
-					after it was opened */
-	ulint		n_rows_prefetched;/* number of prefetched rows cached
-					for fetch: fetching several rows in
-					the same mtr saves CPU time */
-	ulint		first_prefetched;/* index of the first cached row in
-					select buffer arrays for each column */
-	ibool		no_prefetch;	/* no prefetch for this table */
-	sym_node_list_t	columns;	/* symbol table nodes for the columns
-					to retrieve from the table */
-	UT_LIST_BASE_NODE_T(func_node_t)
-			end_conds;	/* conditions which determine the
-					fetch limit of the index segment we
-					have to look at: when one of these
-					fails, the result set has been
-					exhausted for the cursor in this
-					index; these conditions are normalized
-					so that in a comparison the column
-					for this table is the first argument */
-	UT_LIST_BASE_NODE_T(func_node_t)
-			other_conds;	/* the rest of search conditions we can
-					test at this table in a join */
-	ibool		must_get_clust;	/* TRUE if index is a non-clustered
-					index and we must also fetch the
-					clustered index record; this is the
-					case if the non-clustered record does
-					not contain all the needed columns, or
-					if this is a single-table explicit
-					cursor, or a searched update or
-					delete */
-	ulint*		clust_map;	/* map telling how clust_ref is built
-					from the fields of a non-clustered
-					record */
-	dtuple_t*	clust_ref;	/* the reference to the clustered
-					index entry is built here if index is
-					a non-clustered index */
-	btr_pcur_t	clust_pcur;	/* if index is non-clustered, we use
-					this pcur to search the clustered
-					index */
-	mem_heap_t*	old_vers_heap;	/* memory heap used in building an old
-					version of a row, or NULL */
-};
-
-struct sel_node_struct{
-	que_common_t	common;		/* node type: QUE_NODE_SELECT */
-	ulint		state;		/* node state */
-	que_node_t*	select_list;	/* select list */
-	sym_node_t*	into_list;	/* variables list or NULL */
-	sym_node_t*	table_list;	/* table list */
-	ibool		asc;		/* TRUE if the rows should be fetched
-					in an ascending order */
-	ibool		set_x_locks;	/* TRUE if the cursor is for update or
-					delete, which means that a row x-lock
-					should be placed on the cursor row */
-	ibool		select_will_do_update;
-					/* TRUE if the select is for a searched
-					update which can be performed in-place:
-					in this case the select will take care
-					of the update */
-	ulint		latch_mode;	/* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF
-					if select_will_do_update is TRUE */
-	ulint		row_lock_mode;	/* LOCK_X or LOCK_S */
-	ulint		n_tables;	/* number of tables */
-	ulint		fetch_table;	/* number of the next table to access
-					in the join */
-	plan_t*		plans;		/* array of n_tables many plan nodes
-					containing the search plan and the
-					search data structures */
-	que_node_t*	search_cond;	/* search condition */
-	read_view_t*	read_view;	/* if the query is a non-locking
-					consistent read, its read view is
-					placed here, otherwise NULL */
-	ibool		consistent_read;/* TRUE if the select is a consistent,
-					non-locking read */
-	order_node_t*	order_by;	/* order by column definition, or
-					NULL */
-	ibool		is_aggregate;	/* TRUE if the select list consists of
-					aggregate functions */
-	ibool		aggregate_already_fetched;
-					/* TRUE if the aggregate row has
-					already been fetched for the current
-					cursor */
-	ibool		can_get_updated;/* this is TRUE if the select
-					is in a single-table explicit
-					cursor which can get updated
-					within the stored procedure,
-					or in a searched update or
-					delete; NOTE that to determine
-					of an explicit cursor if it
-					can get updated, the parser
-					checks from a stored procedure
-					if it contains positioned
-					update or delete statements */
-	sym_node_t*	explicit_cursor;/* not NULL if an explicit cursor */
-	UT_LIST_BASE_NODE_T(sym_node_t)
-			copy_variables; /* variables whose values we have to
-					copy when an explicit cursor is opened,
-					so that they do not change between
-					fetches */
-};
-
-/* Select node states */
-#define	SEL_NODE_CLOSED		0	/* it is a declared cursor which is not
-					currently open */
-#define SEL_NODE_OPEN		1	/* intention locks not yet set on
-					tables */
-#define SEL_NODE_FETCH		2	/* intention locks have been set */
-#define SEL_NODE_NO_MORE_ROWS	3	/* cursor has reached the result set
-					end */
-
-/* Fetch statement node */
-struct fetch_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_FETCH */
-	sel_node_t*	cursor_def;	/* cursor definition */
-	sym_node_t*	into_list;	/* variables to set */
-
-	pars_user_func_t*
-			func;		/* User callback function or NULL.
-					The first argument to the function
-					is a sel_node_t*, containing the
-					results of the SELECT operation for
-					one row. If the function returns
-					NULL, it is not interested in
-					further rows and the cursor is
-					modified so (cursor % NOTFOUND) is
-					true. If it returns not-NULL,
-					continue normally. See
-					row_fetch_print() for an example
-					(and a useful debugging tool). */
-};
-
-/* Open or close cursor statement node */
-struct open_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_OPEN */
-	ulint		op_type;	/* ROW_SEL_OPEN_CURSOR or
-					ROW_SEL_CLOSE_CURSOR */
-	sel_node_t*	cursor_def;	/* cursor definition */
-};
-
-/* Row printf statement node */
-struct row_printf_node_struct{
-	que_common_t	common;		/* type: QUE_NODE_ROW_PRINTF */
-	sel_node_t*	sel_node;	/* select */
-};
-
-#define ROW_SEL_OPEN_CURSOR	0
-#define ROW_SEL_CLOSE_CURSOR	1
-
-/* Flags for the MySQL interface */
-#define ROW_SEL_NEXT		1
-#define ROW_SEL_PREV		2
-
-#define ROW_SEL_EXACT		1	/* search using a complete key value */
-#define ROW_SEL_EXACT_PREFIX	2	/* search using a key prefix which
-					must match to rows: the prefix may
-					contain an incomplete field (the
-					last field in prefix may be just
-					a prefix of a fixed length column) */
-
-#ifndef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
deleted file mode 100644
index 56ca8711848..00000000000
--- a/storage/innobase/include/row0types.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/******************************************************
-Row operation global types
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0types_h
-#define row0types_h
-
-typedef struct plan_struct plan_t;
-
-typedef	struct upd_struct upd_t;
-
-typedef struct upd_field_struct upd_field_t;
-
-typedef	struct upd_node_struct upd_node_t;
-
-typedef	struct del_node_struct del_node_t;
-
-typedef	struct ins_node_struct ins_node_t;
-
-typedef struct sel_node_struct	sel_node_t;
-
-typedef struct open_node_struct	open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct	row_printf_node_t;
-typedef struct sel_buf_struct	sel_buf_t;
-
-typedef	struct undo_node_struct undo_node_t;
-
-typedef	struct purge_node_struct purge_node_t;
-
-#endif
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
deleted file mode 100644
index e28d5363048..00000000000
--- a/storage/innobase/include/row0uins.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/******************************************************
-Fresh insert undo
-
-(c) 1996 Innobase Oy
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0uins_h
-#define row0uins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***************************************************************
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
-ulint
-row_undo_ins(
-/*=========*/
-				/* out: DB_SUCCESS */
-	undo_node_t*	node);	/* in: row undo node */
-
-
-#ifndef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic
deleted file mode 100644
index 2b3d5a10f95..00000000000
--- a/storage/innobase/include/row0uins.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-Fresh insert undo
-
-(c) 1996 Innobase Oy
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
deleted file mode 100644
index f22945e6f12..00000000000
--- a/storage/innobase/include/row0umod.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/******************************************************
-Undo modify of a row
-
-(c) 1997 Innobase Oy
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0umod_h
-#define row0umod_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
-ulint
-row_undo_mod(
-/*=========*/
-				/* out: DB_SUCCESS or error code */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr);	/* in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic
deleted file mode 100644
index fcbf4dbc1f3..00000000000
--- a/storage/innobase/include/row0umod.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Undo modify of a row
-
-(c) 1997 Innobase Oy
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
deleted file mode 100644
index 0be09ed1822..00000000000
--- a/storage/innobase/include/row0undo.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/******************************************************
-Row undo
-
-(c) 1997 Innobase Oy
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0undo_h
-#define row0undo_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/************************************************************************
-Creates a row undo node to a query graph. */
-
-undo_node_t*
-row_undo_node_create(
-/*=================*/
-				/* out, own: undo node */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
-	mem_heap_t*	heap);	/* in: memory heap where created */
-/***************************************************************
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
-				/* out: TRUE if found; NOTE the node->pcur
-				must be closed by the caller, regardless of
-				the return value */
-	undo_node_t*	node);	/* in: row undo node */
-/***************************************************************
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
-que_thr_t*
-row_undo_step(
-/*==========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-
-/* A single query thread will try to perform the undo for all successive
-versions of a clustered index record, if the transaction has modified it
-several times during the execution which is rolled back. It may happen
-that the task is transferred to another query thread, if the other thread
-is assigned to handle an undo log record in the chain of different versions
-of the record, and the other thread happens to get the x-latch to the
-clustered index record at the right time.
-	If a query thread notices that the clustered index record it is looking
-for is missing, or the roll ptr field in the record doed not point to the
-undo log record the thread was assigned to handle, then it gives up the undo
-task for that undo log record, and fetches the next. This situation can occur
-just in the case where the transaction modified the same record several times
-and another thread is currently doing the undo for successive versions of
-that index record. */
-
-/* Undo node structure */
-
-struct undo_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_UNDO */
-	ulint		state;	/* node execution state */
-	trx_t*		trx;	/* trx for which undo is done */
-	dulint		roll_ptr;/* roll pointer to undo log record */
-	trx_undo_rec_t*	undo_rec;/* undo log record */
-	dulint		undo_no;/* undo number of the record */
-	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
-				... */
-	dulint		new_roll_ptr; /* roll ptr to restore to clustered index
-				record */
-	dulint		new_trx_id; /* trx id to restore to clustered index
-				record */
-	btr_pcur_t	pcur;	/* persistent cursor used in searching the
-				clustered index record */
-	dict_table_t*	table;	/* table where undo is done */
-	ulint		cmpl_info;/* compiler analysis of an update */
-	upd_t*		update;	/* update vector for a clustered index
-				record */
-	dtuple_t*	ref;	/* row reference to the next row to handle */
-	dtuple_t*	row;	/* a copy (also fields copied to heap) of the
-				row to handle */
-	dict_index_t*	index;	/* the next index whose record should be
-				handled */
-	mem_heap_t*	heap;	/* memory heap used as auxiliary storage for
-				row; this must be emptied after undo is tried
-				on a row */
-};
-
-/* Execution states for an undo node */
-#define	UNDO_NODE_FETCH_NEXT	1	/* we should fetch the next undo log
-					record */
-#define	UNDO_NODE_PREV_VERS	2	/* the roll ptr to previous version of
-					a row is stored in node, and undo
-					should be done based on it */
-#define UNDO_NODE_INSERT	3
-#define UNDO_NODE_MODIFY	4
-
-
-#ifndef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic
deleted file mode 100644
index e7f89c7de67..00000000000
--- a/storage/innobase/include/row0undo.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Row undo
-
-(c) 1997 Innobase Oy
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
deleted file mode 100644
index efbc6d6facf..00000000000
--- a/storage/innobase/include/row0upd.h
+++ /dev/null
@@ -1,432 +0,0 @@
-/******************************************************
-Update of a row
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0upd_h
-#define row0upd_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-
-/*************************************************************************
-Creates an update vector object. */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
-				/* out, own: update vector object */
-	ulint		n,	/* in: number of fields */
-	mem_heap_t*	heap);	/* in: heap from which memory allocated */
-/*************************************************************************
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
-			/* out: number of fields */
-	upd_t*	update);	/* in: update vector */
-/*************************************************************************
-Returns the nth field of an update vector. */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
-			/* out: update vector field */
-	upd_t*	update,	/* in: update vector */
-	ulint	n);	/* in: field position in update vector */
-/*************************************************************************
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
-	upd_field_t*	upd_field,	/* in: update vector field */
-	ulint		field_no,	/* in: field number in a clustered
-					index */
-	dict_index_t*	index,		/* in: index */
-	trx_t*		trx);		/* in: transaction */
-/*************************************************************************
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
-				/* out: new pointer to mlog */
-	dict_index_t*	index,	/* in: clustered index */
-	trx_t*		trx,	/* in: transaction */
-	dulint		roll_ptr,/* in: roll ptr of the undo log record */
-	byte*		log_ptr,/* pointer to a buffer of size > 20 opened
-				in mlog */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	trx_t*		trx,	/* in: transaction */
-	dulint		roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
-Sets the trx id or roll ptr field of a clustered index entry. */
-
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
-	dtuple_t*	entry,	/* in: index entry, where the memory buffers
-				for sys fields are already allocated:
-				the function just copies the new values to
-				them */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	dulint		val);	/* in: value to write */
-/*************************************************************************
-Creates an update node for a query graph. */
-
-upd_node_t*
-upd_node_create(
-/*============*/
-				/* out, own: update node */
-	mem_heap_t*	heap);	/* in: mem heap where created */
-/***************************************************************
-Writes to the redo log the new values of the fields occurring in the index. */
-
-void
-row_upd_index_write_log(
-/*====================*/
-	upd_t*	update,	/* in: update vector */
-	byte*	log_ptr,/* in: pointer to mlog buffer: must contain at least
-			MLOG_BUF_MARGIN bytes of free space; the buffer is
-			closed within this function */
-	mtr_t*	mtr);	/* in: mtr into whose log to write */
-/***************************************************************
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
-				/* out: TRUE if the update changes the size of
-				some field in index or the field is external
-				in rec or update */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	upd_t*		update);/* in: update vector */
-/***************************************************************
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
-void
-row_upd_rec_in_place(
-/*=================*/
-	rec_t*		rec,	/* in/out: record where replaced */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update);/* in: update vector */
-/*******************************************************************
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
-				/* out, own: update vector of differing
-				fields */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t*		rec,	/* in: secondary index record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap);	/* in: memory heap from which allocated */
-/*******************************************************************
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
-upd_t*
-row_upd_build_difference_binary(
-/*============================*/
-				/* out, own: update vector of differing
-				fields, excluding roll ptr and trx id */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t*	entry,	/* in: entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	rec_t*		rec,	/* in: clustered index record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap);	/* in: memory heap from which allocated */
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
-	dtuple_t*	entry,	/* in/out: index entry where replaced */
-	dict_index_t*	index,	/* in: index; NOTE that this may also be a
-				non-clustered index */
-	upd_t*		update,	/* in: an update vector built for the index so
-				that the field number in an upd_field is the
-				index position */
-	ibool		order_only,
-				/* in: if TRUE, limit the replacement to
-				ordering fields of index; note that this
-				does not work for non-clustered indexes. */
-	mem_heap_t*	heap);	/* in: memory heap to which we allocate and
-				copy the new values, set this as NULL if you
-				do not want allocation */
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
-	dtuple_t*	entry,	/* in/out: index entry where replaced */
-	dict_index_t*	index,	/* in: index; NOTE that this may also be a
-				non-clustered index */
-	upd_t*		update,	/* in: an update vector built for the
-				CLUSTERED index so that the field number in
-				an upd_field is the clustered index position */
-	mem_heap_t*	heap);	/* in: memory heap to which we allocate and
-				copy the new values, set this as NULL if you
-				do not want allocation */
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
-				/* out: TRUE if update vector changes
-				an ordering field in the index record;
-				NOTE: the fields are compared as binary
-				strings */
-	dtuple_t*	row,	/* in: old value of row, or NULL if the
-				row and the data values in update are not
-				known when this function is called, e.g., at
-				compile time */
-	dict_index_t*	index,	/* in: index of the record */
-	upd_t*		update);/* in: update vector for the row; NOTE: the
-				field numbers in this MUST be clustered index
-				positions! */
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
-				/* out: TRUE if update vector may change
-				an ordering field in an index record */
-	dict_table_t*	table,	/* in: table */
-	upd_t*		update);/* in: update vector for the row */
-/***************************************************************
-Updates a row in a table. This is a high-level function used
-in SQL execution graphs. */
-
-que_thr_t*
-row_upd_step(
-/*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
-	sel_node_t*	sel_node,	/* in: select node */
-	que_thr_t*	thr,		/* in: query thread */
-	mtr_t*		mtr);		/* in: mtr */
-/*************************************************************************
-Parses the log data of system field values. */
-
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
-			/* out: log data end or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	ulint*	pos,	/* out: TRX_ID position in record */
-	dulint*	trx_id,	/* out: trx id */
-	dulint*	roll_ptr);/* out: roll ptr */
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		pos,	/* in: TRX_ID position in rec */
-	dulint		trx_id,	/* in: transaction id */
-	dulint		roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
-byte*
-row_upd_index_parse(
-/*================*/
-				/* out: log data end or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	mem_heap_t*	heap,	/* in: memory heap where update vector is
-				built */
-	upd_t**		update_out);/* out: update vector */
-
-
-/* Update vector field */
-struct upd_field_struct{
-	ulint		field_no;	/* field number in an index, usually
-					the clustered index, but in updating
-					a secondary index record in btr0cur.c
-					this is the position in the secondary
-					index */
-	que_node_t*	exp;		/* expression for calculating a new
-					value: it refers to column values and
-					constants in the symbol table of the
-					query graph */
-	dfield_t	new_val;	/* new value for the column */
-	ibool		extern_storage;	/* this is set to TRUE if dfield
-					actually contains a reference to
-					an externally stored field */
-};
-
-/* Update vector structure */
-struct upd_struct{
-	ulint		info_bits;	/* new value of info bits to record;
-					default is 0 */
-	ulint		n_fields;	/* number of update fields */
-	upd_field_t*	fields;		/* array of update fields */
-};
-
-/* Update node structure which also implements the delete operation
-of a row */
-
-struct upd_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_UPDATE */
-	ibool		is_delete;/* TRUE if delete, FALSE if update */
-	ibool		searched_update;
-				/* TRUE if searched update, FALSE if
-				positioned */
-	ibool		select_will_do_update;
-				/* TRUE if a searched update where ordering
-				fields will not be updated, and the size of
-				the fields will not change: in this case the
-				select node will take care of the update */
-	ibool		in_mysql_interface;
-				/* TRUE if the update node was created
-				for the MySQL interface */
-	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
-				constraint if this update node is used in
-				doing an ON DELETE or ON UPDATE operation */
-	upd_node_t*	cascade_node;/* NULL or an update node template which
-				is used to implement ON DELETE/UPDATE CASCADE
-				or ... SET NULL for foreign keys */
-	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
-				node is created */
-	sel_node_t*	select;	/* query graph subtree implementing a base
-				table cursor: the rows returned will be
-				updated */
-	btr_pcur_t*	pcur;	/* persistent cursor placed on the clustered
-				index record which should be updated or
-				deleted; the cursor is stored in the graph
-				of 'select' field above, except in the case
-				of the MySQL interface */
-	dict_table_t*	table;	/* table where updated */
-	upd_t*		update;	/* update vector for the row */
-	ulint		update_n_fields;
-				/* when this struct is used to implement
-				a cascade operation for foreign keys, we store
-				here the size of the buffer allocated for use
-				as the update vector */
-	sym_node_list_t	columns;/* symbol table nodes for the columns
-				to retrieve from the table */
-	ibool		has_clust_rec_x_lock;
-				/* TRUE if the select which retrieves the
-				records to update already sets an x-lock on
-				the clustered record; note that it must always
-				set at least an s-lock */
-	ulint		cmpl_info;/* information extracted during query
-				compilation; speeds up execution:
-				UPD_NODE_NO_ORD_CHANGE and
-				UPD_NODE_NO_SIZE_CHANGE, ORed */
-	/*----------------------*/
-	/* Local storage for this graph node */
-	ulint		state;	/* node execution state */
-	dict_index_t*	index;	/* NULL, or the next index whose record should
-				be updated */
-	dtuple_t*	row;	/* NULL, or a copy (also fields copied to
-				heap) of the row to update; this must be reset
-				to NULL after a successful update */
-	ulint*		ext_vec;/* array describing which fields are stored
-				externally in the clustered index record of
-				row */
-	ulint		n_ext_vec;/* number of fields in ext_vec */
-	mem_heap_t*	heap;	/* memory heap used as auxiliary storage;
-				this must be emptied after a successful
-				update */
-	/*----------------------*/
-	sym_node_t*	table_sym;/* table node in symbol table */
-	que_node_t*	col_assign_list;
-				/* column assignment list */
-	ulint		magic_n;
-};
-
-#define	UPD_NODE_MAGIC_N	1579975
-
-/* Node execution states */
-#define UPD_NODE_SET_IX_LOCK	   1	/* execution came to the node from
-					a node above and if the field
-					has_clust_rec_x_lock is FALSE, we
-					should set an intention x-lock on
-					the table */
-#define UPD_NODE_UPDATE_CLUSTERED  2	/* clustered index record should be
-					updated */
-#define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
-					inserted, old record is already delete
-					marked */
-#define UPD_NODE_UPDATE_ALL_SEC	   4	/* an ordering field of the clustered
-					index record was changed, or this is
-					a delete operation: should update
-					all the secondary index records */
-#define	UPD_NODE_UPDATE_SOME_SEC   5	/* secondary index entries should be
-					looked at and updated if an ordering
-					field changed */
-
-/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
-#define UPD_NODE_NO_ORD_CHANGE	1	/* no secondary index record will be
-					changed in the update and no ordering
-					field of the clustered index */
-#define UPD_NODE_NO_SIZE_CHANGE	2	/* no record field size will be
-					changed in the update */
-
-#ifndef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
deleted file mode 100644
index 6173849e68f..00000000000
--- a/storage/innobase/include/row0upd.ic
+++ /dev/null
@@ -1,122 +0,0 @@
-/******************************************************
-Update of a row
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "row0row.h"
-#include "btr0sea.h"
-
-/*************************************************************************
-Creates an update vector object. */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
-				/* out, own: update vector object */
-	ulint		n,	/* in: number of fields */
-	mem_heap_t*	heap)	/* in: heap from which memory allocated */
-{
-	upd_t*	update;
-	ulint	i;
-
-	update = mem_heap_alloc(heap, sizeof(upd_t));
-
-	update->info_bits = 0;
-	update->n_fields = n;
-	update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
-
-	for (i = 0; i < n; i++) {
-		update->fields[i].extern_storage = 0;
-	}
-
-	return(update);
-}
-
-/*************************************************************************
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
-			/* out: number of fields */
-	upd_t*	update)	/* in: update vector */
-{
-	ut_ad(update);
-
-	return(update->n_fields);
-}
-
-/*************************************************************************
-Returns the nth field of an update vector. */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
-			/* out: update vector field */
-	upd_t*	update,	/* in: update vector */
-	ulint	n)	/* in: field position in update vector */
-{
-	ut_ad(update);
-	ut_ad(n < update->n_fields);
-
-	return(update->fields + n);
-}
-
-/*************************************************************************
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
-	upd_field_t*	upd_field,	/* in: update vector field */
-	ulint		field_no,	/* in: field number in a clustered
-					index */
-	dict_index_t*	index,		/* in: index */
-	trx_t*		trx)		/* in: transaction */
-{
-	upd_field->field_no = field_no;
-
-	if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to access field %lu in ",
-			(ulong) field_no);
-		dict_index_name_print(stderr, trx, index);
-		fprintf(stderr, "\n"
-			"InnoDB: but index only has %lu fields\n",
-			(ulong) dict_index_get_n_fields(index));
-	}
-
-	dict_col_copy_type(dict_index_get_nth_col(index, field_no),
-			   dfield_get_type(&(upd_field->new_val)));
-}
-
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	trx_t*		trx,	/* in: transaction */
-	dulint		roll_ptr)/* in: roll ptr of the undo log record */
-{
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!buf_block_align(rec)->is_hashed
-	      || rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	row_set_rec_trx_id(rec, index, offsets, trx->id);
-	row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
-}
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
deleted file mode 100644
index e1377112d2a..00000000000
--- a/storage/innobase/include/row0vers.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/******************************************************
-Row versions
-
-(c) 1997 Innobase Oy
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0vers_h
-#define row0vers_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "rem0types.h"
-#include "mtr0mtr.h"
-#include "read0types.h"
-
-/*********************************************************************
-Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
-				/* out: NULL if committed, else the active
-				transaction; NOTE that the kernel mutex is
-				temporarily released! */
-	rec_t*		rec,	/* in: record in a secondary index */
-	dict_index_t*	index,	/* in: the secondary index */
-	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
-/*********************************************************************
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
-			/* out: TRUE if earlier version should be preserved */
-	dulint	trx_id,	/* in: transaction id in the version */
-	mtr_t*	mtr);	/* in: mtr holding the latch on the clustered index
-			record; it will also hold the latch on purge_view */
-/*********************************************************************
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE. */
-
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
-				/* out: TRUE if earlier version should have */
-	ibool		also_curr,/* in: TRUE if also rec is included in the
-				versions to search; otherwise only versions
-				prior to it are searched */
-	rec_t*		rec,	/* in: record in the clustered index; the
-				caller must have a latch on the page */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec; it will
-				also hold the latch on purge_view */
-	dict_index_t*	index,	/* in: the secondary index */
-	dtuple_t*	ientry);	/* in: the secondary index entry */
-/*********************************************************************
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
-ulint
-row_vers_build_for_consistent_read(
-/*===============================*/
-				/* out: DB_SUCCESS or DB_MISSING_HISTORY */
-	rec_t*		rec,	/* in: record in a clustered index; the
-				caller must have a latch on the page; this
-				latch locks the top of the stack of versions
-				of this records */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec; it will
-				also hold the latch on purge_view */
-	dict_index_t*	index,	/* in: the clustered index */
-	ulint**		offsets,/* in/out: offsets returned by
-				rec_get_offsets(rec, index) */
-	read_view_t*	view,	/* in: the consistent read view */
-	mem_heap_t**	offset_heap,/* in/out: memory heap from which
-				the offsets are allocated */
-	mem_heap_t*	in_heap,/* in: memory heap from which the memory for
-				old_vers is allocated; memory for possible
-				intermediate versions is allocated and freed
-				locally within the function */
-	rec_t**		old_vers);/* out, own: old version, or NULL if the
-				record does not exist in the view, that is,
-				it was freshly inserted afterwards */
-
-/*********************************************************************
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
-ulint
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
-				/* out: DB_SUCCESS or DB_MISSING_HISTORY */
-	rec_t*		rec,	/* in: record in a clustered index; the
-				caller must have a latch on the page; this
-				latch locks the top of the stack of versions
-				of this records */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec */
-	dict_index_t*	index,	/* in: the clustered index */
-	ulint**		offsets,/* in/out: offsets returned by
-				rec_get_offsets(rec, index) */
-	mem_heap_t**	offset_heap,/* in/out: memory heap from which
-				the offsets are allocated */
-	mem_heap_t*	in_heap,/* in: memory heap from which the memory for
-				old_vers is allocated; memory for possible
-				intermediate versions is allocated and freed
-				locally within the function */
-	rec_t**		old_vers);/* out, own: rec, old version, or NULL if the
-				record does not exist in the view, that is,
-				it was freshly inserted afterwards */
-
-
-#ifndef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic
deleted file mode 100644
index ab1e264635b..00000000000
--- a/storage/innobase/include/row0vers.ic
+++ /dev/null
@@ -1,13 +0,0 @@
-/******************************************************
-Row versions
-
-(c) 1997 Innobase Oy
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-#include "dict0dict.h"
-#include "read0read.h"
-#include "page0page.h"
-#include "log0recv.h"
diff --git a/storage/innobase/include/srv0que.h b/storage/innobase/include/srv0que.h
deleted file mode 100644
index 05c339cdd32..00000000000
--- a/storage/innobase/include/srv0que.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/******************************************************
-Server query execution
-
-(c) 1996 Innobase Oy
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0que_h
-#define srv0que_h
-
-#include "univ.i"
-#include "que0types.h"
-
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void);
-/*==========================*/
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
-				/* out: the new (may be == thr) query thread
-				to run */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
-void
-srv_que_task_enqueue_low(
-/*=====================*/
-	que_thr_t*	thr);	/* in: query thread */
-
-#endif
-
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
deleted file mode 100644
index 05300e38430..00000000000
--- a/storage/innobase/include/srv0srv.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/******************************************************
-The server main program
-
-(c) 1995 Innobase Oy
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0srv_h
-#define srv0srv_h
-
-#include "univ.i"
-#include "sync0sync.h"
-#include "os0sync.h"
-#include "que0types.h"
-#include "trx0types.h"
-
-extern const char*	srv_main_thread_op_info;
-
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-extern const char	srv_mysql50_table_name_prefix[9];
-
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t	srv_lock_timeout_thread_event;
-
-/* If the last data file is auto-extended, we add this many pages to it
-at a time */
-#define SRV_AUTO_EXTEND_INCREMENT	\
-	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
-
-/* This is set to TRUE if the MySQL user has set it in MySQL */
-extern ibool	srv_lower_case_table_names;
-
-/* Mutex for locking srv_monitor_file */
-extern mutex_t	srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-extern FILE*	srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern mutex_t	srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE*	srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-extern mutex_t	srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-extern FILE*	srv_misc_tmpfile;
-
-/* Server parameters which are read from the initfile */
-
-extern char*	srv_data_home;
-#ifdef UNIV_LOG_ARCHIVE
-extern char*	srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern ibool	srv_file_per_table;
-extern ibool	srv_locks_unsafe_for_binlog;
-
-extern ulint	srv_n_data_files;
-extern char**	srv_data_file_names;
-extern ulint*	srv_data_file_sizes;
-extern ulint*	srv_data_file_is_raw_partition;
-
-extern ibool	srv_auto_extend_last_data_file;
-extern ulint	srv_last_file_size_max;
-extern ulong	srv_auto_extend_increment;
-
-extern ibool	srv_created_new_raw;
-
-#define SRV_NEW_RAW	1
-#define SRV_OLD_RAW	2
-
-extern char**	srv_log_group_home_dirs;
-
-extern ulint	srv_n_log_groups;
-extern ulint	srv_n_log_files;
-extern ulint	srv_log_file_size;
-extern ulint	srv_log_buffer_size;
-extern ulong	srv_flush_log_at_trx_commit;
-
-extern byte	srv_latin1_ordering[256];/* The sort order table of the latin1
-					character set */
-extern ulint	srv_pool_size;
-extern ulint	srv_awe_window_size;
-extern ulint	srv_mem_pool_size;
-extern ulint	srv_lock_table_size;
-
-extern ibool    srv_thread_concurrency_timer_based;
-
-/* Number of background IO threads for read and write. Replaces
- * srv_n_file_io_threads. */
-extern ulint	srv_n_read_io_threads;
-extern ulint	srv_n_write_io_threads;
-/* Max number of adjacent IO requests to merge into one large request. */
-extern ulint	srv_max_merged_io;
-
-/* Number of IO operations per second the server can do */
-extern ulint    srv_io_capacity;
-
-/* Flush dirty pages when below max dirty percent */
-extern ibool  srv_extra_dirty_writes;
-
-
-
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool	srv_log_archive_on;
-extern ibool	srv_archive_recovery;
-extern dulint	srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern ulint	srv_lock_wait_timeout;
-
-extern char*	srv_file_flush_method_str;
-extern ulint	srv_unix_file_flush_method;
-extern ulint	srv_win_file_flush_method;
-
-extern ulint	srv_max_n_open_files;
-
-extern ulint	srv_max_dirty_pages_pct;
-
-extern ulint	srv_force_recovery;
-extern ulong	srv_thread_concurrency;
-extern ulong	srv_commit_concurrency;
-
-extern ulint	srv_max_n_threads;
-
-extern lint	srv_conc_n_threads;
-
-extern ulint	srv_fast_shutdown;	 /* If this is 1, do not do a
-					 purge and index buffer merge.
-					 If this 2, do not even flush the
-					 buffer pool to data files at the
-					 shutdown: we effectively 'crash'
-					 InnoDB (but lose no committed
-					 transactions). */
-extern ibool	srv_innodb_status;
-
-extern ibool	srv_use_doublewrite_buf;
-extern ibool	srv_use_checksums;
-
-extern ibool	srv_set_thread_priorities;
-extern int	srv_query_thread_priority;
-
-extern ulong	srv_max_buf_pool_modified_pct;
-extern ulong	srv_max_purge_lag;
-extern ibool	srv_use_awe;
-extern ibool	srv_use_adaptive_hash_indexes;
-/*-------------------------------------------*/
-
-extern ulint	srv_n_rows_inserted;
-extern ulint	srv_n_rows_updated;
-extern ulint	srv_n_rows_deleted;
-extern ulint	srv_n_rows_read;
-
-extern ibool	srv_print_innodb_monitor;
-extern ibool	srv_print_innodb_lock_monitor;
-extern ibool	srv_print_innodb_tablespace_monitor;
-extern ibool	srv_print_verbose_log;
-extern ibool	srv_print_innodb_table_monitor;
-
-extern ibool	srv_lock_timeout_and_monitor_active;
-extern ibool	srv_error_monitor_active;
-
-extern ulong	srv_n_spin_wait_rounds;
-extern ulong	srv_n_free_tickets_to_enter;
-extern ulong	srv_thread_sleep_delay;
-extern ulint	srv_spin_wait_delay;
-extern ibool	srv_priority_boost;
-
-extern	ulint	srv_pool_size;
-extern	ulint	srv_mem_pool_size;
-extern	ulint	srv_lock_table_size;
-
-extern	ibool	srv_print_thread_releases;
-extern	ibool	srv_print_lock_waits;
-extern	ibool	srv_print_buf_io;
-extern	ibool	srv_print_log_io;
-extern	ibool	srv_print_latch_waits;
-
-extern ulint	srv_activity_count;
-extern ulint	srv_fatal_semaphore_wait_threshold;
-extern ulint	srv_dml_needed_delay;
-
-extern mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
-				query threads, and lock table: we allocate
-				it from dynamic memory to get it to the
-				same DRAM page as other hotspot semaphores */
-#define kernel_mutex (*kernel_mutex_temp)
-
-#define SRV_MAX_N_IO_THREADS	100
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-extern const char* srv_io_thread_op_info[];
-extern const char* srv_io_thread_function[];
-
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
-
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
-
-/* amount of data written to the log files in bytes */
-extern ulint srv_os_log_written;
-
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
-
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
-
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
-
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
-
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
-
-/* variable to count the number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
-
-/* variable to count the number of sequential read-aheads were done */
-extern ulint srv_read_ahead_seq;
-
-/* variable to count the number of random read-aheads were done */
-extern ulint srv_read_ahead_rnd;
-
-/* Number of threads that may have missed a lock wait wakeup */
-extern ulint sync_wake_ups;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-extern char srv_use_legacy_cardinality_algorithm;
-
-/* In this structure we store status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-extern export_struc export_vars;
-
-typedef struct srv_sys_struct	srv_sys_t;
-
-/* The server system */
-extern srv_sys_t*	srv_sys;
-
-/* Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
-#define SRV_UNIX_FSYNC		1	/* This is the default */
-#define SRV_UNIX_O_DSYNC	2
-#define SRV_UNIX_LITTLESYNC	3
-#define SRV_UNIX_NOSYNC		4
-#define SRV_UNIX_O_DIRECT	5
-
-/* Alternatives for file i/o in Windows */
-#define SRV_WIN_IO_NORMAL		1
-#define SRV_WIN_IO_UNBUFFERED		2	/* This is the default */
-
-/* Alternatives for srv_force_recovery. Non-zero values are intended
-to help the user get a damaged database up so that he can dump intact
-tables and rows with SELECT INTO OUTFILE. The database must not otherwise
-be used with these options! A bigger number below means that all precautions
-of lower numbers are included. */
-
-#define SRV_FORCE_IGNORE_CORRUPT 1	/* let the server run even if it
-					detects a corrupt page */
-#define SRV_FORCE_NO_BACKGROUND	2	/* prevent the main thread from
-					running: if a crash would occur
-					in purge, this prevents it */
-#define SRV_FORCE_NO_TRX_UNDO	3	/* do not run trx rollback after
-					recovery */
-#define SRV_FORCE_NO_IBUF_MERGE	4	/* prevent also ibuf operations:
-					if they would cause a crash, better
-					not do them */
-#define	SRV_FORCE_NO_UNDO_LOG_SCAN 5	/* do not look at undo logs when
-					starting the database: InnoDB will
-					treat even incomplete transactions
-					as committed */
-#define SRV_FORCE_NO_LOG_REDO	6	/* do not do the log roll-forward
-					in connection with recovery */
-
-/*************************************************************************
-Boots Innobase server. */
-
-ulint
-srv_boot(void);
-/*==========*/
-			/* out: DB_SUCCESS or error code */
-/*************************************************************************
-Initializes the server. */
-
-void
-srv_init(void);
-/*==========*/
-/*************************************************************************
-Frees the OS fast mutex created in srv_boot(). */
-
-void
-srv_free(void);
-/*==========*/
-/*************************************************************************
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-
-void
-srv_general_init(void);
-/*==================*/
-/*************************************************************************
-Gets the number of threads in the system. */
-
-ulint
-srv_get_n_threads(void);
-/*===================*/
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
-srv_get_thread_type(void);
-/*=====================*/
-			/* out: SRV_COM, ... */
-/*************************************************************************
-Sets the info describing an i/o thread current state. */
-
-void
-srv_set_io_thread_op_info(
-/*======================*/
-	ulint		i,	/* in: the 'segment' of the i/o thread */
-	const char*	str);	/* in: constant char string describing the
-				state */
-/*************************************************************************
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
-ulint
-srv_release_threads(
-/*================*/
-			/* out: number of threads released: this may be
-			< n if not enough threads were suspended at the
-			moment */
-	ulint	type,	/* in: thread type */
-	ulint	n);	/* in: number of threads to release */
-/*************************************************************************
-The master thread controlling the server. */
-
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
-			/* out: a dummy parameter */
-	void*	arg);	/* in: a dummy parameter required by
-			os_thread_create */
-/***********************************************************************
-Tells the Innobase server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-
-void
-srv_active_wake_master_thread(void);
-/*===============================*/
-/***********************************************************************
-Wakes up the master thread if it is suspended or being suspended. */
-
-void
-srv_wake_master_thread(void);
-/*========================*/
-/*************************************************************************
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-void
-srv_conc_enter_innodb(
-/*==================*/
-	trx_t*	trx);	/* in: transaction object associated with the
-			thread */
-/*************************************************************************
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-
-void
-srv_conc_force_enter_innodb(
-/*========================*/
-	trx_t*	trx);	/* in: transaction object associated with the
-			thread */
-/*************************************************************************
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
-	trx_t*	trx);	/* in: transaction object associated with the
-			thread */
-/*************************************************************************
-This must be called when a thread exits InnoDB. */
-
-void
-srv_conc_exit_innodb(
-/*=================*/
-	trx_t*	trx);	/* in: transaction object associated with the
-			thread */
-/*******************************************************************
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-
-void
-srv_suspend_mysql_thread(
-/*=====================*/
-	que_thr_t*	thr);	/* in: query thread associated with the MySQL
-				OS thread */
-/************************************************************************
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
-	que_thr_t*	thr);	/* in: query thread associated with the
-				MySQL OS thread	 */
-/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
-			/* out: a dummy parameter */
-	void*	arg);	/* in: a dummy parameter required by
-			os_thread_create */
-/*************************************************************************
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs. */
-
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
-			/* out: a dummy parameter */
-	void*	arg);	/* in: a dummy parameter required by
-			os_thread_create */
-/**********************************************************************
-Outputs to a file the output of the InnoDB Monitor. */
-
-void
-srv_printf_innodb_monitor(
-/*======================*/
-	FILE*	file);		/* in: output stream */
-
-/**********************************************************************
-Function to pass InnoDB status variables to MySQL */
-
-void
-srv_export_innodb_status(void);
-/*=====================*/
-
-/* Types for the threads existing in the system. Threads of types 4 - 9
-are called utility threads. Note that utility threads are mainly disk
-bound, except that version threads 6 - 7 may also be CPU bound, if
-cleaning versions from the buffer pool. */
-
-#define	SRV_COM		1	/* threads serving communication and queries */
-#define	SRV_CONSOLE	2	/* thread serving console */
-#define	SRV_WORKER	3	/* threads serving parallelized queries and
-				queries released from lock wait */
-#define SRV_BUFFER	4	/* thread flushing dirty buffer blocks,
-				not currently in use */
-#define SRV_RECOVERY	5	/* threads finishing a recovery,
-				not currently in use */
-#define SRV_INSERT	6	/* thread flushing the insert buffer to disk,
-				not currently in use */
-#define SRV_MASTER	7	/* the master thread, (whose type number must
-				be biggest) */
-
-/* Thread slot in the thread table */
-typedef struct srv_slot_struct	srv_slot_t;
-
-/* Thread table is an array of slots */
-typedef srv_slot_t	srv_table_t;
-
-/* In this structure we store status variables to be passed to MySQL */
-struct export_var_struct{
-	ulint innodb_data_pending_reads;
-	ulint innodb_data_pending_writes;
-	ulint innodb_data_pending_fsyncs;
-	ulint innodb_data_fsyncs;
-	ulint innodb_data_read;
-	ulint innodb_data_writes;
-	ulint innodb_data_written;
-	ulint innodb_data_reads;
-	ulint innodb_buffer_pool_pages_total;
-	ulint innodb_buffer_pool_pages_data;
-	ulint innodb_buffer_pool_pages_dirty;
-	ulint innodb_buffer_pool_pages_misc;
-	ulint innodb_buffer_pool_pages_free;
-#ifdef UNIV_DEBUG
-	ulint innodb_buffer_pool_pages_latched;
-#endif /* UNIV_DEBUG */
-	ulint innodb_buffer_pool_read_requests;
-	ulint innodb_buffer_pool_reads;
-	ulint innodb_buffer_pool_wait_free;
-	ulint innodb_buffer_pool_pages_flushed;
-	ulint innodb_buffer_pool_write_requests;
-	ulint innodb_buffer_pool_read_ahead_seq;
-	ulint innodb_buffer_pool_read_ahead_rnd;
-	ulint innodb_dblwr_pages_written;
-	ulint innodb_dblwr_writes;
-	ibool innodb_have_sync_atomic;
-	ibool innodb_heap_enabled;
-	ulint innodb_log_waits;
-	ulint innodb_log_write_requests;
-	ulint innodb_log_writes;
-	ulint innodb_os_log_written;
-	ulint innodb_os_log_fsyncs;
-	ulint innodb_os_log_pending_writes;
-	ulint innodb_os_log_pending_fsyncs;
-	ulint innodb_page_size;
-	ulint innodb_pages_created;
-	ulint innodb_pages_read;
-	ulint innodb_pages_written;
-	ulint innodb_row_lock_waits;
-	ulint innodb_row_lock_current_waits;
-	ib_longlong innodb_row_lock_time;
-	ulint innodb_row_lock_time_avg;
-	ulint innodb_row_lock_time_max;
-	ulint innodb_rows_read;
-	ulint innodb_rows_inserted;
-	ulint innodb_rows_updated;
-	ulint innodb_rows_deleted;
-	ulint innodb_wake_ups;
-};
-
-/* The server system struct */
-struct srv_sys_struct{
-	srv_table_t*	threads;	/* server thread table */
-	UT_LIST_BASE_NODE_T(que_thr_t)
-			tasks;		/* task queue */
-	dict_index_t*	dummy_ind1;	/* dummy index for old-style
-					supremum and infimum records */
-	dict_index_t*	dummy_ind2;	/* dummy index for new-style
-					supremum and infimum records */
-};
-
-extern ulint	srv_n_threads_active[];
-
-#endif
diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic
deleted file mode 100644
index 73e0729660f..00000000000
--- a/storage/innobase/include/srv0srv.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Server main program
-
-(c) 1995 Innobase Oy
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
deleted file mode 100644
index a04930d6516..00000000000
--- a/storage/innobase/include/srv0start.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/******************************************************
-Starts the Innobase database server
-
-(c) 1995-2000 Innobase Oy
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0start_h
-#define srv0start_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/*************************************************************************
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-
-void
-srv_normalize_path_for_win(
-/*=======================*/
-	char*	str);	/* in/out: null-terminated character string */
-/*************************************************************************
-Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-	char*	str,			/* in: the data file path string */
-	char***	data_file_names,	/* out, own: array of data file
-					names */
-	ulint**	data_file_sizes,	/* out, own: array of data file sizes
-					in megabytes */
-	ulint**	data_file_is_raw_partition,/* out, own: array of flags
-					showing which data files are raw
-					partitions */
-	ulint*	n_data_files,		/* out: number of data files */
-	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
-					auto-extending */
-	ulint*	max_auto_extend_size);	/* out: max auto extend size for the
-					last file if specified, 0 if not */
-/*************************************************************************
-Reads log group home directories from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-	char*	str,			/* in: character string */
-	char***	log_group_home_dirs);	/* out, own: log group home dirs */
-/*************************************************************************
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
-			/* out: string which has the separator if the
-			string is not empty */
-	char*	str);	/* in: null-terminated character string */
-/********************************************************************
-Starts Innobase and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
-int
-innobase_start_or_create_for_mysql(void);
-/*====================================*/
-				/* out: DB_SUCCESS or error code */
-/********************************************************************
-Shuts down the Innobase database. */
-int
-innobase_shutdown_for_mysql(void);
-/*=============================*/
-				/* out: DB_SUCCESS or error code */
-extern	dulint	srv_shutdown_lsn;
-extern	dulint	srv_start_lsn;
-
-#ifdef __NETWARE__
-void set_panic_flag_for_netware(void);
-#endif
-
-#ifdef HAVE_DARWIN_THREADS
-extern	ibool	srv_have_fullfsync;
-#endif
-
-extern	ulint	srv_sizeof_trx_t_in_ha_innodb_cc;
-
-extern	ibool	srv_is_being_started;
-extern	ibool	srv_startup_is_before_trx_rollback_phase;
-extern	ibool	srv_is_being_shut_down;
-
-extern	ibool	srv_start_raw_disk_in_use;
-
-/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-
-extern	ulint	srv_shutdown_state;
-
-#define SRV_SHUTDOWN_CLEANUP	   1
-#define SRV_SHUTDOWN_LAST_PHASE	   2
-#define SRV_SHUTDOWN_EXIT_THREADS  3
-
-/* Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
-
-#endif
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
deleted file mode 100644
index dbe35c033e5..00000000000
--- a/storage/innobase/include/sync0arr.ic
+++ /dev/null
@@ -1,10 +0,0 @@
-/******************************************************
-The wait array for synchronization primitives
-
-Inline code
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
deleted file mode 100644
index 57478426f25..00000000000
--- a/storage/innobase/include/sync0types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/******************************************************
-Global types for sync
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0types_h
-#define sync0types_h
-
-#define mutex_t ib_mutex_t
-typedef struct mutex_struct		mutex_t;
-
-
-#endif
diff --git a/storage/innobase/include/thr0loc.ic b/storage/innobase/include/thr0loc.ic
deleted file mode 100644
index b8b8136180c..00000000000
--- a/storage/innobase/include/thr0loc.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Thread local storage
-
-(c) 1995 Innobase Oy
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
deleted file mode 100644
index 9f1c0ed96f8..00000000000
--- a/storage/innobase/include/trx0purge.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/******************************************************
-Purge old versions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-/************************************************************************
-Calculates the file address of an undo log header when we have the file
-address of its history list node. */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
-					/* out: file address of the log */
-	fil_addr_t	node_addr)	/* in: file address of the history
-					list node of the log */
-{
-	node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
-
-	return(node_addr);
-}
-
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
deleted file mode 100644
index 6447b6a2e35..00000000000
--- a/storage/innobase/include/trx0rec.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/******************************************************
-Transaction undo log record
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rec_h
-#define trx0rec_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0types.h"
-
-/***************************************************************************
-Copies the undo record to the heap. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
-					/* out, own: copy of undo log record */
-	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
-	mem_heap_t*	heap);		/* in: heap where copied */
-/**************************************************************************
-Reads the undo log record type. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
-					/* out: record type */
-	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-					/* out: compiler info */
-	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-					/* out: TRUE if extern */
-	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
-/**************************************************************************
-Reads the undo log record number. */
-UNIV_INLINE
-dulint
-trx_undo_rec_get_undo_no(
-/*=====================*/
-					/* out: undo no */
-	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the general parameters. */
-
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
-					/* out: remaining part of undo log
-					record after reading these values */
-	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
-	ulint*		type,		/* out: undo record type:
-					TRX_UNDO_INSERT_REC, ... */
-	ulint*		cmpl_info,	/* out: compiler info, relevant only
-					for update type records */
-	ibool*		updated_extern,	/* out: TRUE if we updated an
-					externally stored fild */
-	dulint*		undo_no,	/* out: undo log record number */
-	dulint*		table_id);	/* out: table id */
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part of a copy of an undo log
-				record, at the start of the row reference;
-				NOTE that this copy of the undo log record must
-				be preserved as long as the row reference is
-				used, as we do NOT copy the data in the
-				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t**	ref,	/* out, own: row reference */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory
-				needed is allocated */
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part in update undo log
-				record, at the start of the row reference */
-	dict_index_t*	index);	/* in: clustered index */
-/**************************************************************************
-Reads from an undo log update record the system field values of the old
-version. */
-
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
-				/* out: remaining part of undo log
-				record after reading these values */
-	byte*	ptr,		/* in: remaining part of undo log
-				record after reading general
-				parameters */
-	dulint*	trx_id,		/* out: trx id */
-	dulint*	roll_ptr,	/* out: roll ptr */
-	ulint*	info_bits);	/* out: info bits state */
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
-				/* out: remaining part of the record,
-				NULL if an error detected, which means that
-				the record is corrupted */
-	byte*		ptr,	/* in: remaining part in update undo log
-				record, after reading the row reference
-				NOTE that this copy of the undo log record must
-				be preserved as long as the update vector is
-				used, as we do NOT copy the data in the
-				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		type,	/* in: TRX_UNDO_UPD_EXIST_REC,
-				TRX_UNDO_UPD_DEL_REC, or
-				TRX_UNDO_DEL_MARK_REC; in the last case,
-				only trx id and roll ptr fields are added to
-				the update vector */
-	dulint		trx_id,	/* in: transaction id from this undorecord */
-	dulint		roll_ptr,/* in: roll pointer from this undo record */
-	ulint		info_bits,/* in: info bits from this undo record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap,	/* in: memory heap from which the memory
-				needed is allocated */
-	upd_t**		upd);	/* out, own: update vector */
-/***********************************************************************
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part in update undo log
-				record of a suitable type, at the start of
-				the stored index columns;
-				NOTE that this copy of the undo log record must
-				be preserved as long as the partial row is
-				used, as we do NOT copy the data in the
-				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t**	row,	/* out, own: partial row */
-	mem_heap_t*	heap);	/* in: memory heap from which the memory
-				needed is allocated */
-/***************************************************************************
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction. */
-
-ulint
-trx_undo_report_row_operation(
-/*==========================*/
-					/* out: DB_SUCCESS or error code */
-	ulint		flags,		/* in: if BTR_NO_UNDO_LOG_FLAG bit is
-					set, does nothing */
-	ulint		op_type,	/* in: TRX_UNDO_INSERT_OP or
-					TRX_UNDO_MODIFY_OP */
-	que_thr_t*	thr,		/* in: query thread */
-	dict_index_t*	index,		/* in: clustered index */
-	dtuple_t*	clust_entry,	/* in: in the case of an insert,
-					index entry to insert into the
-					clustered index, otherwise NULL */
-	upd_t*		update,		/* in: in the case of an update,
-					the update vector, otherwise NULL */
-	ulint		cmpl_info,	/* in: compiler info on secondary
-					index updates */
-	rec_t*		rec,		/* in: case of an update or delete
-					marking, the record in the clustered
-					index, otherwise NULL */
-	dulint*		roll_ptr);	/* out: rollback pointer to the
-					inserted undo log record,
-					ut_dulint_zero if BTR_NO_UNDO_LOG
-					flag was specified */
-/**********************************************************************
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
-					/* out, own: copy of the record */
-	dulint		roll_ptr,	/* in: roll pointer to record */
-	mem_heap_t*	heap);		/* in: memory heap where copied */
-/**********************************************************************
-Copies an undo record to heap. */
-
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
-					/* out: DB_SUCCESS, or
-					DB_MISSING_HISTORY if the undo log
-					has been truncated and we cannot
-					fetch the old version; NOTE: the
-					caller must have latches on the
-					clustered index page and purge_view */
-	dulint		roll_ptr,	/* in: roll pointer to record */
-	dulint		trx_id,		/* in: id of the trx that generated
-					the roll pointer: it points to an
-					undo log of this transaction */
-	trx_undo_rec_t** undo_rec,	/* out, own: copy of the record */
-	mem_heap_t*	heap);		/* in: memory heap where copied */
-/***********************************************************************
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
-ulint
-trx_undo_prev_version_build(
-/*========================*/
-				/* out: DB_SUCCESS, or DB_MISSING_HISTORY if
-				the previous version is not >= purge_view,
-				which means that it may have been removed,
-				DB_ERROR if corrupted record */
-	rec_t*		index_rec,/* in: clustered index record in the
-				index tree */
-	mtr_t*		index_mtr,/* in: mtr which contains the latch to
-				index_rec page and purge_view */
-	rec_t*		rec,	/* in: version of a clustered index record */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) */
-	mem_heap_t*	heap,	/* in: memory heap from which the memory
-				needed is allocated */
-	rec_t**		old_vers);/* out, own: previous version, or NULL if
-				rec is the first inserted version, or if
-				history data has been deleted */
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page);	/* in: page or NULL */
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-
-/* Types of an undo log record: these have to be smaller than 16, as the
-compilation info multiplied by 16 is ORed to this value in an undo log
-record */
-#define TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
-#define TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
-					record */
-#define	TRX_UNDO_UPD_DEL_REC	13	/* update of a delete marked record to
-					a not delete marked record; also the
-					fields of the record can change */
-#define TRX_UNDO_DEL_MARK_REC	14	/* delete marking of a record; fields
-					do not change */
-#define	TRX_UNDO_CMPL_INFO_MULT	16	/* compilation info is multiplied by
-					this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN	128	/* This bit can be ORed to type_cmpl
-					to denote that we updated external
-					storage fields: used by purge to
-					free the external storage */
-
-/* Operation type flags used in trx_undo_report_row_operation */
-#define TRX_UNDO_INSERT_OP	1
-#define TRX_UNDO_MODIFY_OP	2
-
-#ifndef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
deleted file mode 100644
index a1ddc127ec7..00000000000
--- a/storage/innobase/include/trx0rec.ic
+++ /dev/null
@@ -1,86 +0,0 @@
-/******************************************************
-Transaction undo log record
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/**************************************************************************
-Reads from an undo log record the record type. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
-					/* out: record type */
-	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
-{
-	return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
-}
-
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
-					/* out: compiler info */
-	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
-{
-	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
-					/* out: TRUE if extern */
-	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
-{
-	if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/**************************************************************************
-Reads the undo log record number. */
-UNIV_INLINE
-dulint
-trx_undo_rec_get_undo_no(
-/*=====================*/
-					/* out: undo no */
-	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
-{
-	byte*	ptr;
-
-	ptr = undo_rec + 3;
-
-	return(mach_dulint_read_much_compressed(ptr));
-}
-
-/***************************************************************************
-Copies the undo record to the heap. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
-					/* out, own: copy of undo log record */
-	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
-	mem_heap_t*	heap)		/* in: heap where copied */
-{
-	ulint		len;
-	trx_undo_rec_t*	rec_copy;
-
-	len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec)
-		- undo_rec;
-	rec_copy = mem_heap_alloc(heap, len);
-
-	ut_memcpy(rec_copy, undo_rec, len);
-
-	return(rec_copy);
-}
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
deleted file mode 100644
index c1eca3d5753..00000000000
--- a/storage/innobase/include/trx0roll.h
+++ /dev/null
@@ -1,314 +0,0 @@
-/******************************************************
-Transaction rollback
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0roll_h
-#define trx0roll_h
-
-#include "univ.i"
-#include "trx0trx.h"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-
-#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
-
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
-
-trx_savept_t
-trx_savept_take(
-/*============*/
-			/* out: savepoint */
-	trx_t*	trx);	/* in: transaction */
-/***********************************************************************
-Creates an undo number array. */
-
-trx_undo_arr_t*
-trx_undo_arr_create(void);
-/*=====================*/
-/***********************************************************************
-Frees an undo number array. */
-
-void
-trx_undo_arr_free(
-/*==============*/
-	trx_undo_arr_t*	arr);	/* in: undo number array */
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-				/* out: pointer to the nth element */
-	trx_undo_arr_t*	arr,	/* in: undo number array */
-	ulint		n);	/* in: position */
-/***************************************************************************
-Tries truncate the undo logs. */
-
-void
-trx_roll_try_truncate(
-/*==================*/
-	trx_t*	trx);	/* in: transaction */
-/************************************************************************
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
-				/* out: undo log record copied to heap, NULL
-				if none left, or if the undo number of the
-				top record would be less than the limit */
-	trx_t*		trx,	/* in: transaction */
-	dulint		limit,	/* in: least undo number we need */
-	dulint*		roll_ptr,/* out: roll pointer to undo record */
-	mem_heap_t*	heap);	/* in: memory heap where copied */
-/************************************************************************
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above. */
-
-ibool
-trx_undo_rec_reserve(
-/*=================*/
-			/* out: TRUE if succeeded */
-	trx_t*	trx,	/* in: transaction */
-	dulint	undo_no);/* in: undo number of the record */
-/***********************************************************************
-Releases a reserved undo record. */
-
-void
-trx_undo_rec_release(
-/*=================*/
-	trx_t*	trx,	/* in: transaction */
-	dulint	undo_no);/* in: undo number */
-/*************************************************************************
-Starts a rollback operation. */
-
-void
-trx_rollback(
-/*=========*/
-	trx_t*		trx,	/* in: transaction */
-	trx_sig_t*	sig,	/* in: signal starting the rollback */
-	que_thr_t**	next_thr);/* in/out: next query thread to run;
-				if the value which is passed in is
-				a pointer to a NULL pointer, then the
-				calling function can start running
-				a new query thread */
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
-os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
-			/* out: a dummy parameter */
-	void*	arg __attribute__((unused)));
-			/* in: a dummy parameter required by
-			os_thread_create */
-/********************************************************************
-Finishes a transaction rollback. */
-
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
-	que_t*		graph,	/* in: undo graph which can now be freed */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t**	next_thr);/* in/out: next query thread to run;
-				if the value which is passed in is
-				a pointer to a NULL pointer, then the
-				calling function can start running
-				a new query thread; if this parameter is
-				NULL, it is ignored */
-/********************************************************************
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph. */
-
-que_t*
-trx_roll_graph_build(
-/*=================*/
-			/* out, own: the query graph */
-	trx_t*	trx);	/* in: trx handle */
-/*************************************************************************
-Creates a rollback command node struct. */
-
-roll_node_t*
-roll_node_create(
-/*=============*/
-				/* out, own: rollback node struct */
-	mem_heap_t*	heap);	/* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
-que_thr_t*
-trx_rollback_step(
-/*==============*/
-				/* out: query thread to run next, or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_rollback_for_mysql(
-/*===================*/
-			/* out: error code or DB_SUCCESS */
-	trx_t*	trx);	/* in: transaction handle */
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
-int
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
-			/* out: error code or DB_SUCCESS */
-	trx_t*	trx);	/* in: transaction handle */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
-				/* out: error code or DB_SUCCESS */
-	trx_t*		trx,	/* in: transaction handle */
-	ibool		partial,/* in: TRUE if partial rollback requested */
-	trx_savept_t*	savept);/* in: pointer to savepoint undo number, if
-				partial rollback requested */
-/***********************************************************************
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
-ulint
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
-						/* out: if no savepoint
-						of the name found then
-						DB_NO_SAVEPOINT,
-						otherwise DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name,		/* in: savepoint name */
-	ib_longlong*	mysql_binlog_cache_pos);/* out: the MySQL binlog cache
-						position corresponding to this
-						savepoint; MySQL needs this
-						information to remove the
-						binlog entries of the queries
-						executed after the savepoint */
-/***********************************************************************
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
-ulint
-trx_savepoint_for_mysql(
-/*====================*/
-						/* out: always DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name,		/* in: savepoint name */
-	ib_longlong	binlog_cache_pos);	/* in: MySQL binlog cache
-						position corresponding to this
-						connection at the time of the
-						savepoint */
-
-/***********************************************************************
-Releases a named savepoint. Savepoints which
-were set after this savepoint are deleted. */
-
-ulint
-trx_release_savepoint_for_mysql(
-/*============================*/
-						/* out: if no savepoint
-						of the name found then
-						DB_NO_SAVEPOINT,
-						otherwise DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name);	/* in: savepoint name */
-
-/***********************************************************************
-Frees a single savepoint struct. */
-
-void
-trx_roll_savepoint_free(
-/*=====================*/
-	trx_t*			trx,	/* in: transaction handle */
-	trx_named_savept_t*	savep);	/* in: savepoint to free */
-
-/***********************************************************************
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
-
-void
-trx_roll_savepoints_free(
-/*=====================*/
-	trx_t*			trx,	/* in: transaction handle */
-	trx_named_savept_t*	savep);	/* in: free all savepoints > this one;
-					if this is NULL, free all savepoints
-					of trx */
-
-extern sess_t*		trx_dummy_sess;
-
-/* A cell in the array used during a rollback and a purge */
-struct	trx_undo_inf_struct{
-	dulint	trx_no;		/* transaction number: not defined during
-				a rollback */
-	dulint	undo_no;	/* undo number of an undo record */
-	ibool	in_use;		/* TRUE if the cell is in use */
-};
-
-/* During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_struct{
-	ulint		n_cells;	/* number of cells in the array */
-	ulint		n_used;		/* number of cells currently in use */
-	trx_undo_inf_t*	infos;		/* the array of undo infos */
-	mem_heap_t*	heap;		/* memory heap from which allocated */
-};
-
-/* Rollback command node in a query graph */
-struct roll_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_ROLLBACK */
-	ulint		state;	/* node execution state */
-	ibool		partial;/* TRUE if we want a partial rollback */
-	trx_savept_t	savept;	/* savepoint to which to roll back, in the
-				case of a partial rollback */
-};
-
-/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
-	char*		name;		/* savepoint name */
-	trx_savept_t	savept;		/* the undo number corresponding to
-					the savepoint */
-	ib_longlong	mysql_binlog_cache_pos;
-					/* the MySQL binlog cache position
-					corresponding to this savepoint, not
-					defined if the MySQL binlogging is not
-					enabled */
-	UT_LIST_NODE_T(trx_named_savept_t)
-			trx_savepoints;	/* the list of savepoints of a
-					transaction */
-};
-
-/* Rollback node states */
-#define ROLL_NODE_SEND	1
-#define ROLL_NODE_WAIT	2
-
-#ifndef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
deleted file mode 100644
index dfde83ac478..00000000000
--- a/storage/innobase/include/trx0roll.ic
+++ /dev/null
@@ -1,23 +0,0 @@
-/******************************************************
-Transaction rollback
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-				/* out: pointer to the nth element */
-	trx_undo_arr_t*	arr,	/* in: undo number array */
-	ulint		n)	/* in: position */
-{
-	ut_ad(arr);
-	ut_ad(n < arr->n_cells);
-
-	return(arr->infos + n);
-}
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
deleted file mode 100644
index eb1893587a6..00000000000
--- a/storage/innobase/include/trx0rseg.ic
+++ /dev/null
@@ -1,126 +0,0 @@
-/******************************************************
-Rollback segment
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0srv.h"
-
-/**********************************************************************
-Gets a rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
-				/* out: rollback segment header, page
-				x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number of the header */
-	mtr_t*	mtr)		/* in: mtr */
-{
-	trx_rsegf_t*	header;
-
-	header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(header, SYNC_RSEG_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(header);
-}
-
-/**********************************************************************
-Gets a newly created rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
-				/* out: rollback segment header, page
-				x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number of the header */
-	mtr_t*	mtr)		/* in: mtr */
-{
-	trx_rsegf_t*	header;
-
-	header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(header);
-}
-
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
-				/* out: page number of the undo log segment */
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	ulint		n,	/* in: index of slot */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to get slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
-
-	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
-			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/*******************************************************************
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	ulint		n,	/* in: index of slot */
-	ulint		page_no,/* in: page number of the undo log segment */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to set slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
-
-	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
-			 page_no, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************
-Looks for a free slot for an undo log segment. */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
-				/* out: slot index or ULINT_UNDEFINED if not
-				found */
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint		i;
-	ulint		page_no;
-
-	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
-		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
-
-		if (page_no == FIL_NULL) {
-
-			return(i);
-		}
-	}
-
-	return(ULINT_UNDEFINED);
-}
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
deleted file mode 100644
index a8da5cd51a3..00000000000
--- a/storage/innobase/include/trx0sys.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/******************************************************
-Transaction system
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0sys_h
-#define trx0sys_h
-
-#include "univ.i"
-
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "sync0sync.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-#include "fil0fil.h"
-#include "fut0lst.h"
-#include "fsp0fsp.h"
-#include "read0types.h"
-
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
-
-extern char		trx_sys_mysql_master_log_name[];
-extern ib_longlong	trx_sys_mysql_master_log_pos;
-
-/* If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
-
-extern char		trx_sys_mysql_bin_log_name[];
-extern ib_longlong	trx_sys_mysql_bin_log_pos;
-
-/* The transaction system */
-extern trx_sys_t*	trx_sys;
-
-/* Doublewrite system */
-extern trx_doublewrite_t*	trx_doublewrite;
-extern ibool			trx_doublewrite_must_reset_space_ids;
-extern ibool			trx_sys_multiple_tablespace_format;
-
-/********************************************************************
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-
-void
-trx_sys_create_doublewrite_buf(void);
-/*================================*/
-/********************************************************************
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
-	ibool	restore_corrupt_pages);
-/********************************************************************
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void);
-/*===============================================*/
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
-
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
-				/* out: TRUE if the location is inside
-				the two blocks of the doublewrite buffer */
-	ulint	page_no);	/* in: page number */
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
-			/* out: TRUE if trx sys header page */
-	ulint	space,	/* in: space */
-	ulint	page_no);/* in: page number */
-/*********************************************************************
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-
-void
-trx_sys_init_at_db_start(void);
-/*==========================*/
-/*********************************************************************
-Creates and initializes the transaction system at the database creation. */
-
-void
-trx_sys_create(void);
-/*================*/
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
-					/* out: slot index or ULINT_UNDEFINED
-					if not found */
-	mtr_t*		mtr);		/* in: mtr */
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
-				/* out: pointer to rseg object, NULL if slot
-				not in use */
-	trx_sys_t*	sys,	/* in: trx system */
-	ulint		n);	/* in: index of slot */
-/*******************************************************************
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
-	trx_sys_t*	sys,	/* in: trx system */
-	ulint		n,	/* in: index of slot */
-	trx_rseg_t*	rseg);	/* in: pointer to rseg object, NULL if slot
-				not in use */
-/**************************************************************************
-Gets a pointer to the transaction system file copy and x-locks its page. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
-			/* out: pointer to system file copy, page x-locked */
-	mtr_t*	mtr);	/* in: mtr */
-/*********************************************************************
-Gets the space of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
-					/* out: space id */
-	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
-	ulint		i,		/* in: slot index == rseg id */
-	mtr_t*		mtr);		/* in: mtr */
-/*********************************************************************
-Gets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
-					/* out: page number, FIL_NULL
-					if slot unused */
-	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
-	ulint		i,		/* in: slot index == rseg id */
-	mtr_t*		mtr);		/* in: mtr */
-/*********************************************************************
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
-	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
-	ulint		i,		/* in: slot index == rseg id */
-	ulint		space,		/* in: space id */
-	mtr_t*		mtr);		/* in: mtr */
-/*********************************************************************
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
-	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
-	ulint		i,		/* in: slot index == rseg id */
-	ulint		page_no,	/* in: page number, FIL_NULL if
-					the slot is reset to unused */
-	mtr_t*		mtr);		/* in: mtr */
-/*********************************************************************
-Allocates a new transaction id. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_id(void);
-/*========================*/
-			/* out: new, allocated trx id */
-/*********************************************************************
-Allocates a new transaction number. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_no(void);
-/*========================*/
-			/* out: new, allocated trx number */
-/*********************************************************************
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
-	byte*	ptr,	/* in: pointer to memory where written */
-	dulint	id);	/* in: id */
-/*********************************************************************
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_trx_id(
-/*============*/
-			/* out: id */
-	byte*	ptr);	/* in: pointer to memory from where to read */
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
-UNIV_INLINE
-trx_t*
-trx_get_on_id(
-/*==========*/
-			/* out: the trx handle or NULL if not found */
-	dulint	trx_id);	/* in: trx id to search for */
-/********************************************************************
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
-committed.) */
-UNIV_INLINE
-dulint
-trx_list_get_min_trx_id(void);
-/*=========================*/
-			/* out: the minimum trx id, or trx_sys->max_trx_id
-			if the trx list is empty */
-/********************************************************************
-Checks if a transaction with the given id is active. */
-UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
-			/* out: TRUE if active */
-	dulint	trx_id);/* in: trx id of the transaction */
-/********************************************************************
-Checks that trx is in the trx list. */
-
-ibool
-trx_in_trx_list(
-/*============*/
-			/* out: TRUE if is in */
-	trx_t*	in_trx);/* in: trx */
-/*********************************************************************
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
-	const char*	file_name,/* in: MySQL log file name */
-	ib_longlong	offset,	/* in: position in that log file */
-	ulint		field,	/* in: offset of the MySQL log info field in
-				the trx sys header */
-	mtr_t*		mtr);	/* in: mtr */
-/*********************************************************************
-Prints to stderr the MySQL binlog offset info in the trx system header if
-the magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset(void);
-/*===================================*/
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
-	byte*	page);	/* in: buffer containing the trx system header page,
-			i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-#endif /* UNIV_HOTBACKUP */
-/*********************************************************************
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-
-/* The automatically created system rollback segment has this id */
-#define TRX_SYS_SYSTEM_RSEG_ID	0
-
-/* Space id and page no where the trx system file copy resides */
-#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
-#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
-#define	TRX_SYS		FSEG_PAGE_DATA
-
-/* Transaction system header */
-/*-------------------------------------------------------------*/
-#define	TRX_SYS_TRX_ID_STORE	0	/* the maximum trx id or trx number
-					modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
-					written to a file page by any
-					transaction; the assignment of
-					transaction ids continues from this
-					number rounded up by .._MARGIN plus
-					.._MARGIN when the database is
-					started */
-#define TRX_SYS_FSEG_HEADER	8	/* segment header for the tablespace
-					segment the trx system is created
-					into */
-#define	TRX_SYS_RSEGS		(8 + FSEG_HEADER_SIZE)
-					/* the start of the array of rollback
-					segment specification slots */
-/*-------------------------------------------------------------*/
-
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one byte,
-therefore 256; each slot is currently 8 bytes in size */
-#define	TRX_SYS_N_RSEGS		256
-
-#define TRX_SYS_MYSQL_LOG_NAME_LEN	512
-#define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
-
-/* The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO	(UNIV_PAGE_SIZE - 2000)
-
-/* The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
-#define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/* magic number which shows
-						if we have valid data in the
-						MySQL binlog info; the value
-						is ..._MAGIC_N if yes */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH	4	/* high 4 bytes of the offset
-						within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW	8	/* low 4 bytes of the offset
-						within that file */
-#define TRX_SYS_MYSQL_LOG_NAME		12	/* MySQL log file name */
-
-/* The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE		(UNIV_PAGE_SIZE - 200)
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_FSEG	0	/* fseg header of the fseg
-						containing the doublewrite
-						buffer */
-#define TRX_SYS_DOUBLEWRITE_MAGIC	FSEG_HEADER_SIZE
-						/* 4-byte magic number which
-						shows if we already have
-						created the doublewrite
-						buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK1	(4 + FSEG_HEADER_SIZE)
-						/* page number of the
-						first page in the first
-						sequence of 64
-						(= FSP_EXTENT_SIZE) consecutive
-						pages in the doublewrite
-						buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK2	(8 + FSEG_HEADER_SIZE)
-						/* page number of the
-						first page in the second
-						sequence of 64 consecutive
-						pages in the doublewrite
-						buffer */
-#define TRX_SYS_DOUBLEWRITE_REPEAT	12	/* we repeat the above 3
-						numbers so that if the trx
-						sys header is half-written
-						to disk, we still may be able
-						to recover the information */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
-						/* If this is not yet set to
-						.._N, we must reset the
-						doublewrite buffer, because
-						starting from 4.1.x the space
-						id of a data page is stored to
-					FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_MAGIC_N	536853855
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
-
-
-#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
-
-/* Doublewrite control struct */
-struct trx_doublewrite_struct{
-	mutex_t	mutex;		/* mutex protecting the first_free field and
-				write_buf */
-	ulint	block1;		/* the page number of the first
-				doublewrite block (64 pages) */
-	ulint	block2;		/* page number of the second block */
-	ulint	first_free;	/* first free position in write_buf measured
-				in units of UNIV_PAGE_SIZE */
-	byte*	write_buf;	/* write buffer used in writing to the
-				doublewrite buffer, aligned to an
-				address divisible by UNIV_PAGE_SIZE
-				(which is required by Windows aio) */
-	byte*	write_buf_unaligned; /* pointer to write_buf, but unaligned */
-	buf_block_t**
-		buf_block_arr;	/* array to store pointers to the buffer
-				blocks which have been cached to write_buf */
-};
-
-/* The transaction system central memory data structure; protected by the
-kernel mutex */
-struct trx_sys_struct{
-	dulint		max_trx_id;	/* The smallest number not yet
-					assigned as a transaction id or
-					transaction number */
-	UT_LIST_BASE_NODE_T(trx_t) trx_list;
-					/* List of active and committed in
-					memory transactions, sorted on trx id,
-					biggest first */
-	UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
-					/* List of transactions created
-					for MySQL */
-	UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
-					/* List of rollback segment objects */
-	trx_rseg_t*	latest_rseg;	/* Latest rollback segment in the
-					round-robin assignment of rollback
-					segments to transactions */
-	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
-					/* Pointer array to rollback segments;
-					NULL if slot not in use */
-	ulint		rseg_history_len;/* Length of the TRX_RSEG_HISTORY
-					list (update undo logs for committed
-					transactions), protected by
-					rseg->mutex */
-	UT_LIST_BASE_NODE_T(read_view_t) view_list;
-					/* List of read views sorted on trx no,
-					biggest first */
-};
-
-/* When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
-
-#ifndef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
deleted file mode 100644
index 09b2f822ff7..00000000000
--- a/storage/innobase/include/trx0trx.ic
+++ /dev/null
@@ -1,40 +0,0 @@
-/******************************************************
-The transaction
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
-	trx_t*	trx)	/* in: transaction */
-{
-	ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
-	if (trx->conc_state == TRX_NOT_STARTED) {
-
-		trx_start(trx, ULINT_UNDEFINED);
-	}
-}
-
-/*****************************************************************
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
-	trx_t*	trx)	/* in: transaction */
-{
-	ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
-	if (trx->conc_state == TRX_NOT_STARTED) {
-
-		trx_start_low(trx, ULINT_UNDEFINED);
-	}
-}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
deleted file mode 100644
index 0e6ee79498c..00000000000
--- a/storage/innobase/include/trx0types.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/******************************************************
-Transaction system global type definitions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0types_h
-#define trx0types_h
-
-#include "lock0types.h"
-#include "ut0byte.h"
-
-/* Memory objects */
-typedef struct trx_struct	trx_t;
-typedef struct trx_sys_struct	trx_sys_t;
-typedef struct trx_doublewrite_struct	trx_doublewrite_t;
-typedef struct trx_sig_struct	trx_sig_t;
-typedef struct trx_rseg_struct	trx_rseg_t;
-typedef struct trx_undo_struct	trx_undo_t;
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
-typedef struct trx_purge_struct	trx_purge_t;
-typedef struct roll_node_struct	roll_node_t;
-typedef struct commit_node_struct commit_node_t;
-typedef struct trx_named_savept_struct trx_named_savept_t;
-
-/* Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-struct trx_savept_struct{
-	dulint	least_undo_no;	/* least undo number to undo */
-};
-
-/* File objects */
-typedef byte	trx_sysf_t;
-typedef byte	trx_rsegf_t;
-typedef byte	trx_usegf_t;
-typedef byte	trx_ulogf_t;
-typedef byte	trx_upagef_t;
-
-/* Undo log record */
-typedef	byte	trx_undo_rec_t;
-
-#endif
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
deleted file mode 100644
index 7f10e407746..00000000000
--- a/storage/innobase/include/trx0undo.h
+++ /dev/null
@@ -1,503 +0,0 @@
-/******************************************************
-Transaction undo log
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0undo_h
-#define trx0undo_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "page0types.h"
-#include "trx0xa.h"
-
-/***************************************************************************
-Builds a roll pointer dulint. */
-UNIV_INLINE
-dulint
-trx_undo_build_roll_ptr(
-/*====================*/
-				/* out: roll pointer */
-	ibool	is_insert,	/* in: TRUE if insert undo log */
-	ulint	rseg_id,	/* in: rollback segment id */
-	ulint	page_no,	/* in: page number */
-	ulint	offset);	/* in: offset of the undo entry within page */
-/***************************************************************************
-Decodes a roll pointer dulint. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
-	dulint	roll_ptr,	/* in: roll pointer */
-	ibool*	is_insert,	/* out: TRUE if insert undo log */
-	ulint*	rseg_id,	/* out: rollback segment id */
-	ulint*	page_no,	/* out: page number */
-	ulint*	offset);	/* out: offset of the undo entry within page */
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
-				/* out: TRUE if insert undo log */
-	dulint	roll_ptr);	/* in: roll pointer */
-/*********************************************************************
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
-	byte*	ptr,		/* in: pointer to memory where written */
-	dulint	roll_ptr);	/* in: roll ptr */
-/*********************************************************************
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_roll_ptr(
-/*==============*/
-			/* out: roll ptr */
-	byte*	ptr);	/* in: pointer to memory from where to read */
-/**********************************************************************
-Gets an undo log page and x-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
-				/* out: pointer to page x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number */
-	mtr_t*	mtr);		/* in: mtr */
-/**********************************************************************
-Gets an undo log page and s-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
-				/* out: pointer to page s-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number */
-	mtr_t*	mtr);		/* in: mtr */
-/**********************************************************************
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
-				/* out: pointer to record, NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo log record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset);/* in: undo log header offset on page */
-/**********************************************************************
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
-				/* out: pointer to record, NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo log record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset);/* in: undo log header offset on page */
-/**********************************************************************
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
-			/* out: pointer to record, NULL if none */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset);	/* in: undo log header offset on page */
-/**********************************************************************
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
-			/* out: pointer to record, NULL if none */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset);/* in: undo log header offset on page */
-/***************************************************************************
-Gets the previous record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
-				/* out: undo log record, the page s-latched,
-				NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset,	/* in: undo log header offset on page */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************************
-Gets the next record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
-				/* out: undo log record, the page s-latched,
-				NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset,	/* in: undo log header offset on page */
-	mtr_t*		mtr);	/* in: mtr */
-/***************************************************************************
-Gets the first record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
-			/* out: undo log record, the page latched, NULL if
-			none */
-	ulint	space,	/* in: undo log header space */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset,	/* in: undo log header offset on page */
-	ulint	mode,	/* in: latching mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr);	/* in: mtr */
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
-ulint
-trx_undo_add_page(
-/*==============*/
-				/* out: page number if success, else
-				FIL_NULL */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory object */
-	mtr_t*		mtr);	/* in: mtr which does not have a latch to any
-				undo log page; the caller must have reserved
-				the rollback segment mutex */
-/***************************************************************************
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-
-void
-trx_undo_truncate_end(
-/*==================*/
-	trx_t*		trx,	/* in: transaction whose undo log it is */
-	trx_undo_t*	undo,	/* in: undo log */
-	dulint		limit);	/* in: all undo records with undo number
-				>= this value should be truncated */
-/***************************************************************************
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-
-void
-trx_undo_truncate_start(
-/*====================*/
-	trx_rseg_t* rseg,	/* in: rollback segment */
-	ulint	space,		/* in: space id of the log */
-	ulint	hdr_page_no,	/* in: header page number */
-	ulint	hdr_offset,	/* in: header offset on the page */
-	dulint	limit);		/* in: all undo pages with undo numbers <
-				this value should be truncated; NOTE that
-				the function only frees whole pages; the
-				header page is not freed, but emptied, if
-				all the records there are < limit */
-/************************************************************************
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created. */
-
-ulint
-trx_undo_lists_init(
-/*================*/
-				/* out: the combined size of undo log segments
-				in pages */
-	trx_rseg_t*	rseg);	/* in: rollback segment memory object */
-/**************************************************************************
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
-ulint
-trx_undo_assign_undo(
-/*=================*/
-				/* out: DB_SUCCESS if undo log assign
-				 * successful, possible error codes are:
-				 * ER_TOO_MANY_CONCURRENT_TRXS
-				 * DB_OUT_OF_FILE_SPAC
-				 * DB_OUT_OF_MEMORY */
-	trx_t*		trx,	/* in: transaction */
-	ulint		type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
-				/* out: undo log segment header page,
-				x-latched */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory copy */
-	mtr_t*		mtr);	/* in: mtr */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
-				/* out: undo log segment header page,
-				x-latched */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory copy */
-	mtr_t*		mtr);	/* in: mtr */
-
-/**************************************************************************
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-
-void
-trx_undo_update_cleanup(
-/*====================*/
-	trx_t*	trx,		/* in: trx owning the update undo log */
-	page_t*	undo_page,	/* in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr);		/* in: mtr */
-/**********************************************************************
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-
-void
-trx_undo_insert_cleanup(
-/*====================*/
-	trx_t*	trx);	/* in: transaction handle */
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
-			/* out: end of log record or NULL */
-	ulint	type,	/* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr);	/* in: mtr or NULL */
-
-/* Types of an undo log segment */
-#define	TRX_UNDO_INSERT		1	/* contains undo entries for inserts */
-#define	TRX_UNDO_UPDATE		2	/* contains undo entries for updates
-					and delete markings: in short,
-					modifys (the name 'UPDATE' is a
-					historical relic) */
-/* States of an undo log segment */
-#define TRX_UNDO_ACTIVE		1	/* contains an undo log of an active
-					transaction */
-#define	TRX_UNDO_CACHED		2	/* cached for quick reuse */
-#define	TRX_UNDO_TO_FREE	3	/* insert undo segment can be freed */
-#define	TRX_UNDO_TO_PURGE	4	/* update undo segment will not be
-					reused: it can be freed in purge when
-					all undo data in it is removed */
-#define	TRX_UNDO_PREPARED	5	/* contains an undo log of an
-					prepared transaction */
-
-/* Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
-
-struct trx_undo_struct{
-	/*-----------------------------*/
-	ulint		id;		/* undo log slot number within the
-					rollback segment */
-	ulint		type;		/* TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
-	ulint		state;		/* state of the corresponding undo log
-					segment */
-	ibool		del_marks;	/* relevant only in an update undo log:
-					this is TRUE if the transaction may
-					have delete marked records, because of
-					a delete of a row or an update of an
-					indexed field; purge is then
-					necessary; also TRUE if the transaction
-					has updated an externally stored
-					field */
-	dulint		trx_id;		/* id of the trx assigned to the undo
-					log */
-	XID		xid;		/* X/Open XA transaction
-					identification */
-	ibool		dict_operation;	/* TRUE if a dict operation trx */
-	dulint		table_id;	/* if a dict operation, then the table
-					id */
-	trx_rseg_t*	rseg;		/* rseg where the undo log belongs */
-	/*-----------------------------*/
-	ulint		space;		/* space id where the undo log
-					placed */
-	ulint		hdr_page_no;	/* page number of the header page in
-					the undo log */
-	ulint		hdr_offset;	/* header offset of the undo log on the
-					page */
-	ulint		last_page_no;	/* page number of the last page in the
-					undo log; this may differ from
-					top_page_no during a rollback */
-	ulint		size;		/* current size in pages */
-	/*-----------------------------*/
-	ulint		empty;		/* TRUE if the stack of undo log
-					records is currently empty */
-	ulint		top_page_no;	/* page number where the latest undo
-					log record was catenated; during
-					rollback the page from which the latest
-					undo record was chosen */
-	ulint		top_offset;	/* offset of the latest undo record,
-					i.e., the topmost element in the undo
-					log if we think of it as a stack */
-	dulint		top_undo_no;	/* undo number of the latest record */
-	page_t*		guess_page;	/* guess for the buffer frame where
-					the top page might reside */
-	/*-----------------------------*/
-	UT_LIST_NODE_T(trx_undo_t) undo_list;
-					/* undo log objects in the rollback
-					segment are chained into lists */
-};
-
-/* The offset of the undo log page header on pages of the undo log */
-#define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
-/*-------------------------------------------------------------*/
-/* Transaction undo log page header offsets */
-#define	TRX_UNDO_PAGE_TYPE	0	/* TRX_UNDO_INSERT or
-					TRX_UNDO_UPDATE */
-#define	TRX_UNDO_PAGE_START	2	/* Byte offset where the undo log
-					records for the LATEST transaction
-					start on this page (remember that
-					in an update undo log, the first page
-					can contain several undo logs) */
-#define	TRX_UNDO_PAGE_FREE	4	/* On each page of the undo log this
-					field contains the byte offset of the
-					first free byte on the page */
-#define TRX_UNDO_PAGE_NODE	6	/* The file list node in the chain
-					of undo log pages */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_PAGE_HDR_SIZE	(6 + FLST_NODE_SIZE)
-
-/* An update undo segment with just one page can be reused if it has
-< this number bytes used; we must leave space at least for one new undo
-log header on the page */
-
-#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
-
-/* An update undo log segment may contain several undo logs on its first page
-if the undo logs took so little space that the segment could be cached and
-reused. All the undo log headers are then on the first page, and the last one
-owns the undo log records on subsequent pages if the segment is bigger than
-one page. If an undo log is stored in a segment, then on the first page it is
-allowed to have zero undo records, but if the segment extends to several
-pages, then all the rest of the pages must contain at least one undo log
-record. */
-
-/* The offset of the undo log segment header on the first page of the undo
-log segment */
-
-#define	TRX_UNDO_SEG_HDR	(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
-/*-------------------------------------------------------------*/
-#define	TRX_UNDO_STATE		0	/* TRX_UNDO_ACTIVE, ... */
-#define	TRX_UNDO_LAST_LOG	2	/* Offset of the last undo log header
-					on the segment header page, 0 if
-					none */
-#define	TRX_UNDO_FSEG_HEADER	4	/* Header for the file segment which
-					the undo log segment occupies */
-#define	TRX_UNDO_PAGE_LIST	(4 + FSEG_HEADER_SIZE)
-					/* Base node for the list of pages in
-					the undo log segment; defined only on
-					the undo log segment's first page */
-/*-------------------------------------------------------------*/
-/* Size of the undo log segment header */
-#define TRX_UNDO_SEG_HDR_SIZE	(4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
-
-
-/* The undo log header. There can be several undo log headers on the first
-page of an update undo log segment. */
-/*-------------------------------------------------------------*/
-#define	TRX_UNDO_TRX_ID		0	/* Transaction id */
-#define	TRX_UNDO_TRX_NO		8	/* Transaction number of the
-					transaction; defined only if the log
-					is in a history list */
-#define TRX_UNDO_DEL_MARKS	16	/* Defined only in an update undo
-					log: TRUE if the transaction may have
-					done delete markings of records, and
-					thus purge is necessary */
-#define	TRX_UNDO_LOG_START	18	/* Offset of the first undo log record
-					of this log on the header page; purge
-					may remove undo log record from the
-					log start, and therefore this is not
-					necessarily the same as this log
-					header end offset */
-#define	TRX_UNDO_XID_EXISTS	20	/* TRUE if undo log header includes
-					X/Open XA transaction identification
-					XID */
-#define	TRX_UNDO_DICT_TRANS	21	/* TRUE if the transaction is a table
-					create, index create, or drop
-					transaction: in recovery
-					the transaction cannot be rolled back
-					in the usual way: a 'rollback' rather
-					means dropping the created or dropped
-					table, if it still exists */
-#define TRX_UNDO_TABLE_ID	22	/* Id of the table if the preceding
-					field is TRUE */
-#define	TRX_UNDO_NEXT_LOG	30	/* Offset of the next undo log header
-					on this page, 0 if none */
-#define	TRX_UNDO_PREV_LOG	32	/* Offset of the previous undo log
-					header on this page, 0 if none */
-#define TRX_UNDO_HISTORY_NODE	34	/* If the log is put to the history
-					list, the file list node is here */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
-
-/* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
-header is logged separately. In this sense, the XID is not really a member
-of the undo log header. TODO: do not append the XID to the log header if XA
-is not needed by the user. The XID wastes about 150 bytes of space in every
-undo log. In the history list we may have millions of undo logs, which means
-quite a large overhead. */
-
-/* X/Open XA Transaction Identification (XID) */
-
-#define	TRX_UNDO_XA_FORMAT	(TRX_UNDO_LOG_OLD_HDR_SIZE)
-#define	TRX_UNDO_XA_TRID_LEN	(TRX_UNDO_XA_FORMAT + 4)
-#define	TRX_UNDO_XA_BQUAL_LEN	(TRX_UNDO_XA_TRID_LEN + 4)
-#define	TRX_UNDO_XA_XID		(TRX_UNDO_XA_BQUAL_LEN + 4)
-/*--------------------------------------------------------------*/
-#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
-				/* Total size of the header with the XA XID */
-
-#ifndef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
deleted file mode 100644
index df85cd663cb..00000000000
--- a/storage/innobase/include/trx0xa.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Start of xa.h header
- *
- * Define a symbol to prevent multiple inclusions of this header file
- */
-#ifndef	XA_H
-#define	XA_H
-
-/*
- * Transaction branch identification: XID and NULLXID:
- */
-#ifndef XIDDATASIZE
-
-#define	XIDDATASIZE	128		/* size in bytes */
-#define	MAXGTRIDSIZE	 64		/* maximum size in bytes of gtrid */
-#define	MAXBQUALSIZE	 64		/* maximum size in bytes of bqual */
-
-struct xid_t {
-	long formatID;			/* format identifier */
-	long gtrid_length;		/* value from 1 through 64 */
-	long bqual_length;		/* value from 1 through 64 */
-	char data[XIDDATASIZE];
-};
-typedef	struct xid_t XID;
-#endif
-/*
- * A value of -1 in formatID means that the XID is null.
- */
-
-
-#ifdef NOTDEFINED
-/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */
-
-/*
- * Declarations of routines by which RMs call TMs:
- */
-extern int ax_reg __P((int, XID *, long));
-extern int ax_unreg __P((int, long));
-
-/*
- * XA Switch Data Structure
- */
-#define	RMNAMESZ	32		/* length of resource manager name, */
-					/* including the null terminator */
-#define	MAXINFOSIZE	256		/* maximum size in bytes of xa_info */
-					/* strings, including the null
-					terminator */
-
-
-struct xa_switch_t {
-	char name[RMNAMESZ];		/* name of resource manager */
-	long flags;			/* resource manager specific options */
-	long version;			/* must be 0 */
-	int (*xa_open_entry)		/* xa_open function pointer */
-		__P((char *, int, long));
-	int (*xa_close_entry)		/* xa_close function pointer */
-		__P((char *, int, long));
-	int (*xa_start_entry)		/* xa_start function pointer */
-		__P((XID *, int, long));
-	int (*xa_end_entry)		/* xa_end function pointer */
-		__P((XID *, int, long));
-	int (*xa_rollback_entry)	/* xa_rollback function pointer */
-		__P((XID *, int, long));
-	int (*xa_prepare_entry)		/* xa_prepare function pointer */
-		__P((XID *, int, long));
-	int (*xa_commit_entry)		/* xa_commit function pointer */
-		__P((XID *, int, long));
-	int (*xa_recover_entry)		/* xa_recover function pointer */
-		__P((XID *, long, int, long));
-	int (*xa_forget_entry)		/* xa_forget function pointer */
-		__P((XID *, int, long));
-	int (*xa_complete_entry)	/* xa_complete function pointer */
-		__P((int *, int *, int, long));
-};
-#endif	/* NOTDEFINED */
-
-
-/*
- * Flag definitions for the RM switch
- */
-#define	TMNOFLAGS	0x00000000L	/* no resource manager features
-					selected */
-#define	TMREGISTER	0x00000001L	/* resource manager dynamically
-					registers */
-#define	TMNOMIGRATE	0x00000002L	/* resource manager does not support
-					association migration */
-#define	TMUSEASYNC	0x00000004L	/* resource manager supports
-					asynchronous operations */
-/*
- * Flag definitions for xa_ and ax_ routines
- */
-/* use TMNOFLAGGS, defined above, when not specifying other flags */
-#define	TMASYNC		0x80000000L	/* perform routine asynchronously */
-#define	TMONEPHASE	0x40000000L	/* caller is using one-phase commit
-					optimisation */
-#define	TMFAIL		0x20000000L	/* dissociates caller and marks
-					transaction branch rollback-only */
-#define	TMNOWAIT	0x10000000L	/* return if blocking condition
-					exists */
-#define	TMRESUME	0x08000000L	/* caller is resuming association with
-					suspended transaction branch */
-#define	TMSUCCESS	0x04000000L	/* dissociate caller from transaction
-					branch */
-#define	TMSUSPEND	0x02000000L	/* caller is suspending, not ending,
-					association */
-#define	TMSTARTRSCAN	0x01000000L	/* start a recovery scan */
-#define	TMENDRSCAN	0x00800000L	/* end a recovery scan */
-#define	TMMULTIPLE	0x00400000L	/* wait for any asynchronous
-					operation */
-#define	TMJOIN		0x00200000L	/* caller is joining existing
-					transaction branch */
-#define	TMMIGRATE	0x00100000L	/* caller intends to perform
-					migration */
-
-/*
- * ax_() return codes (transaction manager reports to resource manager)
- */
-#define	TM_JOIN		2		/* caller is joining existing
-					transaction branch */
-#define	TM_RESUME	1		/* caller is resuming association with
-					suspended transaction branch */
-#define	TM_OK		0		/* normal execution */
-#define	TMER_TMERR	-1		/* an error occurred in the transaction
-					manager */
-#define	TMER_INVAL	-2		/* invalid arguments were given */
-#define	TMER_PROTO	-3		/* routine invoked in an improper
-					context */
-
-/*
- * xa_() return codes (resource manager reports to transaction manager)
- */
-#define	XA_RBBASE	100		/* The inclusive lower bound of the
-					rollback codes */
-#define	XA_RBROLLBACK	XA_RBBASE	/* The rollback was caused by an
-					unspecified reason */
-#define	XA_RBCOMMFAIL	XA_RBBASE+1	/* The rollback was caused by a
-					communication failure */
-#define	XA_RBDEADLOCK	XA_RBBASE+2	/* A deadlock was detected */
-#define	XA_RBINTEGRITY	XA_RBBASE+3	/* A condition that violates the
-					integrity of the resources was
-					detected */
-#define	XA_RBOTHER	XA_RBBASE+4	/* The resource manager rolled back the
-					transaction branch for a reason not
-					on this list */
-#define	XA_RBPROTO	XA_RBBASE+5	/* A protocol error occurred in the
-					resource manager */
-#define	XA_RBTIMEOUT	XA_RBBASE+6	/* A transaction branch took
-					too long */
-#define	XA_RBTRANSIENT	XA_RBBASE+7	/* May retry the transaction branch */
-#define	XA_RBEND	XA_RBTRANSIENT	/* The inclusive upper bound of the
-					rollback codes */
-#define	XA_NOMIGRATE	9		/* resumption must occur where
-					suspension occurred */
-#define	XA_HEURHAZ	8		/* the transaction branch may have
-					been heuristically completed */
-#define	XA_HEURCOM	7		/* the transaction branch has been
-					heuristically committed */
-#define	XA_HEURRB	6		/* the transaction branch has been
-					heuristically rolled back */
-#define	XA_HEURMIX	5		/* the transaction branch has been
-					heuristically committed and rolled
-					back */
-#define	XA_RETRY	4		/* routine returned with no effect and
-					may be re-issued */
-#define	XA_RDONLY	3		/* the transaction branch was read-only
-					and has been committed */
-#define	XA_OK		0		/* normal execution */
-#define	XAER_ASYNC	-2		/* asynchronous operation already
-					outstanding */
-#define	XAER_RMERR	-3		/* a resource manager error occurred in
-					 the transaction branch */
-#define	XAER_NOTA	-4		/* the XID is not valid */
-#define	XAER_INVAL	-5		/* invalid arguments were given */
-#define	XAER_PROTO	-6		/* routine invoked in an improper
-					context */
-#define	XAER_RMFAIL	-7		/* resource manager unavailable */
-#define	XAER_DUPID	-8		/* the XID already exists */
-#define	XAER_OUTSIDE	-9		/* resource manager doing work outside
-					transaction */
-#endif /* ifndef XA_H */
-/*
- * End of xa.h header
- */
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
deleted file mode 100644
index 3ed1ea21a4d..00000000000
--- a/storage/innobase/include/usr0sess.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0sess_h
-#define usr0sess_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0rec.h"
-
-/*************************************************************************
-Opens a session. */
-
-sess_t*
-sess_open(void);
-/*============*/
-					/* out, own: session object */
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
-				/* out: TRUE if closed */
-	sess_t*		sess);	/* in, own: session object */
-
-/* The session handle. All fields are protected by the kernel mutex */
-struct sess_struct{
-	ulint		state;		/* state of the session */
-	trx_t*		trx;		/* transaction object permanently
-					assigned for the session: the
-					transaction instance designated by the
-					trx id changes, but the memory
-					structure is preserved */
-	UT_LIST_BASE_NODE_T(que_t)
-			graphs;		/* query graphs belonging to this
-					session */
-};
-
-/* Session states */
-#define SESS_ACTIVE		1
-#define SESS_ERROR		2	/* session contains an error message
-					which has not yet been communicated
-					to the client */
-#ifndef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic
deleted file mode 100644
index c851d5745b9..00000000000
--- a/storage/innobase/include/usr0sess.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
deleted file mode 100644
index 311471c1a0e..00000000000
--- a/storage/innobase/include/usr0types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/******************************************************
-Users and sessions global types
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0types_h
-#define usr0types_h
-
-typedef struct sess_struct	sess_t;
-
-#endif
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
deleted file mode 100644
index 6533f1166ca..00000000000
--- a/storage/innobase/include/ut0byte.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/**********************************************************************
-Utilities for byte operations
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0byte_h
-#define ut0byte_h
-
-
-#include "univ.i"
-
-/* Type definition for a 64-bit unsigned integer, which works also
-in 32-bit machines. NOTE! Access the fields only with the accessor
-functions. This definition appears here only for the compiler to
-know the size of a dulint. */
-
-typedef	struct dulint_struct	dulint;
-struct dulint_struct{
-	ulint	high;	/* most significant 32 bits */
-	ulint	low;	/* least significant 32 bits */
-};
-
-/* Zero value for a dulint */
-extern dulint	ut_dulint_zero;
-
-/* Maximum value for a dulint */
-extern dulint	ut_dulint_max;
-
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
-			/* out: created dulint */
-	ulint	high,	/* in: high-order 32 bits */
-	ulint	low);	/* in: low-order 32 bits */
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
-			/* out: 32 bits in ulint */
-	dulint	d);	/* in: dulint */
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
-			/* out: 32 bits in ulint */
-	dulint	d);	/* in: dulint */
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
-UNIV_INLINE
-ib_longlong
-ut_conv_dulint_to_longlong(
-/*=======================*/
-			/* out: value in ib_longlong type */
-	dulint	d);	/* in: dulint */
-/***********************************************************
-Tests if a dulint is zero. */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
-			/* out: TRUE if zero */
-	dulint	a);	/* in: dulint */
-/***********************************************************
-Compares two dulints. */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
-			/* out: -1 if a < b, 0 if a == b,
-			1 if a > b */
-	dulint	a,	/* in: dulint */
-	dulint	b);	/* in: dulint */
-/***********************************************************
-Calculates the max of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
-			/* out: max(a, b) */
-	dulint	a,	/* in: dulint */
-	dulint	b);	/* in: dulint */
-/***********************************************************
-Calculates the min of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
-			/* out: min(a, b) */
-	dulint	a,	/* in: dulint */
-	dulint	b);	/* in: dulint */
-/***********************************************************
-Adds a ulint to a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
-			/* out: sum a + b */
-	dulint	a,	/* in: dulint */
-	ulint	b);	/* in: ulint */
-/***********************************************************
-Subtracts a ulint from a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
-			/* out: a - b */
-	dulint	a,	/* in: dulint */
-	ulint	b);	/* in: ulint, b <= a */
-/***********************************************************
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
-			/* out: a - b */
-	dulint	a,	/* in: dulint; NOTE a must be >= b and at most
-			2 to power 32 - 1 greater */
-	dulint	b);	/* in: dulint */
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
-				/* out: rounded value */
-	dulint	 n,		/* in: number to be rounded */
-	ulint	 align_no);	/* in: align by this number which must be a
-				power of 2 */
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
-				/* out: rounded value */
-	dulint	 n,		/* in: number to be rounded */
-	ulint	 align_no);	/* in: align by this number which must be a
-				power of 2 */
-/***********************************************************
-Increments a dulint variable by 1. */
-#define UT_DULINT_INC(D)\
-{\
-	if ((D).low == 0xFFFFFFFFUL) {\
-		(D).high = (D).high + 1;\
-		(D).low = 0;\
-	} else {\
-		(D).low = (D).low + 1;\
-	}\
-}
-/***********************************************************
-Tests if two dulints are equal. */
-#define UT_DULINT_EQ(D1, D2)	(((D1).low == (D2).low)\
-						&& ((D1).high == (D2).high))
-/****************************************************************
-Sort function for dulint arrays. */
-void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high);
-/*===============================================================*/
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
-				/* out: rounded value */
-	ulint	 n,		/* in: number to be rounded */
-	ulint	 align_no);	/* in: align by this number */
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
-				/* out: rounded value */
-	ulint	 n,		/* in: number to be rounded */
-	ulint	 align_no);	/* in: align by this number */
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
-				/* out: aligned pointer */
-	void*	ptr,		/* in: pointer */
-	ulint	align_no);	/* in: align by this number */
-/*************************************************************
-The following function rounds down a pointer to the nearest
-aligned address. */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
-				/* out: aligned pointer */
-	void*	ptr,		/* in: pointer */
-	ulint	align_no)	/* in: align by this number */
-		__attribute__((const));
-/*************************************************************
-The following function computes the offset of a pointer from the nearest
-aligned address. */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
-					/* out: distance from aligned
-					pointer */
-	const void*	ptr,		/* in: pointer */
-	ulint		align_no)	/* in: align by this number */
-			__attribute__((const));
-/*********************************************************************
-Gets the nth bit of a ulint. */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
-			/* out: TRUE if nth bit is 1; 0th bit is defined to
-			be the least significant */
-	ulint	a,	/* in: ulint */
-	ulint	n);	/* in: nth bit requested */
-/*********************************************************************
-Sets the nth bit of a ulint. */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
-			/* out: the ulint with the bit set as requested */
-	ulint	a,	/* in: ulint */
-	ulint	n,	/* in: nth bit requested */
-	ibool	val);	/* in: value for the bit to set */
-
-#ifndef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
deleted file mode 100644
index 01b6c29d08f..00000000000
--- a/storage/innobase/include/ut0byte.ic
+++ /dev/null
@@ -1,397 +0,0 @@
-/******************************************************************
-Utilities for byte operations
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
-			/* out: created dulint */
-	ulint	high,	/* in: high-order 32 bits */
-	ulint	low)	/* in: low-order 32 bits */
-{
-	dulint	res;
-
-	ut_ad(high <= 0xFFFFFFFF);
-	ut_ad(low <= 0xFFFFFFFF);
-
-	res.high = high;
-	res.low	 = low;
-
-	return(res);
-}
-
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
-			/* out: 32 bits in ulint */
-	dulint	d)	/* in: dulint */
-{
-	return(d.high);
-}
-
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
-			/* out: 32 bits in ulint */
-	dulint	d)	/* in: dulint */
-{
-	return(d.low);
-}
-
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
-UNIV_INLINE
-ib_longlong
-ut_conv_dulint_to_longlong(
-/*=======================*/
-			/* out: value in ib_longlong type */
-	dulint	d)	/* in: dulint */
-{
-	return((ib_longlong)d.low
-	       + (((ib_longlong)d.high) << 32));
-}
-
-/***********************************************************
-Tests if a dulint is zero. */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
-			/* out: TRUE if zero */
-	dulint	a)	/* in: dulint */
-{
-	if ((a.low == 0) && (a.high == 0)) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/***********************************************************
-Compares two dulints. */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
-			/* out: -1 if a < b, 0 if a == b,
-			1 if a > b */
-	dulint	a,	/* in: dulint */
-	dulint	b)	/* in: dulint */
-{
-	if (a.high > b.high) {
-		return(1);
-	} else if (a.high < b.high) {
-		return(-1);
-	} else if (a.low > b.low) {
-		return(1);
-	} else if (a.low < b.low) {
-		return(-1);
-	} else {
-		return(0);
-	}
-}
-
-/***********************************************************
-Calculates the max of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
-			/* out: max(a, b) */
-	dulint	a,	/* in: dulint */
-	dulint	b)	/* in: dulint */
-{
-	if (ut_dulint_cmp(a, b) > 0) {
-
-		return(a);
-	}
-
-	return(b);
-}
-
-/***********************************************************
-Calculates the min of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
-			/* out: min(a, b) */
-	dulint	a,	/* in: dulint */
-	dulint	b)	/* in: dulint */
-{
-	if (ut_dulint_cmp(a, b) > 0) {
-
-		return(b);
-	}
-
-	return(a);
-}
-
-/***********************************************************
-Adds a ulint to a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
-			/* out: sum a + b */
-	dulint	a,	/* in: dulint */
-	ulint	b)	/* in: ulint */
-{
-	if (0xFFFFFFFFUL - b >= a.low) {
-		a.low += b;
-
-		return(a);
-	}
-
-	a.low = a.low - (0xFFFFFFFFUL - b) - 1;
-
-	a.high++;
-
-	return(a);
-}
-
-/***********************************************************
-Subtracts a ulint from a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
-			/* out: a - b */
-	dulint	a,	/* in: dulint */
-	ulint	b)	/* in: ulint, b <= a */
-{
-	if (a.low >= b) {
-		a.low -= b;
-
-		return(a);
-	}
-
-	b -= a.low + 1;
-
-	a.low = 0xFFFFFFFFUL - b;
-
-	ut_ad(a.high > 0);
-
-	a.high--;
-
-	return(a);
-}
-
-/***********************************************************
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
-			/* out: a - b */
-	dulint	a,	/* in: dulint; NOTE a must be >= b and at most
-			2 to power 32 - 1 greater */
-	dulint	b)	/* in: dulint */
-{
-	ulint	diff;
-
-	if (a.high == b.high) {
-		ut_ad(a.low >= b.low);
-
-		return(a.low - b.low);
-	}
-
-	ut_ad(a.high == b.high + 1);
-
-	diff = (ulint)(0xFFFFFFFFUL - b.low);
-	diff += 1 + a.low;
-
-	ut_ad(diff > a.low);
-
-	return(diff);
-}
-
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
-				/* out: rounded value */
-	dulint	 n,		/* in: number to be rounded */
-	ulint	 align_no)	/* in: align by this number which must be a
-				power of 2 */
-{
-	ulint	low, high;
-
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-
-	low = ut_dulint_get_low(n);
-	high = ut_dulint_get_high(n);
-
-	low = low & ~(align_no - 1);
-
-	return(ut_dulint_create(high, low));
-}
-
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
-				/* out: rounded value */
-	dulint	 n,		/* in: number to be rounded */
-	ulint	 align_no)	/* in: align by this number which must be a
-				power of 2 */
-{
-	return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
-}
-
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no
-has to be a power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
-				/* out: rounded value */
-	ulint	 n,		/* in: number to be rounded */
-	ulint	 align_no)	/* in: align by this number */
-{
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-
-	return((n + align_no - 1) & ~(align_no - 1));
-}
-
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
-				/* out: aligned pointer */
-	void*	ptr,		/* in: pointer */
-	ulint	align_no)	/* in: align by this number */
-{
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-	ut_ad(ptr);
-
-	ut_ad(sizeof(void*) == sizeof(ulint));
-
-	return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
-}
-
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
-				/* out: rounded value */
-	ulint	 n,		 /* in: number to be rounded */
-	ulint	 align_no)	 /* in: align by this number */
-{
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-
-	return(n & ~(align_no - 1));
-}
-
-/*************************************************************
-The following function rounds down a pointer to the nearest
-aligned address. */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
-				/* out: aligned pointer */
-	void*	ptr,		/* in: pointer */
-	ulint	align_no)	/* in: align by this number */
-{
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-	ut_ad(ptr);
-
-	ut_ad(sizeof(void*) == sizeof(ulint));
-
-	return((void*)((((ulint)ptr)) & ~(align_no - 1)));
-}
-
-/*************************************************************
-The following function computes the offset of a pointer from the nearest
-aligned address. */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
-					/* out: distance from
-					aligned pointer */
-	const void*	ptr,		/* in: pointer */
-	ulint		align_no)	/* in: align by this number */
-{
-	ut_ad(align_no > 0);
-	ut_ad(((align_no - 1) & align_no) == 0);
-	ut_ad(ptr);
-
-	ut_ad(sizeof(void*) == sizeof(ulint));
-
-	return(((ulint)ptr) & (align_no - 1));
-}
-
-/*********************************************************************
-Gets the nth bit of a ulint. */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
-			/* out: TRUE if nth bit is 1; 0th bit is defined to
-			be the least significant */
-	ulint	a,	/* in: ulint */
-	ulint	n)	/* in: nth bit requested */
-{
-	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-	return(1 & (a >> n));
-}
-
-/*********************************************************************
-Sets the nth bit of a ulint. */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
-			/* out: the ulint with the bit set as requested */
-	ulint	a,	/* in: ulint */
-	ulint	n,	/* in: nth bit requested */
-	ibool	val)	/* in: value for the bit to set */
-{
-	ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-	if (val) {
-		return(((ulint) 1 << n) | a);
-	} else {
-		return(~((ulint) 1 << n) & a);
-	}
-}
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
deleted file mode 100644
index a317f35f4be..00000000000
--- a/storage/innobase/include/ut0dbg.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*********************************************************************
-Debug utilities for Innobase
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#ifndef ut0dbg_h
-#define ut0dbg_h
-
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-extern ulint	ut_dbg_zero; /* This is used to eliminate
-				compiler warnings */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
-
-/*****************************************************************
-Report a failed assertion. */
-
-void
-ut_dbg_assertion_failed(
-/*====================*/
-	const char* expr,	/* in: the failed assertion */
-	const char* file,	/* in: source file containing the assertion */
-	ulint line);		/* in: line number of the assertion */
-
-#ifdef __NETWARE__
-/* Flag for ignoring further assertion failures.
-On NetWare, have a graceful exit rather than a segfault to avoid abends. */
-extern ibool	panic_shutdown;
-/* Abort the execution. */
-void ut_dbg_panic(void);
-# define UT_DBG_PANIC ut_dbg_panic()
-/* Stop threads in ut_a(). */
-# define UT_DBG_STOP	while (0)	/* We do not do this on NetWare */
-#else /* __NETWARE__ */
-# if defined(__WIN__) || defined(__INTEL_COMPILER)
-#  undef UT_DBG_USE_ABORT
-# elif defined(__GNUC__) && (__GNUC__ > 2)
-#  define UT_DBG_USE_ABORT
-# endif
-
-# ifndef UT_DBG_USE_ABORT
-/* A null pointer that will be dereferenced to trigger a memory trap */
-extern ulint*	ut_dbg_null_ptr;
-# endif
-
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* Flag for indicating that all threads should stop.  This will be set
-by ut_dbg_assertion_failed(). */
-extern ibool	ut_dbg_stop_threads;
-
-/*****************************************************************
-Stop a thread after assertion failure. */
-
-void
-ut_dbg_stop_thread(
-/*===============*/
-	const char*	file,
-	ulint		line);
-# endif
-
-# ifdef UT_DBG_USE_ABORT
-/* Abort the execution. */
-#  define UT_DBG_PANIC abort()
-/* Stop threads (null operation) */
-#  define UT_DBG_STOP while (0)
-# else /* UT_DBG_USE_ABORT */
-/* Abort the execution. */
-#  define UT_DBG_PANIC					\
-	if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
-/* Stop threads in ut_a(). */
-#  define UT_DBG_STOP do						\
-	if (UNIV_UNLIKELY(ut_dbg_stop_threads)) {		\
-		ut_dbg_stop_thread(__FILE__, (ulint) __LINE__);	\
-	} while (0)
-# endif /* UT_DBG_USE_ABORT */
-#endif /* __NETWARE__ */
-
-/* Abort execution if EXPR does not evaluate to nonzero. */
-#define ut_a(EXPR) do {						\
-	if (UT_DBG_FAIL(EXPR)) {				\
-		ut_dbg_assertion_failed(#EXPR,			\
-				__FILE__, (ulint) __LINE__);	\
-		UT_DBG_PANIC;					\
-	}							\
-	UT_DBG_STOP;						\
-} while (0)
-
-/* Abort execution. */
-#define ut_error do {						\
-	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__);	\
-	UT_DBG_PANIC;						\
-} while (0)
-
-#ifdef UNIV_DEBUG
-#define ut_ad(EXPR)	ut_a(EXPR)
-#define ut_d(EXPR)	do {EXPR;} while (0)
-#else
-#define ut_ad(EXPR)
-#define ut_d(EXPR)
-#endif
-
-#define UT_NOT_USED(A)	A = A
-
-#endif
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
deleted file mode 100644
index c2d3e4557f0..00000000000
--- a/storage/innobase/include/ut0list.ic
+++ /dev/null
@@ -1,23 +0,0 @@
-/********************************************************************
-Get the first node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
-				/* out: first node, or NULL */
-	ib_list_t*	list)	/* in: list */
-{
-	return(list->first);
-}
-
-/********************************************************************
-Get the last node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
-				/* out: last node, or NULL */
-	ib_list_t*	list)	/* in: list */
-{
-	return(list->last);
-}
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
deleted file mode 100644
index e56895bc142..00000000000
--- a/storage/innobase/include/ut0mem.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/***********************************************************************
-Memory primitives
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef ut0mem_h
-#define ut0mem_h
-
-#include "univ.i"
-#include <string.h>
-#include <stdlib.h>
-
-/* The total amount of memory currently allocated from the OS with malloc */
-extern ulint	ut_total_allocated_memory;
-
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n);
-
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n);
-
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n);
-
-
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
-void*
-ut_malloc_low(
-/*==========*/
-					/* out, own: allocated memory */
-	ulint	n,			/* in: number of bytes to allocate */
-	ibool	set_to_zero,		/* in: TRUE if allocated memory
-					should be set to zero if
-					UNIV_SET_MEM_TO_ZERO is defined */
-	ibool	assert_on_error);	/* in: if TRUE, we crash mysqld if
-					the memory cannot be allocated */
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
-void*
-ut_malloc(
-/*======*/
-			/* out, own: allocated memory */
-	ulint	n);	/* in: number of bytes to allocate */
-/**************************************************************************
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
-ibool
-ut_test_malloc(
-/*===========*/
-			/* out: TRUE if succeeded */
-	ulint	n);	/* in: try to allocate this many bytes */
-/**************************************************************************
-Frees a memory block allocated with ut_malloc. */
-
-void
-ut_free(
-/*====*/
-	void* ptr);  /* in, own: memory block */
-/**************************************************************************
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
-       realloc()  changes the size of the memory block pointed to
-       by ptr to size bytes.  The contents will be  unchanged  to
-       the minimum of the old and new sizes; newly allocated mem�
-       ory will be uninitialized.  If ptr is NULL,  the	 call  is
-       equivalent  to malloc(size); if size is equal to zero, the
-       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
-       must  have  been	 returned by an earlier call to malloc(),
-       calloc() or realloc().
-
-RETURN VALUE
-       realloc() returns a pointer to the newly allocated memory,
-       which is suitably aligned for any kind of variable and may
-       be different from ptr, or NULL if the  request  fails.  If
-       size  was equal to 0, either NULL or a pointer suitable to
-       be passed to free() is returned.	 If realloc()  fails  the
-       original	 block	is  left  untouched  - it is not freed or
-       moved. */
-
-void*
-ut_realloc(
-/*=======*/
-			/* out, own: pointer to new mem block or NULL */
-	void*	ptr,	/* in: pointer to old block or NULL */
-	ulint	size);	/* in: desired size */
-/**************************************************************************
-Frees in shutdown all allocated memory not freed yet. */
-
-void
-ut_free_all_mem(void);
-/*=================*/
-
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour);
-
-UNIV_INLINE
-ulint
-ut_strlen(const char* str);
-
-UNIV_INLINE
-int
-ut_strcmp(const void* str1, const void* str2);
-
-/**************************************************************************
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
-ulint
-ut_strlcpy(
-/*=======*/
-				/* out: strlen(src) */
-	char*		dst,	/* in: destination buffer */
-	const char*	src,	/* in: source buffer */
-	ulint		size);	/* in: size of destination buffer */
-
-/**************************************************************************
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
-ulint
-ut_strlcpy_rev(
-/*===========*/
-				/* out: strlen(src) */
-	char*		dst,	/* in: destination buffer */
-	const char*	src,	/* in: source buffer */
-	ulint		size);	/* in: size of destination buffer */
-
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
-				/* out: length of the string when quoted */
-	const char*	str,	/* in: null-terminated string */
-	char		q);	/* in: the quote character */
-
-/**************************************************************************
-Make a quoted copy of a NUL-terminated string.	Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
-char*
-ut_strcpyq(
-/*=======*/
-				/* out: pointer to end of dest */
-	char*		dest,	/* in: output buffer */
-	char		q,	/* in: the quote character */
-	const char*	src);	/* in: null-terminated string */
-
-/**************************************************************************
-Make a quoted copy of a fixed-length string.  Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
-char*
-ut_memcpyq(
-/*=======*/
-				/* out: pointer to end of dest */
-	char*		dest,	/* in: output buffer */
-	char		q,	/* in: the quote character */
-	const char*	src,	/* in: string to be quoted */
-	ulint		len);	/* in: length of src */
-
-/**************************************************************************
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
-ulint
-ut_strcount(
-/*========*/
-				/* out: the number of times s2 occurs in s1 */
-	const char*	s1,	/* in: string to search in */
-	const char*	s2);	/* in: string to search for */
-
-/**************************************************************************
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
-ut_strreplace(
-/*==========*/
-				/* out, own: modified string, must be
-				freed with mem_free() */
-	const char*	str,	/* in: string to operate on */
-	const char*	s1,	/* in: string to replace */
-	const char*	s2);	/* in: string to replace s1 with */
-
-#ifndef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
deleted file mode 100644
index e0253ebf618..00000000000
--- a/storage/innobase/include/ut0mem.ic
+++ /dev/null
@@ -1,70 +0,0 @@
-/***********************************************************************
-Memory primitives
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n)
-{
-	return(memcpy(dest, sour, n));
-}
-
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n)
-{
-	return(memmove(dest, sour, n));
-}
-
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n)
-{
-	return(memcmp(str1, str2, n));
-}
-
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour)
-{
-	return(strcpy(dest, sour));
-}
-
-UNIV_INLINE
-ulint
-ut_strlen(const char* str)
-{
-	return(strlen(str));
-}
-
-UNIV_INLINE
-int
-ut_strcmp(const void* str1, const void* str2)
-{
-	return(strcmp((const char*)str1, (const char*)str2));
-}
-
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
-				/* out: length of the string when quoted */
-	const char*	str,	/* in: null-terminated string */
-	char		q)	/* in: the quote character */
-{
-	ulint len;
-
-	for (len = 0; *str; len++, str++) {
-		if (*str == q) {
-			len++;
-		}
-	}
-
-	return(len);
-}
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
deleted file mode 100644
index 3f3fce1075c..00000000000
--- a/storage/innobase/include/ut0rnd.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/**********************************************************************
-Random numbers and hashing
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0rnd_h
-#define ut0rnd_h
-
-#include "univ.i"
-
-#include "ut0byte.h"
-
-/* The 'character code' for end of field or string (used
-in folding records */
-#define UT_END_OF_FIELD		257
-
-/************************************************************
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
-	ulint	 seed);		 /* in: seed */
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
-			/* out: the next 'random' number */
-	ulint	rnd);	/* in: the previous random number value */
-/*************************************************************
-The following function generates 'random' ulint integers which
-enumerate the value space (let there be N of them) of ulint integers
-in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator. */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void);
-/*==================*/
-			/* out: the 'random' number */
-/************************************************************
-Generates a random integer from a given interval. */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
-			/* out: the 'random' number */
-	ulint	low,	/* in: low limit; can generate also this value */
-	ulint	high);	/* in: high limit; can generate also this value */
-/*************************************************************
-Generates a random iboolean value. */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
-			/* out: the random value */
-/***********************************************************
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime or some
-random number to work reliably. */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
-				/* out: hash value */
-	ulint	 key,		/* in: value to be hashed */
-	ulint	 table_size);	/* in: hash table size */
-/*****************************************************************
-Folds a pair of ulints. */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
-			/* out: folded value */
-	ulint	n1,	/* in: ulint */
-	ulint	n2);	/* in: ulint */
-/*****************************************************************
-Folds a dulint. */
-UNIV_INLINE
-ulint
-ut_fold_dulint(
-/*===========*/
-			/* out: folded value */
-	dulint	d);	/* in: dulint */
-/*****************************************************************
-Folds a character string ending in the null character. */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
-				/* out: folded value */
-	const char*	str);	/* in: null-terminated string */
-/*****************************************************************
-Folds a binary string. */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
-				/* out: folded value */
-	const byte*	str,	/* in: string of bytes */
-	ulint		len);	/* in: length */
-/***************************************************************
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
-ulint
-ut_find_prime(
-/*==========*/
-			/* out: prime */
-	ulint	 n);	 /* in: positive number > 100 */
-
-
-#ifndef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
deleted file mode 100644
index 95d7ba017f1..00000000000
--- a/storage/innobase/include/ut0ut.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/**********************************************************************
-Various utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0ut_h
-#define ut0ut_h
-
-#include "univ.i"
-#include <time.h>
-#ifndef MYSQL_SERVER
-#include <ctype.h>
-#endif
-
-typedef time_t	ib_time_t;
-
-#ifdef HAVE_PAUSE_INSTRUCTION
-#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");}
-#else
-#ifdef HAVE_FAKE_PAUSE_INSTRUCTION
-#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");}
-#else
-#ifdef UNIV_SYNC_ATOMIC
-#define PAUSE_INSTRUCTION() \
-  { \
-    volatile lint volatile_var; \
-    os_compare_and_swap(&volatile_var, 0, 1); \
-  }
-#else
-#define PAUSE_INSTRUCTION()
-#endif
-#endif
-#endif
-
-/************************************************************
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
-ulint
-ut_get_high32(
-/*==========*/
-			/* out: a >> 32 */
-	ulint	a);	/* in: ulint */
-/**********************************************************
-Calculates the minimum of two ulints. */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-			/* out: minimum */
-	ulint	 n1,	/* in: first number */
-	ulint	 n2);	/* in: second number */
-/**********************************************************
-Calculates the maximum of two ulints. */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-			/* out: maximum */
-	ulint	 n1,	/* in: first number */
-	ulint	 n2);	/* in: second number */
-/********************************************************************
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
-	ulint*	a,	/* out: more significant part of minimum */
-	ulint*	b,	/* out: less significant part of minimum */
-	ulint	a1,	/* in: more significant part of first pair */
-	ulint	b1,	/* in: less significant part of first pair */
-	ulint	a2,	/* in: more significant part of second pair */
-	ulint	b2);	/* in: less significant part of second pair */
-/**********************************************************
-Compares two ulints. */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
-			/* out: 1 if a > b, 0 if a == b, -1 if a < b */
-	ulint	a,	/* in: ulint */
-	ulint	b);	/* in: ulint */
-/***********************************************************
-Compares two pairs of ulints. */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
-			/* out: -1 if a < b, 0 if a == b,
-			1 if a > b */
-	ulint	a1,	/* in: more significant part of first pair */
-	ulint	a2,	/* in: less significant part of first pair */
-	ulint	b1,	/* in: more significant part of second pair */
-	ulint	b2);	/* in: less significant part of second pair */
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/	/* out: remainder */
-	ulint	n,	/* in: number to be divided */
-	ulint	m);	/* in: divisor; power of 2 */
-/*****************************************************************
-Calculates fast value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/		/* out: value of n rounded down to nearest
-			multiple of m */
-	ulint	n,	/* in: number to be rounded */
-	ulint	m);	/* in: divisor; power of 2 */
-/*****************************************************************
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
-			/* out: logarithm in the base 2, rounded upward */
-	ulint	n);	/* in: number */
-/*****************************************************************
-Calculates 2 to power n. */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
-			/* out: 2 to power n */
-	ulint	n);	/* in: number */
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
-ulint
-ut_2_power_up(
-/*==========*/
-			/* out: first power of 2 which is >= n */
-	ulint	n)	/* in: number != 0 */
-	__attribute__((const));
-
-/* Determine how many bytes (groups of 8 bits) are needed to
-store the given number of bits. */
-#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
-/*============================================================*/
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void);
-/**************************************************************
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
-ib_time_t
-ut_time(void);
-/*=========*/
-/**************************************************************
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error. */
-
-int
-ut_usectime(
-/*========*/
-			/* out: 0 on success, -1 otherwise */
-	ulint*	sec,	/* out: seconds since the Epoch */
-	ulint*	ms);	/* out: microseconds since the Epoch+*sec */
-
-/**************************************************************
-Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms). */
-
-ib_longlong
-ut_usecdiff(
-/*========*/
-	ulint	end_sec,	/* in: seconds since the Epoch */
-	ulint	end_ms,	/* in: microseconds since the Epoch+*sec1 */
-	ulint	start_sec,	/* in: seconds since the Epoch */
-	ulint	start_ms);	/* in: microseconds since the Epoch+*sec2 */
-
-/**************************************************************
-Returns the difference of two times in seconds. */
-
-double
-ut_difftime(
-/*========*/
-				/* out: time2 - time1 expressed in seconds */
-	ib_time_t	time2,	/* in: time */
-	ib_time_t	time1);	/* in: time */
-/**************************************************************
-Prints a timestamp to a file. */
-
-void
-ut_print_timestamp(
-/*===============*/
-	FILE*  file); /* in: file where to print */
-/**************************************************************
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
-void
-ut_sprintf_timestamp(
-/*=================*/
-	char*	buf); /* in: buffer where to sprintf */
-/**************************************************************
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
-	char*	buf); /* in: buffer where to sprintf */
-/**************************************************************
-Returns current year, month, day. */
-
-void
-ut_get_year_month_day(
-/*==================*/
-	ulint*	year,	/* out: current year */
-	ulint*	month,	/* out: month */
-	ulint*	day);	/* out: day */
-/*****************************************************************
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
-ulint
-ut_delay(
-/*=====*/
-			/* out: dummy value */
-	ulint	delay);	/* in: delay in microseconds on 100 MHz Pentium */
-/*****************************************************************
-Prints the contents of a memory buffer in hex and ascii. */
-
-void
-ut_print_buf(
-/*=========*/
-	FILE*		file,	/* in: file where to print */
-	const void*	buf,	/* in: memory buffer */
-	ulint		len);	/* in: length of the buffer */
-
-/**************************************************************************
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-
-void
-ut_print_filename(
-/*==============*/
-	FILE*		f,	/* in: output stream */
-	const char*	name);	/* in: name to print */
-
-/* Forward declaration of transaction handle */
-struct trx_struct;
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_name(
-/*==========*/
-	FILE*		f,	/* in: output stream */
-	struct trx_struct*trx,	/* in: transaction */
-	ibool		table_id,/* in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name);	/* in: name to print */
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_namel(
-/*===========*/
-	FILE*		f,	/* in: output stream */
-	struct trx_struct*trx,	/* in: transaction (NULL=no quotes) */
-	ibool		table_id,/* in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/* in: name to print */
-	ulint		namelen);/* in: length of name */
-
-/**************************************************************************
-Catenate files. */
-
-void
-ut_copy_file(
-/*=========*/
-	FILE*	dest,	/* in: output file */
-	FILE*	src);	/* in: input file to be appended to output */
-
-/**************************************************************************
-snprintf(). */
-
-#ifdef __WIN__
-int
-ut_snprintf(
-				/* out: number of characters that would
-				have been printed if the size were
-				unlimited, not including the terminating
-				'\0'. */
-	char*		str,	/* out: string */
-	size_t		size,	/* in: str size */
-	const char*	fmt,	/* in: format */
-	...);			/* in: format values */
-#else
-#define ut_snprintf	snprintf
-#endif /* __WIN__ */
-
-#ifndef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
deleted file mode 100644
index 412717a094e..00000000000
--- a/storage/innobase/include/ut0ut.ic
+++ /dev/null
@@ -1,174 +0,0 @@
-/******************************************************************
-Various utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/**********************************************************
-Calculates the minimum of two ulints. */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-			/* out: minimum */
-	ulint	 n1,	/* in: first number */
-	ulint	 n2)	/* in: second number */
-{
-	return((n1 <= n2) ? n1 : n2);
-}
-
-/**********************************************************
-Calculates the maximum of two ulints. */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-			/* out: maximum */
-	ulint	 n1,	/* in: first number */
-	ulint	 n2)	/* in: second number */
-{
-	return((n1 <= n2) ? n2 : n1);
-}
-
-/********************************************************************
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
-	ulint*	a,	/* out: more significant part of minimum */
-	ulint*	b,	/* out: less significant part of minimum */
-	ulint	a1,	/* in: more significant part of first pair */
-	ulint	b1,	/* in: less significant part of first pair */
-	ulint	a2,	/* in: more significant part of second pair */
-	ulint	b2)	/* in: less significant part of second pair */
-{
-	if (a1 == a2) {
-		*a = a1;
-		*b = ut_min(b1, b2);
-	} else if (a1 < a2) {
-		*a = a1;
-		*b = b1;
-	} else {
-		*a = a2;
-		*b = b2;
-	}
-}
-
-/**********************************************************
-Compares two ulints. */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
-			/* out: 1 if a > b, 0 if a == b, -1 if a < b */
-	ulint	a,	/* in: ulint */
-	ulint	b)	/* in: ulint */
-{
-	if (a < b) {
-		return(-1);
-	} else if (a == b) {
-		return(0);
-	} else {
-		return(1);
-	}
-}
-
-/***********************************************************
-Compares two pairs of ulints. */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
-			/* out: -1 if a < b, 0 if a == b, 1 if a > b */
-	ulint	a1,	/* in: more significant part of first pair */
-	ulint	a2,	/* in: less significant part of first pair */
-	ulint	b1,	/* in: more significant part of second pair */
-	ulint	b2)	/* in: less significant part of second pair */
-{
-	if (a1 > b1) {
-		return(1);
-	} else if (a1 < b1) {
-		return(-1);
-	} else if (a2 > b2) {
-		return(1);
-	} else if (a2 < b2) {
-		return(-1);
-	} else {
-		return(0);
-	}
-}
-
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/	/* out: remainder */
-	ulint	n,	/* in: number to be divided */
-	ulint	m)	/* in: divisor; power of 2 */
-{
-	ut_ad(0x80000000UL % m == 0);
-
-	return(n & (m - 1));
-}
-
-/*****************************************************************
-Calculates fast a value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/		/* out: value of n rounded down to nearest
-			multiple of m */
-	ulint	n,	/* in: number to be rounded */
-	ulint	m)	/* in: divisor; power of 2 */
-{
-	ut_ad(0x80000000UL % m == 0);
-
-	return(n & ~(m - 1));
-}
-
-/*****************************************************************
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
-			/* out: logarithm in the base 2, rounded upward */
-	ulint	n)	/* in: number != 0 */
-{
-	ulint	res;
-
-	res = 0;
-
-	ut_ad(n > 0);
-
-	n = n - 1;
-
-	for (;;) {
-		n = n / 2;
-
-		if (n == 0) {
-			break;
-		}
-
-		res++;
-	}
-
-	return(res + 1);
-}
-
-/*****************************************************************
-Calculates 2 to power n. */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
-			/* out: 2 to power n */
-	ulint	n)	/* in: number */
-{
-	return((ulint) 1 << n);
-}
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
deleted file mode 100644
index e0cc4dfb009..00000000000
--- a/storage/innobase/include/ut0vec.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef IB_VECTOR_H
-#define IB_VECTOR_H
-
-#include "univ.i"
-#include "mem0mem.h"
-
-typedef struct ib_vector_struct ib_vector_t;
-
-/* An automatically resizing vector datatype with the following properties:
-
- -Contains void* items.
-
- -The items are owned by the caller.
-
- -All memory allocation is done through a heap owned by the caller, who is
- responsible for freeing it when done with the vector.
-
- -When the vector is resized, the old memory area is left allocated since it
- uses the same heap as the new memory area, so this is best used for
- relatively small or short-lived uses.
-*/
-
-/********************************************************************
-Create a new vector with the given initial size. */
-
-ib_vector_t*
-ib_vector_create(
-/*=============*/
-				/* out: vector */
-	mem_heap_t*	heap,	/* in: heap */
-	ulint		size);	/* in: initial size */
-
-/********************************************************************
-Push a new element to the vector, increasing its size if necessary. */
-
-void
-ib_vector_push(
-/*===========*/
-	ib_vector_t*	vec,	/* in: vector */
-	void*		elem);	/* in: data element */
-
-/********************************************************************
-Get the number of elements in the vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
-				/* out: number of elements in vector */
-	ib_vector_t*	vec);	/* in: vector */
-
-/********************************************************************
-Get the n'th element. */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
-				/* out: n'th element */
-	ib_vector_t*	vec,	/* in: vector */
-	ulint		n);	/* in: element index to get */
-
-/* See comment at beginning of file. */
-struct ib_vector_struct {
-	mem_heap_t*	heap;	/* heap */
-	void**		data;	/* data elements */
-	ulint		used;	/* number of elements currently used */
-	ulint		total;	/* number of elements allocated */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
deleted file mode 100644
index 417a17d951f..00000000000
--- a/storage/innobase/include/ut0vec.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/********************************************************************
-Get number of elements in vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
-				/* out: number of elements in vector */
-	ib_vector_t*	vec)	/* in: vector */
-{
-	return(vec->used);
-}
-
-/********************************************************************
-Get n'th element. */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
-				/* out: n'th element */
-	ib_vector_t*	vec,	/* in: vector */
-	ulint		n)	/* in: element index to get */
-{
-	ut_a(n < vec->used);
-
-	return(vec->data[n]);
-}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
deleted file mode 100644
index 57f2297beee..00000000000
--- a/storage/innobase/include/ut0wqueue.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/***********************************************************************
-A Work queue. Threads can add work items to the queue and other threads can
-wait for work items to be available and take them off the queue for
-processing.
-
-************************************************************************/
-
-#ifndef IB_WORK_QUEUE_H
-#define IB_WORK_QUEUE_H
-
-#include "ut0list.h"
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
-
-typedef struct ib_wqueue_struct ib_wqueue_t;
-
-/********************************************************************
-Create a new work queue. */
-
-ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
-			/* out: work queue */
-
-/********************************************************************
-Free a work queue. */
-
-void
-ib_wqueue_free(
-/*===========*/
-	ib_wqueue_t*	wq);	/* in: work queue */
-
-/********************************************************************
-Add a work item to the queue. */
-
-void
-ib_wqueue_add(
-/*==========*/
-	ib_wqueue_t*	wq,	/* in: work queue */
-	void*		item,	/* in: work item */
-	mem_heap_t*	heap);	/* in: memory heap to use for allocating the
-				list node */
-
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
-void*
-ib_wqueue_wait(
-				/* out: work item */
-	ib_wqueue_t*	wq);	/* in: work queue */
-
-/* Work queue. */
-struct ib_wqueue_struct {
-	mutex_t		mutex;	/* mutex protecting everything */
-	ib_list_t*	items;	/* work item list */
-	os_event_t	event;	/* event we use to signal additions to list */
-};
-
-#endif
diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
deleted file mode 100644
index a99fe8b6a0e..00000000000
--- a/storage/innobase/os/os0proc.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#include "ut0mem.h"
-#include "ut0byte.h"
-
-
-/*
-How to get AWE to compile on Windows?
--------------------------------------
-
-In the project settings of the innobase project the Visual C++ source,
-__WIN2000__ has to be defined.
-
-The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
-defined to a value >= 0x0500 when windows.h is included.
-
-#define _WIN32_WINNT	0x0500
-
-Where does AWE work?
--------------------
-
-See the error message in os_awe_allocate_physical_mem().
-
-How to assign privileges for mysqld to use AWE?
------------------------------------------------
-
-See the error message in os_awe_enable_lock_pages_in_mem().
-
-Use Windows AWE functions in this order
----------------------------------------
-
-(1) os_awe_enable_lock_pages_in_mem();
-(2) os_awe_allocate_physical_mem();
-(3) os_awe_allocate_virtual_mem_window();
-(4) os_awe_map_physical_mem_to_window().
-
-To test 'AWE' in a computer which does not have the AWE API,
-you can compile with UNIV_SIMULATE_AWE defined in this file.
-*/
-
-#ifdef UNIV_SIMULATE_AWE
-/* If we simulate AWE, we allocate the 'physical memory' here */
-byte*		os_awe_simulate_mem;
-ulint		os_awe_simulate_mem_size;
-os_awe_t*	os_awe_simulate_page_info;
-byte*		os_awe_simulate_window;
-ulint		os_awe_simulate_window_size;
-/* In simulated AWE the following contains a NULL pointer or a pointer
-to a mapped 'physical page' for each 4 kB page in the AWE window */
-byte**		os_awe_simulate_map;
-#endif
-
-#ifdef __WIN2000__
-os_awe_t*	os_awe_page_info;
-ulint		os_awe_n_pages;
-byte*		os_awe_window;
-ulint		os_awe_window_size;
-#endif
-
-ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void)
-/*=================================*/
-				/* out: TRUE if success, FALSE if error;
-				prints error info to stderr if no success */
-{
-#ifdef UNIV_SIMULATE_AWE
-
-	return(TRUE);
-
-#elif defined(__WIN2000__)
-	struct {
-		DWORD			Count;
-		LUID_AND_ATTRIBUTES	Privilege[1];
-	}	Info;
-	HANDLE	hProcess;
-	HANDLE	Token;
-	BOOL	Result;
-
-	hProcess = GetCurrentProcess();
-
-	/* Open the token of the current process */
-
-	Result = OpenProcessToken(hProcess,
-				  TOKEN_ADJUST_PRIVILEGES, &Token);
-	if (Result != TRUE) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot open process token, error %lu\n",
-			(ulint)GetLastError());
-		return(FALSE);
-	}
-
-	Info.Count = 1;
-
-	Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
-
-	/* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
-	privilege */
-
-	Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
-				      &(Info.Privilege[0].Luid));
-	if (Result != TRUE) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot get local privilege"
-			" value for %s, error %lu.\n",
-			SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
-
-		return(FALSE);
-	}
-
-	/* Try to adjust the privilege */
-
-	Result = AdjustTokenPrivileges(Token, FALSE,
-				       (PTOKEN_PRIVILEGES)&Info,
-				       0, NULL, NULL);
-	/* Check the result */
-
-	if (Result != TRUE) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot adjust process token privileges,"
-			" error %u.\n",
-			GetLastError());
-		return(FALSE);
-	} else if (GetLastError() != ERROR_SUCCESS) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege,"
-			" error %lu.\n"
-			"InnoDB: In Windows XP Home you cannot use AWE."
-			" In Windows 2000 and XP\n"
-			"InnoDB: Professional you must go to the"
-			" Control Panel, to\n"
-			"InnoDB: Security Settings, to Local Policies,"
-			" and enable\n"
-			"InnoDB: the 'lock pages in memory' privilege"
-			" for the user who runs\n"
-			"InnoDB: the MySQL server.\n", GetLastError());
-
-		return(FALSE);
-	}
-
-	CloseHandle(Token);
-
-	return(TRUE);
-#else
-#ifdef __WIN__
-	fprintf(stderr,
-		"InnoDB: AWE: Error: to use AWE you must use"
-		" a ...-nt MySQL executable.\n");
-#endif
-	return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
-				/* out: TRUE if success */
-	os_awe_t** page_info,	/* out, own: array of opaque data containing
-				the info for allocated physical memory pages;
-				each allocated 4 kB physical memory page has
-				one slot of type os_awe_t in the array */
-	ulint	  n_megabytes)	/* in: number of megabytes to allocate */
-{
-#ifdef UNIV_SIMULATE_AWE
-	os_awe_simulate_page_info = ut_malloc
-		(sizeof(os_awe_t) * n_megabytes
-		 * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
-
-	os_awe_simulate_mem
-		= ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096);
-	os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
-
-	*page_info = os_awe_simulate_page_info;
-
-	return(TRUE);
-
-#elif defined(__WIN2000__)
-	BOOL		bResult;
-	os_awe_t	NumberOfPages;		/* Question: why does Windows
-						use the name ULONG_PTR for
-						a scalar integer type? Maybe
-						because we may also refer to
-						&NumberOfPages? */
-	os_awe_t	NumberOfPagesInitial;
-	SYSTEM_INFO	sSysInfo;
-	int		PFNArraySize;
-
-	if (n_megabytes > 64 * 1024) {
-
-		fprintf(stderr,
-			"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
-			"InnoDB: AWE cannot allocate more than"
-			" 64 GB in any computer.\n", n_megabytes);
-
-		return(FALSE);
-	}
-
-	GetSystemInfo(&sSysInfo);  /* fill the system information structure */
-
-	if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: this computer has a page size"
-			" of %lu.\n"
-			"InnoDB: Should be 4096 bytes for"
-			" InnoDB AWE support to work.\n",
-			(ulint)sSysInfo.dwPageSize);
-
-		return(FALSE);
-	}
-
-	/* Calculate the number of pages of memory to request */
-
-	NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
-
-	/* Calculate the size of page_info for allocated physical pages */
-
-	PFNArraySize = NumberOfPages * sizeof(os_awe_t);
-
-	*page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
-
-	if (*page_info == NULL) {
-		fprintf(stderr,
-			"InnoDB: AWE: Failed to allocate page info"
-			" array from process heap, error %lu\n",
-			(ulint)GetLastError());
-
-		return(FALSE);
-	}
-
-	ut_total_allocated_memory += PFNArraySize;
-
-	/* Enable this process' privilege to lock pages to physical memory */
-
-	if (!os_awe_enable_lock_pages_in_mem()) {
-
-		return(FALSE);
-	}
-
-	/* Allocate the physical memory */
-
-	NumberOfPagesInitial = NumberOfPages;
-
-	os_awe_page_info = *page_info;
-	os_awe_n_pages = (ulint)NumberOfPages;
-
-	/* Compilation note: if the compiler complains the function is not
-	defined, see the note at the start of this file */
-
-	bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
-					    &NumberOfPages, *page_info);
-	if (bResult != TRUE) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot allocate physical pages,"
-			" error %lu.\n",
-			(ulint)GetLastError());
-
-		return(FALSE);
-	}
-
-	if (NumberOfPagesInitial != NumberOfPages) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: allocated only %lu pages"
-			" of %lu requested.\n"
-			"InnoDB: Check that you have enough free RAM.\n"
-			"InnoDB: In Windows XP Professional and"
-			" 2000 Professional\n"
-			"InnoDB: Windows PAE size is max 4 GB."
-			" In 2000 and .NET\n"
-			"InnoDB: Advanced Servers and 2000 Datacenter Server"
-			" it is 32 GB,\n"
-			"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
-			"InnoDB: A Microsoft web page said that"
-			" the processor must be an Intel\n"
-			"InnoDB: processor.\n",
-			(ulint)NumberOfPages,
-			(ulint)NumberOfPagesInitial);
-
-		return(FALSE);
-	}
-
-	fprintf(stderr,
-		"InnoDB: Using Address Windowing Extensions (AWE);"
-		" allocated %lu MB\n",
-		n_megabytes);
-
-	return(TRUE);
-#else
-	UT_NOT_USED(n_megabytes);
-	UT_NOT_USED(page_info);
-
-	return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
-			/* out, own: allocated memory, or NULL if did not
-			succeed */
-	ulint	size)	/* in: virtual memory allocation size in bytes, must
-			be < 2 GB */
-{
-#ifdef UNIV_SIMULATE_AWE
-	ulint	i;
-
-	os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
-	os_awe_simulate_window_size = size;
-
-	os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
-
-	for (i = 0; i < (size / 4096); i++) {
-		*(os_awe_simulate_map + i) = NULL;
-	}
-
-	return(os_awe_simulate_window);
-
-#elif defined(__WIN2000__)
-	byte*	ptr;
-
-	if (size > (ulint)0x7FFFFFFFUL) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot allocate %lu bytes"
-			" of virtual memory\n", size);
-
-		return(NULL);
-	}
-
-	ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
-			   PAGE_READWRITE);
-	if (ptr == NULL) {
-		fprintf(stderr,
-			"InnoDB: AWE: Cannot allocate %lu bytes"
-			" of virtual memory, error %lu\n",
-			size, (ulint)GetLastError());
-
-		return(NULL);
-	}
-
-	os_awe_window = ptr;
-	os_awe_window_size = size;
-
-	ut_total_allocated_memory += size;
-
-	return(ptr);
-#else
-	UT_NOT_USED(size);
-
-	return(NULL);
-#endif
-}
-
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
-					/* out: TRUE if success; the function
-					calls exit(1) in case of an error */
-	byte*		ptr,		/* in: a page-aligned pointer to
-					somewhere in the virtual address
-					space window; we map the physical mem
-					pages here */
-	ulint		n_mem_pages,	/* in: number of 4 kB mem pages to
-					map */
-	os_awe_t*	page_info)	/* in: array of page infos for those
-					pages; each page has one slot in the
-					array */
-{
-#ifdef UNIV_SIMULATE_AWE
-	ulint	i;
-	byte**	map;
-	byte*	page;
-	byte*	phys_page;
-
-	ut_a(ptr >= os_awe_simulate_window);
-	ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
-	ut_a(page_info >= os_awe_simulate_page_info);
-	ut_a(page_info < os_awe_simulate_page_info
-	     + (os_awe_simulate_mem_size / 4096));
-
-	/* First look if some other 'physical pages' are mapped at ptr,
-	and copy them back to where they were if yes */
-
-	map = os_awe_simulate_map
-		+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
-	page = ptr;
-
-	for (i = 0; i < n_mem_pages; i++) {
-		if (*map != NULL) {
-			ut_memcpy(*map, page, 4096);
-		}
-		map++;
-		page += 4096;
-	}
-
-	/* Then copy to ptr the 'physical pages' determined by page_info; we
-	assume page_info is a segment of the array we created at the start */
-
-	phys_page = os_awe_simulate_mem
-		+ (ulint)(page_info - os_awe_simulate_page_info)
-		* 4096;
-
-	ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
-
-	/* Update the map */
-
-	map = os_awe_simulate_map
-		+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
-
-	for (i = 0; i < n_mem_pages; i++) {
-		*map = phys_page;
-
-		map++;
-		phys_page += 4096;
-	}
-
-	return(TRUE);
-
-#elif defined(__WIN2000__)
-	BOOL		bResult;
-	os_awe_t	n_pages;
-
-	n_pages = (os_awe_t)n_mem_pages;
-
-	if (!(ptr >= os_awe_window)) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: trying to map to address %lx"
-			" but AWE window start %lx\n",
-			(ulint)ptr, (ulint)os_awe_window);
-		ut_a(0);
-	}
-
-	if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: trying to map to address %lx"
-			" but AWE window end %lx\n",
-			(ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
-		ut_a(0);
-	}
-
-	if (!(page_info >= os_awe_page_info)) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: trying to map page info"
-			" at %lx but array start %lx\n",
-			(ulint)page_info, (ulint)os_awe_page_info);
-		ut_a(0);
-	}
-
-	if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
-		fprintf(stderr,
-			"InnoDB: AWE: Error: trying to map page info"
-			" at %lx but array end %lx\n",
-			(ulint)page_info,
-			(ulint)(os_awe_page_info + os_awe_n_pages));
-		ut_a(0);
-	}
-
-	bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
-
-	if (bResult != TRUE) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: AWE: Mapping of %lu physical pages"
-			" to address %lx failed,\n"
-			"InnoDB: error %lu.\n"
-			"InnoDB: Cannot continue operation.\n",
-			n_mem_pages, (ulint)ptr, (ulint)GetLastError());
-		exit(1);
-	}
-
-	return(TRUE);
-#else
-	UT_NOT_USED(ptr);
-	UT_NOT_USED(n_mem_pages);
-	UT_NOT_USED(page_info);
-
-	return(FALSE);
-#endif
-}
-
-/********************************************************************
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
-ulint
-os_proc_get_number(void)
-/*====================*/
-{
-#ifdef __WIN__
-	return((ulint)GetCurrentProcessId());
-#else
-	return((ulint)getpid());
-#endif
-}
-
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
-			/* out: allocated memory */
-	ulint	n)	/* in: number of bytes */
-{
-#ifdef __WIN__
-	void*	ptr;
-
-	ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
-			   PAGE_READWRITE | PAGE_NOCACHE);
-	ut_a(ptr);
-
-	return(ptr);
-#else
-	return(ut_malloc(n));
-#endif
-}
-
-/********************************************************************
-Allocates large pages memory. */
-
-void*
-os_mem_alloc_large(
-/*===============*/
-					/* out: allocated memory */
-	ulint		n,		/* in: number of bytes */
-	ibool		set_to_zero,	/* in: TRUE if allocated memory
-					should be set to zero if
-					UNIV_SET_MEM_TO_ZERO is defined */
-	ibool		assert_on_error)/* in: if TRUE, we crash mysqld if
-					 the memory cannot be allocated */
-{
-#ifdef HAVE_LARGE_PAGES
-	ulint size;
-	int shmid;
-	void *ptr = NULL;
-	struct shmid_ds buf;
-
-	if (!os_use_large_pages || !os_large_page_size) {
-		goto skip;
-	}
-
-#ifdef UNIV_LINUX
-	/* Align block size to os_large_page_size */
-	size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
-
-	shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
-	if (shmid < 0) {
-		fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
-			" %lu bytes. errno %d\n", n, errno);
-	} else {
-		ptr = shmat(shmid, NULL, 0);
-		if (ptr == (void *)-1) {
-			fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
-				" attach shared memory segment, errno %d\n",
-				errno);
-		}
-
-		/* Remove the shared memory segment so that it will be
-		automatically freed after memory is detached or
-		process exits */
-		shmctl(shmid, IPC_RMID, &buf);
-	}
-#endif
-
-	if (ptr) {
-		if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
-			memset(ptr, '\0', size);
-#endif
-		}
-
-		return(ptr);
-	}
-
-	fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
-		" memory pool\n");
-skip:
-#endif /* HAVE_LARGE_PAGES */
-
-	return(ut_malloc_low(n, set_to_zero, assert_on_error));
-}
-
-/********************************************************************
-Frees large pages memory. */
-
-void
-os_mem_free_large(
-/*==============*/
-	void	*ptr)	/* in: number of bytes */
-{
-#ifdef HAVE_LARGE_PAGES
-	if (os_use_large_pages && os_large_page_size
-#ifdef UNIV_LINUX
-	    && !shmdt(ptr)
-#endif
-	    ) {
-		return;
-	}
-#endif
-
-	ut_free(ptr);
-}
-
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
-	ibool	do_boost)	/* in: TRUE if priority boost should be done,
-				FALSE if not */
-{
-#ifdef __WIN__
-	ibool	no_boost;
-
-	if (do_boost) {
-		no_boost = FALSE;
-	} else {
-		no_boost = TRUE;
-	}
-
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-
-	/* Does not do anything currently!
-	SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
-	*/
-	fputs("Warning: process priority boost setting"
-	      " currently not functional!\n",
-	      stderr);
-#else
-	UT_NOT_USED(do_boost);
-#endif
-}
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
deleted file mode 100644
index 70b7de194fd..00000000000
--- a/storage/innobase/page/page0cur.c
+++ /dev/null
@@ -1,1510 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0cur.h"
-#ifdef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#include "rem0cmp.h"
-#include "mtr0log.h"
-#include "log0recv.h"
-#include "rem0cmp.h"
-#include "srv0srv.h"
-#include "ut0ut.h"
-
-static ulint	page_rnd	= 976722341;
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-ulint	page_cur_short_succ	= 0;
-# endif /* UNIV_SEARCH_PERF_STAT */
-
-/***********************************************************************
-This is a linear congruential generator PRNG. Returns a pseudo random
-number between 0 and 2^64-1 inclusive. The formula and the constants
-being used are:
-X[n+1] = (a * X[n] + c) mod m
-where:
-X[0] = ut_usectime()
-a = 1103515245 (3^5 * 5 * 7 * 129749)
-c = 12345 (3 * 5 * 823)
-m = 18446744073709551616 (2^64)
-*/
-#define LCG_a	1103515245
-#define LCG_c	12345
-static
-unsigned long long
-page_cur_lcg_prng()
-/*===============*/
-			/* out: number between 0 and 2^64-1 */
-{
-	static unsigned long long lcg_current = 0;
-	static ibool		initialized = FALSE;
-	ulint			time_sec;
-	ulint			time_ms;
-
-	if (!initialized) {
-		ut_usectime(&time_sec, &time_ms);
-		lcg_current = (unsigned long long) (time_sec * 1000000
-						    + time_ms);
-		initialized = TRUE;
-	}
-
-	/* no need to "% 2^64" explicitly because lcg_current is
-	64 bit and this will be done anyway */
-	lcg_current = LCG_a * lcg_current + LCG_c;
-
-	return(lcg_current);
-}
-
-/********************************************************************
-Tries a search shortcut based on the last insert. */
-UNIV_INLINE
-ibool
-page_cur_try_search_shortcut(
-/*=========================*/
-				/* out: TRUE on success */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint*		iup_matched_fields,
-				/* in/out: already matched fields in upper
-				limit record */
-	ulint*		iup_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	ulint*		ilow_matched_fields,
-				/* in/out: already matched fields in lower
-				limit record */
-	ulint*		ilow_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	page_cur_t*	cursor) /* out: page cursor */
-{
-	rec_t*	rec;
-	rec_t*	next_rec;
-	ulint	low_match;
-	ulint	low_bytes;
-	ulint	up_match;
-	ulint	up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
-	page_cur_t cursor2;
-#endif
-	ibool		success		= FALSE;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(dtuple_check_typed(tuple));
-
-	rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
-	offsets = rec_get_offsets(rec, index, offsets,
-				  dtuple_get_n_fields(tuple), &heap);
-
-	ut_ad(rec);
-	ut_ad(page_rec_is_user_rec(rec));
-
-	ut_pair_min(&low_match, &low_bytes,
-		    *ilow_matched_fields, *ilow_matched_bytes,
-		    *iup_matched_fields, *iup_matched_bytes);
-
-	up_match = low_match;
-	up_bytes = low_bytes;
-
-	if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
-					   &low_match, &low_bytes) < 0) {
-		goto exit_func;
-	}
-
-	next_rec = page_rec_get_next(rec);
-	offsets = rec_get_offsets(next_rec, index, offsets,
-				  dtuple_get_n_fields(tuple), &heap);
-
-	if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
-					   &up_match, &up_bytes) >= 0) {
-		goto exit_func;
-	}
-
-	cursor->rec = rec;
-
-#ifdef UNIV_SEARCH_DEBUG
-	page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG,
-				   iup_matched_fields,
-				   iup_matched_bytes,
-				   ilow_matched_fields,
-				   ilow_matched_bytes,
-				   &cursor2);
-	ut_a(cursor2.rec == cursor->rec);
-
-	if (next_rec != page_get_supremum_rec(page)) {
-
-		ut_a(*iup_matched_fields == up_match);
-		ut_a(*iup_matched_bytes == up_bytes);
-	}
-
-	ut_a(*ilow_matched_fields == low_match);
-	ut_a(*ilow_matched_bytes == low_bytes);
-#endif
-	if (!page_rec_is_supremum(next_rec)) {
-
-		*iup_matched_fields = up_match;
-		*iup_matched_bytes = up_bytes;
-	}
-
-	*ilow_matched_fields = low_match;
-	*ilow_matched_bytes = low_bytes;
-
-#ifdef UNIV_SEARCH_PERF_STAT
-	page_cur_short_succ++;
-#endif
-	success = TRUE;
-exit_func:
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(success);
-}
-
-#endif
-
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-/********************************************************************
-Checks if the nth field in a record is a character type field which extends
-the nth field in tuple, i.e., the field is longer or equal in length and has
-common first characters. */
-static
-ibool
-page_cur_rec_field_extends(
-/*=======================*/
-				/* out: TRUE if rec field
-				extends tuple field */
-	dtuple_t*	tuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n)	/* in: compare nth field */
-{
-	dtype_t* type;
-	dfield_t* dfield;
-	byte*	  rec_f;
-	ulint	  rec_f_len;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	dfield = dtuple_get_nth_field(tuple, n);
-
-	type = dfield_get_type(dfield);
-
-	rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
-
-	if (type->mtype == DATA_VARCHAR
-	    || type->mtype == DATA_CHAR
-	    || type->mtype == DATA_FIXBINARY
-	    || type->mtype == DATA_BINARY
-	    || type->mtype == DATA_BLOB
-	    || type->mtype == DATA_VARMYSQL
-	    || type->mtype == DATA_MYSQL) {
-
-		if (dfield_get_len(dfield) != UNIV_SQL_NULL
-		    && rec_f_len != UNIV_SQL_NULL
-		    && rec_f_len >= dfield_get_len(dfield)
-		    && !cmp_data_data_slow(type,
-					   dfield_get_data(dfield),
-					   dfield_get_len(dfield),
-					   rec_f, dfield_get_len(dfield))) {
-
-			return(TRUE);
-		}
-	}
-
-	return(FALSE);
-}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
-/********************************************************************
-Searches the right position for a page cursor. */
-
-void
-page_cur_search_with_match(
-/*=======================*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	ulint*		iup_matched_fields,
-				/* in/out: already matched fields in upper
-				limit record */
-	ulint*		iup_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	ulint*		ilow_matched_fields,
-				/* in/out: already matched fields in lower
-				limit record */
-	ulint*		ilow_matched_bytes,
-				/* in/out: already matched bytes in a field
-				not yet completely matched */
-	page_cur_t*	cursor) /* out: page cursor */
-{
-	ulint	up;
-	ulint	low;
-	ulint	mid;
-	page_dir_slot_t* slot;
-	rec_t*	up_rec;
-	rec_t*	low_rec;
-	rec_t*	mid_rec;
-	ulint	up_matched_fields;
-	ulint	up_matched_bytes;
-	ulint	low_matched_fields;
-	ulint	low_matched_bytes;
-	ulint	cur_matched_fields;
-	ulint	cur_matched_bytes;
-	int	cmp;
-#ifdef UNIV_SEARCH_DEBUG
-	int	dbg_cmp;
-	ulint	dbg_matched_fields;
-	ulint	dbg_matched_bytes;
-#endif
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes
-	      && ilow_matched_fields && ilow_matched_bytes && cursor);
-	ut_ad(dtuple_validate(tuple));
-	ut_ad(dtuple_check_typed(tuple));
-#ifdef UNIV_DEBUG
-# ifdef PAGE_CUR_DBG
-	if (mode != PAGE_CUR_DBG)
-# endif /* PAGE_CUR_DBG */
-# ifdef PAGE_CUR_LE_OR_EXTENDS
-		if (mode != PAGE_CUR_LE_OR_EXTENDS)
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
-			ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
-			      || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
-#endif /* UNIV_DEBUG */
-
-	page_check_dir(page);
-
-#ifdef PAGE_CUR_ADAPT
-	if ((page_header_get_field(page, PAGE_LEVEL) == 0)
-	    && (mode == PAGE_CUR_LE)
-	    && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
-	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
-	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
-		if (page_cur_try_search_shortcut(
-			    page, index, tuple,
-			    iup_matched_fields, iup_matched_bytes,
-			    ilow_matched_fields, ilow_matched_bytes,
-			    cursor)) {
-			return;
-		}
-	}
-# ifdef PAGE_CUR_DBG
-	if (mode == PAGE_CUR_DBG) {
-		mode = PAGE_CUR_LE;
-	}
-# endif
-#endif
-
-	/* The following flag does not work for non-latin1 char sets because
-	cmp_full_field does not tell how many bytes matched */
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-	ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
-	/* If mode PAGE_CUR_G is specified, we are trying to position the
-	cursor to answer a query of the form "tuple < X", where tuple is
-	the input parameter, and X denotes an arbitrary physical record on
-	the page. We want to position the cursor on the first X which
-	satisfies the condition. */
-
-	up_matched_fields  = *iup_matched_fields;
-	up_matched_bytes   = *iup_matched_bytes;
-	low_matched_fields = *ilow_matched_fields;
-	low_matched_bytes  = *ilow_matched_bytes;
-
-	/* Perform binary search. First the search is done through the page
-	directory, after that as a linear search in the list of records
-	owned by the upper limit directory slot. */
-
-	low = 0;
-	up = page_dir_get_n_slots(page) - 1;
-
-	/* Perform binary search until the lower and upper limit directory
-	slots come to the distance 1 of each other */
-
-	while (up - low > 1) {
-		mid = (low + up) / 2;
-		slot = page_dir_get_nth_slot(page, mid);
-		mid_rec = page_dir_slot_get_rec(slot);
-
-		ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
-			    low_matched_fields, low_matched_bytes,
-			    up_matched_fields, up_matched_bytes);
-
-		offsets = rec_get_offsets(mid_rec, index, offsets,
-					  dtuple_get_n_fields_cmp(tuple),
-					  &heap);
-
-		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
-						&cur_matched_fields,
-						&cur_matched_bytes);
-		if (UNIV_LIKELY(cmp > 0)) {
-low_slot_match:
-			low = mid;
-			low_matched_fields = cur_matched_fields;
-			low_matched_bytes = cur_matched_bytes;
-
-		} else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-			if (mode == PAGE_CUR_LE_OR_EXTENDS
-			    && page_cur_rec_field_extends(
-				    tuple, mid_rec, offsets,
-				    cur_matched_fields)) {
-
-				goto low_slot_match;
-			}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_slot_match:
-			up = mid;
-			up_matched_fields = cur_matched_fields;
-			up_matched_bytes = cur_matched_bytes;
-
-		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-			   || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-			   ) {
-
-			goto low_slot_match;
-		} else {
-
-			goto up_slot_match;
-		}
-	}
-
-	slot = page_dir_get_nth_slot(page, low);
-	low_rec = page_dir_slot_get_rec(slot);
-	slot = page_dir_get_nth_slot(page, up);
-	up_rec = page_dir_slot_get_rec(slot);
-
-	/* Perform linear search until the upper and lower records come to
-	distance 1 of each other. */
-
-	while (page_rec_get_next(low_rec) != up_rec) {
-
-		mid_rec = page_rec_get_next(low_rec);
-
-		ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
-			    low_matched_fields, low_matched_bytes,
-			    up_matched_fields, up_matched_bytes);
-
-		offsets = rec_get_offsets(mid_rec, index, offsets,
-					  dtuple_get_n_fields_cmp(tuple),
-					  &heap);
-
-		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
-						&cur_matched_fields,
-						&cur_matched_bytes);
-		if (UNIV_LIKELY(cmp > 0)) {
-low_rec_match:
-			low_rec = mid_rec;
-			low_matched_fields = cur_matched_fields;
-			low_matched_bytes = cur_matched_bytes;
-
-		} else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-			if (mode == PAGE_CUR_LE_OR_EXTENDS
-			    && page_cur_rec_field_extends(
-				    tuple, mid_rec, offsets,
-				    cur_matched_fields)) {
-
-				goto low_rec_match;
-			}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_rec_match:
-			up_rec = mid_rec;
-			up_matched_fields = cur_matched_fields;
-			up_matched_bytes = cur_matched_bytes;
-		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-			   || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-			   ) {
-
-			goto low_rec_match;
-		} else {
-
-			goto up_rec_match;
-		}
-	}
-
-#ifdef UNIV_SEARCH_DEBUG
-
-	/* Check that the lower and upper limit records have the
-	right alphabetical order compared to tuple. */
-	dbg_matched_fields = 0;
-	dbg_matched_bytes = 0;
-
-	offsets = rec_get_offsets(low_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
-						 &dbg_matched_fields,
-						 &dbg_matched_bytes);
-	if (mode == PAGE_CUR_G) {
-		ut_a(dbg_cmp >= 0);
-	} else if (mode == PAGE_CUR_GE) {
-		ut_a(dbg_cmp == 1);
-	} else if (mode == PAGE_CUR_L) {
-		ut_a(dbg_cmp == 1);
-	} else if (mode == PAGE_CUR_LE) {
-		ut_a(dbg_cmp >= 0);
-	}
-
-	if (low_rec != page_get_infimum_rec(page)) {
-
-		ut_a(low_matched_fields == dbg_matched_fields);
-		ut_a(low_matched_bytes == dbg_matched_bytes);
-	}
-
-	dbg_matched_fields = 0;
-	dbg_matched_bytes = 0;
-
-	offsets = rec_get_offsets(up_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
-						 &dbg_matched_fields,
-						 &dbg_matched_bytes);
-	if (mode == PAGE_CUR_G) {
-		ut_a(dbg_cmp == -1);
-	} else if (mode == PAGE_CUR_GE) {
-		ut_a(dbg_cmp <= 0);
-	} else if (mode == PAGE_CUR_L) {
-		ut_a(dbg_cmp <= 0);
-	} else if (mode == PAGE_CUR_LE) {
-		ut_a(dbg_cmp == -1);
-	}
-
-	if (up_rec != page_get_supremum_rec(page)) {
-
-		ut_a(up_matched_fields == dbg_matched_fields);
-		ut_a(up_matched_bytes == dbg_matched_bytes);
-	}
-#endif
-	if (mode <= PAGE_CUR_GE) {
-		cursor->rec = up_rec;
-	} else {
-		cursor->rec = low_rec;
-	}
-
-	*iup_matched_fields  = up_matched_fields;
-	*iup_matched_bytes   = up_matched_bytes;
-	*ilow_matched_fields = low_matched_fields;
-	*ilow_matched_bytes  = low_matched_bytes;
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/***************************************************************
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
-	page_t*		page,	/* in: page */
-	page_cur_t*	cursor)	/* in/out: page cursor */
-{
-	ulint	rnd;
-	rec_t*	rec;
-
-	if (page_get_n_recs(page) == 0) {
-		page_cur_position(page_get_infimum_rec(page), cursor);
-
-		return;
-	}
-
-	if (srv_use_legacy_cardinality_algorithm) {
-		page_rnd += 87584577;
-
-		rnd = page_rnd % page_get_n_recs(page);
-	} else {
-		rnd = (ulint) page_cur_lcg_prng() % page_get_n_recs(page);
-	}
-
-	rec = page_get_infimum_rec(page);
-
-	rec = page_rec_get_next(rec);
-
-	while (rnd > 0) {
-		rec = page_rec_get_next(rec);
-
-		rnd--;
-	}
-
-	page_cur_position(rec, cursor);
-}
-
-/***************************************************************
-Writes the log record of a record insert on a page. */
-static
-void
-page_cur_insert_rec_write_log(
-/*==========================*/
-	rec_t*		insert_rec,	/* in: inserted physical record */
-	ulint		rec_size,	/* in: insert_rec size */
-	rec_t*		cursor_rec,	/* in: record the
-					cursor is pointing to */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mini-transaction handle */
-{
-	ulint	cur_rec_size;
-	ulint	extra_size;
-	ulint	cur_extra_size;
-	ulint	min_rec_size;
-	byte*	ins_ptr;
-	byte*	cur_ptr;
-	ulint	extra_info_yes;
-	byte*	log_ptr;
-	byte*	log_end;
-	ulint	i;
-	ulint	comp;
-
-	ut_a(rec_size < UNIV_PAGE_SIZE);
-	ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec));
-	ut_ad(!page_rec_is_comp(insert_rec)
-	      == !dict_table_is_comp(index->table));
-	comp = page_rec_is_comp(insert_rec);
-
-	{
-		mem_heap_t*	heap		= NULL;
-		ulint		cur_offs_[REC_OFFS_NORMAL_SIZE];
-		ulint		ins_offs_[REC_OFFS_NORMAL_SIZE];
-
-		ulint*		cur_offs;
-		ulint*		ins_offs;
-
-		*cur_offs_ = (sizeof cur_offs_) / sizeof *cur_offs_;
-		*ins_offs_ = (sizeof ins_offs_) / sizeof *ins_offs_;
-
-		cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
-					   ULINT_UNDEFINED, &heap);
-		ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
-					   ULINT_UNDEFINED, &heap);
-
-		extra_size = rec_offs_extra_size(ins_offs);
-		cur_extra_size = rec_offs_extra_size(cur_offs);
-		ut_ad(rec_size == rec_offs_size(ins_offs));
-		cur_rec_size = rec_offs_size(cur_offs);
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-	}
-
-	ins_ptr = insert_rec - extra_size;
-
-	i = 0;
-
-	if (cur_extra_size == extra_size) {
-		min_rec_size = ut_min(cur_rec_size, rec_size);
-
-		cur_ptr = cursor_rec - cur_extra_size;
-
-		/* Find out the first byte in insert_rec which differs from
-		cursor_rec; skip the bytes in the record info */
-
-		for (;;) {
-			if (i >= min_rec_size) {
-
-				break;
-			} else if (*ins_ptr == *cur_ptr) {
-				i++;
-				ins_ptr++;
-				cur_ptr++;
-			} else if ((i < extra_size)
-				   && (i >= extra_size
-				       - (comp
-					  ? REC_N_NEW_EXTRA_BYTES
-					  : REC_N_OLD_EXTRA_BYTES))) {
-				i = extra_size;
-				ins_ptr = insert_rec;
-				cur_ptr = cursor_rec;
-			} else {
-				break;
-			}
-		}
-	}
-
-	if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
-
-		log_ptr = mlog_open_and_write_index(mtr, insert_rec, index,
-						    comp
-						    ? MLOG_COMP_REC_INSERT
-						    : MLOG_REC_INSERT,
-						    2 + 5 + 1 + 5 + 5
-						    + MLOG_BUF_MARGIN);
-
-		if (!log_ptr) {
-			/* Logging in mtr is switched off during crash
-			recovery: in that case mlog_open returns NULL */
-			return;
-		}
-
-		log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
-		/* Write the cursor rec offset as a 2-byte ulint */
-		mach_write_to_2(log_ptr, cursor_rec
-				- buf_frame_align(cursor_rec));
-		log_ptr += 2;
-	} else {
-		log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
-		if (!log_ptr) {
-			/* Logging in mtr is switched off during crash
-			recovery: in that case mlog_open returns NULL */
-			return;
-		}
-		log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
-	}
-
-	if ((rec_get_info_and_status_bits(insert_rec, comp)
-	     != rec_get_info_and_status_bits(cursor_rec, comp))
-	    || (extra_size != cur_extra_size)
-	    || (rec_size != cur_rec_size)) {
-
-		extra_info_yes = 1;
-	} else {
-		extra_info_yes = 0;
-	}
-
-	/* Write the record end segment length and the extra info storage
-	flag */
-	log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)
-					 + extra_info_yes);
-	if (extra_info_yes) {
-		/* Write the info bits */
-		mach_write_to_1(log_ptr,
-				rec_get_info_and_status_bits(insert_rec,
-							     comp));
-		log_ptr++;
-
-		/* Write the record origin offset */
-		log_ptr += mach_write_compressed(log_ptr, extra_size);
-
-		/* Write the mismatch index */
-		log_ptr += mach_write_compressed(log_ptr, i);
-
-		ut_a(i < UNIV_PAGE_SIZE);
-		ut_a(extra_size < UNIV_PAGE_SIZE);
-	}
-
-	/* Write to the log the inserted index record end segment which
-	differs from the cursor record */
-
-	rec_size -= i;
-
-	if (log_ptr + rec_size <= log_end) {
-		memcpy(log_ptr, ins_ptr, rec_size);
-		mlog_close(mtr, log_ptr + rec_size);
-	} else {
-		mlog_close(mtr, log_ptr);
-		ut_a(rec_size < UNIV_PAGE_SIZE);
-		mlog_catenate_string(mtr, ins_ptr, rec_size);
-	}
-}
-
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
-				/* out: end of log record or NULL */
-	ibool		is_short,/* in: TRUE if short inserts */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr)	/* in: mtr or NULL */
-{
-	ulint	extra_info_yes;
-	ulint	offset = 0; /* remove warning */
-	ulint	origin_offset;
-	ulint	end_seg_len;
-	ulint	mismatch_index;
-	rec_t*	cursor_rec;
-	byte	buf1[1024];
-	byte*	buf;
-	byte*	ptr2 = ptr;
-	ulint	info_and_status_bits = 0; /* remove warning */
-	page_cur_t cursor;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	if (!is_short) {
-		/* Read the cursor rec offset as a 2-byte ulint */
-
-		if (end_ptr < ptr + 2) {
-
-			return(NULL);
-		}
-
-		offset = mach_read_from_2(ptr);
-
-		if (offset >= UNIV_PAGE_SIZE) {
-
-			recv_sys->found_corrupt_log = TRUE;
-
-			return(NULL);
-		}
-
-		ptr += 2;
-	}
-
-	ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
-
-	if (ptr == NULL) {
-
-		return(NULL);
-	}
-
-	extra_info_yes = end_seg_len & 0x1UL;
-	end_seg_len >>= 1;
-
-	if (end_seg_len >= UNIV_PAGE_SIZE) {
-		recv_sys->found_corrupt_log = TRUE;
-
-		return(NULL);
-	}
-
-	if (extra_info_yes) {
-		/* Read the info bits */
-
-		if (end_ptr < ptr + 1) {
-
-			return(NULL);
-		}
-
-		info_and_status_bits = mach_read_from_1(ptr);
-		ptr++;
-
-		ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
-
-		if (ptr == NULL) {
-
-			return(NULL);
-		}
-
-		ut_a(origin_offset < UNIV_PAGE_SIZE);
-
-		ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
-
-		if (ptr == NULL) {
-
-			return(NULL);
-		}
-
-		ut_a(mismatch_index < UNIV_PAGE_SIZE);
-	}
-
-	if (end_ptr < ptr + end_seg_len) {
-
-		return(NULL);
-	}
-
-	if (page == NULL) {
-
-		return(ptr + end_seg_len);
-	}
-
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	/* Read from the log the inserted index record end segment which
-	differs from the cursor record */
-
-	if (is_short) {
-		cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
-	} else {
-		cursor_rec = page + offset;
-	}
-
-	offsets = rec_get_offsets(cursor_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-
-	if (extra_info_yes == 0) {
-		info_and_status_bits = rec_get_info_and_status_bits(
-			cursor_rec, page_is_comp(page));
-		origin_offset = rec_offs_extra_size(offsets);
-		mismatch_index = rec_offs_size(offsets) - end_seg_len;
-	}
-
-	if (mismatch_index + end_seg_len < sizeof buf1) {
-		buf = buf1;
-	} else {
-		buf = mem_alloc(mismatch_index + end_seg_len);
-	}
-
-	/* Build the inserted record to buf */
-
-	if (mismatch_index >= UNIV_PAGE_SIZE) {
-		fprintf(stderr,
-			"Is short %lu, info_and_status_bits %lu, offset %lu, "
-			"o_offset %lu\n"
-			"mismatch index %lu, end_seg_len %lu\n"
-			"parsed len %lu\n",
-			(ulong) is_short, (ulong) info_and_status_bits,
-			(ulong) offset,
-			(ulong) origin_offset,
-			(ulong) mismatch_index, (ulong) end_seg_len,
-			(ulong) (ptr - ptr2));
-
-		fputs("Dump of 300 bytes of log:\n", stderr);
-		ut_print_buf(stderr, ptr2, 300);
-
-		buf_page_print(page);
-
-		ut_error;
-	}
-
-	ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
-	ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
-
-	rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page),
-				     info_and_status_bits);
-
-	page_cur_position(cursor_rec, &cursor);
-
-	offsets = rec_get_offsets(buf + origin_offset, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr);
-
-	if (buf != buf1) {
-
-		mem_free(buf);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	return(ptr + end_seg_len);
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
-				/* out: pointer to record if succeed, NULL
-				otherwise */
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dtuple_t*	tuple,	/* in: pointer to a data tuple or NULL */
-	dict_index_t*	index,	/* in: record descriptor */
-	rec_t*		rec,	/* in: pointer to a physical record or NULL */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) or NULL */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
-{
-	byte*		insert_buf	= NULL;
-	ulint		rec_size;
-	byte*		page;		/* the relevant page */
-	rec_t*		last_insert;	/* cursor position at previous
-					insert */
-	rec_t*		insert_rec;	/* inserted record */
-	ulint		heap_no;	/* heap number of the inserted
-					record */
-	rec_t*		current_rec;	/* current record after which the
-					new record is inserted */
-	rec_t*		next_rec;	/* next record after current before
-					the insertion */
-	ulint		owner_slot;	/* the slot which owns the
-					inserted record */
-	rec_t*		owner_rec;
-	ulint		n_owned;
-	mem_heap_t*	heap		= NULL;
-	ulint		comp;
-
-	ut_ad(cursor && mtr);
-	ut_ad(tuple || rec);
-	ut_ad(!(tuple && rec));
-	ut_ad(rec || dtuple_check_typed(tuple));
-
-	page = page_cur_get_page(cursor);
-	comp = page_is_comp(page);
-	ut_ad(dict_table_is_comp(index->table) == !!comp);
-
-	ut_ad(cursor->rec != page_get_supremum_rec(page));
-
-	/* 1. Get the size of the physical record in the page */
-	if (tuple != NULL) {
-		rec_size = rec_get_converted_size(index, tuple);
-	} else {
-		if (!offsets) {
-			offsets = rec_get_offsets(rec, index, offsets,
-						  ULINT_UNDEFINED, &heap);
-		}
-		ut_ad(rec_offs_validate(rec, index, offsets));
-		rec_size = rec_offs_size(offsets);
-	}
-
-	/* 2. Try to find suitable space from page memory management */
-	insert_buf = page_mem_alloc(page, rec_size, index, &heap_no);
-
-	if (insert_buf == NULL) {
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		return(NULL);
-	}
-
-	/* 3. Create the record */
-	if (tuple != NULL) {
-		insert_rec = rec_convert_dtuple_to_rec(insert_buf,
-						       index, tuple);
-		offsets = rec_get_offsets(insert_rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-	} else {
-		insert_rec = rec_copy(insert_buf, rec, offsets);
-		ut_ad(rec_offs_validate(rec, index, offsets));
-		rec_offs_make_valid(insert_rec, index, offsets);
-	}
-
-	ut_ad(insert_rec);
-	ut_ad(rec_size == rec_offs_size(offsets));
-
-	/* 4. Insert the record in the linked list of records */
-	current_rec = cursor->rec;
-
-	ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM);
-	ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
-
-	next_rec = page_rec_get_next(current_rec);
-	ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM);
-	page_rec_set_next(insert_rec, next_rec);
-	page_rec_set_next(current_rec, insert_rec);
-
-	page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
-
-	/* 5. Set the n_owned field in the inserted record to zero,
-	and set the heap_no field */
-
-	rec_set_n_owned(insert_rec, comp, 0);
-	rec_set_heap_no(insert_rec, comp, heap_no);
-
-	/* 6. Update the last insertion info in page header */
-
-	last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
-	ut_ad(!last_insert || !comp
-	      || rec_get_node_ptr_flag(last_insert)
-	      == rec_get_node_ptr_flag(insert_rec));
-
-	if (last_insert == NULL) {
-		page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-		page_header_set_field(page, PAGE_N_DIRECTION, 0);
-
-	} else if ((last_insert == current_rec)
-		   && (page_header_get_field(page, PAGE_DIRECTION)
-		       != PAGE_LEFT)) {
-
-		page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT);
-		page_header_set_field(page, PAGE_N_DIRECTION,
-				      page_header_get_field(
-					      page, PAGE_N_DIRECTION) + 1);
-
-	} else if ((page_rec_get_next(insert_rec) == last_insert)
-		   && (page_header_get_field(page, PAGE_DIRECTION)
-		       != PAGE_RIGHT)) {
-
-		page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT);
-		page_header_set_field(page, PAGE_N_DIRECTION,
-				      page_header_get_field(
-					      page, PAGE_N_DIRECTION) + 1);
-	} else {
-		page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-		page_header_set_field(page, PAGE_N_DIRECTION, 0);
-	}
-
-	page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec);
-
-	/* 7. It remains to update the owner record. */
-
-	owner_rec = page_rec_find_owner_rec(insert_rec);
-	n_owned = rec_get_n_owned(owner_rec, comp);
-	rec_set_n_owned(owner_rec, comp, n_owned + 1);
-
-	/* 8. Now we have incremented the n_owned field of the owner
-	record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
-	we have to split the corresponding directory slot in two. */
-
-	if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
-		owner_slot = page_dir_find_owner_slot(owner_rec);
-		page_dir_split_slot(page, owner_slot);
-	}
-
-	/* 9. Write log record of the insert */
-	page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec,
-				      index, mtr);
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(insert_rec);
-}
-
-/**************************************************************
-Writes a log record of copying a record list end to a new created page. */
-UNIV_INLINE
-byte*
-page_copy_rec_list_to_created_page_write_log(
-/*=========================================*/
-				/* out: 4-byte field where to
-				write the log data length */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	log_ptr;
-
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	log_ptr = mlog_open_and_write_index(mtr, page, index,
-					    page_is_comp(page)
-					    ? MLOG_COMP_LIST_END_COPY_CREATED
-					    : MLOG_LIST_END_COPY_CREATED, 4);
-	ut_a(log_ptr);
-	mlog_close(mtr, log_ptr + 4);
-
-	return(log_ptr);
-}
-
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
-				/* out: end of log record or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr)	/* in: mtr or NULL */
-{
-	byte*	rec_end;
-	ulint	log_data_len;
-
-	if (ptr + 4 > end_ptr) {
-
-		return(NULL);
-	}
-
-	log_data_len = mach_read_from_4(ptr);
-	ptr += 4;
-
-	rec_end = ptr + log_data_len;
-
-	if (rec_end > end_ptr) {
-
-		return(NULL);
-	}
-
-	if (!page) {
-
-		return(rec_end);
-	}
-
-	while (ptr < rec_end) {
-		ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
-						index, page, mtr);
-	}
-
-	ut_a(ptr == rec_end);
-
-	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-	page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-	page_header_set_field(page, PAGE_N_DIRECTION, 0);
-
-	return(rec_end);
-}
-
-/*****************************************************************
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: first record to copy */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_dir_slot_t* slot = 0; /* remove warning */
-	byte*	heap_top;
-	rec_t*	insert_rec = 0; /* remove warning */
-	rec_t*	prev_rec;
-	ulint	count;
-	ulint	n_recs;
-	ulint	slot_index;
-	ulint	rec_size;
-	ulint	log_mode;
-	byte*	log_ptr;
-	ulint	log_data_len;
-	ulint		comp		= page_is_comp(page);
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(page_dir_get_n_heap(new_page) == 2);
-	ut_ad(page != new_page);
-	ut_ad(comp == page_is_comp(new_page));
-
-	if (rec == page_get_infimum_rec(page)) {
-
-		rec = page_rec_get_next(rec);
-	}
-
-	if (rec == page_get_supremum_rec(page)) {
-
-		return;
-	}
-
-#ifdef UNIV_DEBUG
-	/* To pass the debug tests we have to set these dummy values
-	in the debug version */
-	page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2);
-	page_header_set_ptr(new_page, PAGE_HEAP_TOP,
-			    new_page + UNIV_PAGE_SIZE - 1);
-#endif
-
-	log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
-							       index, mtr);
-
-	log_data_len = dyn_array_get_data_size(&(mtr->log));
-
-	/* Individual inserts are logged in a shorter form */
-
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
-
-	prev_rec = page_get_infimum_rec(new_page);
-	if (comp) {
-		heap_top = new_page + PAGE_NEW_SUPREMUM_END;
-	} else {
-		heap_top = new_page + PAGE_OLD_SUPREMUM_END;
-	}
-	count = 0;
-	slot_index = 0;
-	n_recs = 0;
-
-	/* should be do ... until, comment by Jani */
-	while (rec != page_get_supremum_rec(page)) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		insert_rec = rec_copy(heap_top, rec, offsets);
-
-		rec_set_next_offs(prev_rec, comp, insert_rec - new_page);
-
-		rec_set_n_owned(insert_rec, comp, 0);
-		rec_set_heap_no(insert_rec, comp, 2 + n_recs);
-
-		rec_size = rec_offs_size(offsets);
-
-		heap_top = heap_top + rec_size;
-
-		ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
-
-		count++;
-		n_recs++;
-
-		if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
-
-			slot_index++;
-
-			slot = page_dir_get_nth_slot(new_page, slot_index);
-
-			page_dir_slot_set_rec(slot, insert_rec);
-			page_dir_slot_set_n_owned(slot, count);
-
-			count = 0;
-		}
-
-		page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
-					      index, mtr);
-		prev_rec = insert_rec;
-		rec = page_rec_get_next(rec);
-	}
-
-	if ((slot_index > 0) && (count + 1
-				 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
-				 <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
-		/* We can merge the two last dir slots. This operation is
-		here to make this function imitate exactly the equivalent
-		task made using page_cur_insert_rec, which we use in database
-		recovery to reproduce the task performed by this function.
-		To be able to check the correctness of recovery, it is good
-		that it imitates exactly. */
-
-		count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
-
-		page_dir_slot_set_n_owned(slot, 0);
-
-		slot_index--;
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
-
-	ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
-
-	mach_write_to_4(log_ptr, log_data_len);
-
-	rec_set_next_offs(insert_rec, comp,
-			  comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM);
-
-	slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
-
-	page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
-	page_dir_slot_set_n_owned(slot, count + 1);
-
-	page_dir_set_n_slots(new_page, 2 + slot_index);
-	page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top);
-	page_dir_set_n_heap(new_page, 2 + n_recs);
-	page_header_set_field(new_page, PAGE_N_RECS, n_recs);
-
-	page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL);
-	page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-	page_header_set_field(new_page, PAGE_N_DIRECTION, 0);
-
-	/* Restore the log mode */
-
-	mtr_set_log_mode(mtr, log_mode);
-}
-
-/***************************************************************
-Writes log record of a record delete on a page. */
-UNIV_INLINE
-void
-page_cur_delete_rec_write_log(
-/*==========================*/
-	rec_t*		rec,	/* in: record to be deleted */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
-{
-	byte*	log_ptr;
-
-	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
-	log_ptr = mlog_open_and_write_index(mtr, rec, index,
-					    page_rec_is_comp(rec)
-					    ? MLOG_COMP_REC_DELETE
-					    : MLOG_REC_DELETE, 2);
-
-	if (!log_ptr) {
-		/* Logging in mtr is switched off during crash recovery:
-		in that case mlog_open returns NULL */
-		return;
-	}
-
-	/* Write the cursor rec offset as a 2-byte ulint */
-	mach_write_to_2(log_ptr, page_offset(rec));
-
-	mlog_close(mtr, log_ptr + 2);
-}
-
-/***************************************************************
-Parses log record of a record delete on a page. */
-
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
-				/* out: pointer to record end or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr)	/* in: mtr or NULL */
-{
-	ulint		offset;
-	page_cur_t	cursor;
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	/* Read the cursor rec offset as a 2-byte ulint */
-	offset = mach_read_from_2(ptr);
-	ptr += 2;
-
-	ut_a(offset <= UNIV_PAGE_SIZE);
-
-	if (page) {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		rec_t*		rec		= page + offset;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-		page_cur_position(rec, &cursor);
-
-		page_cur_delete_rec(&cursor, index,
-				    rec_get_offsets(rec, index, offsets_,
-						    ULINT_UNDEFINED, &heap),
-				    mtr);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-	}
-
-	return(ptr);
-}
-
-/***************************************************************
-Deletes a record at the page cursor. The cursor is moved to the next
-record after the deleted one. */
-
-void
-page_cur_delete_rec(
-/*================*/
-	page_cur_t*	cursor,	/* in: a page cursor */
-	dict_index_t*	index,	/* in: record descriptor */
-	const ulint*	offsets,/* in: rec_get_offsets(cursor->rec, index) */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
-{
-	page_dir_slot_t* cur_dir_slot;
-	page_dir_slot_t* prev_slot;
-	page_t*		page;
-	rec_t*		current_rec;
-	rec_t*		prev_rec	= NULL;
-	rec_t*		next_rec;
-	ulint		cur_slot_no;
-	ulint		cur_n_owned;
-	rec_t*		rec;
-
-	ut_ad(cursor && mtr);
-
-	page = page_cur_get_page(cursor);
-	current_rec = cursor->rec;
-	ut_ad(rec_offs_validate(current_rec, index, offsets));
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	/* The record must not be the supremum or infimum record. */
-	ut_ad(current_rec != page_get_supremum_rec(page));
-	ut_ad(current_rec != page_get_infimum_rec(page));
-
-	/* Save to local variables some data associated with current_rec */
-	cur_slot_no = page_dir_find_owner_slot(current_rec);
-	cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
-	cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
-
-	/* 0. Write the log record */
-	page_cur_delete_rec_write_log(current_rec, index, mtr);
-
-	/* 1. Reset the last insert info in the page header and increment
-	the modify clock for the frame */
-
-	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-
-	/* The page gets invalid for optimistic searches: increment the
-	frame modify clock */
-
-	buf_frame_modify_clock_inc(page);
-
-	/* 2. Find the next and the previous record. Note that the cursor is
-	left at the next record. */
-
-	ut_ad(cur_slot_no > 0);
-	prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
-
-	rec = page_dir_slot_get_rec(prev_slot);
-
-	/* rec now points to the record of the previous directory slot. Look
-	for the immediate predecessor of current_rec in a loop. */
-
-	while(current_rec != rec) {
-		prev_rec = rec;
-		rec = page_rec_get_next(rec);
-	}
-
-	page_cur_move_to_next(cursor);
-	next_rec = cursor->rec;
-
-	/* 3. Remove the record from the linked list of records */
-
-	page_rec_set_next(prev_rec, next_rec);
-	page_header_set_field(page, PAGE_N_RECS,
-			      (ulint)(page_get_n_recs(page) - 1));
-
-	/* 4. If the deleted record is pointed to by a dir slot, update the
-	record pointer in slot. In the following if-clause we assume that
-	prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
-	>= 2. */
-
-#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
-# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
-#endif
-	ut_ad(cur_n_owned > 1);
-
-	if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
-		page_dir_slot_set_rec(cur_dir_slot, prev_rec);
-	}
-
-	/* 5. Update the number of owned records of the slot */
-
-	page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1);
-
-	/* 6. Free the memory occupied by the record */
-	page_mem_free(page, current_rec, offsets);
-
-	/* 7. Now we have decremented the number of owned records of the slot.
-	If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
-	slots. */
-
-	if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
-		page_dir_balance_slot(page, cur_slot_no);
-	}
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-/***********************************************************************
-Print the first n numbers, generated by page_cur_lcg_prng() to make sure
-(visually) that it works properly. */
-void
-test_page_cur_lcg_prng(
-/*===================*/
-	int	n)	/* in: print first n numbers */
-{
-	int			i;
-	unsigned long long	rnd;
-
-	for (i = 0; i < n; i++) {
-		rnd = page_cur_lcg_prng();
-		printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
-		       rnd,
-		       rnd % 2,
-		       rnd % 3,
-		       rnd % 5,
-		       rnd % 7,
-		       rnd % 11);
-	}
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
deleted file mode 100644
index 543cf9e34eb..00000000000
--- a/storage/innobase/page/page0page.c
+++ /dev/null
@@ -1,2038 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#define THIS_MODULE
-#include "page0page.h"
-#ifdef UNIV_NONINL
-#include "page0page.ic"
-#endif
-#undef THIS_MODULE
-
-#include "page0cur.h"
-#include "lock0lock.h"
-#include "fut0lst.h"
-#include "btr0sea.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "btr0btr.h"
-
-/*			THE INDEX PAGE
-			==============
-
-The index page consists of a page header which contains the page's
-id and other information. On top of it are the the index records
-in a heap linked into a one way linear list according to alphabetic order.
-
-Just below page end is an array of pointers which we call page directory,
-to about every sixth record in the list. The pointers are placed in
-the directory in the alphabetical order of the records pointed to,
-enabling us to make binary search using the array. Each slot n:o I
-in the directory points to a record, where a 4-bit field contains a count
-of those records which are in the linear list between pointer I and
-the pointer I - 1 in the directory, including the record
-pointed to by pointer I and not including the record pointed to by I - 1.
-We say that the record pointed to by slot I, or that slot I, owns
-these records. The count is always kept in the range 4 to 8, with
-the exception that it is 1 for the first slot, and 1--8 for the second slot.
-
-An essentially binary search can be performed in the list of index
-records, like we could do if we had pointer to every record in the
-page directory. The data structure is, however, more efficient when
-we are doing inserts, because most inserts are just pushed on a heap.
-Only every 8th insert requires block move in the directory pointer
-table, which itself is quite small. A record is deleted from the page
-by just taking it off the linear list and updating the number of owned
-records-field of the record which owns it, and updating the page directory,
-if necessary. A special case is the one when the record owns itself.
-Because the overhead of inserts is so small, we may also increase the
-page size from the projected default of 8 kB to 64 kB without too
-much loss of efficiency in inserts. Bigger page becomes actual
-when the disk transfer rate compared to seek and latency time rises.
-On the present system, the page size is set so that the page transfer
-time (3 ms) is 20 % of the disk random access time (15 ms).
-
-When the page is split, merged, or becomes full but contains deleted
-records, we have to reorganize the page.
-
-Assuming a page size of 8 kB, a typical index page of a secondary
-index contains 300 index entries, and the size of the page directory
-is 50 x 4 bytes = 200 bytes. */
-
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
-			/* out: the directory slot number */
-	rec_t*	rec)	/* in: the physical record */
-{
-	page_t*				page;
-	register uint16			rec_offs_bytes;
-	register page_dir_slot_t*	slot;
-	register const page_dir_slot_t*	first_slot;
-	register rec_t*			r = rec;
-
-	ut_ad(page_rec_check(rec));
-
-	page = buf_frame_align(rec);
-	first_slot = page_dir_get_nth_slot(page, 0);
-	slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
-
-	if (page_is_comp(page)) {
-		while (rec_get_n_owned(r, TRUE) == 0) {
-			r = page + rec_get_next_offs(r, TRUE);
-			ut_ad(r >= page + PAGE_NEW_SUPREMUM);
-			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
-		}
-	} else {
-		while (rec_get_n_owned(r, FALSE) == 0) {
-			r = page + rec_get_next_offs(r, FALSE);
-			ut_ad(r >= page + PAGE_OLD_SUPREMUM);
-			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
-		}
-	}
-
-	rec_offs_bytes = mach_encode_2(r - page);
-
-	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
-
-		if (UNIV_UNLIKELY(slot == first_slot)) {
-			fprintf(stderr,
-				"InnoDB: Probable data corruption on"
-				" page %lu\n"
-				"InnoDB: Original record ",
-				(ulong) buf_frame_get_page_no(page));
-
-			if (page_is_comp(page)) {
-				fputs("(compact record)", stderr);
-			} else {
-				rec_print_old(stderr, rec);
-			}
-
-			fputs("\n"
-			      "InnoDB: on that page.\n"
-			      "InnoDB: Cannot find the dir slot for record ",
-			      stderr);
-			if (page_is_comp(page)) {
-				fputs("(compact record)", stderr);
-			} else {
-				rec_print_old(stderr, page
-					      + mach_decode_2(rec_offs_bytes));
-			}
-			fputs("\n"
-			      "InnoDB: on that page!\n", stderr);
-
-			buf_page_print(page);
-
-			ut_error;
-		}
-
-		slot += PAGE_DIR_SLOT_SIZE;
-	}
-
-	return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
-}
-
-/******************************************************************
-Used to check the consistency of a directory slot. */
-static
-ibool
-page_dir_slot_check(
-/*================*/
-					/* out: TRUE if succeed */
-	page_dir_slot_t*	slot)	/* in: slot */
-{
-	page_t*	page;
-	ulint	n_slots;
-	ulint	n_owned;
-
-	ut_a(slot);
-
-	page = buf_frame_align(slot);
-
-	n_slots = page_dir_get_n_slots(page);
-
-	ut_a(slot <= page_dir_get_nth_slot(page, 0));
-	ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
-
-	ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
-
-	n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot),
-				  page_is_comp(page));
-
-	if (slot == page_dir_get_nth_slot(page, 0)) {
-		ut_a(n_owned == 1);
-	} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
-		ut_a(n_owned >= 1);
-		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
-	} else {
-		ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
-		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
-	}
-
-	return(TRUE);
-}
-
-/*****************************************************************
-Sets the max trx id field value. */
-
-void
-page_set_max_trx_id(
-/*================*/
-	page_t*	page,	/* in: page */
-	dulint	trx_id)	/* in: transaction id */
-{
-	buf_block_t*	block;
-
-	ut_ad(page);
-
-	block = buf_block_align(page);
-
-	if (block->is_hashed) {
-		rw_lock_x_lock(&btr_search_latch);
-	}
-
-	/* It is not necessary to write this change to the redo log, as
-	during a database recovery we assume that the max trx id of every
-	page is the maximum trx id assigned before the crash. */
-
-	mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);
-
-	if (block->is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
-	}
-}
-
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
-ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
-			/* out: free space */
-	ulint	comp)	/* in: nonzero=compact page format */
-{
-	return(page_get_free_space_of_empty(comp));
-}
-
-/****************************************************************
-Allocates a block of memory from an index page. */
-
-byte*
-page_mem_alloc(
-/*===========*/
-				/* out: pointer to start of allocated
-				buffer, or NULL if allocation fails */
-	page_t*		page,	/* in: index page */
-	ulint		need,	/* in: number of bytes needed */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		heap_no)/* out: this contains the heap number
-				of the allocated record
-				if allocation succeeds */
-{
-	rec_t*	rec;
-	byte*	block;
-	ulint	avl_space;
-	ulint	garbage;
-
-	ut_ad(page && heap_no);
-
-	/* If there are records in the free list, look if the first is
-	big enough */
-
-	rec = page_header_get_ptr(page, PAGE_FREE);
-
-	if (rec) {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		ulint*		offsets		= offsets_;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-
-		if (rec_offs_size(offsets) >= need) {
-			page_header_set_ptr(page, PAGE_FREE,
-					    page_rec_get_next(rec));
-
-			garbage = page_header_get_field(page, PAGE_GARBAGE);
-			ut_ad(garbage >= need);
-
-			page_header_set_field(page, PAGE_GARBAGE,
-					      garbage - need);
-
-			*heap_no = rec_get_heap_no(rec, page_is_comp(page));
-
-			block = rec_get_start(rec, offsets);
-			if (UNIV_LIKELY_NULL(heap)) {
-				mem_heap_free(heap);
-			}
-			return(block);
-		}
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-	}
-
-	/* Could not find space from the free list, try top of heap */
-
-	avl_space = page_get_max_insert_size(page, 1);
-
-	if (avl_space >= need) {
-		block = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
-		page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
-		*heap_no = page_dir_get_n_heap(page);
-
-		page_dir_set_n_heap(page, 1 + *heap_no);
-
-		return(block);
-	}
-
-	return(NULL);
-}
-
-/**************************************************************
-Writes a log record of page creation. */
-UNIV_INLINE
-void
-page_create_write_log(
-/*==================*/
-	buf_frame_t*	frame,	/* in: a buffer frame where the page is
-				created */
-	mtr_t*		mtr,	/* in: mini-transaction handle */
-	ulint		comp)	/* in: nonzero=compact page format */
-{
-	mlog_write_initial_log_record(frame, comp
-				      ? MLOG_COMP_PAGE_CREATE
-				      : MLOG_PAGE_CREATE, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of creating a page. */
-
-byte*
-page_parse_create(
-/*==============*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
-	ulint	comp,	/* in: nonzero=compact page format */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
-{
-	ut_ad(ptr && end_ptr);
-
-	/* The record is empty, except for the record initial part */
-
-	if (page) {
-		page_create(page, mtr, comp);
-	}
-
-	return(ptr);
-}
-
-/**************************************************************
-The index page creation function. */
-
-page_t*
-page_create(
-/*========*/
-				/* out: pointer to the page */
-	buf_frame_t*	frame,	/* in: a buffer frame where the page is
-				created */
-	mtr_t*		mtr,	/* in: mini-transaction handle */
-	ulint		comp)	/* in: nonzero=compact page format */
-{
-	page_dir_slot_t* slot;
-	mem_heap_t*	heap;
-	dtuple_t*	tuple;
-	dfield_t*	field;
-	byte*		heap_top;
-	rec_t*		infimum_rec;
-	rec_t*		supremum_rec;
-	page_t*		page;
-	dict_index_t*	index;
-	ulint*		offsets;
-
-	index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1;
-
-	ut_ad(frame && mtr);
-#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
-#endif
-#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
-#endif
-
-	/* 1. INCREMENT MODIFY CLOCK */
-	buf_frame_modify_clock_inc(frame);
-
-	/* 2. WRITE LOG INFORMATION */
-	page_create_write_log(frame, mtr, comp);
-
-	page = frame;
-
-	fil_page_set_type(page, FIL_PAGE_INDEX);
-
-	heap = mem_heap_create(200);
-
-	/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
-	/* Create first a data tuple for infimum record */
-	tuple = dtuple_create(heap, 1);
-	dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
-	field = dtuple_get_nth_field(tuple, 0);
-
-	dfield_set_data(field, "infimum", 8);
-	dtype_set(dfield_get_type(field),
-		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
-	/* Set the corresponding physical record to its place in the page
-	record heap */
-
-	heap_top = page + PAGE_DATA;
-
-	infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
-
-	ut_a(infimum_rec == page
-	     + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
-	rec_set_n_owned(infimum_rec, comp, 1);
-	rec_set_heap_no(infimum_rec, comp, 0);
-	offsets = rec_get_offsets(infimum_rec, index, NULL,
-				  ULINT_UNDEFINED, &heap);
-
-	heap_top = rec_get_end(infimum_rec, offsets);
-
-	/* Create then a tuple for supremum */
-
-	tuple = dtuple_create(heap, 1);
-	dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
-	field = dtuple_get_nth_field(tuple, 0);
-
-	dfield_set_data(field, "supremum", comp ? 8 : 9);
-	dtype_set(dfield_get_type(field),
-		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
-	supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
-
-	ut_a(supremum_rec == page
-	     + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM));
-
-	rec_set_n_owned(supremum_rec, comp, 1);
-	rec_set_heap_no(supremum_rec, comp, 1);
-
-	offsets = rec_get_offsets(supremum_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	heap_top = rec_get_end(supremum_rec, offsets);
-
-	ut_ad(heap_top == page
-	      + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
-	mem_heap_free(heap);
-
-	/* 4. INITIALIZE THE PAGE */
-
-	page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
-	page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
-	page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2);
-	page_header_set_ptr(page, PAGE_FREE, NULL);
-	page_header_set_field(page, PAGE_GARBAGE, 0);
-	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-	page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-	page_header_set_field(page, PAGE_N_DIRECTION, 0);
-	page_header_set_field(page, PAGE_N_RECS, 0);
-	page_set_max_trx_id(page, ut_dulint_zero);
-	memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
-	       - (heap_top - page));
-
-	/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
-	/* Set the slots to point to infimum and supremum. */
-
-	slot = page_dir_get_nth_slot(page, 0);
-	page_dir_slot_set_rec(slot, infimum_rec);
-
-	slot = page_dir_get_nth_slot(page, 1);
-	page_dir_slot_set_rec(slot, supremum_rec);
-
-	/* Set the next pointers in infimum and supremum */
-
-	rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page));
-	rec_set_next_offs(supremum_rec, comp, 0);
-
-	return(page);
-}
-
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
-
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_cur_t	cur1;
-	page_cur_t	cur2;
-	rec_t*		sup;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	page_cur_position(rec, &cur1);
-
-	if (page_cur_is_before_first(&cur1)) {
-
-		page_cur_move_to_next(&cur1);
-	}
-
-	ut_a((ibool)!!page_is_comp(new_page)
-	     == dict_table_is_comp(index->table));
-	ut_a(page_is_comp(new_page) == page_is_comp(page));
-	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
-	     (page_is_comp(new_page)
-	      ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
-	page_cur_set_before_first(new_page, &cur2);
-
-	/* Copy records from the original page to the new page */
-
-	sup = page_get_supremum_rec(page);
-
-	for (;;) {
-		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
-		if (cur1_rec == sup) {
-			break;
-		}
-		offsets = rec_get_offsets(cur1_rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index,
-						       offsets, mtr))) {
-			/* Track an assertion failure reported on the mailing
-			list on June 18th, 2003 */
-
-			buf_page_print(new_page);
-			buf_page_print(page);
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"InnoDB: rec offset %lu, cur1 offset %lu,"
-				" cur2 offset %lu\n",
-				(ulong)(rec - page),
-				(ulong)(page_cur_get_rec(&cur1) - page),
-				(ulong)(page_cur_get_rec(&cur2) - new_page));
-
-			ut_error;
-		}
-
-		page_cur_move_to_next(&cur1);
-		page_cur_move_to_next(&cur2);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/*****************************************************************
-Copies records from page to new_page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
-page_copy_rec_list_end(
-/*===================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	if (page_dir_get_n_heap(new_page) == 2) {
-		page_copy_rec_list_end_to_created_page(new_page, page, rec,
-						       index, mtr);
-	} else {
-		page_copy_rec_list_end_no_locks(new_page, page, rec,
-						index, mtr);
-	}
-
-	/* Update the lock table, MAX_TRX_ID, and possible hash index */
-
-	lock_move_rec_list_end(new_page, page, rec);
-
-	page_update_max_trx_id(new_page, page_get_max_trx_id(page));
-
-	btr_search_move_or_delete_hash_entries(new_page, page, index);
-}
-
-/*****************************************************************
-Copies records from page to new_page, up to the given record,
-NOT including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
-page_copy_rec_list_start(
-/*=====================*/
-	page_t*		new_page,	/* in: index page to copy to */
-	page_t*		page,		/* in: index page */
-	rec_t*		rec,		/* in: record on page */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_cur_t	cur1;
-	page_cur_t	cur2;
-	rec_t*		old_end;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	page_cur_set_before_first(page, &cur1);
-
-	if (rec == page_cur_get_rec(&cur1)) {
-
-		return;
-	}
-
-	page_cur_move_to_next(&cur1);
-
-	page_cur_set_after_last(new_page, &cur2);
-	page_cur_move_to_prev(&cur2);
-	old_end = page_cur_get_rec(&cur2);
-
-	/* Copy records from the original page to the new page */
-
-	while (page_cur_get_rec(&cur1) != rec) {
-		rec_t*	ins_rec;
-		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
-		offsets = rec_get_offsets(cur1_rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index,
-					      offsets, mtr);
-		ut_a(ins_rec);
-
-		page_cur_move_to_next(&cur1);
-		page_cur_move_to_next(&cur2);
-	}
-
-	/* Update the lock table, MAX_TRX_ID, and possible hash index */
-
-	lock_move_rec_list_start(new_page, page, rec, old_end);
-
-	page_update_max_trx_id(new_page, page_get_max_trx_id(page));
-
-	btr_search_move_or_delete_hash_entries(new_page, page, index);
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/**************************************************************
-Writes a log record of a record list end or start deletion. */
-UNIV_INLINE
-void
-page_delete_rec_list_write_log(
-/*===========================*/
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: record descriptor */
-	byte		type,	/* in: operation type:
-				MLOG_LIST_END_DELETE, ... */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	byte*	log_ptr;
-	ut_ad(type == MLOG_LIST_END_DELETE
-	      || type == MLOG_LIST_START_DELETE
-	      || type == MLOG_COMP_LIST_END_DELETE
-	      || type == MLOG_COMP_LIST_START_DELETE);
-
-	log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
-	if (log_ptr) {
-		/* Write the parameter as a 2-byte ulint */
-		mach_write_to_2(log_ptr, page_offset(rec));
-		mlog_close(mtr, log_ptr + 2);
-	}
-}
-
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
-				/* out: end of log record or NULL */
-	byte		type,	/* in: MLOG_LIST_END_DELETE,
-				MLOG_LIST_START_DELETE,
-				MLOG_COMP_LIST_END_DELETE or
-				MLOG_COMP_LIST_START_DELETE */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	dict_index_t*	index,	/* in: record descriptor */
-	page_t*		page,	/* in: page or NULL */
-	mtr_t*		mtr)	/* in: mtr or NULL */
-{
-	ulint	offset;
-
-	ut_ad(type == MLOG_LIST_END_DELETE
-	      || type == MLOG_LIST_START_DELETE
-	      || type == MLOG_COMP_LIST_END_DELETE
-	      || type == MLOG_COMP_LIST_START_DELETE);
-
-	/* Read the record offset as a 2-byte ulint */
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	offset = mach_read_from_2(ptr);
-	ptr += 2;
-
-	if (!page) {
-
-		return(ptr);
-	}
-
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	if (type == MLOG_LIST_END_DELETE
-	    || type == MLOG_COMP_LIST_END_DELETE) {
-		page_delete_rec_list_end(page, page + offset, index,
-					 ULINT_UNDEFINED,
-					 ULINT_UNDEFINED, mtr);
-	} else {
-		page_delete_rec_list_start(page, page + offset, index, mtr);
-	}
-
-	return(ptr);
-}
-
-/*****************************************************************
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_end(
-/*=====================*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		n_recs,	/* in: number of records to delete,
-				or ULINT_UNDEFINED if not known */
-	ulint		size,	/* in: the sum of the sizes of the
-				records in the end of the chain to
-				delete, or ULINT_UNDEFINED if not known */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_dir_slot_t* slot;
-	ulint	slot_index;
-	rec_t*	last_rec;
-	rec_t*	prev_rec;
-	rec_t*	free;
-	rec_t*	rec2;
-	ulint	count;
-	ulint	n_owned;
-	rec_t*	sup;
-	ulint	comp;
-
-	/* Reset the last insert info in the page header and increment
-	the modify clock for the frame */
-
-	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
-	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-
-	/* The page gets invalid for optimistic searches: increment the
-	frame modify clock */
-
-	buf_frame_modify_clock_inc(page);
-
-	sup = page_get_supremum_rec(page);
-
-	comp = page_is_comp(page);
-	if (page_rec_is_infimum_low(rec - page)) {
-		rec = page_rec_get_next(rec);
-	}
-
-	page_delete_rec_list_write_log(rec, index, comp
-				       ? MLOG_COMP_LIST_END_DELETE
-				       : MLOG_LIST_END_DELETE, mtr);
-
-	if (rec == sup) {
-
-		return;
-	}
-
-	prev_rec = page_rec_get_prev(rec);
-
-	last_rec = page_rec_get_prev(sup);
-
-	if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		ulint*		offsets		= offsets_;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-		/* Calculate the sum of sizes and the number of records */
-		size = 0;
-		n_recs = 0;
-		rec2 = rec;
-
-		while (rec2 != sup) {
-			ulint	s;
-			offsets = rec_get_offsets(rec2, index, offsets,
-						  ULINT_UNDEFINED, &heap);
-			s = rec_offs_size(offsets);
-			ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
-			      < UNIV_PAGE_SIZE);
-			ut_ad(size + s < UNIV_PAGE_SIZE);
-			size += s;
-			n_recs++;
-
-			rec2 = page_rec_get_next(rec2);
-		}
-
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-	}
-
-	ut_ad(size < UNIV_PAGE_SIZE);
-
-	/* Update the page directory; there is no need to balance the number
-	of the records owned by the supremum record, as it is allowed to be
-	less than PAGE_DIR_SLOT_MIN_N_OWNED */
-
-	rec2 = rec;
-	count = 0;
-
-	while (rec_get_n_owned(rec2, comp) == 0) {
-		count++;
-
-		rec2 = page_rec_get_next(rec2);
-	}
-
-	ut_ad(rec_get_n_owned(rec2, comp) - count > 0);
-
-	n_owned = rec_get_n_owned(rec2, comp) - count;
-
-	slot_index = page_dir_find_owner_slot(rec2);
-	slot = page_dir_get_nth_slot(page, slot_index);
-
-	page_dir_slot_set_rec(slot, sup);
-	page_dir_slot_set_n_owned(slot, n_owned);
-
-	page_dir_set_n_slots(page, slot_index + 1);
-
-	/* Remove the record chain segment from the record chain */
-	page_rec_set_next(prev_rec, page_get_supremum_rec(page));
-
-	/* Catenate the deleted chain segment to the page free list */
-
-	free = page_header_get_ptr(page, PAGE_FREE);
-
-	page_rec_set_next(last_rec, free);
-	page_header_set_ptr(page, PAGE_FREE, rec);
-
-	page_header_set_field(page, PAGE_GARBAGE, size
-			      + page_header_get_field(page, PAGE_GARBAGE));
-
-	page_header_set_field(page, PAGE_N_RECS,
-			      (ulint)(page_get_n_recs(page) - n_recs));
-}
-
-/*****************************************************************
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_start(
-/*=======================*/
-	page_t*		page,	/* in: index page */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	page_cur_t	cur1;
-	ulint		log_mode;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	mem_heap_t*	heap		= NULL;
-	byte		type;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	if (page_is_comp(page)) {
-		type = MLOG_COMP_LIST_START_DELETE;
-	} else {
-		type = MLOG_LIST_START_DELETE;
-	}
-
-	page_delete_rec_list_write_log(rec, index, type, mtr);
-
-	page_cur_set_before_first(page, &cur1);
-
-	if (rec == page_cur_get_rec(&cur1)) {
-
-		return;
-	}
-
-	page_cur_move_to_next(&cur1);
-
-	/* Individual deletes are not logged */
-
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
-	while (page_cur_get_rec(&cur1) != rec) {
-		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
-					  offsets, ULINT_UNDEFINED, &heap);
-		page_cur_delete_rec(&cur1, index, offsets, mtr);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
-	/* Restore log mode */
-
-	mtr_set_log_mode(mtr, log_mode);
-}
-
-/*****************************************************************
-Moves record list end to another page. Moved records include
-split_rec. */
-
-void
-page_move_rec_list_end(
-/*===================*/
-	page_t*		new_page,	/* in: index page where to move */
-	page_t*		page,		/* in: index page */
-	rec_t*		split_rec,	/* in: first record to move */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	ulint	old_data_size;
-	ulint	new_data_size;
-	ulint	old_n_recs;
-	ulint	new_n_recs;
-
-	old_data_size = page_get_data_size(new_page);
-	old_n_recs = page_get_n_recs(new_page);
-
-	page_copy_rec_list_end(new_page, page, split_rec, index, mtr);
-
-	new_data_size = page_get_data_size(new_page);
-	new_n_recs = page_get_n_recs(new_page);
-
-	ut_ad(new_data_size >= old_data_size);
-
-	page_delete_rec_list_end(page, split_rec, index,
-				 new_n_recs - old_n_recs,
-				 new_data_size - old_data_size, mtr);
-}
-
-/*****************************************************************
-Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
-page_move_rec_list_start(
-/*=====================*/
-	page_t*		new_page,	/* in: index page where to move */
-	page_t*		page,		/* in: index page */
-	rec_t*		split_rec,	/* in: first record not to move */
-	dict_index_t*	index,		/* in: record descriptor */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	page_copy_rec_list_start(new_page, page, split_rec, index, mtr);
-
-	page_delete_rec_list_start(page, split_rec, index, mtr);
-}
-
-/***************************************************************************
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary record. */
-
-void
-page_rec_write_index_page_no(
-/*=========================*/
-	rec_t*	rec,	/* in: record to update */
-	ulint	i,	/* in: index of the field to update */
-	ulint	page_no,/* in: value to write */
-	mtr_t*	mtr)	/* in: mtr */
-{
-	byte*	data;
-	ulint	len;
-
-	data = rec_get_nth_field_old(rec, i, &len);
-
-	ut_ad(len == 4);
-
-	mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
-}
-
-/******************************************************************
-Used to delete n slots from the directory. This function updates
-also n_owned fields in the records, so that the first slot after
-the deleted ones inherits the records of the deleted slots. */
-UNIV_INLINE
-void
-page_dir_delete_slots(
-/*==================*/
-	page_t*	page,	/* in: the index page */
-	ulint	start,	/* in: first slot to be deleted */
-	ulint	n)	/* in: number of slots to delete (currently
-			only n == 1 allowed) */
-{
-	page_dir_slot_t*	slot;
-	ulint			i;
-	ulint			sum_owned = 0;
-	ulint			n_slots;
-	rec_t*			rec;
-
-	ut_ad(n == 1);
-	ut_ad(start > 0);
-	ut_ad(start + n < page_dir_get_n_slots(page));
-
-	n_slots = page_dir_get_n_slots(page);
-
-	/* 1. Reset the n_owned fields of the slots to be
-	deleted */
-	for (i = start; i < start + n; i++) {
-		slot = page_dir_get_nth_slot(page, i);
-		sum_owned += page_dir_slot_get_n_owned(slot);
-		page_dir_slot_set_n_owned(slot, 0);
-	}
-
-	/* 2. Update the n_owned value of the first non-deleted slot */
-
-	slot = page_dir_get_nth_slot(page, start + n);
-	page_dir_slot_set_n_owned(slot,
-				  sum_owned + page_dir_slot_get_n_owned(slot));
-
-	/* 3. Destroy start and other slots by copying slots */
-	for (i = start + n; i < n_slots; i++) {
-		slot = page_dir_get_nth_slot(page, i);
-		rec = page_dir_slot_get_rec(slot);
-
-		slot = page_dir_get_nth_slot(page, i - n);
-		page_dir_slot_set_rec(slot, rec);
-	}
-
-	/* 4. Update the page header */
-	page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
-}
-
-/******************************************************************
-Used to add n slots to the directory. Does not set the record pointers
-in the added slots or update n_owned values: this is the responsibility
-of the caller. */
-UNIV_INLINE
-void
-page_dir_add_slots(
-/*===============*/
-	page_t*	page,	/* in: the index page */
-	ulint	start,	/* in: the slot above which the new slots are added */
-	ulint	n)	/* in: number of slots to add (currently only n == 1
-			allowed) */
-{
-	page_dir_slot_t*	slot;
-	ulint			n_slots;
-	ulint			i;
-	rec_t*			rec;
-
-	ut_ad(n == 1);
-
-	n_slots = page_dir_get_n_slots(page);
-
-	ut_ad(start < n_slots - 1);
-
-	/* Update the page header */
-	page_dir_set_n_slots(page, n_slots + n);
-
-	/* Move slots up */
-
-	for (i = n_slots - 1; i > start; i--) {
-
-		slot = page_dir_get_nth_slot(page, i);
-		rec = page_dir_slot_get_rec(slot);
-
-		slot = page_dir_get_nth_slot(page, i + n);
-		page_dir_slot_set_rec(slot, rec);
-	}
-}
-
-/********************************************************************
-Splits a directory slot which owns too many records. */
-
-void
-page_dir_split_slot(
-/*================*/
-	page_t*	page,		/* in: the index page in question */
-	ulint	slot_no)	/* in: the directory slot */
-{
-	rec_t*			rec;
-	page_dir_slot_t*	new_slot;
-	page_dir_slot_t*	prev_slot;
-	page_dir_slot_t*	slot;
-	ulint			i;
-	ulint			n_owned;
-
-	ut_ad(page);
-	ut_ad(slot_no > 0);
-
-	slot = page_dir_get_nth_slot(page, slot_no);
-
-	n_owned = page_dir_slot_get_n_owned(slot);
-	ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
-
-	/* 1. We loop to find a record approximately in the middle of the
-	records owned by the slot. */
-
-	prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
-	rec = page_dir_slot_get_rec(prev_slot);
-
-	for (i = 0; i < n_owned / 2; i++) {
-		rec = page_rec_get_next(rec);
-	}
-
-	ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
-
-	/* 2. We add one directory slot immediately below the slot to be
-	split. */
-
-	page_dir_add_slots(page, slot_no - 1, 1);
-
-	/* The added slot is now number slot_no, and the old slot is
-	now number slot_no + 1 */
-
-	new_slot = page_dir_get_nth_slot(page, slot_no);
-	slot = page_dir_get_nth_slot(page, slot_no + 1);
-
-	/* 3. We store the appropriate values to the new slot. */
-
-	page_dir_slot_set_rec(new_slot, rec);
-	page_dir_slot_set_n_owned(new_slot, n_owned / 2);
-
-	/* 4. Finally, we update the number of records field of the
-	original slot */
-
-	page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
-}
-
-/*****************************************************************
-Tries to balance the given directory slot with too few records with the upper
-neighbor, so that there are at least the minimum number of records owned by
-the slot; this may result in the merging of two slots. */
-
-void
-page_dir_balance_slot(
-/*==================*/
-	page_t*	page,		/* in: index page */
-	ulint	slot_no)	/* in: the directory slot */
-{
-	page_dir_slot_t*	slot;
-	page_dir_slot_t*	up_slot;
-	ulint			n_owned;
-	ulint			up_n_owned;
-	rec_t*			old_rec;
-	rec_t*			new_rec;
-
-	ut_ad(page);
-	ut_ad(slot_no > 0);
-
-	slot = page_dir_get_nth_slot(page, slot_no);
-
-	/* The last directory slot cannot be balanced with the upper
-	neighbor, as there is none. */
-
-	if (slot_no == page_dir_get_n_slots(page) - 1) {
-
-		return;
-	}
-
-	up_slot = page_dir_get_nth_slot(page, slot_no + 1);
-
-	n_owned = page_dir_slot_get_n_owned(slot);
-	up_n_owned = page_dir_slot_get_n_owned(up_slot);
-
-	ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
-
-	/* If the upper slot has the minimum value of n_owned, we will merge
-	the two slots, therefore we assert: */
-	ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
-
-	if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
-
-		/* In this case we can just transfer one record owned
-		by the upper slot to the property of the lower slot */
-		old_rec = page_dir_slot_get_rec(slot);
-		new_rec = page_rec_get_next(old_rec);
-
-		rec_set_n_owned(old_rec, page_is_comp(page), 0);
-		rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1);
-
-		page_dir_slot_set_rec(slot, new_rec);
-
-		page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
-	} else {
-		/* In this case we may merge the two slots */
-		page_dir_delete_slots(page, slot_no, 1);
-	}
-}
-
-/****************************************************************
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list. */
-
-rec_t*
-page_get_middle_rec(
-/*================*/
-			/* out: middle record */
-	page_t*	page)	/* in: page */
-{
-	page_dir_slot_t*	slot;
-	ulint			middle;
-	ulint			i;
-	ulint			n_owned;
-	ulint			count;
-	rec_t*			rec;
-
-	/* This many records we must leave behind */
-	middle = (page_get_n_recs(page) + 2) / 2;
-
-	count = 0;
-
-	for (i = 0;; i++) {
-
-		slot = page_dir_get_nth_slot(page, i);
-		n_owned = page_dir_slot_get_n_owned(slot);
-
-		if (count + n_owned > middle) {
-			break;
-		} else {
-			count += n_owned;
-		}
-	}
-
-	ut_ad(i > 0);
-	slot = page_dir_get_nth_slot(page, i - 1);
-	rec = page_dir_slot_get_rec(slot);
-	rec = page_rec_get_next(rec);
-
-	/* There are now count records behind rec */
-
-	for (i = 0; i < middle - count; i++) {
-		rec = page_rec_get_next(rec);
-	}
-
-	return(rec);
-}
-
-/*******************************************************************
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
-			/* out: number of records */
-	rec_t*	rec)	/* in: the physical record */
-{
-	page_dir_slot_t*	slot;
-	rec_t*			slot_rec;
-	page_t*			page;
-	ulint			i;
-	ulint			comp;
-	lint			n	= 0;
-
-	ut_ad(page_rec_check(rec));
-
-	page = buf_frame_align(rec);
-	comp = page_is_comp(page);
-
-	while (rec_get_n_owned(rec, comp) == 0) {
-
-		rec = page_rec_get_next(rec);
-		n--;
-	}
-
-	for (i = 0; ; i++) {
-		slot = page_dir_get_nth_slot(page, i);
-		slot_rec = page_dir_slot_get_rec(slot);
-
-		n += rec_get_n_owned(slot_rec, comp);
-
-		if (rec == slot_rec) {
-
-			break;
-		}
-	}
-
-	n--;
-
-	ut_ad(n >= 0);
-
-	return((ulint) n);
-}
-
-/****************************************************************
-Prints record contents including the data relevant only in
-the index page context. */
-
-void
-page_rec_print(
-/*===========*/
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: record descriptor */
-{
-	ulint	comp	= page_is_comp(buf_frame_align(rec));
-
-	ut_a(!comp == !rec_offs_comp(offsets));
-	rec_print_new(stderr, rec, offsets);
-	fprintf(stderr,
-		"            n_owned: %lu; heap_no: %lu; next rec: %lu\n",
-		(ulong) rec_get_n_owned(rec, comp),
-		(ulong) rec_get_heap_no(rec, comp),
-		(ulong) rec_get_next_offs(rec, comp));
-
-	page_rec_check(rec);
-	rec_validate(rec, offsets);
-}
-
-/*******************************************************************
-This is used to print the contents of the directory for
-debugging purposes. */
-
-void
-page_dir_print(
-/*===========*/
-	page_t*	page,	/* in: index page */
-	ulint	pr_n)	/* in: print n first and n last entries */
-{
-	ulint			n;
-	ulint			i;
-	page_dir_slot_t*	slot;
-
-	n = page_dir_get_n_slots(page);
-
-	fprintf(stderr, "--------------------------------\n"
-		"PAGE DIRECTORY\n"
-		"Page address %p\n"
-		"Directory stack top at offs: %lu; number of slots: %lu\n",
-		page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page),
-		(ulong) n);
-	for (i = 0; i < n; i++) {
-		slot = page_dir_get_nth_slot(page, i);
-		if ((i == pr_n) && (i < n - pr_n)) {
-			fputs("    ...   \n", stderr);
-		}
-		if ((i < pr_n) || (i >= n - pr_n)) {
-			fprintf(stderr,
-				"Contents of slot: %lu: n_owned: %lu,"
-				" rec offs: %lu\n",
-				(ulong) i,
-				(ulong) page_dir_slot_get_n_owned(slot),
-				(ulong)(page_dir_slot_get_rec(slot) - page));
-		}
-	}
-	fprintf(stderr, "Total of %lu records\n"
-		"--------------------------------\n",
-		(ulong) (2 + page_get_n_recs(page)));
-}
-
-/*******************************************************************
-This is used to print the contents of the page record list for
-debugging purposes. */
-
-void
-page_print_list(
-/*============*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: dictionary index of the page */
-	ulint		pr_n)	/* in: print n first and n last entries */
-{
-	page_cur_t	cur;
-	ulint		count;
-	ulint		n_recs;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
-
-	fprintf(stderr,
-		"--------------------------------\n"
-		"PAGE RECORD LIST\n"
-		"Page address %p\n", page);
-
-	n_recs = page_get_n_recs(page);
-
-	page_cur_set_before_first(page, &cur);
-	count = 0;
-	for (;;) {
-		offsets = rec_get_offsets(cur.rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		page_rec_print(cur.rec, offsets);
-
-		if (count == pr_n) {
-			break;
-		}
-		if (page_cur_is_after_last(&cur)) {
-			break;
-		}
-		page_cur_move_to_next(&cur);
-		count++;
-	}
-
-	if (n_recs > 2 * pr_n) {
-		fputs(" ... \n", stderr);
-	}
-
-	while (!page_cur_is_after_last(&cur)) {
-		page_cur_move_to_next(&cur);
-
-		if (count + pr_n >= n_recs) {
-			offsets = rec_get_offsets(cur.rec, index, offsets,
-						  ULINT_UNDEFINED, &heap);
-			page_rec_print(cur.rec, offsets);
-		}
-		count++;
-	}
-
-	fprintf(stderr,
-		"Total of %lu records \n"
-		"--------------------------------\n",
-		(ulong) (count + 1));
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/*******************************************************************
-Prints the info in a page header. */
-
-void
-page_header_print(
-/*==============*/
-	page_t*	page)
-{
-	fprintf(stderr,
-		"--------------------------------\n"
-		"PAGE HEADER INFO\n"
-		"Page address %p, n records %lu (%s)\n"
-		"n dir slots %lu, heap top %lu\n"
-		"Page n heap %lu, free %lu, garbage %lu\n"
-		"Page last insert %lu, direction %lu, n direction %lu\n",
-		page, (ulong) page_header_get_field(page, PAGE_N_RECS),
-		page_is_comp(page) ? "compact format" : "original format",
-		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
-		(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
-		(ulong) page_dir_get_n_heap(page),
-		(ulong) page_header_get_field(page, PAGE_FREE),
-		(ulong) page_header_get_field(page, PAGE_GARBAGE),
-		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
-		(ulong) page_header_get_field(page, PAGE_DIRECTION),
-		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
-}
-
-/*******************************************************************
-This is used to print the contents of the page for
-debugging purposes. */
-
-void
-page_print(
-/*=======*/
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index,	/* in: dictionary index of the page */
-	ulint		dn,	/* in: print dn first and last entries
-				in directory */
-	ulint		rn)	/* in: print rn first and last records
-				in directory */
-{
-	page_header_print(page);
-	page_dir_print(page, dn);
-	page_print_list(page, index, rn);
-}
-
-/*******************************************************************
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
-ibool
-page_rec_validate(
-/*==============*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
-{
-	ulint	n_owned;
-	ulint	heap_no;
-	page_t*	page;
-	ulint	comp;
-
-	page = buf_frame_align(rec);
-	comp = page_is_comp(page);
-	ut_a(!comp == !rec_offs_comp(offsets));
-
-	page_rec_check(rec);
-	rec_validate(rec, offsets);
-
-	n_owned = rec_get_n_owned(rec, comp);
-	heap_no = rec_get_heap_no(rec, comp);
-
-	if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
-		fprintf(stderr,
-			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
-			(ulong)(rec - page), (ulong) n_owned);
-		return(FALSE);
-	}
-
-	if (!(heap_no < page_dir_get_n_heap(page))) {
-		fprintf(stderr,
-			"InnoDB: Heap no of rec %lu too big %lu %lu\n",
-			(ulong)(rec - page), (ulong) heap_no,
-			(ulong) page_dir_get_n_heap(page));
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/*******************************************************************
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-
-void
-page_check_dir(
-/*===========*/
-	page_t*	page)	/* in: index page */
-{
-	ulint	n_slots;
-
-	n_slots = page_dir_get_n_slots(page);
-
-	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
-	    != page_get_infimum_rec(page)) {
-
-		fprintf(stderr,
-			"InnoDB: Page directory corruption:"
-			" infimum not pointed to\n");
-		buf_page_print(page);
-	}
-
-	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
-	    != page_get_supremum_rec(page)) {
-
-		fprintf(stderr,
-			"InnoDB: Page directory corruption:"
-			" supremum not pointed to\n");
-		buf_page_print(page);
-	}
-}
-
-/*******************************************************************
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
-ibool
-page_simple_validate(
-/*=================*/
-			/* out: TRUE if ok */
-	page_t*	page)	/* in: index page */
-{
-	page_cur_t	cur;
-	page_dir_slot_t* slot;
-	ulint		slot_no;
-	ulint		n_slots;
-	rec_t*		rec;
-	byte*		rec_heap_top;
-	ulint		count;
-	ulint		own_count;
-	ibool		ret	= FALSE;
-	ulint		comp	= page_is_comp(page);
-
-	/* Check first that the record heap and the directory do not
-	overlap. */
-
-	n_slots = page_dir_get_n_slots(page);
-
-	if (n_slots > UNIV_PAGE_SIZE / 4) {
-		fprintf(stderr,
-			"InnoDB: Nonsensical number %lu of page dir slots\n",
-			(ulong) n_slots);
-
-		goto func_exit;
-	}
-
-	rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
-	if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
-
-		fprintf(stderr,
-			"InnoDB: Record heap and dir overlap on a page,"
-			" heap top %lu, dir %lu\n",
-			(ulong)
-			(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
-			(ulong)
-			(page_dir_get_nth_slot(page, n_slots - 1) - page));
-
-		goto func_exit;
-	}
-
-	/* Validate the record list in a loop checking also that it is
-	consistent with the page record directory. */
-
-	count = 0;
-	own_count = 1;
-	slot_no = 0;
-	slot = page_dir_get_nth_slot(page, slot_no);
-
-	page_cur_set_before_first(page, &cur);
-
-	for (;;) {
-		rec = (&cur)->rec;
-
-		if (rec > rec_heap_top) {
-			fprintf(stderr,
-				"InnoDB: Record %lu is above"
-				" rec heap top %lu\n",
-				(ulong)(rec - page),
-				(ulong)(rec_heap_top - page));
-
-			goto func_exit;
-		}
-
-		if (rec_get_n_owned(rec, comp) != 0) {
-			/* This is a record pointed to by a dir slot */
-			if (rec_get_n_owned(rec, comp) != own_count) {
-
-				fprintf(stderr,
-					"InnoDB: Wrong owned count %lu, %lu,"
-					" rec %lu\n",
-					(ulong) rec_get_n_owned(rec, comp),
-					(ulong) own_count,
-					(ulong)(rec - page));
-
-				goto func_exit;
-			}
-
-			if (page_dir_slot_get_rec(slot) != rec) {
-				fprintf(stderr,
-					"InnoDB: Dir slot does not point"
-					" to right rec %lu\n",
-					(ulong)(rec - page));
-
-				goto func_exit;
-			}
-
-			own_count = 0;
-
-			if (!page_cur_is_after_last(&cur)) {
-				slot_no++;
-				slot = page_dir_get_nth_slot(page, slot_no);
-			}
-		}
-
-		if (page_cur_is_after_last(&cur)) {
-
-			break;
-		}
-
-		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
-		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Next record offset"
-				" nonsensical %lu for rec %lu\n",
-				(ulong) rec_get_next_offs(rec, comp),
-				(ulong)(rec - page));
-
-			goto func_exit;
-		}
-
-		count++;
-
-		if (count > UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Page record list appears"
-				" to be circular %lu\n",
-				(ulong) count);
-			goto func_exit;
-		}
-
-		page_cur_move_to_next(&cur);
-		own_count++;
-	}
-
-	if (rec_get_n_owned(rec, comp) == 0) {
-		fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
-
-		goto func_exit;
-	}
-
-	if (slot_no != n_slots - 1) {
-		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
-			(ulong) slot_no, (ulong) (n_slots - 1));
-		goto func_exit;
-	}
-
-	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
-		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
-			(ulong) (count + 1));
-
-		goto func_exit;
-	}
-
-	/* Check then the free list */
-	rec = page_header_get_ptr(page, PAGE_FREE);
-
-	while (rec != NULL) {
-		if (rec < page + FIL_PAGE_DATA
-		    || rec >= page + UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Free list record has"
-				" a nonsensical offset %lu\n",
-				(ulong) (rec - page));
-
-			goto func_exit;
-		}
-
-		if (rec > rec_heap_top) {
-			fprintf(stderr,
-				"InnoDB: Free list record %lu"
-				" is above rec heap top %lu\n",
-				(ulong) (rec - page),
-				(ulong) (rec_heap_top - page));
-
-			goto func_exit;
-		}
-
-		count++;
-
-		if (count > UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Page free list appears"
-				" to be circular %lu\n",
-				(ulong) count);
-			goto func_exit;
-		}
-
-		rec = page_rec_get_next(rec);
-	}
-
-	if (page_dir_get_n_heap(page) != count + 1) {
-
-		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
-			(ulong) page_dir_get_n_heap(page),
-			(ulong) (count + 1));
-
-		goto func_exit;
-	}
-
-	ret = TRUE;
-
-func_exit:
-	return(ret);
-}
-
-/*******************************************************************
-This function checks the consistency of an index page. */
-
-ibool
-page_validate(
-/*==========*/
-				/* out: TRUE if ok */
-	page_t*		page,	/* in: index page */
-	dict_index_t*	index)	/* in: data dictionary index containing
-				the page record type definition */
-{
-	page_dir_slot_t* slot;
-	mem_heap_t*	heap;
-	page_cur_t	cur;
-	byte*		buf;
-	ulint		count;
-	ulint		own_count;
-	ulint		slot_no;
-	ulint		data_size;
-	rec_t*		rec;
-	rec_t*		old_rec		= NULL;
-	ulint		offs;
-	ulint		n_slots;
-	ibool		ret		= FALSE;
-	ulint		i;
-	ulint		comp		= page_is_comp(page);
-	ulint*		offsets		= NULL;
-	ulint*		old_offsets	= NULL;
-
-	if ((ibool)!!comp != dict_table_is_comp(index->table)) {
-		fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
-		goto func_exit2;
-	}
-	if (!page_simple_validate(page)) {
-		goto func_exit2;
-	}
-
-	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
-
-	/* The following buffer is used to check that the
-	records in the page record heap do not overlap */
-
-	buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
-	memset(buf, 0, UNIV_PAGE_SIZE);
-
-	/* Check first that the record heap and the directory do not
-	overlap. */
-
-	n_slots = page_dir_get_n_slots(page);
-
-	if (!(page_header_get_ptr(page, PAGE_HEAP_TOP)
-	      <= page_dir_get_nth_slot(page, n_slots - 1))) {
-
-		fputs("InnoDB: Record heap and dir overlap on a page ",
-		      stderr);
-		dict_index_name_print(stderr, NULL, index);
-		fprintf(stderr, ", %p, %p\n",
-			page_header_get_ptr(page, PAGE_HEAP_TOP),
-			page_dir_get_nth_slot(page, n_slots - 1));
-
-		goto func_exit;
-	}
-
-	/* Validate the record list in a loop checking also that
-	it is consistent with the directory. */
-	count = 0;
-	data_size = 0;
-	own_count = 1;
-	slot_no = 0;
-	slot = page_dir_get_nth_slot(page, slot_no);
-
-	page_cur_set_before_first(page, &cur);
-
-	for (;;) {
-		rec = cur.rec;
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-
-		if (comp && page_rec_is_user_rec(rec)
-		    && rec_get_node_ptr_flag(rec)
-		    != (ibool)
-		    (btr_page_get_level_low(page) != 0)) {
-			fputs("InnoDB: node_ptr flag mismatch\n", stderr);
-			goto func_exit;
-		}
-
-		if (!page_rec_validate(rec, offsets)) {
-			goto func_exit;
-		}
-
-		/* Check that the records are in the ascending order */
-		if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
-			if (!(1 == cmp_rec_rec(rec, old_rec,
-					       offsets, old_offsets, index))) {
-				fprintf(stderr,
-					"InnoDB: Records in wrong order"
-					" on page %lu ",
-					(ulong) buf_frame_get_page_no(page));
-				dict_index_name_print(stderr, NULL, index);
-				fputs("\nInnoDB: previous record ", stderr);
-				rec_print_new(stderr, old_rec, old_offsets);
-				fputs("\nInnoDB: record ", stderr);
-				rec_print_new(stderr, rec, offsets);
-				putc('\n', stderr);
-
-				goto func_exit;
-			}
-		}
-
-		if (page_rec_is_user_rec(rec)) {
-
-			data_size += rec_offs_size(offsets);
-		}
-
-		offs = rec_get_start(rec, offsets) - page;
-
-		for (i = 0; i < rec_offs_size(offsets); i++) {
-			if (!buf[offs + i] == 0) {
-				/* No other record may overlap this */
-
-				fputs("InnoDB: Record overlaps another\n",
-				      stderr);
-				goto func_exit;
-			}
-
-			buf[offs + i] = 1;
-		}
-
-		if (rec_get_n_owned(rec, comp) != 0) {
-			/* This is a record pointed to by a dir slot */
-			if (rec_get_n_owned(rec, comp) != own_count) {
-				fprintf(stderr,
-					"InnoDB: Wrong owned count %lu, %lu\n",
-					(ulong) rec_get_n_owned(rec, comp),
-					(ulong) own_count);
-				goto func_exit;
-			}
-
-			if (page_dir_slot_get_rec(slot) != rec) {
-				fputs("InnoDB: Dir slot does not"
-				      " point to right rec\n",
-				      stderr);
-				goto func_exit;
-			}
-
-			page_dir_slot_check(slot);
-
-			own_count = 0;
-			if (!page_cur_is_after_last(&cur)) {
-				slot_no++;
-				slot = page_dir_get_nth_slot(page, slot_no);
-			}
-		}
-
-		if (page_cur_is_after_last(&cur)) {
-			break;
-		}
-
-		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
-		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Next record offset wrong %lu\n",
-				(ulong) rec_get_next_offs(rec, comp));
-			goto func_exit;
-		}
-
-		count++;
-		page_cur_move_to_next(&cur);
-		own_count++;
-		old_rec = rec;
-		/* set old_offsets to offsets; recycle offsets */
-		{
-			ulint* offs = old_offsets;
-			old_offsets = offsets;
-			offsets = offs;
-		}
-	}
-
-	if (rec_get_n_owned(rec, comp) == 0) {
-		fputs("InnoDB: n owned is zero\n", stderr);
-		goto func_exit;
-	}
-
-	if (slot_no != n_slots - 1) {
-		fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
-			(ulong) slot_no, (ulong) (n_slots - 1));
-		goto func_exit;
-	}
-
-	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
-		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
-			(ulong) (count + 1));
-		goto func_exit;
-	}
-
-	if (data_size != page_get_data_size(page)) {
-		fprintf(stderr,
-			"InnoDB: Summed data size %lu, returned by func %lu\n",
-			(ulong) data_size, (ulong) page_get_data_size(page));
-		goto func_exit;
-	}
-
-	/* Check then the free list */
-	rec = page_header_get_ptr(page, PAGE_FREE);
-
-	while (rec != NULL) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		if (!page_rec_validate(rec, offsets)) {
-
-			goto func_exit;
-		}
-
-		count++;
-		offs = rec_get_start(rec, offsets) - page;
-
-		for (i = 0; i < rec_offs_size(offsets); i++) {
-
-			if (buf[offs + i] != 0) {
-				fputs("InnoDB: Record overlaps another"
-				      " in free list\n", stderr);
-				goto func_exit;
-			}
-
-			buf[offs + i] = 1;
-		}
-
-		rec = page_rec_get_next(rec);
-	}
-
-	if (page_dir_get_n_heap(page) != count + 1) {
-		fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
-			(ulong) page_dir_get_n_heap(page),
-			(ulong) count + 1);
-		goto func_exit;
-	}
-
-	ret = TRUE;
-
-func_exit:
-	mem_heap_free(heap);
-
-	if (ret == FALSE) {
-func_exit2:
-		fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
-			(ulong) buf_frame_get_page_no(page));
-		dict_index_name_print(stderr, NULL, index);
-		putc('\n', stderr);
-		buf_page_print(page);
-	}
-
-	return(ret);
-}
-
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
-
-rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
-			/* out: record, NULL if not found */
-	page_t*	page,	/* in: index page */
-	ulint	heap_no)/* in: heap number */
-{
-	page_cur_t	cur;
-
-	page_cur_set_before_first(page, &cur);
-
-	for (;;) {
-		if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) {
-
-			return(cur.rec);
-		}
-
-		if (page_cur_is_after_last(&cur)) {
-
-			return(NULL);
-		}
-
-		page_cur_move_to_next(&cur);
-	}
-}
diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh
deleted file mode 100755
index c11456230c4..00000000000
--- a/storage/innobase/pars/make_bison.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-#
-# generate parser files from bison input files.
-
-set -eu
-
-bison -d pars0grm.y
-mv pars0grm.tab.c pars0grm.c
-mv pars0grm.tab.h pars0grm.h
-cp pars0grm.h ../include
diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh
deleted file mode 100755
index c015327bf8c..00000000000
--- a/storage/innobase/pars/make_flex.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# generate lexer files from flex input files.
-
-set -eu
-
-TMPFILE=_flex_tmp.c
-OUTFILE=lexyy.c
-
-flex -o $TMPFILE pars0lex.l
-
-# AIX needs its includes done in a certain order, so include "univ.i" first
-# to be sure we get it right.
-echo '#include "univ.i"' > $OUTFILE
-
-# flex assigns a pointer to an int in one place without a cast, resulting in
-# a warning on Win64. this adds the cast.
-sed -e 's/int offset = (yy_c_buf_p) - (yytext_ptr);/int offset = (int)((yy_c_buf_p) - (yytext_ptr));/;' < $TMPFILE >> $OUTFILE
-
-rm $TMPFILE
diff --git a/storage/innobase/pars/pars0grm.h b/storage/innobase/pars/pars0grm.h
deleted file mode 100644
index 0062b8314ee..00000000000
--- a/storage/innobase/pars/pars0grm.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* A Bison parser, made by GNU Bison 1.875d.  */
-
-/* Skeleton parser for Yacc-like parsing with Bison,
-   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, when this file is copied by Bison into a
-   Bison output file, you may use that output file without restriction.
-   This special exception was added by the Free Software Foundation
-   in version 1.24 of Bison.  */
-
-/* Tokens.  */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
-   /* Put the tokens into the symbol table, so that GDB and other debuggers
-      know about them.  */
-   enum yytokentype {
-     PARS_INT_LIT = 258,
-     PARS_FLOAT_LIT = 259,
-     PARS_STR_LIT = 260,
-     PARS_FIXBINARY_LIT = 261,
-     PARS_BLOB_LIT = 262,
-     PARS_NULL_LIT = 263,
-     PARS_ID_TOKEN = 264,
-     PARS_AND_TOKEN = 265,
-     PARS_OR_TOKEN = 266,
-     PARS_NOT_TOKEN = 267,
-     PARS_GE_TOKEN = 268,
-     PARS_LE_TOKEN = 269,
-     PARS_NE_TOKEN = 270,
-     PARS_PROCEDURE_TOKEN = 271,
-     PARS_IN_TOKEN = 272,
-     PARS_OUT_TOKEN = 273,
-     PARS_BINARY_TOKEN = 274,
-     PARS_BLOB_TOKEN = 275,
-     PARS_INT_TOKEN = 276,
-     PARS_INTEGER_TOKEN = 277,
-     PARS_FLOAT_TOKEN = 278,
-     PARS_CHAR_TOKEN = 279,
-     PARS_IS_TOKEN = 280,
-     PARS_BEGIN_TOKEN = 281,
-     PARS_END_TOKEN = 282,
-     PARS_IF_TOKEN = 283,
-     PARS_THEN_TOKEN = 284,
-     PARS_ELSE_TOKEN = 285,
-     PARS_ELSIF_TOKEN = 286,
-     PARS_LOOP_TOKEN = 287,
-     PARS_WHILE_TOKEN = 288,
-     PARS_RETURN_TOKEN = 289,
-     PARS_SELECT_TOKEN = 290,
-     PARS_SUM_TOKEN = 291,
-     PARS_COUNT_TOKEN = 292,
-     PARS_DISTINCT_TOKEN = 293,
-     PARS_FROM_TOKEN = 294,
-     PARS_WHERE_TOKEN = 295,
-     PARS_FOR_TOKEN = 296,
-     PARS_DDOT_TOKEN = 297,
-     PARS_READ_TOKEN = 298,
-     PARS_ORDER_TOKEN = 299,
-     PARS_BY_TOKEN = 300,
-     PARS_ASC_TOKEN = 301,
-     PARS_DESC_TOKEN = 302,
-     PARS_INSERT_TOKEN = 303,
-     PARS_INTO_TOKEN = 304,
-     PARS_VALUES_TOKEN = 305,
-     PARS_UPDATE_TOKEN = 306,
-     PARS_SET_TOKEN = 307,
-     PARS_DELETE_TOKEN = 308,
-     PARS_CURRENT_TOKEN = 309,
-     PARS_OF_TOKEN = 310,
-     PARS_CREATE_TOKEN = 311,
-     PARS_TABLE_TOKEN = 312,
-     PARS_INDEX_TOKEN = 313,
-     PARS_UNIQUE_TOKEN = 314,
-     PARS_CLUSTERED_TOKEN = 315,
-     PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
-     PARS_ON_TOKEN = 317,
-     PARS_ASSIGN_TOKEN = 318,
-     PARS_DECLARE_TOKEN = 319,
-     PARS_CURSOR_TOKEN = 320,
-     PARS_SQL_TOKEN = 321,
-     PARS_OPEN_TOKEN = 322,
-     PARS_FETCH_TOKEN = 323,
-     PARS_CLOSE_TOKEN = 324,
-     PARS_NOTFOUND_TOKEN = 325,
-     PARS_TO_CHAR_TOKEN = 326,
-     PARS_TO_NUMBER_TOKEN = 327,
-     PARS_TO_BINARY_TOKEN = 328,
-     PARS_BINARY_TO_NUMBER_TOKEN = 329,
-     PARS_SUBSTR_TOKEN = 330,
-     PARS_REPLSTR_TOKEN = 331,
-     PARS_CONCAT_TOKEN = 332,
-     PARS_INSTR_TOKEN = 333,
-     PARS_LENGTH_TOKEN = 334,
-     PARS_SYSDATE_TOKEN = 335,
-     PARS_PRINTF_TOKEN = 336,
-     PARS_ASSERT_TOKEN = 337,
-     PARS_RND_TOKEN = 338,
-     PARS_RND_STR_TOKEN = 339,
-     PARS_ROW_PRINTF_TOKEN = 340,
-     PARS_COMMIT_TOKEN = 341,
-     PARS_ROLLBACK_TOKEN = 342,
-     PARS_WORK_TOKEN = 343,
-     PARS_UNSIGNED_TOKEN = 344,
-     PARS_EXIT_TOKEN = 345,
-     PARS_FUNCTION_TOKEN = 346,
-     PARS_LOCK_TOKEN = 347,
-     PARS_SHARE_TOKEN = 348,
-     PARS_MODE_TOKEN = 349,
-     NEG = 350
-   };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
-
-
diff --git a/storage/innobase/plug.in b/storage/innobase/plug.in
deleted file mode 100644
index f7d2abed751..00000000000
--- a/storage/innobase/plug.in
+++ /dev/null
@@ -1,44 +0,0 @@
-MYSQL_STORAGE_ENGINE(innobase,  innodb, [InnoDB Storage Engine],
-        [Transactional Tables using InnoDB], [max,max-no-ndb])
-MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase])
-MYSQL_PLUGIN_STATIC(innobase,   [libinnobase.a])
-MYSQL_PLUGIN_DYNAMIC(innobase,  [ha_innodb.la])
-MYSQL_PLUGIN_ACTIONS(innobase,  [
-  AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"])
-  AC_SUBST(innodb_system_libs)
-  AC_CHECK_HEADERS(aio.h sched.h)
-  AC_CHECK_SIZEOF(int, 4)
-  AC_CHECK_SIZEOF(long, 4)
-  AC_CHECK_SIZEOF(void*, 4)
-  AC_CHECK_FUNCS(sched_yield)
-  AC_CHECK_FUNCS(fdatasync)
-  AC_CHECK_FUNCS(localtime_r)
-  AC_C_BIGENDIAN
-  case "$target_os" in
-	lin*)
-		CFLAGS="$CFLAGS -DUNIV_LINUX";;
-	hpux10*)
-		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
-	hp*)
-		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
-	aix*)
-		CFLAGS="$CFLAGS -DUNIV_AIX";;
-	irix*|osf*|sysv5uw7*|openbsd*)
-		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
-	*solaris*|*SunOS*)
-		CFLAGS="$CFLAGS -DUNIV_SOLARIS";;
-  esac
-  INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN"
-  case "$target_cpu" in
-	x86_64)
-		# The AMD64 ABI forbids absolute addresses in shared libraries
-		;;
-	*86)
-		# Use absolute addresses on IA-32
-		INNODB_DYNAMIC_CFLAGS="$INNODB_DYNAMIC_CFLAGS -prefer-non-pic"
-		;;
-  esac
-  AC_SUBST(INNODB_DYNAMIC_CFLAGS)
-  ])
-
-# vim: set ft=config:
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
deleted file mode 100644
index 08e50817db9..00000000000
--- a/storage/innobase/row/row0row.c
+++ /dev/null
@@ -1,726 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-
-#ifdef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#include "dict0dict.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
-/*==================*/
-				/* out: value of the field */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
-{
-	ulint		pos;
-	byte*		field;
-	ulint		len;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-
-	pos = dict_index_get_sys_col_pos(index, type);
-
-	field = rec_get_nth_field(rec, offsets, pos, &len);
-
-	if (type == DATA_TRX_ID) {
-
-		return(trx_read_trx_id(field));
-	} else {
-		ut_ad(type == DATA_ROLL_PTR);
-
-		return(trx_read_roll_ptr(field));
-	}
-}
-
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
-				/* out: value of the field */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	dulint		val)	/* in: value to set */
-{
-	ulint	pos;
-	byte*	field;
-	ulint	len;
-
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	pos = dict_index_get_sys_col_pos(index, type);
-
-	field = rec_get_nth_field(rec, offsets, pos, &len);
-
-	if (type == DATA_TRX_ID) {
-
-		trx_write_trx_id(field, val);
-	} else {
-		ut_ad(type == DATA_ROLL_PTR);
-
-		trx_write_roll_ptr(field, val);
-	}
-}
-
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
-dtuple_t*
-row_build_index_entry(
-/*==================*/
-				/* out: index entry which should be inserted */
-	dtuple_t*	row,	/* in: row which should be inserted to the
-				table */
-	dict_index_t*	index,	/* in: index on the table */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
-				the index entry is allocated */
-{
-	dtuple_t*	entry;
-	ulint		entry_len;
-	dict_field_t*	ind_field;
-	dfield_t*	dfield;
-	dfield_t*	dfield2;
-	ulint		i;
-	ulint		storage_len;
-
-	ut_ad(row && index && heap);
-	ut_ad(dtuple_check_typed(row));
-
-	entry_len = dict_index_get_n_fields(index);
-	entry = dtuple_create(heap, entry_len);
-
-	if (index->type & DICT_UNIVERSAL) {
-		dtuple_set_n_fields_cmp(entry, entry_len);
-	} else {
-		dtuple_set_n_fields_cmp(
-			entry, dict_index_get_n_unique_in_tree(index));
-	}
-
-	for (i = 0; i < entry_len; i++) {
-		const dict_col_t*	col;
-		ind_field = dict_index_get_nth_field(index, i);
-		col = ind_field->col;
-
-		dfield = dtuple_get_nth_field(entry, i);
-
-		dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
-
-		dfield_copy(dfield, dfield2);
-
-		/* If a column prefix index, take only the prefix */
-		if (ind_field->prefix_len > 0
-		    && dfield_get_len(dfield2) != UNIV_SQL_NULL) {
-
-			storage_len = dtype_get_at_most_n_mbchars(
-				col->prtype, col->mbminlen, col->mbmaxlen,
-				ind_field->prefix_len,
-				dfield_get_len(dfield2), dfield2->data);
-
-			dfield_set_len(dfield, storage_len);
-		}
-	}
-
-	ut_ad(dtuple_check_typed(entry));
-
-	return(entry);
-}
-
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
-dtuple_t*
-row_build(
-/*======*/
-				/* out, own: row built; see the NOTE below! */
-	ulint		type,	/* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
-				the latter copies also the data fields to
-				heap while the first only places pointers to
-				data fields on the index page, and thus is
-				more efficient */
-	dict_index_t*	index,	/* in: clustered index */
-	rec_t*		rec,	/* in: record in the clustered index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row dtuple is used! */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index)
-				or NULL, in which case this function
-				will invoke rec_get_offsets() */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory
-				needed is allocated */
-{
-	dtuple_t*	row;
-	dict_table_t*	table;
-	dict_field_t*	ind_field;
-	dfield_t*	dfield;
-	ulint		n_fields;
-	byte*		field;
-	ulint		len;
-	ulint		row_len;
-	byte*		buf;
-	ulint		i;
-	mem_heap_t*	tmp_heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(index && rec && heap);
-	ut_ad(index->type & DICT_CLUSTERED);
-
-	if (!offsets) {
-		offsets = rec_get_offsets(rec, index, offsets_,
-					  ULINT_UNDEFINED, &tmp_heap);
-	} else {
-		ut_ad(rec_offs_validate(rec, index, offsets));
-	}
-
-	if (type != ROW_COPY_POINTERS) {
-		/* Take a copy of rec to heap */
-		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
-		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, (ulint*) offsets);
-	}
-
-	table = index->table;
-	row_len = dict_table_get_n_cols(table);
-
-	row = dtuple_create(heap, row_len);
-
-	dtuple_set_info_bits(row, rec_get_info_bits(
-				     rec, dict_table_is_comp(table)));
-
-	n_fields = rec_offs_n_fields(offsets);
-
-	dict_table_copy_types(row, table);
-
-	for (i = 0; i < n_fields; i++) {
-		ind_field = dict_index_get_nth_field(index, i);
-
-		if (ind_field->prefix_len == 0) {
-
-			const dict_col_t*	col
-				= dict_field_get_col(ind_field);
-
-			dfield = dtuple_get_nth_field(row,
-						      dict_col_get_no(col));
-			field = rec_get_nth_field(rec, offsets, i, &len);
-
-			dfield_set_data(dfield, field, len);
-		}
-	}
-
-	ut_ad(dtuple_check_typed(row));
-
-	if (tmp_heap) {
-		mem_heap_free(tmp_heap);
-	}
-
-	return(row);
-}
-
-/***********************************************************************
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap. */
-
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
-				/* out, own: index entry built; see the
-				NOTE below! */
-	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
-				the former copies also the data fields to
-				heap as the latter only places pointers to
-				data fields on the index page */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the dtuple is used! */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory
-				needed is allocated */
-{
-	dtuple_t*	entry;
-	dfield_t*	dfield;
-	ulint		i;
-	byte*		field;
-	ulint		len;
-	ulint		rec_len;
-	byte*		buf;
-	mem_heap_t*	tmp_heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(rec && heap && index);
-
-	offsets = rec_get_offsets(rec, index, offsets,
-				  ULINT_UNDEFINED, &tmp_heap);
-
-	if (type == ROW_COPY_DATA) {
-		/* Take a copy of rec to heap */
-		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
-		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, offsets);
-	}
-
-	rec_len = rec_offs_n_fields(offsets);
-
-	entry = dtuple_create(heap, rec_len);
-
-	dtuple_set_n_fields_cmp(entry,
-				dict_index_get_n_unique_in_tree(index));
-	ut_ad(rec_len == dict_index_get_n_fields(index));
-
-	dict_index_copy_types(entry, index, rec_len);
-
-	dtuple_set_info_bits(entry,
-			     rec_get_info_bits(rec, rec_offs_comp(offsets)));
-
-	for (i = 0; i < rec_len; i++) {
-
-		dfield = dtuple_get_nth_field(entry, i);
-		field = rec_get_nth_field(rec, offsets, i, &len);
-
-		dfield_set_data(dfield, field, len);
-	}
-
-	ut_ad(dtuple_check_typed(entry));
-	if (tmp_heap) {
-		mem_heap_free(tmp_heap);
-	}
-
-	return(entry);
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-dtuple_t*
-row_build_row_ref(
-/*==============*/
-				/* out, own: row reference built; see the
-				NOTE below! */
-	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
-				the former copies also the data fields to
-				heap, whereas the latter only places pointers
-				to data fields on the index page */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: in the case ROW_COPY_POINTERS
-				the data fields in the row will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row reference is used! */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory
-				needed is allocated */
-{
-	dict_table_t*	table;
-	dict_index_t*	clust_index;
-	dfield_t*	dfield;
-	dtuple_t*	ref;
-	byte*		field;
-	ulint		len;
-	ulint		ref_len;
-	ulint		pos;
-	byte*		buf;
-	ulint		clust_col_prefix_len;
-	ulint		i;
-	mem_heap_t*	tmp_heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(index && rec && heap);
-
-	offsets = rec_get_offsets(rec, index, offsets,
-				  ULINT_UNDEFINED, &tmp_heap);
-
-	if (type == ROW_COPY_DATA) {
-		/* Take a copy of rec to heap */
-
-		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
-
-		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, offsets);
-	}
-
-	table = index->table;
-
-	clust_index = dict_table_get_first_index(table);
-
-	ref_len = dict_index_get_n_unique(clust_index);
-
-	ref = dtuple_create(heap, ref_len);
-
-	dict_index_copy_types(ref, clust_index, ref_len);
-
-	for (i = 0; i < ref_len; i++) {
-		dfield = dtuple_get_nth_field(ref, i);
-
-		pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
-		ut_a(pos != ULINT_UNDEFINED);
-
-		field = rec_get_nth_field(rec, offsets, pos, &len);
-
-		dfield_set_data(dfield, field, len);
-
-		/* If the primary key contains a column prefix, then the
-		secondary index may contain a longer prefix of the same
-		column, or the full column, and we must adjust the length
-		accordingly. */
-
-		clust_col_prefix_len = dict_index_get_nth_field(
-			clust_index, i)->prefix_len;
-
-		if (clust_col_prefix_len > 0) {
-			if (len != UNIV_SQL_NULL) {
-
-				const dtype_t*	dtype
-					= dfield_get_type(dfield);
-
-				dfield_set_len(dfield,
-					       dtype_get_at_most_n_mbchars(
-						       dtype->prtype,
-						       dtype->mbminlen,
-						       dtype->mbmaxlen,
-						       clust_col_prefix_len,
-						       len, (char*) field));
-			}
-		}
-	}
-
-	ut_ad(dtuple_check_typed(ref));
-	if (tmp_heap) {
-		mem_heap_free(tmp_heap);
-	}
-
-	return(ref);
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
-	dtuple_t*	ref,	/* in/out: row reference built; see the
-				NOTE below! */
-	dict_index_t*	index,	/* in: index */
-	rec_t*		rec,	/* in: record in the index;
-				NOTE: the data fields in ref will point
-				directly into this record, therefore,
-				the buffer page of this record must be
-				at least s-latched and the latch held
-				as long as the row reference is used! */
-	trx_t*		trx)	/* in: transaction */
-{
-	dict_index_t*	clust_index;
-	dfield_t*	dfield;
-	byte*		field;
-	ulint		len;
-	ulint		ref_len;
-	ulint		pos;
-	ulint		clust_col_prefix_len;
-	ulint		i;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_a(ref);
-	ut_a(index);
-	ut_a(rec);
-
-	if (UNIV_UNLIKELY(!index->table)) {
-		fputs("InnoDB: table ", stderr);
-notfound:
-		ut_print_name(stderr, trx, TRUE, index->table_name);
-		fputs(" for index ", stderr);
-		ut_print_name(stderr, trx, FALSE, index->name);
-		fputs(" not found\n", stderr);
-		ut_error;
-	}
-
-	clust_index = dict_table_get_first_index(index->table);
-
-	if (!clust_index) {
-		fputs("InnoDB: clust index for table ", stderr);
-		goto notfound;
-	}
-
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-	ref_len = dict_index_get_n_unique(clust_index);
-
-	ut_ad(ref_len == dtuple_get_n_fields(ref));
-
-	dict_index_copy_types(ref, clust_index, ref_len);
-
-	for (i = 0; i < ref_len; i++) {
-		dfield = dtuple_get_nth_field(ref, i);
-
-		pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
-		ut_a(pos != ULINT_UNDEFINED);
-
-		field = rec_get_nth_field(rec, offsets, pos, &len);
-
-		dfield_set_data(dfield, field, len);
-
-		/* If the primary key contains a column prefix, then the
-		secondary index may contain a longer prefix of the same
-		column, or the full column, and we must adjust the length
-		accordingly. */
-
-		clust_col_prefix_len = dict_index_get_nth_field(
-			clust_index, i)->prefix_len;
-
-		if (clust_col_prefix_len > 0) {
-			if (len != UNIV_SQL_NULL) {
-
-				const dtype_t*	dtype
-					= dfield_get_type(dfield);
-
-				dfield_set_len(dfield,
-					       dtype_get_at_most_n_mbchars(
-						       dtype->prtype,
-						       dtype->mbminlen,
-						       dtype->mbmaxlen,
-						       clust_col_prefix_len,
-						       len, (char*) field));
-			}
-		}
-	}
-
-	ut_ad(dtuple_check_typed(ref));
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-}
-
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
-	dtuple_t*	ref,	/* in/out: row reference built; see the
-				NOTE below! ref must have the right number
-				of fields! */
-	dict_table_t*	table,	/* in: table */
-	dtuple_t*	row)	/* in: row
-				NOTE: the data fields in ref will point
-				directly into data of this row */
-{
-	dict_index_t*	clust_index;
-	ulint		ref_len;
-	ulint		i;
-
-	ut_ad(ref && table && row);
-
-	clust_index = dict_table_get_first_index(table);
-
-	ref_len = dict_index_get_n_unique(clust_index);
-
-	ut_ad(ref_len == dtuple_get_n_fields(ref));
-
-	for (i = 0; i < ref_len; i++) {
-		const dict_col_t*	col;
-		dict_field_t*		field;
-		dfield_t*		dfield;
-		dfield_t*		dfield2;
-
-		dfield = dtuple_get_nth_field(ref, i);
-
-		field = dict_index_get_nth_field(clust_index, i);
-
-		col = dict_field_get_col(field);
-
-		dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
-
-		dfield_copy(dfield, dfield2);
-
-		if (field->prefix_len > 0
-		    && dfield->len != UNIV_SQL_NULL) {
-
-			dfield->len = dtype_get_at_most_n_mbchars(
-				col->prtype, col->mbminlen, col->mbmaxlen,
-				field->prefix_len, dfield->len, dfield->data);
-		}
-	}
-
-	ut_ad(dtuple_check_typed(ref));
-}
-
-/*******************************************************************
-Searches the clustered index record for a row, if we have the row reference. */
-
-ibool
-row_search_on_row_ref(
-/*==================*/
-				/* out: TRUE if found */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
-				be closed by the caller */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	dict_table_t*	table,	/* in: table */
-	dtuple_t*	ref,	/* in: row reference */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint		low_match;
-	rec_t*		rec;
-	dict_index_t*	index;
-
-	ut_ad(dtuple_check_typed(ref));
-
-	index = dict_table_get_first_index(table);
-
-	ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
-
-	btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
-
-	low_match = btr_pcur_get_low_match(pcur);
-
-	rec = btr_pcur_get_rec(pcur);
-
-	if (page_rec_is_infimum(rec)) {
-
-		return(FALSE);
-	}
-
-	if (low_match != dtuple_get_n_fields(ref)) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
-rec_t*
-row_get_clust_rec(
-/*==============*/
-				/* out: record or NULL, if no record found */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	rec_t*		rec,	/* in: record in a secondary index */
-	dict_index_t*	index,	/* in: secondary index */
-	dict_index_t**	clust_index,/* out: clustered index */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	mem_heap_t*	heap;
-	dtuple_t*	ref;
-	dict_table_t*	table;
-	btr_pcur_t	pcur;
-	ibool		found;
-	rec_t*		clust_rec;
-
-	ut_ad((index->type & DICT_CLUSTERED) == 0);
-
-	table = index->table;
-
-	heap = mem_heap_create(256);
-
-	ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
-
-	found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
-
-	clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
-
-	mem_heap_free(heap);
-
-	btr_pcur_close(&pcur);
-
-	*clust_index = dict_table_get_first_index(table);
-
-	return(clust_rec);
-}
-
-/*******************************************************************
-Searches an index record. */
-
-ibool
-row_search_index_entry(
-/*===================*/
-				/* out: TRUE if found */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
-				be closed by the caller */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint	n_fields;
-	ulint	low_match;
-	rec_t*	rec;
-
-	ut_ad(dtuple_check_typed(entry));
-
-	btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
-	low_match = btr_pcur_get_low_match(pcur);
-
-	rec = btr_pcur_get_rec(pcur);
-
-	n_fields = dtuple_get_n_fields(entry);
-
-	if (page_rec_is_infimum(rec)) {
-
-		return(FALSE);
-	}
-
-	if (low_match != n_fields) {
-		/* Not found */
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
diff --git a/storage/innobase/srv/srv0que.c b/storage/innobase/srv/srv0que.c
deleted file mode 100644
index e2b4e217980..00000000000
--- a/storage/innobase/srv/srv0que.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/******************************************************
-Server query execution
-
-(c) 1996 Innobase Oy
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0que.h"
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "os0thread.h"
-#include "usr0sess.h"
-#include "que0que.h"
-
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void)
-/*==========================*/
-{
-	que_thr_t*	thr;
-
-	for (;;) {
-		mutex_enter(&kernel_mutex);
-
-		thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
-		if (thr == NULL) {
-			mutex_exit(&kernel_mutex);
-
-			return;
-		}
-
-		UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
-
-		mutex_exit(&kernel_mutex);
-
-		que_run_threads(thr);
-	}
-}
-
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
-				/* out: the new (may be == thr) query thread
-				to run */
-	que_thr_t*	thr)	/* in: query thread */
-{
-	que_thr_t*	new_thr;
-
-	ut_ad(thr);
-	ut_ad(thr->state == QUE_THR_RUNNING);
-
-	mutex_enter(&kernel_mutex);
-
-	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
-	new_thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
-	mutex_exit(&kernel_mutex);
-
-	return(new_thr);
-}
-
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-
-void
-srv_que_task_enqueue_low(
-/*=====================*/
-	que_thr_t*	thr)	/* in: query thread */
-{
-	ut_ad(thr);
-	ut_ad(mutex_own(&kernel_mutex));
-
-	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
-	srv_release_threads(SRV_WORKER, 1);
-}
-
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
-	que_thr_t*	thr)	/* in: query thread */
-{
-	ut_ad(thr);
-
-	ut_a(0);	/* Under MySQL this is never called */
-
-	mutex_enter(&kernel_mutex);
-
-	srv_que_task_enqueue_low(thr);
-
-	mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innobase/usr/usr0sess.c b/storage/innobase/usr/usr0sess.c
deleted file mode 100644
index 3740c05eaab..00000000000
--- a/storage/innobase/usr/usr0sess.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-#ifdef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#include "trx0trx.h"
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it. */
-static
-void
-sess_close(
-/*=======*/
-	sess_t*		sess);	/* in, own: session object */
-
-/*************************************************************************
-Opens a session. */
-
-sess_t*
-sess_open(void)
-/*===========*/
-					/* out, own: session object */
-{
-	sess_t*	sess;
-
-	ut_ad(mutex_own(&kernel_mutex));
-
-	sess = mem_alloc(sizeof(sess_t));
-
-	sess->state = SESS_ACTIVE;
-
-	sess->trx = trx_create(sess);
-
-	UT_LIST_INIT(sess->graphs);
-
-	return(sess);
-}
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it. */
-static
-void
-sess_close(
-/*=======*/
-	sess_t*	sess)	/* in, own: session object */
-{
-	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(sess->trx == NULL);
-
-	mem_free(sess);
-}
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
-			/* out: TRUE if closed */
-	sess_t*	sess)	/* in, own: session object */
-{
-	ut_ad(mutex_own(&kernel_mutex));
-
-	if (UT_LIST_GET_LEN(sess->graphs) == 0) {
-		sess_close(sess);
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
diff --git a/storage/innobase/ut/ut0byte.c b/storage/innobase/ut/ut0byte.c
deleted file mode 100644
index b5467fde601..00000000000
--- a/storage/innobase/ut/ut0byte.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*******************************************************************
-Byte utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0byte.h"
-
-#ifdef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#include "ut0sort.h"
-
-/* Zero value for a dulint */
-dulint	ut_dulint_zero		= {0, 0};
-
-/* Maximum value for a dulint */
-dulint	ut_dulint_max		= {0xFFFFFFFFUL, 0xFFFFFFFFUL};
-
-/****************************************************************
-Sort function for dulint arrays. */
-void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high)
-/*===============================================================*/
-{
-	UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
-			      ut_dulint_cmp);
-}
diff --git a/storage/innobase/ut/ut0dbg.c b/storage/innobase/ut/ut0dbg.c
deleted file mode 100644
index 8c4be190d77..00000000000
--- a/storage/innobase/ut/ut0dbg.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*********************************************************************
-Debug utilities for Innobase.
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#include "univ.i"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/* This is used to eliminate compiler warnings */
-ulint	ut_dbg_zero	= 0;
-#endif
-
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* If this is set to TRUE all threads will stop into the next assertion
-and assert */
-ibool	ut_dbg_stop_threads	= FALSE;
-#endif
-#ifdef __NETWARE__
-ibool panic_shutdown = FALSE;	/* This is set to TRUE when on NetWare there
-				happens an InnoDB assertion failure or other
-				fatal error condition that requires an
-				immediate shutdown. */
-#elif !defined(UT_DBG_USE_ABORT)
-/* Null pointer used to generate memory trap */
-
-ulint*	ut_dbg_null_ptr		= NULL;
-#endif
-
-/*****************************************************************
-Report a failed assertion. */
-
-void
-ut_dbg_assertion_failed(
-/*====================*/
-	const char* expr,	/* in: the failed assertion (optional) */
-	const char* file,	/* in: source file containing the assertion */
-	ulint line)		/* in: line number of the assertion */
-{
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Assertion failure in thread %lu"
-		" in file %s line %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), file, line);
-	if (expr) {
-		fprintf(stderr,
-			"InnoDB: Failing assertion: %s\n", expr);
-	}
-
-	fputs("InnoDB: We intentionally generate a memory trap.\n"
-	      "InnoDB: Submit a detailed bug report"
-	      " to http://bugs.mysql.com.\n"
-	      "InnoDB: If you get repeated assertion failures"
-	      " or crashes, even\n"
-	      "InnoDB: immediately after the mysqld startup, there may be\n"
-	      "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
-	      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-	      "forcing-recovery.html\n"
-	      "InnoDB: about forcing recovery.\n", stderr);
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-	ut_dbg_stop_threads = TRUE;
-#endif
-}
-
-#ifdef __NETWARE__
-/*****************************************************************
-Shut down MySQL/InnoDB after assertion failure. */
-
-void
-ut_dbg_panic(void)
-/*==============*/
-{
-	if (!panic_shutdown) {
-		panic_shutdown = TRUE;
-		innobase_shutdown_for_mysql();
-	}
-	exit(1);
-}
-#else /* __NETWARE__ */
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/*****************************************************************
-Stop a thread after assertion failure. */
-
-void
-ut_dbg_stop_thread(
-/*===============*/
-	const char*	file,
-	ulint		line)
-{
-	fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), file, line);
-	os_thread_sleep(1000000000);
-}
-# endif
-#endif /* __NETWARE__ */
diff --git a/storage/innobase/ut/ut0vec.c b/storage/innobase/ut/ut0vec.c
deleted file mode 100644
index e0d3e84d4a2..00000000000
--- a/storage/innobase/ut/ut0vec.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "ut0vec.h"
-#ifdef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-#include <string.h>
-
-/********************************************************************
-Create a new vector with the given initial size. */
-
-ib_vector_t*
-ib_vector_create(
-/*=============*/
-				/* out: vector */
-	mem_heap_t*	heap,	/* in: heap */
-	ulint		size)	/* in: initial size */
-{
-	ib_vector_t*	vec;
-
-	ut_a(size > 0);
-
-	vec = mem_heap_alloc(heap, sizeof(*vec));
-
-	vec->heap = heap;
-	vec->data = mem_heap_alloc(heap, sizeof(void*) * size);
-	vec->used = 0;
-	vec->total = size;
-
-	return(vec);
-}
-
-/********************************************************************
-Push a new element to the vector, increasing its size if necessary. */
-
-void
-ib_vector_push(
-/*===========*/
-	ib_vector_t*	vec,	/* in: vector */
-	void*		elem)	/* in: data element */
-{
-	if (vec->used >= vec->total) {
-		void**	new_data;
-		ulint	new_total = vec->total * 2;
-
-		new_data = mem_heap_alloc(vec->heap,
-					  sizeof(void*) * new_total);
-		memcpy(new_data, vec->data, sizeof(void*) * vec->total);
-
-		vec->data = new_data;
-		vec->total = new_total;
-	}
-
-	vec->data[vec->used] = elem;
-	vec->used++;
-}
diff --git a/storage/innobase/ut/ut0wqueue.c b/storage/innobase/ut/ut0wqueue.c
deleted file mode 100644
index 7e090e89a4f..00000000000
--- a/storage/innobase/ut/ut0wqueue.c
+++ /dev/null
@@ -1,92 +0,0 @@
-#include "ut0wqueue.h"
-
-/********************************************************************
-Create a new work queue. */
-
-ib_wqueue_t*
-ib_wqueue_create(void)
-/*===================*/
-			/* out: work queue */
-{
-	ib_wqueue_t*	wq = mem_alloc(sizeof(ib_wqueue_t));
-
-	mutex_create(&wq->mutex, SYNC_WORK_QUEUE);
-
-	wq->items = ib_list_create();
-	wq->event = os_event_create(NULL);
-
-	return(wq);
-}
-
-/********************************************************************
-Free a work queue. */
-
-void
-ib_wqueue_free(
-/*===========*/
-	ib_wqueue_t*	wq)	/* in: work queue */
-{
-	ut_a(!ib_list_get_first(wq->items));
-
-	mutex_free(&wq->mutex);
-	ib_list_free(wq->items);
-	os_event_free(wq->event);
-
-	mem_free(wq);
-}
-
-/********************************************************************
-Add a work item to the queue. */
-
-void
-ib_wqueue_add(
-/*==========*/
-	ib_wqueue_t*	wq,	/* in: work queue */
-	void*		item,	/* in: work item */
-	mem_heap_t*	heap)	/* in: memory heap to use for allocating the
-				list node */
-{
-	mutex_enter(&wq->mutex);
-
-	ib_list_add_last(wq->items, item, heap);
-	os_event_set(wq->event);
-
-	mutex_exit(&wq->mutex);
-}
-
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
-void*
-ib_wqueue_wait(
-				/* out: work item */
-	ib_wqueue_t*	wq)	/* in: work queue */
-{
-	ib_list_node_t*	node;
-
-	for (;;) {
-		os_event_wait(wq->event);
-
-		mutex_enter(&wq->mutex);
-
-		node = ib_list_get_first(wq->items);
-
-		if (node) {
-			ib_list_remove(wq->items, node);
-
-			if (!ib_list_get_first(wq->items)) {
-				/* We must reset the event when the list
-				gets emptied. */
-				os_event_reset(wq->event);
-			}
-
-			break;
-		}
-
-		mutex_exit(&wq->mutex);
-	}
-
-	mutex_exit(&wq->mutex);
-
-	return(node->data);
-}
diff --git a/storage/innobase/win_atomics32_test.c b/storage/innobase/win_atomics32_test.c
deleted file mode 100644
index fcb88d6b54e..00000000000
--- a/storage/innobase/win_atomics32_test.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2009 Sun Microsystems AB
- 
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; version 2 of the License.
- 
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
- 
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
-
-#include <windows.h>
-
-int main()
-{
-  volatile long var32 = 0;
-  long add32 = 1;
-  long old32 = 0;
-  long exch32 = 1;
-  long ret_value;
-
-  ret_value = InterlockedExchangeAdd(&var32, add32);
-  ret_value = InterlockedCompareExchange(&var32, exch32, old32);
-  MemoryBarrier();
-  return EXIT_SUCCESS;
-}
diff --git a/storage/innobase/win_atomics64_test.c b/storage/innobase/win_atomics64_test.c
deleted file mode 100644
index 123cb6d98cf..00000000000
--- a/storage/innobase/win_atomics64_test.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2009 Sun Microsystems AB
-  
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; version 2 of the License.
-  
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-  
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
-
-#include <windows.h>
-
-int main()
-{
-  volatile long long var64 = 0;
-  long long add64 = 1;
-  long long old64 = 0;
-  long long exch64 = 1;
-  long long ret_value;
-
-  ret_value = InterlockedExchangeAdd64(&var64, add64);
-  ret_value = InterlockedCompareExchange64(&var64, exch64, old64);
-  MemoryBarrier();
-  return EXIT_SUCCESS;
-}
diff --git a/storage/innodb_plugin/CMakeLists.txt b/storage/innodb_plugin/CMakeLists.txt
new file mode 100644
index 00000000000..7762ece9bcd
--- /dev/null
+++ b/storage/innodb_plugin/CMakeLists.txt
@@ -0,0 +1,78 @@
+# Copyright (C) 2009 Oracle/Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# This is the CMakeLists for InnoDB Plugin
+
+
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
+IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+	SET(WIN64 TRUE)
+ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+
+# Include directories under innodb_plugin
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innodb_plugin/include
+		    ${CMAKE_SOURCE_DIR}/storage/innodfb_plugin/handler)
+
+# Include directories under mysql
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+		    ${CMAKE_SOURCE_DIR}/sql
+		    ${CMAKE_SOURCE_DIR}/regex
+		    ${CMAKE_SOURCE_DIR}/zlib
+		    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
+
+# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
+# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
+IF(MSVC AND $(WIN64))
+	SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.c mem/mem0pool.c
+				    PROPERTIES COMPILE_FLAGS -Od)
+ENDIF(MSVC AND $(WIN64))
+
+SET(INNODB_PLUGIN_SOURCES	btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
+			buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
+			data/data0data.c data/data0type.c
+			dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
+			dyn/dyn0dyn.c
+			eval/eval0eval.c eval/eval0proc.c
+			fil/fil0fil.c
+			fsp/fsp0fsp.c
+			fut/fut0fut.c fut/fut0lst.c
+			ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
+			ibuf/ibuf0ibuf.c
+			pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
+			lock/lock0lock.c lock/lock0iter.c
+			log/log0log.c log/log0recv.c
+			mach/mach0data.c
+			mem/mem0mem.c mem/mem0pool.c
+			mtr/mtr0log.c mtr/mtr0mtr.c
+			os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
+			page/page0cur.c page/page0page.c page/page0zip.c
+			que/que0que.c
+			handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
+			read/read0read.c
+			rem/rem0cmp.c rem/rem0rec.c
+			row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
+			row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
+			srv/srv0que.c srv/srv0srv.c srv/srv0start.c
+			sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
+			thr/thr0loc.c
+			trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
+			trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
+			usr/usr0sess.c
+			ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
+			ut/ut0list.c ut/ut0wqueue.c)
+ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION)
+MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
\ No newline at end of file
diff --git a/storage/innodb_plugin/COPYING b/storage/innodb_plugin/COPYING
new file mode 100644
index 00000000000..6b106e18fdb
--- /dev/null
+++ b/storage/innodb_plugin/COPYING
@@ -0,0 +1,351 @@
+                      GNU GENERAL PUBLIC LICENSE
+                         Version 2, June 1991
+
+     Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+     Everyone is permitted to copy and distribute verbatim copies
+     of this license document, but changing it is not allowed.
+
+Preamble
+========
+
+The licenses for most software are designed to take away your freedom
+to share and change it.  By contrast, the GNU General Public License is
+intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price.
+Our General Public Licenses are designed to make sure that you have
+the freedom to distribute copies of free software (and charge for this
+service if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs; and that you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone
+to deny you these rights or to ask you to surrender the rights.  These
+restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis
+or for a fee, you must give the recipients all the rights that you
+have.  You must make sure that they, too, receive or can get the source
+code.  And you must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+Finally, any free program is threatened constantly by software patents.
+We wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program
+proprietary.  To prevent this, we have made it clear that any patent
+must be licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+                      GNU GENERAL PUBLIC LICENSE
+    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+  0. This License applies to any program or other work which contains a
+     notice placed by the copyright holder saying it may be distributed
+     under the terms of this General Public License.  The "Program",
+     below, refers to any such program or work, and a "work based on
+     the Program" means either the Program or any derivative work under
+     copyright law: that is to say, a work containing the Program or a
+     portion of it, either verbatim or with modifications and/or
+     translated into another language.  (Hereinafter, translation is
+     included without limitation in the term "modification".)  Each
+     licensee is addressed as "you".
+
+     Activities other than copying, distribution and modification are
+     not covered by this License; they are outside its scope.  The act
+     of running the Program is not restricted, and the output from the
+     Program is covered only if its contents constitute a work based on
+     the Program (independent of having been made by running the
+     Program).  Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+     source code as you receive it, in any medium, provided that you
+     conspicuously and appropriately publish on each copy an appropriate
+     copyright notice and disclaimer of warranty; keep intact all the
+     notices that refer to this License and to the absence of any
+     warranty; and give any other recipients of the Program a copy of
+     this License along with the Program.
+
+     You may charge a fee for the physical act of transferring a copy,
+     and you may at your option offer warranty protection in exchange
+     for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+     of it, thus forming a work based on the Program, and copy and
+     distribute such modifications or work under the terms of Section 1
+     above, provided that you also meet all of these conditions:
+
+       a. You must cause the modified files to carry prominent notices
+          stating that you changed the files and the date of any change.
+
+       b. You must cause any work that you distribute or publish, that
+          in whole or in part contains or is derived from the Program
+          or any part thereof, to be licensed as a whole at no charge
+          to all third parties under the terms of this License.
+
+       c. If the modified program normally reads commands interactively
+          when run, you must cause it, when started running for such
+          interactive use in the most ordinary way, to print or display
+          an announcement including an appropriate copyright notice and
+          a notice that there is no warranty (or else, saying that you
+          provide a warranty) and that users may redistribute the
+          program under these conditions, and telling the user how to
+          view a copy of this License.  (Exception: if the Program
+          itself is interactive but does not normally print such an
+          announcement, your work based on the Program is not required
+          to print an announcement.)
+
+     These requirements apply to the modified work as a whole.  If
+     identifiable sections of that work are not derived from the
+     Program, and can be reasonably considered independent and separate
+     works in themselves, then this License, and its terms, do not
+     apply to those sections when you distribute them as separate
+     works.  But when you distribute the same sections as part of a
+     whole which is a work based on the Program, the distribution of
+     the whole must be on the terms of this License, whose permissions
+     for other licensees extend to the entire whole, and thus to each
+     and every part regardless of who wrote it.
+
+     Thus, it is not the intent of this section to claim rights or
+     contest your rights to work written entirely by you; rather, the
+     intent is to exercise the right to control the distribution of
+     derivative or collective works based on the Program.
+
+     In addition, mere aggregation of another work not based on the
+     Program with the Program (or with a work based on the Program) on
+     a volume of a storage or distribution medium does not bring the
+     other work under the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+     under Section 2) in object code or executable form under the terms
+     of Sections 1 and 2 above provided that you also do one of the
+     following:
+
+       a. Accompany it with the complete corresponding machine-readable
+          source code, which must be distributed under the terms of
+          Sections 1 and 2 above on a medium customarily used for
+          software interchange; or,
+
+       b. Accompany it with a written offer, valid for at least three
+          years, to give any third-party, for a charge no more than your
+          cost of physically performing source distribution, a complete
+          machine-readable copy of the corresponding source code, to be
+          distributed under the terms of Sections 1 and 2 above on a
+          medium customarily used for software interchange; or,
+
+       c. Accompany it with the information you received as to the offer
+          to distribute corresponding source code.  (This alternative is
+          allowed only for noncommercial distribution and only if you
+          received the program in object code or executable form with
+          such an offer, in accord with Subsection b above.)
+
+     The source code for a work means the preferred form of the work for
+     making modifications to it.  For an executable work, complete
+     source code means all the source code for all modules it contains,
+     plus any associated interface definition files, plus the scripts
+     used to control compilation and installation of the executable.
+     However, as a special exception, the source code distributed need
+     not include anything that is normally distributed (in either
+     source or binary form) with the major components (compiler,
+     kernel, and so on) of the operating system on which the executable
+     runs, unless that component itself accompanies the executable.
+
+     If distribution of executable or object code is made by offering
+     access to copy from a designated place, then offering equivalent
+     access to copy the source code from the same place counts as
+     distribution of the source code, even though third parties are not
+     compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+     except as expressly provided under this License.  Any attempt
+     otherwise to copy, modify, sublicense or distribute the Program is
+     void, and will automatically terminate your rights under this
+     License.  However, parties who have received copies, or rights,
+     from you under this License will not have their licenses
+     terminated so long as such parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+     signed it.  However, nothing else grants you permission to modify
+     or distribute the Program or its derivative works.  These actions
+     are prohibited by law if you do not accept this License.
+     Therefore, by modifying or distributing the Program (or any work
+     based on the Program), you indicate your acceptance of this
+     License to do so, and all its terms and conditions for copying,
+     distributing or modifying the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+     Program), the recipient automatically receives a license from the
+     original licensor to copy, distribute or modify the Program
+     subject to these terms and conditions.  You may not impose any
+     further restrictions on the recipients' exercise of the rights
+     granted herein.  You are not responsible for enforcing compliance
+     by third parties to this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+     infringement or for any other reason (not limited to patent
+     issues), conditions are imposed on you (whether by court order,
+     agreement or otherwise) that contradict the conditions of this
+     License, they do not excuse you from the conditions of this
+     License.  If you cannot distribute so as to satisfy simultaneously
+     your obligations under this License and any other pertinent
+     obligations, then as a consequence you may not distribute the
+     Program at all.  For example, if a patent license would not permit
+     royalty-free redistribution of the Program by all those who
+     receive copies directly or indirectly through you, then the only
+     way you could satisfy both it and this License would be to refrain
+     entirely from distribution of the Program.
+
+     If any portion of this section is held invalid or unenforceable
+     under any particular circumstance, the balance of the section is
+     intended to apply and the section as a whole is intended to apply
+     in other circumstances.
+
+     It is not the purpose of this section to induce you to infringe any
+     patents or other property right claims or to contest validity of
+     any such claims; this section has the sole purpose of protecting
+     the integrity of the free software distribution system, which is
+     implemented by public license practices.  Many people have made
+     generous contributions to the wide range of software distributed
+     through that system in reliance on consistent application of that
+     system; it is up to the author/donor to decide if he or she is
+     willing to distribute software through any other system and a
+     licensee cannot impose that choice.
+
+     This section is intended to make thoroughly clear what is believed
+     to be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+     certain countries either by patents or by copyrighted interfaces,
+     the original copyright holder who places the Program under this
+     License may add an explicit geographical distribution limitation
+     excluding those countries, so that distribution is permitted only
+     in or among countries not thus excluded.  In such case, this
+     License incorporates the limitation as if written in the body of
+     this License.
+
+  9. The Free Software Foundation may publish revised and/or new
+     versions of the General Public License from time to time.  Such
+     new versions will be similar in spirit to the present version, but
+     may differ in detail to address new problems or concerns.
+
+     Each version is given a distinguishing version number.  If the
+     Program specifies a version number of this License which applies
+     to it and "any later version", you have the option of following
+     the terms and conditions either of that version or of any later
+     version published by the Free Software Foundation.  If the Program
+     does not specify a version number of this License, you may choose
+     any version ever published by the Free Software Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+     programs whose distribution conditions are different, write to the
+     author to ask for permission.  For software which is copyrighted
+     by the Free Software Foundation, write to the Free Software
+     Foundation; we sometimes make exceptions for this.  Our decision
+     will be guided by the two goals of preserving the free status of
+     all derivatives of our free software and of promoting the sharing
+     and reuse of software generally.
+
+                                NO WARRANTY
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
+     WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
+     LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+     HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
+     WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
+     NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+     FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE
+     QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+     PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
+     SERVICING, REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+     WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
+     MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
+     LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+     INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+     INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+     DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
+     OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+     OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+                      END OF TERMS AND CONDITIONS
+How to Apply These Terms to Your New Programs
+=============================================
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+To do so, attach the following notices to the program.  It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+     ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
+     Copyright (C) YYYY  NAME OF AUTHOR
+
+     This program is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 2 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program; if not, write to the Free Software
+     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+     Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
+     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+     This is free software, and you are welcome to redistribute it
+     under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License.  Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+     Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+     `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+     SIGNATURE OF TY COON, 1 April 1989
+     Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library,
+you may consider it more useful to permit linking proprietary
+applications with the library.  If this is what you want to do, use the
+GNU Library General Public License instead of this License.
diff --git a/storage/innodb_plugin/COPYING.Google b/storage/innodb_plugin/COPYING.Google
new file mode 100644
index 00000000000..5ade2b0e381
--- /dev/null
+++ b/storage/innodb_plugin/COPYING.Google
@@ -0,0 +1,30 @@
+Portions of this software contain modifications contributed by Google, Inc.
+These contributions are used with the following license:
+
+Copyright (c) 2008, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+      * Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above
+        copyright notice, this list of conditions and the following
+        disclaimer in the documentation and/or other materials
+        provided with the distribution.
+      * Neither the name of the Google Inc. nor the names of its
+        contributors may be used to endorse or promote products
+        derived from this software without specific prior written
+        permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/innodb_plugin/COPYING.Percona b/storage/innodb_plugin/COPYING.Percona
new file mode 100644
index 00000000000..8c786811719
--- /dev/null
+++ b/storage/innodb_plugin/COPYING.Percona
@@ -0,0 +1,30 @@
+Portions of this software contain modifications contributed by Percona, Inc.
+These contributions are used with the following license:
+
+Copyright (c) 2008, 2009, Percona Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+      * Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above
+        copyright notice, this list of conditions and the following
+        disclaimer in the documentation and/or other materials
+        provided with the distribution.
+      * Neither the name of the Percona Inc. nor the names of its
+        contributors may be used to endorse or promote products
+        derived from this software without specific prior written
+        permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/innodb_plugin/COPYING.Sun_Microsystems b/storage/innodb_plugin/COPYING.Sun_Microsystems
new file mode 100644
index 00000000000..5a77ef3ab73
--- /dev/null
+++ b/storage/innodb_plugin/COPYING.Sun_Microsystems
@@ -0,0 +1,31 @@
+Portions of this software contain modifications contributed by
+Sun Microsystems, Inc. These contributions are used with the following
+license:
+
+Copyright (c) 2009, Sun Microsystems, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+      * Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above
+        copyright notice, this list of conditions and the following
+        disclaimer in the documentation and/or other materials
+        provided with the distribution.
+      * Neither the name of Sun Microsystems, Inc. nor the names of its
+        contributors may be used to endorse or promote products derived
+        from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
new file mode 100644
index 00000000000..2b04c06f0e8
--- /dev/null
+++ b/storage/innodb_plugin/ChangeLog
@@ -0,0 +1,1149 @@
+2009-07-20	The InnoDB Team
+
+	* buf/buf0rea.c, handler/ha_innodb.cc, include/srv0srv.h,
+	srv/srv0srv.c:
+	Change the read ahead parameter name to innodb_read_ahead_threshold.
+	Change the meaning of this parameter to signify the number of pages
+	that must be sequentially accessed for InnoDB to trigger a readahead
+	request.
+
+2009-07-20	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#39802 On Windows, 32-bit time_t should be enforced
+
+2009-07-16	The InnoDB Team
+
+	* include/univ.i:
+	Support inlining of functions and prefetch with Sun Studio.
+	These changes are based on contribution from Sun Microsystems Inc.
+	under a BSD license.
+
+2009-07-14	The InnoDB Team
+
+	* fil/fil0fil.c:
+	Fix Bug#45814 URL reference in InnoDB server errors needs adjusting to
+	match documentation
+
+2009-07-14	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb_bug21704.result,
+	mysql-test/innodb_bug21704.test:
+	Fix Bug#21704 Renaming column does not update FK definition
+
+2009-07-10	The InnoDB Team
+
+	* handler/ha_innodb.cc, srv/srv0srv.c:
+	Change the defaults for
+	innodb_sync_spin_loops: 20 -> 30
+	innodb_spin_wait_delay: 5 -> 6
+
+2009-07-08	The InnoDB Team
+
+	* buf/buf0flu.c, handler/ha_innodb.cc, include/buf0flu.h,
+	include/log0log.h, include/log0log.ic, include/srv0srv.h,
+	srv/srv0srv.c:
+	Implement the adaptive flushing of dirty pages, which uses
+	a heuristics based flushing rate of dirty pages to avoid IO
+	bursts at checkpoint. Expose new configure knob
+	innodb_adaptive_flushing to control whether the new flushing
+	algorithm should be used.
+
+2009-07-07	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h, log/log0log.c,
+	srv/srv0srv.c:
+	Implement IO capacity tuning. Expose new configure knob
+	innodb_io_capacity to control the master threads IO rate. The
+	ibuf merge is also changed from synchronous to asynchronous.
+	These changes are based on contribution from Google Inc.
+	under a BSD license.
+
+2009-07-02	The InnoDB Team
+
+	* include/ut0ut.h, plug.in, ut/ut0ut.c:
+	Use the PAUSE instruction inside the spinloop if it is available,
+	Thanks to Mikael Ronstrom <mikael@mysql.com>.
+
+2009-06-29	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb_file_format.test,
+	mysql-test/innodb_file_format.result:
+	Do not crash on SET GLOBAL innodb_file_format=DEFAULT
+	or SET GLOBAL innodb_file_format_check=DEFAULT.
+
+2009-06-29	The InnoDB Team
+
+	* buf/buf0buf.c, buf/buf0rea.c, lock/lock0lock.c:
+	Tolerate missing tablespaces during crash recovery and when
+	printing information on locks.
+
+2009-06-29	The InnoDB Team
+
+	* buf/buf0buf.c:
+	Fix a race condition when reading buf_fix_count.
+	Currently, it is not being protected by the buffer pool mutex,
+	but by the block mutex.
+
+2009-06-29	The InnoDB Team
+
+	* handler/handler0alter.cc:
+	Start the user transaction prebuilt->trx if it was not started
+	before adding or dropping an index. Without this fix, the
+	table could be locked outside an active transaction.
+
+2009-06-25	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb_bug42101.test,
+	mysql-test/innodb_bug42101.result,
+	mysql-test/innodb_bug42101-nonzero.test,
+	mysql-test/innodb_bug42101-nonzero.result:
+	Fix Bug#45749 Race condition in SET GLOBAL
+	innodb_commit_concurrency=DEFAULT
+
+2009-06-25	The InnoDB Team
+
+	* dict/dict0dict.c:
+	When an index column cannot be found in the table during index
+	creation, display additional diagnostic before an assertion failure.
+	This does NOT fix Bug #44571 InnoDB Plugin crashes on ADD INDEX,
+	but it helps understand the reason of the crash.
+
+2009-06-17	The InnoDB Team
+
+	* row/row0merge.c:
+	Fix Bug#45426 UNIV_DEBUG build cause assertion error at CREATE INDEX
+
+2009-06-17	The InnoDB Team
+
+	* mysql-test/innodb_bug45357.result, mysql-test/innodb_bug45357.test,
+	row/row0mysql.c:
+	Fix Bug#45357 5.1.35 crashes with Failing assertion: index->type &
+	DICT_CLUSTERED
+
+2009-06-17	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#44030 Error: (1500) Couldn't read the MAX(ID) autoinc value
+	from the index (PRIMARY)
+
+2009-06-11	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb.result, srv/srv0srv.c:
+	Change the following defaults:
+	max_dirty_pages_pct: from 90 to 75, max allowed from 100 to 99
+	additional_mem_pool_size: from 1 to 8 MB
+	buffer_pool_size: from 8 to 128 MB
+	log_buffer_size: from 1 to 8 MB
+	read_io_threads/write_io_threads: from 1 to 4
+
+2009-06-09	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/trx0trx.h, trx/trx0trx.c:
+	Enable Group Commit functionality that was broken in 5.0 when
+	distributed transactions were introduced.
+
+2009-06-05	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/os0file.h, include/srv0srv.h,
+	os/os0file.c, srv/srv0srv.c, srv/srv0start.c:
+	Enable functionality to have multiple background IO helper threads.
+	Expose new configure knobs innodb_read_io_threads and
+	innodb_write_io_threads and deprecate innodb_file_io_threads (this
+	parameter was relevant only on windows). Internally this allows
+	multiple segments for read and write IO request arrays where one
+	thread works on one segment.
+
+2009-06-05	The InnoDB Team
+
+	* buf/buf0lru.c, buf/buf0rea.c, handler/ha_innodb.cc,
+	include/srv0srv.h, srv/srv0srv.c:
+	Fix a bug in linear read ahead:
+	  1) Take into account access pattern when deciding whether or not to
+	    do linear read ahead.
+	  2) Expose a knob innodb_read_ahead_factor = [0-64] default (8),
+	    dynamic, global to control linear read ahead behavior. This is the
+	    value of the number of pages that InnoDB will tolerate within a
+	    64 page extent even if they are accessed out of order or have
+	    not been accessed at all. This number (which varies from 0 to 64)
+	    is indicative of the slack that we have when deciding about linear
+	    readahead.
+	  3) Disable random read ahead. Keep the code for now.
+
+2009-06-03	The InnoDB Team
+
+	* dict/dict0dict.c, mysql-test/t/innodb_mysql.test,
+	mysql-test/r/innodb_mysql.result:
+	Fix Bug#39793 Foreign keys not constructed when column
+	has a '#' in a comment or default value
+
+2009-05-27	The InnoDB Team
+
+	* Doxyfile:
+	Allow the extraction of documentation from the code base with the
+	Doxygen tool. Convert and add many (but not yet all) comments to
+	Doxygen format.
+
+2009-05-19	The InnoDB Team
+
+	* btr/btr0btr.c, btr/btr0cur.c, lock/lock0lock.c,
+	include/page0page.ic, include/lock0lock.h, include/dict0dict.h,
+	include/page0page.h, include/dict0dict.ic, ibuf/ibuf0ibuf.c,
+	page/page0zip.c, page/page0page.c:
+	Write updates of PAGE_MAX_TRX_ID to the redo log and add debug
+	assertions for checking that PAGE_MAX_TRX_ID is valid on leaf
+	pages of secondary indexes and the insert buffer B-tree. This bug
+	could cause failures in secondary index lookups in consistent
+	reads right after crash recovery.
+
+2009-05-18	The InnoDB Team
+
+	* btr/btr0cur.c:
+	Correctly estimate the space needed on the compressed page when
+	performing an update by delete-and-insert.
+
+2009-05-14	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h,
+	mysql-test/innodb_bug42101-nonzero-master.opt,
+	mysql-test/innodb_bug42101-nonzero.result,
+	mysql-test/innodb_bug42101-nonzero.test,
+	mysql-test/innodb_bug42101.result, mysql-test/innodb_bug42101.test,
+	srv/srv0srv.c:
+	Fix Bug#42101 Race condition in innodb_commit_concurrency
+
+2009-05-13	The InnoDB Team
+
+	* dict/dict0dict.c:
+	Fix Bug#44320 InnoDB: missing DB_ROLL_PTR in Table Monitor COLUMNS
+	output
+
+2009-04-23	The InnoDB Team
+
+	* row/row0mysql.c:
+	When scanning indexes, report in the error log any error codes
+	returned by the search function. These error codes will still be
+	ignored in CHECK TABLE.
+
+2009-04-23	The InnoDB Team
+
+	* include/trx0types.h:
+	Define the logical type names trx_id_t, roll_ptr_t, and undo_no_t
+	and use them in place of dulint everywhere.
+
+2009-04-18	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/pars0pars.h:
+	Fix Bug#29125 Windows Server X64: so many compiler warnings
+
+2009-04-16	The InnoDB Team
+
+	* include/univ.i:
+	Define REFMAN as the base URL of the MySQL Reference Manual and
+	use the macro in all diagnostic output.
+
+2009-04-16	The InnoDB Team
+
+	* CMakeLists.txt, include/os0sync.h, include/sync0sync.h,
+	include/sync0sync.ic, include/univ.i, srv/srv0start.c,
+	sync/sync0sync.c:
+	Use the Windows Interlocked functions for atomic memory
+	access.
+
+2009-04-15	The InnoDB Team
+
+	* mysql-test/innodb.result, mysql-test/innodb.test:
+	Fix Bug#43309 Test main.innodb can't be run twice
+
+2009-04-14	The InnoDB Team
+
+	* CMakeLists.txt, handler/win_delay_loader.cc,
+	win-plugin/win-plugin.diff:
+	Remove statically linked libraries from MySQL (zlib and strings).
+
+2009-04-11	The InnoDB Team
+
+	* CMakeLists.txt, win-plugin/README, win-plugin/win-plugin.diff:
+	Rewrite CMakeLists.txt.
+
+2009-04-07	The InnoDB Team
+
+	* include/os0sync.h, include/sync0rw.ic, include/sync0sync.h,
+	include/sync0sync.ic, include/univ.i, plug.in, srv/srv0srv.c,
+	srv/srv0start.c, sync/sync0arr.c, sync/sync0sync.c:
+	Enable atomics on Solaris (using the libc functions as defined in
+	atomic.h) if GCC atomic builtins are not present.
+
+2009-04-07	The InnoDB Team
+
+	* btr/btr0btr.c, dict/dict0dict.c, ibuf/ibuf0ibuf.c,
+	include/data0data.h, include/data0data.ic, include/data0type.h,
+	include/data0type.ic, include/dict0dict.h, include/dict0dict.ic,
+	include/rem0rec.ic, mysql-test/innodb.result, mysql-test/innodb.test,
+	pars/pars0pars.c, rem/rem0rec.c, row/row0upd.c:
+	Fix Bug#44032 In ROW_FORMAT=REDUNDANT, update UTF-8 CHAR
+	to/from NULL is not in-place
+
+2009-04-07	The InnoDB Team
+
+	* page/page0cur.c:
+	Fix Bug#43660 SHOW INDEXES/ANALYZE does NOT update cardinality for
+	indexes of InnoDB table
+
+2009-04-06	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Make the parameter innodb_change_buffering settable by the
+	configuration file or mysqld command line options. Before this
+	fix, the initial value specified for this parameter was ignored.
+
+2009-04-06	The InnoDB Team
+
+	* sync/sync0rw.c:
+	Avoid a bogus failure in UNIV_SYNC_DEBUG diagnostics.
+
+2009-04-02	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h, srv/srv0srv.c:
+	Add new parameter innodb_spin_wait_delay to set the maximum delay
+	between polling for a spin lock.
+
+2009-04-02	The InnoDB Team
+
+	* dict/dict0crea.c, handler/ha_innodb.cc, handler/ha_innodb.h,
+	include/dict0mem.h, include/row0merge.h, include/row0mysql.h,
+	mysql-test/innodb-index.result, mysql-test/innodb-index.test,
+	row/row0merge.c, row/row0sel.c:
+	In consistent reads, refuse to use newly created indexes that may
+	lack history.
+
+2009-03-25	The InnoDB Team
+
+	* buf/buf0buf.c, handler/ha_innodb.cc, include/buf0buf.h:
+	In SHOW ENGINE INNODB MUTEX do not show the status of block->mutex,
+	block->lock, block->lock->mutex (if applicable) and all mutexes and
+	rw-locks for which number of os-waits are zero because this can
+	be overwhelming particularly when the buffer pool is very large.
+
+2009-03-20	The InnoDB Team
+
+	* buf/buf0buf.c, include/log0recv.h, log/log0recv.c:
+	Remove the compile-time constant parameters of
+	recv_recover_page(), recv_scan_log_recs(), and recv_sys_init().
+
+2009-03-20	The InnoDB Team
+
+	* data/data0type.c, handler/ha_innodb.cc, include/ha_prototypes.h:
+	Declare innobase_get_at_most_n_mbchars() in ha_prototypes.h.
+
+2009-03-20	The InnoDB Team
+
+	* fil/fil0fil.h, fil/fil0fil.c, srv/srv0start.c:
+	Add the parameter hash_size to fil_init().
+
+2009-03-20	The InnoDB Team
+
+	* fil/fil0fil.c:
+	Refer to fil_system directly, not via local variables.
+
+2009-03-20	The InnoDB Team
+
+	* page/page0page.c:
+	In page_validate(), always report the space id, page number and
+	the name of the index when corruption is noticed.
+
+2009-03-20	The InnoDB Team
+
+	* include/log0log.h, include/log0log.ic, log/log0log.c:
+	Add in/out comments or const qualifiers to some function
+	parameters as appropriate.
+
+2009-03-20	The InnoDB Team
+
+	* dict/dict0boot.c, dict/dict0dict.c, fsp/fsp0fsp.c,
+	include/dict0dict.h, include/srv0srv.h, srv/srv0srv.c,
+	page/page0page.c:
+	Replace srv_sys->dummy_ind1 and srv_sys->dummy_ind2 with
+	dict_ind_redundant and dict_ind_compact, which are
+	initialized by dict_init().
+
+2009-03-11	The InnoDB Team
+
+	InnoDB Plugin 1.0.3 released
+
+2009-03-05	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#43203 Overflow from auto incrementing causes server segv
+
+2009-02-25	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#42714 AUTO_INCREMENT errors in 5.1.31
+
+2009-02-23	The InnoDB Team
+
+	* btr/btr0cur.c:
+	Fix Bug#43043 Crash on BLOB delete operation
+
+2009-02-20	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Make innodb_use_sys_malloc=ON the default.
+
+2009-02-20	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#42400 InnoDB autoinc code can't handle floating-point columns
+
+2009-02-18	The InnoDB Team
+
+	* include/ut0mem.h, os/os0proc.c, ut/ut0mem.c:
+	Protect ut_total_allocated_memory with ut_list_mutex in
+	os_mem_alloc_large() and os_mem_free_large(). The lack of this mutex
+	protection could cause an assertion failure during fast index
+	creation. Also, add UNIV_MEM_ALLOC and UNIV_MEM_FREE instrumentation
+	to os_mem_alloc_large() and os_mem_free_large(), so that Valgrind can
+	detect more errors.
+
+2009-02-11	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Make innodb_thread_concurrency=0 the default. The old default value
+	was 8. A non-zero setting may be useful when InnoDB is showing severe
+	scalability problems under multiple concurrent connections.
+
+2009-02-10	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/ha_innodb.h:
+	Fix Bug#41676 Table names are case insensitive in locking
+
+2009-02-10	The InnoDB Team
+
+	* mem/mem0dbg.c, mem/mem0mem.c, mem/mem0pool.c:
+	When innodb_use_sys_malloc is set, ignore
+	innodb_additional_mem_pool_size, because nothing will be allocated
+	from mem_comm_pool.
+
+2009-02-10	The InnoDB Team
+
+	* ut/ut0mem.c:
+	Map ut_malloc_low(), ut_realloc(), and ut_free() directly to malloc(),
+	realloc(), and free() when innodb_use_sys_malloc is set. As a side
+	effect, ut_total_allocated_memory ("Total memory allocated" in the
+	"BUFFER POOL AND MEMORY" section of SHOW ENGINE INNODB STATUS) will
+	exclude any memory allocated by these functions when
+	innodb_use_sys_malloc is set.
+
+2009-02-10	The InnoDB Team
+
+	* btr/btr0cur.c, btr/btr0sea.c, buf/buf0buf.c, handler/ha_innodb.cc,
+	include/buf0buf.ic, include/os0sync.h, include/srv0srv.h,
+	include/sync0rw.h, include/sync0rw.ic, include/sync0sync.h,
+	include/sync0sync.ic, include/univ.i, row/row0sel.c, srv/srv0srv.c,
+	srv/srv0start.c, sync/sync0arr.c, sync/sync0rw.c, sync/sync0sync.c:
+	On those platforms that support it, implement the synchronization
+	primitives of InnoDB mutexes and read/write locks with GCC atomic
+	builtins instead of Pthreads mutexes and InnoDB mutexes. These changes
+	are based on a patch supplied by Mark Callaghan of Google under a BSD
+	license.
+
+2009-01-30	The InnoDB Team
+
+	* btr/btr0cur.c, btr/btr0sea.c, buf/buf0buf.c, handler/ha_innodb.cc,
+	include/btr0sea.h, include/buf0buf.h, include/sync0sync.h,
+	sync/sync0sync.c:
+	Make the configuration parameter innodb_adaptive_hash_index dynamic,
+	so that it can be changed at runtime.
+
+2009-01-29	The InnoDB Team
+
+	* handler/ha_innodb.cc, ibuf/ibuf0ibuf.c, include/ibuf0ibuf.h,
+	include/ibuf0ibuf.ic:
+	Implement the settable global variable innodb_change_buffering,
+	with the allowed values 'none' and 'inserts'. The default value
+	'inserts' enables the buffering of inserts to non-unique secondary
+	index trees when the B-tree leaf page is not in the buffer pool.
+
+2009-01-27	The InnoDB Team
+
+	* buf/buf0lru.c:
+	Fix a race condition in buf_LRU_invalidate_tablespace(): The
+	compressed page size (zip_size) was read while the block descriptor
+	was no longer protected by a mutex. This could lead to corruption
+	when a table is dropped on a busy system that contains compressed
+	tables.
+
+2009-01-26	The InnoDB Team
+
+	* btr/btr0sea.c, buf/buf0buf.c, include/buf0buf.h, include/buf0buf.ic,
+	include/mtr0log.ic, include/row0upd.ic, mtr/mtr0mtr.c:
+	Implement buf_block_align() with pointer arithmetics, as it is in the
+	built-in InnoDB distributed with MySQL. Do not acquire the buffer pool
+	mutex before buf_block_align(). This removes a scalability bottleneck
+	in the adaptive hash index lookup. In CHECK TABLE, check that
+	buf_pool->page_hash is consistent with buf_block_align().
+
+2009-01-23	The InnoDB Team
+
+	* btr/btr0sea.c:
+	Fix Bug#42279 Race condition in btr_search_drop_page_hash_when_freed()
+
+2009-01-23	The InnoDB Team
+
+	* buf/buf0buf.c, include/buf0buf.h:
+	Remove the unused mode BUF_GET_NOWAIT of buf_page_get_gen()
+
+2009-01-20	The InnoDB Team
+
+	* include/rem0rec.h, include/rem0rec.ic:
+	Fix Bug#41571 MySQL segfaults after innodb recovery
+
+2009-01-20	The InnoDB Team
+
+	* lock/lock0lock.c:
+	Fix Bug#42152 Race condition in lock_is_table_exclusive()
+
+2009-01-14	The InnoDB Team
+
+	* include/trx0roll.h, trx/trx0roll.c, trx/trx0trx.c:
+	Fix Bug#38187 Error 153 when creating savepoints
+
+2009-01-14	The InnoDB Team
+
+	* dict/dict0load.c:
+	Fix Bug#42075 dict_load_indexes failure in dict_load_table will
+	corrupt the dictionary cache
+
+2009-01-13	The InnoDB Team
+
+	* buf/buf0buddy.c, dict/dict0dict.c, dict/dict0mem.c, fil/fil0fil.c,
+	ha/ha0storage.c, handler/ha_innodb.cc, handler/win_delay_loader.cc,
+	include/buf0buf.ic, include/dict0dict.ic, include/hash0hash.h,
+	thr/thr0loc.c, trx/trx0i_s.c:
+	Add the parameter ASSERTION to HASH_SEARCH() macro, and use it for
+	light validation of the traversed items in hash table lookups when
+	UNIV_DEBUG is enabled.
+
+2009-01-09	The InnoDB Team
+
+	* buf/buf0flu.c, include/buf0flu.h, include/buf0flu.ic:
+	Remove unused code from the functions
+	buf_flush_insert_into_flush_list() and
+	buf_flush_insert_sorted_into_flush_list().
+
+2009-01-09	The InnoDB Team
+
+	* buf/buf0flu.c:
+	Simplify the functions buf_flush_try_page() and buf_flush_batch(). Add
+	debug assertions and an explanation to buf_flush_write_block_low().
+
+2009-01-07	The InnoDB Team
+
+	* row/row0merge.c:
+	Fix a bug in recovery when dropping temporary indexes.
+
+2009-01-07	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/ha_innodb.h, handler/handler0alter.cc:
+	Fix Bug#41680 calls to trx_allocate_for_mysql are not consistent
+
+2009-01-07	The InnoDB Team
+
+	* mysql-test/innodb_bug41904.result, mysql-test/innodb_bug41904.test,
+	row/row0merge.c:
+	Fix Bug#41904 create unique index problem
+
+2009-01-02	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h, mem/mem0pool.c,
+	mysql-test/innodb-use-sys-malloc-master.opt,
+	mysql-test/innodb-use-sys-malloc.result,
+	mysql-test/innodb-use-sys-malloc.test, srv/srv0srv.c, srv/srv0start.c:
+	Implement the configuration parameter innodb_use_sys_malloc (false by
+	default), for disabling InnoDB's internal memory allocator and using
+	system malloc/free instead. The "BUFFER POOL AND MEMORY" section of
+	SHOW ENGINE INNODB STATUS will report "in additional pool allocated
+	allocated 0" when innodb_use_sys_malloc is set.
+
+2008-12-30	The InnoDB Team
+
+	* btr/btr0btr.c:
+	When setting the PAGE_LEVEL of a compressed B-tree page from or to 0,
+	compress the page at the same time. This is necessary, because the
+	column information stored on the compressed page will differ between
+	leaf and non-leaf pages. Leaf pages are identified by PAGE_LEVEL=0.
+	This bug can make InnoDB crash when all rows of a compressed table are
+	deleted.
+
+2008-12-17	The InnoDB Team
+
+	* include/row0sel.h, include/row0upd.h, pars/pars0pars.c,
+	row/row0mysql.c, row/row0sel.c, row/row0upd.c:
+	Remove update-in-place select from the internal SQL interpreter. It
+	was only used for updating the InnoDB internal data dictionary when
+	renaming or dropping tables. It could have caused deadlocks when
+	acquiring latches on insert buffer bitmap pages.
+
+2008-12-17	The InnoDB Team
+
+	* btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c, ha/ha0ha.c,
+	ha/hash0hash.c, include/buf0buf.h, include/ha0ha.h, include/ha0ha.ic,
+	include/hash0hash.h, include/univ.i:
+	Introduce the preprocessor symbol UNIV_AHI_DEBUG for enabling adaptive
+	hash index debugging independently of UNIV_DEBUG.
+
+2008-12-16	The InnoDB Team
+
+	* btr/btr0cur.c:
+	Do not update the free bits in the insert buffer bitmap when inserting
+	or deleting from the insert buffer B-tree. Assert that records in the
+	insert buffer B-tree are never updated.
+
+2008-12-12	The InnoDB Team
+
+	* buf/buf0buf.c, fil/fil0fil.c, fsp/fsp0fsp.c, ibuf/ibuf0ibuf.c,
+	include/fil0fil.h, include/ibuf0ibuf.h, include/ibuf0ibuf.ic,
+	include/ibuf0types.h:
+	Clean up the insert buffer subsystem so that only one insert
+	buffer B-tree exists.
+	Originally, there were provisions in InnoDB for multiple insert
+	buffer B-trees, apparently one for each tablespace.
+	When Heikki Tuuri implemented multiple InnoDB tablespaces in
+	MySQL/InnoDB 4.1, he made the insert buffer live only in the
+	system tablespace (space 0) but left the provisions in the code.
+
+2008-12-11	The InnoDB Team
+
+	* include/srv0srv.h, os/os0proc.c, srv/srv0srv.c:
+	Fix the issue that the InnoDB plugin fails if innodb_buffer_pool_size
+	is defined bigger than 4096M on 64-bit Windows. This bug should not
+	have affected other 64-bit systems.
+
+2008-12-09	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#40386 Not flushing query cache after truncate.
+
+2008-12-09	The InnoDB Team
+
+	* handler/ha_innodb.cc, srv/srv0srv.c, trx/trx0trx.c:
+	Fix Bug#40760 "set global innodb_thread_concurrency = 0;" is not safe
+
+2008-12-04	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/mysql_addons.cc,
+	include/mysql_addons.h, trx/trx0i_s.c, win-plugin/win-plugin.diff:
+	Remove dependencies to MySQL internals (defining MYSQL_SERVER).
+
+2008-12-02	The InnoDB Team
+
+	* page/page0cur.c:
+	When allocating space for a record from the free list of previously
+	purged records, zero out the DB_TRX_ID and DB_ROLL_PTR of the purged
+	record if the new record would not overwrite these fields. This fixes
+	a harmless content mismatch reported by page_zip_validate().
+
+2008-12-02	The InnoDB Team
+
+	* row/row0merge.c:
+	Replace the WHILE 1 with WHILE 1=1 in the SQL procedure, so that the
+	loop will actually be entered and temporary indexes be dropped during
+	crash recovery.
+
+2008-12-01	The InnoDB Team
+
+	InnoDB Plugin 1.0.2 released
+
+2008-10-31	The InnoDB Team
+
+	* dict/dict0mem.c, include/dict0mem.h, include/lock0lock.h,
+	include/row0mysql.h, include/trx0trx.h, include/univ.i,
+	include/ut0vec.h, include/ut0vec.ic, lock/lock0lock.c,
+	row/row0mysql.c, trx/trx0trx.c:
+	Fix Bug#26316 Triggers create duplicate entries on auto-increment
+	columns
+
+2008-10-30	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/handler0vars.h,
+	handler/win_delay_loader.cc, mysql-test/innodb_bug40360.result,
+	mysql-test/innodb_bug40360.test:
+	Fix Bug#40360 Binlog related errors with binlog off
+
+2008-10-29	The InnoDB Team
+
+	* include/data0type.ic:
+	Fix Bug#40369 dtype_get_sql_null_size() returns 0 or 1, not the size
+
+2008-10-29	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h, srv/srv0srv.c:
+	Fix Bug#38189 innodb_stats_on_metadata missing
+
+2008-10-28	The InnoDB Team
+
+	* CMakeLists.txt, ha_innodb.def, handler/ha_innodb.cc,
+	handler/handler0alter.cc, handler/handler0vars.h, handler/i_s.cc,
+	handler/win_delay_loader.cc, win-plugin/*:
+	Implemented the delayloading of externals for the plugin on Windows.
+	This makes it possible to build a dynamic plugin (ha_innodb.dll) on
+	Windows.
+
+2008-10-27	The InnoDB Team
+
+	* CMakeLists.txt:
+	Fix Bug#19424 InnoDB: Possibly a memory overrun of the buffer being
+	freed (64-bit Visual C)
+
+2008-10-23	The InnoDB Team
+
+	* ibuf/ibuf0ibuf.c:
+	ibuf_delete_rec(): When the cursor to the insert buffer record
+	cannot be restored, do not complain if the tablespace does not
+	exist, because the insert buffer record may have been discarded by
+	some other thread. This bug has existed in MySQL/InnoDB since
+	version 4.1, when innodb_file_per_table was implemented.
+	This may fix Bug#27276 InnoDB Error: ibuf cursor restoration fails.
+
+2008-10-22	The InnoDB Team
+
+	* dict/dict0dict.c, dict/dict0mem.c, handler/ha_innodb.cc,
+	handler/ha_innodb.h, include/dict0dict.h, include/dict0mem.h,
+	row/row0mysql.c:
+	Fix Bug#39830 Table autoinc value not updated on first insert
+	Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
+	::info
+	Fix Bug#36411 "Failed to read auto-increment value from storage
+	engine" in 5.1.24 auto-inc
+
+2008-10-22	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
+	Fix Bug#40224 New AUTOINC changes mask reporting of deadlock/timeout
+	errors
+
+2008-10-16	The InnoDB Team
+
+	* dict/dict0dict.c, mysql-test/innodb-index.result,
+	mysql-test/innodb-index.test:
+	Skip the undo log size check when creating REDUNDANT and COMPACT
+	tables. In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED, column
+	prefix indexes require that prefixes of externally stored columns
+	be written to the undo log. This may make the undo log record
+	bigger than the record on the B-tree page. The maximum size of an
+	undo log record is the page size. That must be checked for, in
+	dict_index_add_to_cache(). However, this restriction must not
+	be enforced on REDUNDANT or COMPACT tables.
+
+2008-10-15	The InnoDB Team
+
+	* btr/btr0cur.c, include/btr0cur.h, row/row0ext.c, row/row0sel.c,
+	row/row0upd.c:
+	When the server crashes while freeing an externally stored column
+	of a compressed table, the BTR_EXTERN_LEN field in the BLOB
+	pointer will be written as 0. Tolerate this in the functions that
+	deal with externally stored columns. This fixes problems after
+	crash recovery, in the rollback of incomplete transactions, and in
+	the purge of delete-marked records.
+
+2008-10-15	The InnoDB Team
+
+	* btr/btr0btr.c, include/page0zip.h, page/page0zip.c, include/univ.i:
+	When a B-tree node of a compressed table is split or merged, the
+	compression may fail. In this case, the entire compressed page
+	will be copied and the excess records will be deleted. However,
+	page_zip_copy(), now renamed to page_zip_copy_recs(), copied too
+	many fields in the page header, overwriting PAGE_BTR_SEG_LEAF and
+	PAGE_BTR_SEG_TOP when splitting the B-tree root. This caused
+	corruption of compressed tables. Furthermore, the lock table and
+	the adaptive hash index would be corrupted, because we forgot to
+	update them when invoking page_zip_copy_recs().
+
+	Introduce the symbol UNIV_ZIP_DEBUG for triggering the copying of
+	compressed pages more often, for debugging purposes.
+
+2008-10-10	The InnoDB Team
+
+	* handler/handler0alter.cc, include/row0merge.h, row/row0merge.c,
+	row/row0mysql.c:
+	Fix some locking issues, mainly in fast index creation. The
+	InnoDB data dictionary cache should be latched whenever a
+	transaction is holding locks on any data dictionary tables.
+	Otherwise, lock waits or deadlocks could occur. Furthermore, the
+	data dictionary transaction must be committed (and the locks
+	released) before the data dictionary latch is released.
+
+	ha_innobase::add_index(): Lock the data dictionary before renaming
+	or dropping the created indexes, because neither operation will
+	commit the data dictionary transaction.
+
+	ha_innobase::final_drop_index(): Commit the transactions before
+	unlocking the data dictionary.
+
+2008-10-09	The InnoDB Team
+
+	* buf/buf0lru.c:
+	Fix Bug#39939 DROP TABLE/DISCARD TABLESPACE takes long time in
+	buf_LRU_invalidate_tablespace()
+
+2008-10-08	The InnoDB Team
+
+	* dict/dict0crea.c, trx/trx0roll.c, include/row0mysql.h,
+	row/row0merge.c, row/row0mysql.c:
+	When dropping a table, hold the data dictionary latch until the
+	transaction has been committed. The data dictionary latch is
+	supposed to prevent lock waits and deadlocks in the data
+	dictionary tables. Due to this bug, DROP TABLE could cause a
+	deadlock or hang. Note that because of Bug#33650 and Bug#39833,
+	MySQL may also drop a (temporary) table when executing CREATE INDEX
+	or ALTER TABLE ... ADD INDEX.
+
+2008-10-04	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb_bug39438-master.opt,
+	mysql-test/innodb_bug39438.result, mysql-test/innodb_bug39438.test:
+	Fix Bug#39438 Testcase for Bug#39436 crashes on 5.1 in
+	fil_space_get_latch
+
+2008-10-04	The InnoDB Team
+
+	* include/lock0lock.h, lock/lock0lock.c,
+	mysql-test/innodb_bug38231.result, mysql-test/innodb_bug38231.test,
+	row/row0mysql.c:
+	Fix Bug#38231 Innodb crash in lock_reset_all_on_table() on TRUNCATE +
+	LOCK / UNLOCK
+
+2008-10-04	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
+	::info
+
+2008-10-04	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/ha_innodb.h:
+	Fix Bug#37788 InnoDB Plugin: AUTO_INCREMENT wrong for compressed
+	tables
+
+2008-10-04	The InnoDB Team
+
+	* dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h,
+	include/dict0dict.h, include/dict0mem.h, row/row0mysql.c:
+	Fix Bug#39830 Table autoinc value not updated on first insert
+
+2008-10-03	The InnoDB Team
+
+	* mysql-test/innodb-index.test, mysql-test/innodb-index.result,
+	mysql-test/innodb-timeout.test, mysql-test/innodb-timeout.result,
+	srv/srv0srv.c, include/srv0srv.h, handler/ha_innodb.cc,
+	include/ha_prototypes.h:
+	Fix Bug#36285 innodb_lock_wait_timeout is not dynamic, not per session
+
+2008-09-19	The InnoDB Team
+
+	* os/os0proc.c:
+	Fix a memory leak on Windows. The memory leak was due to wrong
+	parameters passed into VirtualFree() call. As the result, the
+	call fails with Windows error 87.
+
+2008-09-17	The InnoDB Team
+
+	* mysql-test/innodb.result, mysql-test/innodb-zip.result,
+	mysql-test/innodb-zip.test, mysql-test/innodb.test, ibuf/ibuf0ibuf.c,
+	dict/dict0crea.c, dict/dict0load.c, dict/dict0boot.c,
+	include/dict0dict.h, include/trx0trx.h, dict/dict0dict.c,
+	trx/trx0trx.c, include/ha_prototypes.h, handler/ha_innodb.cc:
+	When creating an index in innodb_strict_mode, check that the
+	maximum record size will never exceed the B-tree page size limit.
+	For uncompressed tables, there should always be enough space for
+	two records in an empty B-tree page. For compressed tables, there
+	should be enough space for storing two node pointer records or one
+	data record in an empty page in uncompressed format.
+	The purpose of this check is to guarantee that INSERT or UPDATE
+	will never fail due to too big record size.
+
+2008-09-17	The InnoDB Team
+
+	* btr/btr0cur.c, data/data0data.c, include/page0zip.h,
+	include/page0zip.ic, page/page0zip.c, mysql-test/innodb_bug36172.test:
+	Prevent infinite B-tree page splits in compressed tables by
+	ensuring that there will always be enough space for two node
+	pointer records in an empty B-tree page. Also, require that at
+	least one data record will fit in an empty compressed page. This
+	will reduce the maximum size of records in compressed tables.
+
+2008-09-09	The InnoDB Team
+
+	* mysql-test/innodb.result:
+	Fix the failing innodb test by merging changes that MySQL made to
+	that file (r2646.12.1 in MySQL BZR repository)
+
+2008-09-09	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#38839 auto increment does not work properly with InnoDB after
+	update
+
+2008-09-09	The InnoDB Team
+
+	* dict/dict0dict.c, handler/handler0alter.cc, include/dict0dict.h,
+	mysql-test/innodb-index.result, mysql-test/innodb-index.test:
+	Fix Bug#38786 InnoDB plugin crashes on drop table/create table with FK
+
+2008-08-21	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/ha_prototypes.h, row/row0sel.c:
+	Fix Bug#37885 row_search_for_mysql may gap lock unnecessarily with SQL
+	comments in query
+
+2008-08-21	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#38185 ha_innobase::info can hold locks even when called with
+	HA_STATUS_NO_LOCK
+
+2008-08-18	The InnoDB Team
+
+	* buf/buf0buf.c, buf/buf0lru.c, include/buf0buf.ic, include/univ.i:
+	Introduce UNIV_LRU_DEBUG for debugging the LRU buffer pool cache
+
+2008-08-08	The InnoDB Team
+
+	* buf/buf0lru.c, include/buf0buf.h:
+	Fix two recovery bugs that could lead to a crash in debug builds with
+	small buffer size
+
+2008-08-07	The InnoDB Team
+
+	* btr/btr0cur.c, handler/ha_innodb.cc, include/srv0srv.h,
+	srv/srv0srv.c:
+	Add a parameter innodb_stats_sample_pages to allow users to control
+	the number of index dives when InnoDB estimates the cardinality of
+	an index (ANALYZE TABLE, SHOW TABLE STATUS etc)
+
+2008-08-07	The InnoDB Team
+
+	* trx/trx0i_s.c:
+	Fix a bug that would lead to a crash if a SELECT was issued from the
+	INFORMATION_SCHEMA tables and there are rolling back transactions at
+	the same time
+
+2008-08-06	The InnoDB Team
+
+	* btr/btr0btr.c, btr/btr0cur.c, ibuf/ibuf0ibuf.c, include/btr0cur.h,
+	include/trx0roll.h, include/trx0types.h, row/row0purge.c,
+	row/row0uins.c, row/row0umod.c, trx/trx0roll.c:
+	In the rollback of incomplete transactions after crash recovery,
+	tolerate clustered index records whose externally stored columns
+	have not been written.
+
+2008-07-30	The InnoDB Team
+
+	* trx/trx0trx.c:
+	Fixes a race in recovery where the recovery thread recovering a
+	PREPARED trx and the background rollback thread can both try
+	to free the trx after its status is set to COMMITTED_IN_MEMORY.
+
+2008-07-29	The InnoDB Team
+
+	* include/trx0rec.h, row/row0purge.c, row/row0vers.c, trx/trx0rec.c:
+	Fix a BLOB corruption bug
+
+2008-07-15	The InnoDB Team
+
+	* btr/btr0sea.c, dict/dict0dict.c, include/btr0sea.h:
+	Fixed a timing hole where a thread dropping an index can free the
+	in-memory index struct while another thread is still using that
+	structure to remove entries from adaptive hash index belonging
+	to one of the pages that belongs to the index being dropped.
+
+2008-07-04	The InnoDB Team
+
+	* mysql-test/innodb-index.result:
+	Fix the failing innodb-index test by adjusting the result to a new
+	MySQL behavior (the change occured in BZR-r2667)
+
+2008-07-03	The InnoDB Team
+
+	* mysql-test/innodb-zip.result, mysql-test/innodb-zip.test:
+	Remove the negative test cases that produce warnings
+
+2008-07-02	The InnoDB Team
+
+	* mysql-test/innodb-replace.result, mysql-test/innodb-index.test:
+	Disable part of innodb-index test because MySQL changed its behavior
+	and is not calling ::add_index() anymore when adding primary index on
+	non-NULL column
+
+2008-07-01	The InnoDB Team
+
+	* mysql-test/innodb-replace.result, mysql-test/innodb-replace.test:
+	Fix the failing innodb-replace test by merging changes that MySQL
+	made to that file (r2659 in MySQL BZR repository)
+
+2008-07-01	The InnoDB Team
+
+	* lock/lock0lock.c:
+	Fix Bug#36942 Performance problem in lock_get_n_rec_locks (SHOW INNODB
+	STATUS)
+
+2008-07-01	The InnoDB Team
+
+	* ha/ha0ha.c:
+	Fix Bug#36941 Performance problem in ha_print_info (SHOW INNODB
+	STATUS)
+
+2008-07-01	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc.test:
+	Fix Bug#37531 After truncate, auto_increment behaves incorrectly for
+	InnoDB
+
+2008-06-19	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Rewrite the function innodb_plugin_init() to support parameters in
+	different order (in static and dynamic InnoDB) and to support more
+	parameters in the static InnoDB
+
+2008-06-19	The InnoDB Team
+
+	* handler/handler0alter.cc:
+	Fix a bug in ::add_index() which set the transaction state to "active"
+	but never restored it to the original value. This bug caused warnings
+	to be printed by the rpl.rpl_ddl mysql-test.
+
+2008-06-19	The InnoDB Team
+
+	* mysql-test/patches:
+	Add a directory which contains patches, which need to be applied to
+	MySQL source in order to get some mysql-tests to succeed. The patches
+	cannot be committed in MySQL repository because they are specific to
+	the InnoDB plugin.
+
+2008-06-19	The InnoDB Team
+
+	* mysql-test/innodb-zip.result, mysql-test/innodb-zip.test,
+	row/row0row.c:
+	Fix an anomaly when updating a record with BLOB prefix
+
+2008-06-18	The InnoDB Team
+
+	* include/trx0sys.h, srv/srv0start.c, trx/trx0sys.c:
+	Fix a bug in recovery which was a side effect of the file_format_check
+	changes
+
+2008-06-09	The InnoDB Team
+
+	* mysql-test/innodb.result:
+	Fix the failing innodb test by merging changes that MySQL made to that
+	file
+
+2008-06-06	The InnoDB Team
+
+	* buf/buf0buf.c, handler/ha_innodb.cc, include/buf0buf.h,
+	include/srv0srv.h, srv/srv0srv.c:
+	Fix Bug#36600 SHOW STATUS takes a lot of CPU in
+	buf_get_latched_pages_number
+
+	* handler/ha_innodb.cc, os/os0file.c:
+	Fix Bug#11894 innodb_file_per_table crashes w/ Windows .sym symbolic
+	link hack
+
+	* include/ut0ut.h, srv/srv0srv.c, ut/ut0ut.c:
+	Fix Bug#36819 ut_usectime does not handle errors from gettimeofday
+
+	* handler/ha_innodb.cc:
+	Fix Bug#35602 Failed to read auto-increment value from storage engine
+
+	* srv/srv0start.c:
+	Fix Bug#36149 Read buffer overflow in srv0start.c found during "make
+	test"
+
+2008-05-08	The InnoDB Team
+
+	* btr/btr0btr.c, mysql-test/innodb_bug36172.result,
+	mysql-test/innodb_bug36172.test:
+	Fix Bug#36172 insert into compressed innodb table crashes
+
+2008-05-08	The InnoDB Team
+
+	InnoDB Plugin 1.0.1 released
+
+2008-05-06	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/srv0srv.h, include/sync0sync.h,
+	include/trx0sys.h, mysql-test/innodb-zip.result,
+	mysql-test/innodb-zip.test, srv/srv0srv.c, srv/srv0start.c,
+	sync/sync0sync.c, trx/trx0sys.c:
+	Implement the system tablespace tagging
+
+	* handler/ha_innodb.cc, handler/i_s.cc, include/univ.i,
+	srv/srv0start.c:
+	Add InnoDB version in INFORMATION_SCHEMA.PLUGINS.PLUGIN_VERSION,
+	in the startup message and in a server variable innodb_version.
+
+	* sync/sync0sync.c:
+	Fix a bug in the sync debug code where a lock with level
+	SYNC_LEVEL_VARYING would cause an assertion failure when a thread
+	tried to release it.
+
+2008-04-30	The InnoDB Team
+
+	* Makefile.am:
+	Fix Bug#36434 ha_innodb.so is installed in the wrong directory
+
+	* handler/ha_innodb.cc:
+	Merge change from MySQL (Fix Bug#35406 5.1-opt crashes on select from
+	I_S.REFERENTIAL_CONSTRAINTS):
+	ChangeSet@1.2563, 2008-03-18 19:42:04+04:00, gluh@mysql.com +1 -0
+
+	* scripts/install_innodb_plugins.sql:
+	Added
+
+	* mysql-test/innodb.result:
+	Merge change from MySQL (this fixes the failing innodb test):
+	ChangeSet@1.1810.3601.4, 2008-02-07 02:33:21+04:00
+
+	* row/row0sel.c:
+	Fix Bug#35226 RBR event crashes slave
+
+	* handler/ha_innodb.cc:
+	Change the fix for Bug#32440 to show bytes instead of kilobytes in
+	INFORMATION_SCHEMA.TABLES.DATA_FREE
+
+	* handler/ha_innodb.cc, mysql-test/innodb.result,
+	mysql-test/innodb.test:
+	Fix Bug#29507 TRUNCATE shows to many rows effected
+
+	* handler/ha_innodb.cc, mysql-test/innodb.result,
+	mysql-test/innodb.test:
+	Fix Bug#35537 Innodb doesn't increment handler_update and
+	handler_delete
+
+2008-04-29	The InnoDB Team
+
+	* handler/i_s.cc, include/srv0start.h, srv/srv0start.c:
+	Fix Bug#36310 InnoDB plugin crash
+
+2008-04-23	The InnoDB Team
+
+	* mysql-test/innodb_bug36169.result, mysql-test/innodb_bug36169.test,
+	row/row0mysql.c:
+	Fix Bug#36169 create innodb compressed table with too large row size
+	crashed
+
+	* (outside the source tree):
+	Fix Bug#36222 New InnoDB plugin 1.0 has wrong MKDIR_P defined in
+	Makefile.in
+
+2008-04-15	The InnoDB Team
+
+	InnoDB Plugin 1.0.0 released
diff --git a/storage/innodb_plugin/Doxyfile b/storage/innodb_plugin/Doxyfile
new file mode 100644
index 00000000000..62aa7dd8abc
--- /dev/null
+++ b/storage/innodb_plugin/Doxyfile
@@ -0,0 +1,1419 @@
+# Doxyfile 1.5.6
+
+# Usage: SVNVERSION=-r$(svnversion) doxygen
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = "InnoDB Plugin"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         = 1.0$(SVNVERSION)
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = dox
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
+# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
+# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
+# and Ukrainian.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP         = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.  This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = . include/univ.i
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          = *.c *.ic *.h
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                = ut0auxconf_*
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.  Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.  The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.  Otherwise they will link to the documentstion.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to FRAME, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature. Other possible values
+# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
+# and Class Hiererachy pages using a tree view instead of an ordered list;
+# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
+# disables this behavior completely. For backwards compatibility with previous
+# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
+# respectively.
+
+GENERATE_TREEVIEW      = NONE
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.  This is useful
+# if you want to understand what is going on.  On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      = UT_LIST_BASE_NODE_T UT_LIST_NODE_T
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#   TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#   TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = NO
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = YES
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = NO
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 3
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is enabled by default, which results in a transparent
+# background. Warning: Depending on the platform used, enabling this option
+# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
+# become hard to read).
+
+DOT_TRANSPARENT        = YES
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE           = NO
diff --git a/storage/innodb_plugin/Makefile.am b/storage/innodb_plugin/Makefile.am
new file mode 100644
index 00000000000..50a3c1e6cab
--- /dev/null
+++ b/storage/innodb_plugin/Makefile.am
@@ -0,0 +1,343 @@
+# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Process this file with automake to create Makefile.in
+
+MYSQLDATAdir=		$(localstatedir)
+MYSQLSHAREdir=		$(pkgdatadir)
+MYSQLBASEdir=		$(prefix)
+MYSQLLIBdir=		$(pkglibdir)
+pkgplugindir=		$(pkglibdir)/plugin
+INCLUDES=		-I$(top_srcdir)/include -I$(top_builddir)/include \
+			-I$(top_srcdir)/regex \
+			-I$(srcdir)/include \
+			-I$(top_srcdir)/sql \
+			-I$(srcdir) @ZLIB_INCLUDES@
+
+DEFS=			@DEFS@
+
+
+noinst_HEADERS=		\
+			handler/ha_innodb.h	\
+			handler/handler0vars.h	\
+			handler/i_s.h		\
+			include/btr0btr.h	\
+			include/btr0btr.ic	\
+			include/btr0cur.h	\
+			include/btr0cur.ic	\
+			include/btr0pcur.h	\
+			include/btr0pcur.ic	\
+			include/btr0sea.h	\
+			include/btr0sea.ic	\
+			include/btr0types.h	\
+			include/buf0buddy.h	\
+			include/buf0buddy.ic	\
+			include/buf0buf.h	\
+			include/buf0buf.ic	\
+			include/buf0flu.h	\
+			include/buf0flu.ic	\
+			include/buf0lru.h	\
+			include/buf0lru.ic	\
+			include/buf0rea.h	\
+			include/buf0types.h	\
+			include/data0data.h	\
+			include/data0data.ic	\
+			include/data0type.h	\
+			include/data0type.ic	\
+			include/data0types.h	\
+			include/db0err.h	\
+			include/dict0boot.h	\
+			include/dict0boot.ic	\
+			include/dict0crea.h	\
+			include/dict0crea.ic	\
+			include/dict0dict.h	\
+			include/dict0dict.ic	\
+			include/dict0load.h	\
+			include/dict0load.ic	\
+			include/dict0mem.h	\
+			include/dict0mem.ic	\
+			include/dict0types.h	\
+			include/dyn0dyn.h	\
+			include/dyn0dyn.ic	\
+			include/eval0eval.h	\
+			include/eval0eval.ic	\
+			include/eval0proc.h	\
+			include/eval0proc.ic	\
+			include/fil0fil.h	\
+			include/fsp0fsp.h	\
+			include/fsp0fsp.ic	\
+			include/fsp0types.h	\
+			include/fut0fut.h	\
+			include/fut0fut.ic	\
+			include/fut0lst.h	\
+			include/fut0lst.ic	\
+			include/ha0ha.h		\
+			include/ha0ha.ic	\
+			include/ha0storage.h	\
+			include/ha0storage.ic	\
+			include/ha_prototypes.h	\
+			include/handler0alter.h	\
+			include/hash0hash.h	\
+			include/hash0hash.ic	\
+			include/ibuf0ibuf.h	\
+			include/ibuf0ibuf.ic	\
+			include/ibuf0types.h	\
+			include/lock0iter.h	\
+			include/lock0lock.h	\
+			include/lock0lock.ic	\
+			include/lock0priv.h	\
+			include/lock0priv.ic	\
+			include/lock0types.h	\
+			include/log0log.h	\
+			include/log0log.ic	\
+			include/log0recv.h	\
+			include/log0recv.ic	\
+			include/mach0data.h	\
+			include/mach0data.ic	\
+			include/mem0dbg.h	\
+			include/mem0dbg.ic	\
+			include/mem0mem.h	\
+			include/mem0mem.ic	\
+			include/mem0pool.h	\
+			include/mem0pool.ic	\
+			include/mtr0log.h	\
+			include/mtr0log.ic	\
+			include/mtr0mtr.h	\
+			include/mtr0mtr.ic	\
+			include/mtr0types.h	\
+			include/mysql_addons.h	\
+			include/os0file.h	\
+			include/os0proc.h	\
+			include/os0proc.ic	\
+			include/os0sync.h	\
+			include/os0sync.ic	\
+			include/os0thread.h	\
+			include/os0thread.ic	\
+			include/page0cur.h	\
+			include/page0cur.ic	\
+			include/page0page.h	\
+			include/page0page.ic	\
+			include/page0types.h	\
+			include/page0zip.h	\
+			include/page0zip.ic	\
+			include/pars0grm.h	\
+			include/pars0opt.h	\
+			include/pars0opt.ic	\
+			include/pars0pars.h	\
+			include/pars0pars.ic	\
+			include/pars0sym.h	\
+			include/pars0sym.ic	\
+			include/pars0types.h	\
+			include/que0que.h	\
+			include/que0que.ic	\
+			include/que0types.h	\
+			include/read0read.h	\
+			include/read0read.ic	\
+			include/read0types.h	\
+			include/rem0cmp.h	\
+			include/rem0cmp.ic	\
+			include/rem0rec.h	\
+			include/rem0rec.ic	\
+			include/rem0types.h	\
+			include/row0ext.h	\
+			include/row0ext.ic	\
+			include/row0ins.h	\
+			include/row0ins.ic	\
+			include/row0merge.h	\
+			include/row0mysql.h	\
+			include/row0mysql.ic	\
+			include/row0purge.h	\
+			include/row0purge.ic	\
+			include/row0row.h	\
+			include/row0row.ic	\
+			include/row0sel.h	\
+			include/row0sel.ic	\
+			include/row0types.h	\
+			include/row0uins.h	\
+			include/row0uins.ic	\
+			include/row0umod.h	\
+			include/row0umod.ic	\
+			include/row0undo.h	\
+			include/row0undo.ic	\
+			include/row0upd.h	\
+			include/row0upd.ic	\
+			include/row0vers.h	\
+			include/row0vers.ic	\
+			include/srv0que.h	\
+			include/srv0srv.h	\
+			include/srv0srv.ic	\
+			include/srv0start.h	\
+			include/sync0arr.h	\
+			include/sync0arr.ic	\
+			include/sync0rw.h	\
+			include/sync0rw.ic	\
+			include/sync0sync.h	\
+			include/sync0sync.ic	\
+			include/sync0types.h	\
+			include/thr0loc.h	\
+			include/thr0loc.ic	\
+			include/trx0i_s.h	\
+			include/trx0purge.h	\
+			include/trx0purge.ic	\
+			include/trx0rec.h	\
+			include/trx0rec.ic	\
+			include/trx0roll.h	\
+			include/trx0roll.ic	\
+			include/trx0rseg.h	\
+			include/trx0rseg.ic	\
+			include/trx0sys.h	\
+			include/trx0sys.ic	\
+			include/trx0trx.h	\
+			include/trx0trx.ic	\
+			include/trx0types.h	\
+			include/trx0undo.h	\
+			include/trx0undo.ic	\
+			include/trx0xa.h	\
+			include/univ.i		\
+			include/usr0sess.h	\
+			include/usr0sess.ic	\
+			include/usr0types.h	\
+			include/ut0auxconf.h	\
+			include/ut0byte.h	\
+			include/ut0byte.ic	\
+			include/ut0dbg.h	\
+			include/ut0list.h	\
+			include/ut0list.ic	\
+			include/ut0lst.h	\
+			include/ut0mem.h	\
+			include/ut0mem.ic	\
+			include/ut0rnd.h	\
+			include/ut0rnd.ic	\
+			include/ut0sort.h	\
+			include/ut0ut.h		\
+			include/ut0ut.ic	\
+			include/ut0vec.h	\
+			include/ut0vec.ic	\
+			include/ut0wqueue.h	\
+			mem/mem0dbg.c
+
+EXTRA_LIBRARIES=	libinnobase.a
+noinst_LIBRARIES=	@plugin_innodb_plugin_static_target@
+libinnobase_a_SOURCES=	\
+			btr/btr0btr.c			\
+			btr/btr0cur.c			\
+			btr/btr0pcur.c			\
+			btr/btr0sea.c			\
+			buf/buf0buddy.c			\
+			buf/buf0buf.c			\
+			buf/buf0flu.c			\
+			buf/buf0lru.c			\
+			buf/buf0rea.c			\
+			data/data0data.c		\
+			data/data0type.c		\
+			dict/dict0boot.c		\
+			dict/dict0crea.c		\
+			dict/dict0dict.c		\
+			dict/dict0load.c		\
+			dict/dict0mem.c			\
+			dyn/dyn0dyn.c			\
+			eval/eval0eval.c		\
+			eval/eval0proc.c		\
+			fil/fil0fil.c			\
+			fsp/fsp0fsp.c			\
+			fut/fut0fut.c			\
+			fut/fut0lst.c			\
+			ha/ha0ha.c			\
+			ha/ha0storage.c			\
+			ha/hash0hash.c			\
+			handler/ha_innodb.cc		\
+			handler/handler0alter.cc	\
+			handler/i_s.cc			\
+			handler/mysql_addons.cc		\
+			ibuf/ibuf0ibuf.c		\
+			lock/lock0iter.c		\
+			lock/lock0lock.c		\
+			log/log0log.c			\
+			log/log0recv.c			\
+			mach/mach0data.c		\
+			mem/mem0mem.c			\
+			mem/mem0pool.c			\
+			mtr/mtr0log.c			\
+			mtr/mtr0mtr.c			\
+			os/os0file.c			\
+			os/os0proc.c			\
+			os/os0sync.c			\
+			os/os0thread.c			\
+			page/page0cur.c			\
+			page/page0page.c		\
+			page/page0zip.c			\
+			pars/lexyy.c			\
+			pars/pars0grm.c			\
+			pars/pars0opt.c			\
+			pars/pars0pars.c		\
+			pars/pars0sym.c			\
+			que/que0que.c			\
+			read/read0read.c		\
+			rem/rem0cmp.c			\
+			rem/rem0rec.c			\
+			row/row0ext.c			\
+			row/row0ins.c			\
+			row/row0merge.c			\
+			row/row0mysql.c			\
+			row/row0purge.c			\
+			row/row0row.c			\
+			row/row0sel.c			\
+			row/row0uins.c			\
+			row/row0umod.c			\
+			row/row0undo.c			\
+			row/row0upd.c			\
+			row/row0vers.c			\
+			srv/srv0que.c			\
+			srv/srv0srv.c			\
+			srv/srv0start.c			\
+			sync/sync0arr.c			\
+			sync/sync0rw.c			\
+			sync/sync0sync.c		\
+			thr/thr0loc.c			\
+			trx/trx0i_s.c			\
+			trx/trx0purge.c			\
+			trx/trx0rec.c			\
+			trx/trx0roll.c			\
+			trx/trx0rseg.c			\
+			trx/trx0sys.c			\
+			trx/trx0trx.c			\
+			trx/trx0undo.c			\
+			usr/usr0sess.c			\
+			ut/ut0byte.c			\
+			ut/ut0dbg.c			\
+			ut/ut0list.c			\
+			ut/ut0mem.c			\
+			ut/ut0rnd.c			\
+			ut/ut0ut.c			\
+			ut/ut0vec.c			\
+			ut/ut0wqueue.c
+
+libinnobase_a_CXXFLAGS=	$(AM_CFLAGS)
+libinnobase_a_CFLAGS=	$(AM_CFLAGS)
+
+EXTRA_LTLIBRARIES=	ha_innodb_plugin.la
+pkgplugin_LTLIBRARIES=	@plugin_innodb_plugin_shared_target@
+
+ha_innodb_plugin_la_LDFLAGS=	-module -rpath $(pkgplugindir)
+ha_innodb_plugin_la_CXXFLAGS=	$(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
+ha_innodb_plugin_la_CFLAGS=	$(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
+ha_innodb_plugin_la_SOURCES=	$(libinnobase_a_SOURCES)
+
+EXTRA_DIST=		CMakeLists.txt plug.in \
+			pars/make_bison.sh pars/make_flex.sh \
+			pars/pars0grm.y pars/pars0lex.l
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
diff --git a/storage/innodb_plugin/README b/storage/innodb_plugin/README
new file mode 100644
index 00000000000..56aa8058224
--- /dev/null
+++ b/storage/innodb_plugin/README
@@ -0,0 +1,29 @@
+This is the source of the InnoDB Plugin 1.0.4 for MySQL 5.1
+===========================================================
+
+Instructions for compiling the plugin:
+--------------------------------------
+
+1. Get the latest MySQL 5.1 sources from
+   http://dev.mysql.com/downloads/mysql/5.1.html#source
+
+2. Replace the contents of the mysql-5.1.N/storage/innobase/ directory
+   with the contents of this directory.
+
+3. Optional (only necessary if you are going to run tests from the
+   mysql-test suite): cd into the innobase directory and run ./setup.sh
+
+4. Compile MySQL as usual.
+
+5. Enjoy!
+
+See the online documentation for more detailed instructions:
+http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html
+
+For more information about InnoDB visit
+http://www.innodb.com
+
+Please report any problems or issues with the plugin in the InnoDB Forums
+http://forums.innodb.com/ or in the MySQL Bugs database http://bugs.mysql.com
+
+Thank you for using the InnoDB plugin!
diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c
new file mode 100644
index 00000000000..6ba9b36207b
--- /dev/null
+++ b/storage/innodb_plugin/btr/btr0btr.c
@@ -0,0 +1,3693 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0btr.c
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+#ifdef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "page0zip.h"
+
+#ifndef UNIV_HOTBACKUP
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "trx0trx.h"
+
+/*
+Latching strategy of the InnoDB B-tree
+--------------------------------------
+A tree latch protects all non-leaf nodes of the tree. Each node of a tree
+also has a latch of its own.
+
+A B-tree operation normally first acquires an S-latch on the tree. It
+searches down the tree and releases the tree latch when it has the
+leaf node latch. To save CPU time we do not acquire any latch on
+non-leaf nodes of the tree during a search, those pages are only bufferfixed.
+
+If an operation needs to restructure the tree, it acquires an X-latch on
+the tree before searching to a leaf node. If it needs, for example, to
+split a leaf,
+(1) InnoDB decides the split point in the leaf,
+(2) allocates a new page,
+(3) inserts the appropriate node pointer to the first non-leaf level,
+(4) releases the tree X-latch,
+(5) and then moves records from the leaf to the new allocated page.
+
+Node pointers
+-------------
+Leaf pages of a B-tree contain the index records stored in the
+tree. On levels n > 0 we store 'node pointers' to pages on level
+n - 1. For each page there is exactly one node pointer stored:
+thus the our tree is an ordinary B-tree, not a B-link tree.
+
+A node pointer contains a prefix P of an index record. The prefix
+is long enough so that it determines an index record uniquely.
+The file page number of the child page is added as the last
+field. To the child page we can store node pointers or index records
+which are >= P in the alphabetical order, but < P1 if there is
+a next node pointer on the level, and P1 is its prefix.
+
+If a node pointer with a prefix P points to a non-leaf child,
+then the leftmost record in the child must have the same
+prefix P. If it points to a leaf node, the child is not required
+to contain any record with a prefix equal to P. The leaf case
+is decided this way to allow arbitrary deletions in a leaf node
+without touching upper levels of the tree.
+
+We have predefined a special minimum record which we
+define as the smallest record in any alphabetical order.
+A minimum record is denoted by setting a bit in the record
+header. A minimum record acts as the prefix of a node pointer
+which points to a leftmost node on any level of the tree.
+
+File page allocation
+--------------------
+In the root node of a B-tree there are two file segment headers.
+The leaf pages of a tree are allocated from one file segment, to
+make them consecutive on disk if possible. From the other file segment
+we allocate pages for the non-leaf levels of the tree.
+*/
+
+#ifdef UNIV_BTR_DEBUG
+/**************************************************************//**
+Checks a file segment header within a B-tree root page.
+@return	TRUE if valid */
+static
+ibool
+btr_root_fseg_validate(
+/*===================*/
+	const fseg_header_t*	seg_header,	/*!< in: segment header */
+	ulint			space)		/*!< in: tablespace identifier */
+{
+	ulint	offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
+
+	ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
+	ut_a(offset >= FIL_PAGE_DATA);
+	ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+	return(TRUE);
+}
+#endif /* UNIV_BTR_DEBUG */
+
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return	root page, x-latched */
+static
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint		space;
+	ulint		zip_size;
+	ulint		root_page_no;
+	buf_block_t*	block;
+
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+	root_page_no = dict_index_get_page(index);
+
+	block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
+	ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
+	     == dict_table_is_comp(index->table));
+#ifdef UNIV_BTR_DEBUG
+	if (!dict_index_is_ibuf(index)) {
+		const page_t*	root = buf_block_get_frame(block);
+
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+					    + root, space));
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+					    + root, space));
+	}
+#endif /* UNIV_BTR_DEBUG */
+
+	return(block);
+}
+
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return	root page, x-latched */
+UNIV_INTERN
+page_t*
+btr_root_get(
+/*=========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+}
+
+/*************************************************************//**
+Gets pointer to the previous user record in the tree. It is assumed that
+the caller has appropriate latches on the page and its neighbor.
+@return	previous user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the previous page */
+{
+	page_t*	page;
+	page_t*	prev_page;
+	ulint	prev_page_no;
+
+	if (!page_rec_is_infimum(rec)) {
+
+		rec_t*	prev_rec = page_rec_get_prev(rec);
+
+		if (!page_rec_is_infimum(prev_rec)) {
+
+			return(prev_rec);
+		}
+	}
+
+	page = page_align(rec);
+	prev_page_no = btr_page_get_prev(page, mtr);
+
+	if (prev_page_no != FIL_NULL) {
+
+		ulint		space;
+		ulint		zip_size;
+		buf_block_t*	prev_block;
+
+		space = page_get_space_id(page);
+		zip_size = fil_space_get_zip_size(space);
+
+		prev_block = buf_page_get_with_no_latch(space, zip_size,
+							prev_page_no, mtr);
+		prev_page = buf_block_get_frame(prev_block);
+		/* The caller must already have a latch to the brother */
+		ut_ad(mtr_memo_contains(mtr, prev_block,
+					MTR_MEMO_PAGE_S_FIX)
+		      || mtr_memo_contains(mtr, prev_block,
+					   MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(prev_page) == page_is_comp(page));
+		ut_a(btr_page_get_next(prev_page, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
+	}
+
+	return(NULL);
+}
+
+/*************************************************************//**
+Gets pointer to the next user record in the tree. It is assumed that the
+caller has appropriate latches on the page and its neighbor.
+@return	next user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the next page */
+{
+	page_t*	page;
+	page_t*	next_page;
+	ulint	next_page_no;
+
+	if (!page_rec_is_supremum(rec)) {
+
+		rec_t*	next_rec = page_rec_get_next(rec);
+
+		if (!page_rec_is_supremum(next_rec)) {
+
+			return(next_rec);
+		}
+	}
+
+	page = page_align(rec);
+	next_page_no = btr_page_get_next(page, mtr);
+
+	if (next_page_no != FIL_NULL) {
+		ulint		space;
+		ulint		zip_size;
+		buf_block_t*	next_block;
+
+		space = page_get_space_id(page);
+		zip_size = fil_space_get_zip_size(space);
+
+		next_block = buf_page_get_with_no_latch(space, zip_size,
+							next_page_no, mtr);
+		next_page = buf_block_get_frame(next_block);
+		/* The caller must already have a latch to the brother */
+		ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
+		      || mtr_memo_contains(mtr, next_block,
+					   MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(next_page) == page_is_comp(page));
+		ut_a(btr_page_get_prev(next_page, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		return(page_rec_get_next(page_get_infimum_rec(next_page)));
+	}
+
+	return(NULL);
+}
+
+/**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization).  @see btr_page_empty(). */
+static
+void
+btr_page_create(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page to be created */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page = buf_block_get_frame(block);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_create_zip(block, index, level, mtr);
+	} else {
+		page_create(block, mtr, dict_table_is_comp(index->table));
+		/* Set the level of the new index page */
+		btr_page_set_level(page, NULL, level, mtr);
+	}
+
+	block->check_index_page_at_flush = TRUE;
+
+	btr_page_set_index_id(page, page_zip, index->id, mtr);
+}
+
+/**************************************************************//**
+Allocates a new file page to be used in an ibuf tree. Takes the page from
+the free list of the tree, which must contain pages!
+@return	new allocated block, x-latched */
+static
+buf_block_t*
+btr_page_alloc_for_ibuf(
+/*====================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	fil_addr_t	node_addr;
+	page_t*		root;
+	page_t*		new_page;
+	buf_block_t*	new_block;
+
+	root = btr_root_get(index, mtr);
+
+	node_addr = flst_get_first(root + PAGE_HEADER
+				   + PAGE_BTR_IBUF_FREE_LIST, mtr);
+	ut_a(node_addr.page != FIL_NULL);
+
+	new_block = buf_page_get(dict_index_get_space(index),
+				 dict_table_zip_size(index->table),
+				 node_addr.page, RW_X_LATCH, mtr);
+	new_page = buf_block_get_frame(new_block);
+	buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
+
+	flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+		    new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
+		    mtr);
+	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+			    mtr));
+
+	return(new_block);
+}
+
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@return	new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		hint_page_no,	/*!< in: hint of a good page */
+	byte		file_direction,	/*!< in: direction where a possible
+					page split is made */
+	ulint		level,		/*!< in: level where the page is placed
+					in the tree */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	fseg_header_t*	seg_header;
+	page_t*		root;
+	buf_block_t*	new_block;
+	ulint		new_page_no;
+
+	if (dict_index_is_ibuf(index)) {
+
+		return(btr_page_alloc_for_ibuf(index, mtr));
+	}
+
+	root = btr_root_get(index, mtr);
+
+	if (level == 0) {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+	} else {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+	}
+
+	/* Parameter TRUE below states that the caller has made the
+	reservation for free extents, and thus we know that a page can
+	be allocated: */
+
+	new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
+						   file_direction, TRUE, mtr);
+	if (new_page_no == FIL_NULL) {
+
+		return(NULL);
+	}
+
+	new_block = buf_page_get(dict_index_get_space(index),
+				 dict_table_zip_size(index->table),
+				 new_page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
+
+	return(new_block);
+}
+
+/**************************************************************//**
+Gets the number of pages in a B-tree.
+@return	number of pages */
+UNIV_INTERN
+ulint
+btr_get_size(
+/*=========*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		flag)	/*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+{
+	fseg_header_t*	seg_header;
+	page_t*		root;
+	ulint		n;
+	ulint		dummy;
+	mtr_t		mtr;
+
+	mtr_start(&mtr);
+
+	mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+	root = btr_root_get(index, &mtr);
+
+	if (flag == BTR_N_LEAF_PAGES) {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+		fseg_n_reserved_pages(seg_header, &n, &mtr);
+
+	} else if (flag == BTR_TOTAL_SIZE) {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+		n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+		n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
+	} else {
+		ut_error;
+	}
+
+	mtr_commit(&mtr);
+
+	return(n);
+}
+
+/**************************************************************//**
+Frees a page used in an ibuf tree. Puts the page to the free list of the
+ibuf tree. */
+static
+void
+btr_page_free_for_ibuf(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		root;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	root = btr_root_get(index, mtr);
+
+	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+		       buf_block_get_frame(block)
+		       + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
+
+	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+			    mtr));
+}
+
+/**************************************************************//**
+Frees a file page used in an index tree. Can be used also to (BLOB)
+external storage pages, because the page level 0 can be given as an
+argument. */
+UNIV_INTERN
+void
+btr_page_free_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	ulint		level,	/*!< in: page level */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	fseg_header_t*	seg_header;
+	page_t*		root;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* The page gets invalid for optimistic searches: increment the frame
+	modify clock */
+
+	buf_block_modify_clock_inc(block);
+
+	if (dict_index_is_ibuf(index)) {
+
+		btr_page_free_for_ibuf(index, block, mtr);
+
+		return;
+	}
+
+	root = btr_root_get(index, mtr);
+
+	if (level == 0) {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+	} else {
+		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+	}
+
+	fseg_free_page(seg_header,
+		       buf_block_get_space(block),
+		       buf_block_get_page_no(block), mtr);
+}
+
+/**************************************************************//**
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+UNIV_INTERN
+void
+btr_page_free(
+/*==========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint		level;
+
+	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+
+	btr_page_free_low(index, block, level, mtr);
+}
+
+/**************************************************************//**
+Sets the child node file address in a node pointer. */
+UNIV_INLINE
+void
+btr_node_ptr_set_child_page_no(
+/*===========================*/
+	rec_t*		rec,	/*!< in: node pointer record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		page_no,/*!< in: child node address */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte*	field;
+	ulint	len;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!page_is_leaf(page_align(rec)));
+	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
+
+	/* The child address is in the last field */
+	field = rec_get_nth_field(rec, offsets,
+				  rec_offs_n_fields(offsets) - 1, &len);
+
+	ut_ad(len == REC_NODE_PTR_SIZE);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_write_node_ptr(page_zip, rec,
+					rec_offs_data_size(offsets),
+					page_no, mtr);
+	} else {
+		mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
+	}
+}
+
+/************************************************************//**
+Returns the child page of a node pointer and x-latches it.
+@return	child page, x-latched */
+static
+buf_block_t*
+btr_node_ptr_get_child(
+/*===================*/
+	const rec_t*	node_ptr,/*!< in: node pointer */
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint	page_no;
+	ulint	space;
+
+	ut_ad(rec_offs_validate(node_ptr, index, offsets));
+	space = page_get_space_id(page_align(node_ptr));
+	page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+
+	return(btr_block_get(space, dict_table_zip_size(index->table),
+			     page_no, RW_X_LATCH, mtr));
+}
+
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an x-latch on the tree.
+@return	rec_get_offsets() of the node pointer record */
+static
+ulint*
+btr_page_get_father_node_ptr(
+/*=========================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
+				out: cursor on node pointer record,
+				its page x-latched */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dtuple_t*	tuple;
+	rec_t*		user_rec;
+	rec_t*		node_ptr;
+	ulint		level;
+	ulint		page_no;
+	dict_index_t*	index;
+
+	page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
+	index = btr_cur_get_index(cursor);
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+
+	ut_ad(dict_index_get_page(index) != page_no);
+
+	level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+	user_rec = btr_cur_get_rec(cursor);
+	ut_a(page_rec_is_user_rec(user_rec));
+	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
+
+	btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
+				    BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
+
+	node_ptr = btr_cur_get_rec(cursor);
+	ut_ad(!page_rec_is_comp(node_ptr)
+	      || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
+	offsets = rec_get_offsets(node_ptr, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+
+	if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
+			  != page_no)) {
+		rec_t*	print_rec;
+		fputs("InnoDB: Dump of the child page:\n", stderr);
+		buf_page_print(page_align(user_rec), 0);
+		fputs("InnoDB: Dump of the parent page:\n", stderr);
+		buf_page_print(page_align(node_ptr), 0);
+
+		fputs("InnoDB: Corruption of an index tree: table ", stderr);
+		ut_print_name(stderr, NULL, TRUE, index->table_name);
+		fputs(", index ", stderr);
+		ut_print_name(stderr, NULL, FALSE, index->name);
+		fprintf(stderr, ",\n"
+			"InnoDB: father ptr page no %lu, child page no %lu\n",
+			(ulong)
+			btr_node_ptr_get_child_page_no(node_ptr, offsets),
+			(ulong) page_no);
+		print_rec = page_rec_get_next(
+			page_get_infimum_rec(page_align(user_rec)));
+		offsets = rec_get_offsets(print_rec, index,
+					  offsets, ULINT_UNDEFINED, &heap);
+		page_rec_print(print_rec, offsets);
+		offsets = rec_get_offsets(node_ptr, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		page_rec_print(node_ptr, offsets);
+
+		fputs("InnoDB: You should dump + drop + reimport the table"
+		      " to fix the\n"
+		      "InnoDB: corruption. If the crash happens at "
+		      "the database startup, see\n"
+		      "InnoDB: " REFMAN "forcing-recovery.html about\n"
+		      "InnoDB: forcing recovery. "
+		      "Then dump + drop + reimport.\n", stderr);
+
+		ut_error;
+	}
+
+	return(offsets);
+}
+
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an x-latch on the tree.
+@return	rec_get_offsets() of the node pointer record */
+static
+ulint*
+btr_page_get_father_block(
+/*======================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	dict_index_t*	index,	/*!< in: b-tree index */
+	buf_block_t*	block,	/*!< in: child page in the index */
+	mtr_t*		mtr,	/*!< in: mtr */
+	btr_cur_t*	cursor)	/*!< out: cursor on node pointer record,
+				its page x-latched */
+{
+	rec_t*	rec
+		= page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
+								 block)));
+	btr_cur_position(index, rec, block, cursor);
+	return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
+}
+
+/************************************************************//**
+Seeks to the upper level node pointer to a page.
+It is assumed that mtr holds an x-latch on the tree. */
+static
+void
+btr_page_get_father(
+/*================*/
+	dict_index_t*	index,	/*!< in: b-tree index */
+	buf_block_t*	block,	/*!< in: child page in the index */
+	mtr_t*		mtr,	/*!< in: mtr */
+	btr_cur_t*	cursor)	/*!< out: cursor on node pointer record,
+				its page x-latched */
+{
+	mem_heap_t*	heap;
+	rec_t*		rec
+		= page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
+								 block)));
+	btr_cur_position(index, rec, block, cursor);
+
+	heap = mem_heap_create(100);
+	btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
+	mem_heap_free(heap);
+}
+
+/************************************************************//**
+Creates the root node for a new index tree.
+@return	page number of the created root, FIL_NULL if did not succeed */
+UNIV_INTERN
+ulint
+btr_create(
+/*=======*/
+	ulint		type,	/*!< in: type of the index */
+	ulint		space,	/*!< in: space where created */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	dulint		index_id,/*!< in: index id */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ulint		page_no;
+	buf_block_t*	block;
+	buf_frame_t*	frame;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+
+	/* Create the two new segments (one, in the case of an ibuf tree) for
+	the index tree; the segment headers are put on the allocated root page
+	(for an ibuf tree, not in the root, but on a separate ibuf header
+	page) */
+
+	if (type & DICT_IBUF) {
+		/* Allocate first the ibuf header page */
+		buf_block_t*	ibuf_hdr_block = fseg_create(
+			space, 0,
+			IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
+
+		buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
+
+		ut_ad(buf_block_get_page_no(ibuf_hdr_block)
+		      == IBUF_HEADER_PAGE_NO);
+		/* Allocate then the next page to the segment: it will be the
+		tree root page */
+
+		page_no = fseg_alloc_free_page(buf_block_get_frame(
+						       ibuf_hdr_block)
+					       + IBUF_HEADER
+					       + IBUF_TREE_SEG_HEADER,
+					       IBUF_TREE_ROOT_PAGE_NO,
+					       FSP_UP, mtr);
+		ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+		block = buf_page_get(space, zip_size, page_no,
+				     RW_X_LATCH, mtr);
+	} else {
+		block = fseg_create(space, 0,
+				    PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
+	}
+
+	if (block == NULL) {
+
+		return(FIL_NULL);
+	}
+
+	page_no = buf_block_get_page_no(block);
+	frame = buf_block_get_frame(block);
+
+	buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
+
+	if (type & DICT_IBUF) {
+		/* It is an insert buffer tree: initialize the free list */
+
+		ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
+
+		flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
+	} else {
+		/* It is a non-ibuf tree: create a file segment for leaf
+		pages */
+		fseg_create(space, page_no,
+			    PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr);
+		/* The fseg create acquires a second latch on the page,
+		therefore we must declare it: */
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
+	}
+
+	/* Create a new index page on the the allocated segment page */
+	page_zip = buf_block_get_page_zip(block);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page = page_create_zip(block, index, 0, mtr);
+	} else {
+		page = page_create(block, mtr,
+				   dict_table_is_comp(index->table));
+		/* Set the level of the new index page */
+		btr_page_set_level(page, NULL, 0, mtr);
+	}
+
+	block->check_index_page_at_flush = TRUE;
+
+	/* Set the index id of the page */
+	btr_page_set_index_id(page, page_zip, index_id, mtr);
+
+	/* Set the next node and previous node fields */
+	btr_page_set_next(page, page_zip, FIL_NULL, mtr);
+	btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
+
+	/* We reset the free bits for the page to allow creation of several
+	trees in the same mtr, otherwise the latch on a bitmap page would
+	prevent it because of the latching order */
+
+	if (!(type & DICT_CLUSTERED)) {
+		ibuf_reset_free_bits(block);
+	}
+
+	/* In the following assertion we test that two records of maximum
+	allowed size fit on the root page: this fact is needed to ensure
+	correctness of split algorithms */
+
+	ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
+
+	return(page_no);
+}
+
+/************************************************************//**
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+UNIV_INTERN
+void
+btr_free_but_not_root(
+/*==================*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no)	/*!< in: root page number */
+{
+	ibool	finished;
+	page_t*	root;
+	mtr_t	mtr;
+
+leaf_loop:
+	mtr_start(&mtr);
+
+	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+				    + root, space));
+	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+				    + root, space));
+#endif /* UNIV_BTR_DEBUG */
+
+	/* NOTE: page hash indexes are dropped when a page is freed inside
+	fsp0fsp. */
+
+	finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
+				  &mtr);
+	mtr_commit(&mtr);
+
+	if (!finished) {
+
+		goto leaf_loop;
+	}
+top_loop:
+	mtr_start(&mtr);
+
+	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+				    + root, space));
+#endif /* UNIV_BTR_DEBUG */
+
+	finished = fseg_free_step_not_header(
+		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+	mtr_commit(&mtr);
+
+	if (!finished) {
+
+		goto top_loop;
+	}
+}
+
+/************************************************************//**
+Frees the B-tree root page. Other tree MUST already have been freed. */
+UNIV_INTERN
+void
+btr_free_root(
+/*==========*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no,	/*!< in: root page number */
+	mtr_t*	mtr)		/*!< in: a mini-transaction which has already
+				been started */
+{
+	buf_block_t*	block;
+	fseg_header_t*	header;
+
+	block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
+
+	btr_search_drop_page_hash_index(block);
+
+	header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_root_fseg_validate(header, space));
+#endif /* UNIV_BTR_DEBUG */
+
+	while (!fseg_free_step(header, mtr));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Reorganizes an index page. */
+static
+ibool
+btr_page_reorganize_low(
+/*====================*/
+	ibool		recovery,/*!< in: TRUE if called in recovery:
+				locks should not be updated, i.e.,
+				there cannot exist locks on the
+				page, and a hash index should not be
+				dropped: it cannot exist */
+	buf_block_t*	block,	/*!< in: page to be reorganized */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page		= buf_block_get_frame(block);
+	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
+	buf_block_t*	temp_block;
+	page_t*		temp_page;
+	ulint		log_mode;
+	ulint		data_size1;
+	ulint		data_size2;
+	ulint		max_ins_size1;
+	ulint		max_ins_size2;
+	ibool		success		= FALSE;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	data_size1 = page_get_data_size(page);
+	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
+
+#ifndef UNIV_HOTBACKUP
+	/* Write the log record */
+	mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
+				  ? MLOG_COMP_PAGE_REORGANIZE
+				  : MLOG_PAGE_REORGANIZE, 0);
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Turn logging off */
+	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+#ifndef UNIV_HOTBACKUP
+	temp_block = buf_block_alloc(0);
+#else /* !UNIV_HOTBACKUP */
+	ut_ad(block == back_block1);
+	temp_block = back_block2;
+#endif /* !UNIV_HOTBACKUP */
+	temp_page = temp_block->frame;
+
+	/* Copy the old page to temporary space */
+	buf_frame_copy(temp_page, page);
+
+#ifndef UNIV_HOTBACKUP
+	if (UNIV_LIKELY(!recovery)) {
+		btr_search_drop_page_hash_index(block);
+	}
+
+	block->check_index_page_at_flush = TRUE;
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Recreate the page: note that global data on page (possible
+	segment headers, next page-field, etc.) is preserved intact */
+
+	page_create(block, mtr, dict_table_is_comp(index->table));
+
+	/* Copy the records from the temporary space to the recreated page;
+	do not copy the lock bits yet */
+
+	page_copy_rec_list_end_no_locks(block, temp_block,
+					page_get_infimum_rec(temp_page),
+					index, mtr);
+
+	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+		/* Copy max trx id to recreated page */
+		trx_id_t	max_trx_id = page_get_max_trx_id(temp_page);
+		page_set_max_trx_id(block, NULL, max_trx_id, mtr);
+		/* In crash recovery, dict_index_is_sec_or_ibuf() always
+		returns TRUE, even for clustered indexes.  max_trx_id is
+		unused in clustered index pages. */
+		ut_ad(!ut_dulint_is_zero(max_trx_id) || recovery);
+	}
+
+	if (UNIV_LIKELY_NULL(page_zip)
+	    && UNIV_UNLIKELY
+	    (!page_zip_compress(page_zip, page, index, NULL))) {
+
+		/* Restore the old page and exit. */
+		buf_frame_copy(page, temp_page);
+
+		goto func_exit;
+	}
+
+#ifndef UNIV_HOTBACKUP
+	if (UNIV_LIKELY(!recovery)) {
+		/* Update the record lock bitmaps */
+		lock_move_reorganize_page(block, temp_block);
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	data_size2 = page_get_data_size(page);
+	max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
+
+	if (UNIV_UNLIKELY(data_size1 != data_size2)
+	    || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
+		buf_page_print(page, 0);
+		buf_page_print(temp_page, 0);
+		fprintf(stderr,
+			"InnoDB: Error: page old data size %lu"
+			" new data size %lu\n"
+			"InnoDB: Error: page old max ins size %lu"
+			" new max ins size %lu\n"
+			"InnoDB: Submit a detailed bug report"
+			" to http://bugs.mysql.com\n",
+			(unsigned long) data_size1, (unsigned long) data_size2,
+			(unsigned long) max_ins_size1,
+			(unsigned long) max_ins_size2);
+	} else {
+		success = TRUE;
+	}
+
+func_exit:
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+#ifndef UNIV_HOTBACKUP
+	buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Restore logging mode */
+	mtr_set_log_mode(mtr, log_mode);
+
+	return(success);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Reorganizes an index page.
+IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
+page of a non-clustered index, the caller must update the insert
+buffer free bits in the same mini-transaction in such a way that the
+modification will be redo-logged.
+@return	TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+btr_page_reorganize(
+/*================*/
+	buf_block_t*	block,	/*!< in: page to be reorganized */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	return(btr_page_reorganize_low(FALSE, block, index, mtr));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a redo log record of reorganizing a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr __attribute__((unused)),
+				/*!< in: buffer end */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	ut_ad(ptr && end_ptr);
+
+	/* The record is empty, except for the record initial part */
+
+	if (UNIV_LIKELY(block != NULL)) {
+		btr_page_reorganize_low(TRUE, block, index, mtr);
+	}
+
+	return(ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Empties an index page.  @see btr_page_create(). */
+static
+void
+btr_page_empty(
+/*===========*/
+	buf_block_t*	block,	/*!< in: page to be emptied */
+	page_zip_des_t*	page_zip,/*!< out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index of the page */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*	page = buf_block_get_frame(block);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page_zip == buf_block_get_page_zip(block));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	btr_search_drop_page_hash_index(block);
+
+	/* Recreate the page: note that global data on page (possible
+	segment headers, next page-field, etc.) is preserved intact */
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_create_zip(block, index, level, mtr);
+	} else {
+		page_create(block, mtr, dict_table_is_comp(index->table));
+		btr_page_set_level(page, NULL, level, mtr);
+	}
+
+	block->check_index_page_at_flush = TRUE;
+}
+
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return	inserted record */
+UNIV_INTERN
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert: must be
+				on the root page; when the function returns,
+				the cursor is positioned on the predecessor
+				of the inserted record */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index;
+	page_t*		root;
+	page_t*		new_page;
+	ulint		new_page_no;
+	rec_t*		rec;
+	mem_heap_t*	heap;
+	dtuple_t*	node_ptr;
+	ulint		level;
+	rec_t*		node_ptr_rec;
+	page_cur_t*	page_cursor;
+	page_zip_des_t*	root_page_zip;
+	page_zip_des_t*	new_page_zip;
+	buf_block_t*	root_block;
+	buf_block_t*	new_block;
+
+	root = btr_cur_get_page(cursor);
+	root_block = btr_cur_get_block(cursor);
+	root_page_zip = buf_block_get_page_zip(root_block);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
+#endif /* UNIV_ZIP_DEBUG */
+	index = btr_cur_get_index(cursor);
+#ifdef UNIV_BTR_DEBUG
+	if (!dict_index_is_ibuf(index)) {
+		ulint	space = dict_index_get_space(index);
+
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+					    + root, space));
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+					    + root, space));
+	}
+
+	ut_a(dict_index_get_page(index) == page_get_page_no(root));
+#endif /* UNIV_BTR_DEBUG */
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
+
+	/* Allocate a new page to the tree. Root splitting is done by first
+	moving the root records to the new page, emptying the root, putting
+	a node pointer to the new page, and then splitting the new page. */
+
+	level = btr_page_get_level(root, mtr);
+
+	new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
+	new_page = buf_block_get_frame(new_block);
+	new_page_zip = buf_block_get_page_zip(new_block);
+	ut_a(!new_page_zip == !root_page_zip);
+	ut_a(!new_page_zip
+	     || page_zip_get_size(new_page_zip)
+	     == page_zip_get_size(root_page_zip));
+
+	btr_page_create(new_block, new_page_zip, index, level, mtr);
+
+	/* Set the next node and previous node fields of new page */
+	btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
+	btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
+
+	/* Copy the records from root to the new page one by one. */
+
+	if (0
+#ifdef UNIV_ZIP_COPY
+	    || new_page_zip
+#endif /* UNIV_ZIP_COPY */
+	    || UNIV_UNLIKELY
+	    (!page_copy_rec_list_end(new_block, root_block,
+				     page_get_infimum_rec(root),
+				     index, mtr))) {
+		ut_a(new_page_zip);
+
+		/* Copy the page byte for byte. */
+		page_zip_copy_recs(new_page_zip, new_page,
+				   root_page_zip, root, index, mtr);
+
+		/* Update the lock table and possible hash index. */
+
+		lock_move_rec_list_end(new_block, root_block,
+				       page_get_infimum_rec(root));
+
+		btr_search_move_or_delete_hash_entries(new_block, root_block,
+						       index);
+	}
+
+	/* If this is a pessimistic insert which is actually done to
+	perform a pessimistic update then we have stored the lock
+	information of the record to be inserted on the infimum of the
+	root page: we cannot discard the lock structs on the root page */
+
+	lock_update_root_raise(new_block, root_block);
+
+	/* Create a memory heap where the node pointer is stored */
+	heap = mem_heap_create(100);
+
+	rec = page_rec_get_next(page_get_infimum_rec(new_page));
+	new_page_no = buf_block_get_page_no(new_block);
+
+	/* Build the node pointer (= node key and page address) for the
+	child */
+
+	node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
+					     level);
+	/* The node pointer must be marked as the predefined minimum record,
+	as there is no lower alphabetical limit to records in the leftmost
+	node of a level: */
+	dtuple_set_info_bits(node_ptr,
+			     dtuple_get_info_bits(node_ptr)
+			     | REC_INFO_MIN_REC_FLAG);
+
+	/* Rebuild the root page to get free space */
+	btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
+
+	/* Set the next node and previous node fields, although
+	they should already have been set.  The previous node field
+	must be FIL_NULL if root_page_zip != NULL, because the
+	REC_INFO_MIN_REC_FLAG (of the first user record) will be
+	set if and only if btr_page_get_prev() == FIL_NULL. */
+	btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
+	btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	/* Insert node pointer to the root */
+
+	page_cur_set_before_first(root_block, page_cursor);
+
+	node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
+					     index, 0, mtr);
+
+	/* The root page should only contain the node pointer
+	to new_page at this point.  Thus, the data should fit. */
+	ut_a(node_ptr_rec);
+
+	/* Free the memory heap */
+	mem_heap_free(heap);
+
+	/* We play safe and reset the free bits for the new page */
+
+#if 0
+	fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
+#endif
+
+	if (!dict_index_is_clust(index)) {
+		ibuf_reset_free_bits(new_block);
+	}
+
+	/* Reposition the cursor to the child node */
+	page_cur_search(new_block, index, tuple,
+			PAGE_CUR_LE, page_cursor);
+
+	/* Split the child and insert tuple */
+	return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
+}
+
+/*************************************************************//**
+Decides if the page should be split at the convergence point of inserts
+converging to the left.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec) /*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple to be inserted should
+				be first */
+{
+	page_t*	page;
+	rec_t*	insert_point;
+	rec_t*	infimum;
+
+	page = btr_cur_get_page(cursor);
+	insert_point = btr_cur_get_rec(cursor);
+
+	if (page_header_get_ptr(page, PAGE_LAST_INSERT)
+	    == page_rec_get_next(insert_point)) {
+
+		infimum = page_get_infimum_rec(page);
+
+		/* If the convergence is in the middle of a page, include also
+		the record immediately before the new insert to the upper
+		page. Otherwise, we could repeatedly move from page to page
+		lots of records smaller than the convergence point. */
+
+		if (infimum != insert_point
+		    && page_rec_get_next(infimum) != insert_point) {
+
+			*split_rec = insert_point;
+		} else {
+			*split_rec = page_rec_get_next(insert_point);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************//**
+Decides if the page should be split at the convergence point of inserts
+converging to the right.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec) /*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple to be inserted should
+				be first */
+{
+	page_t*	page;
+	rec_t*	insert_point;
+
+	page = btr_cur_get_page(cursor);
+	insert_point = btr_cur_get_rec(cursor);
+
+	/* We use eager heuristics: if the new insert would be right after
+	the previous insert on the same page, we assume that there is a
+	pattern of sequential inserts here. */
+
+	if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
+			== insert_point)) {
+
+		rec_t*	next_rec;
+
+		next_rec = page_rec_get_next(insert_point);
+
+		if (page_rec_is_supremum(next_rec)) {
+split_at_new:
+			/* Split at the new record to insert */
+			*split_rec = NULL;
+		} else {
+			rec_t*	next_next_rec = page_rec_get_next(next_rec);
+			if (page_rec_is_supremum(next_next_rec)) {
+
+				goto split_at_new;
+			}
+
+			/* If there are >= 2 user records up from the insert
+			point, split all but 1 off. We want to keep one because
+			then sequential inserts can use the adaptive hash
+			index, as they can do the necessary checks of the right
+			search position just by looking at the records on this
+			page. */
+
+			*split_rec = next_next_rec;
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************//**
+Calculates a split record such that the tuple will certainly fit on
+its half-page when the split is performed. We assume in this function
+only that the cursor page has at least one user record.
+@return split record, or NULL if tuple will be the first record on
+upper half-page */
+static
+rec_t*
+btr_page_get_sure_split_rec(
+/*========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which insert should be made */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+{
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	ulint		insert_size;
+	ulint		free_space;
+	ulint		total_data;
+	ulint		total_n_recs;
+	ulint		total_space;
+	ulint		incl_data;
+	rec_t*		ins_rec;
+	rec_t*		rec;
+	rec_t*		next_rec;
+	ulint		n;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+
+	page = btr_cur_get_page(cursor);
+
+	insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
+	free_space  = page_get_free_space_of_empty(page_is_comp(page));
+
+	page_zip = btr_cur_get_page_zip(cursor);
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		/* Estimate the free space of an empty compressed page. */
+		ulint	free_space_zip = page_zip_empty_size(
+			cursor->index->n_fields,
+			page_zip_get_size(page_zip));
+
+		if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
+			free_space = (ulint) free_space_zip;
+		}
+	}
+
+	/* free_space is now the free space of a created new page */
+
+	total_data   = page_get_data_size(page) + insert_size;
+	total_n_recs = page_get_n_recs(page) + 1;
+	ut_ad(total_n_recs >= 2);
+	total_space  = total_data + page_dir_calc_reserved_space(total_n_recs);
+
+	n = 0;
+	incl_data = 0;
+	ins_rec = btr_cur_get_rec(cursor);
+	rec = page_get_infimum_rec(page);
+
+	heap = NULL;
+	offsets = NULL;
+
+	/* We start to include records to the left half, and when the
+	space reserved by them exceeds half of total_space, then if
+	the included records fit on the left page, they will be put there
+	if something was left over also for the right page,
+	otherwise the last included record will be the first on the right
+	half page */
+
+	do {
+		/* Decide the next record to include */
+		if (rec == ins_rec) {
+			rec = NULL;	/* NULL denotes that tuple is
+					now included */
+		} else if (rec == NULL) {
+			rec = page_rec_get_next(ins_rec);
+		} else {
+			rec = page_rec_get_next(rec);
+		}
+
+		if (rec == NULL) {
+			/* Include tuple */
+			incl_data += insert_size;
+		} else {
+			offsets = rec_get_offsets(rec, cursor->index,
+						  offsets, ULINT_UNDEFINED,
+						  &heap);
+			incl_data += rec_offs_size(offsets);
+		}
+
+		n++;
+	} while (incl_data + page_dir_calc_reserved_space(n)
+		 < total_space / 2);
+
+	if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
+		/* The next record will be the first on
+		the right half page if it is not the
+		supremum record of page */
+
+		if (rec == ins_rec) {
+			rec = NULL;
+
+			goto func_exit;
+		} else if (rec == NULL) {
+			next_rec = page_rec_get_next(ins_rec);
+		} else {
+			next_rec = page_rec_get_next(rec);
+		}
+		ut_ad(next_rec);
+		if (!page_rec_is_supremum(next_rec)) {
+			rec = next_rec;
+		}
+	}
+
+func_exit:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(rec);
+}
+
+/*************************************************************//**
+Returns TRUE if the insert fits on the appropriate half-page with the
+chosen split_rec.
+@return	TRUE if fits */
+static
+ibool
+btr_page_insert_fits(
+/*=================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which insert
+				should be made */
+	const rec_t*	split_rec,/*!< in: suggestion for first record
+				on upper half-page, or NULL if
+				tuple to be inserted should be first */
+	const ulint*	offsets,/*!< in: rec_get_offsets(
+				split_rec, cursor->index) */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mem_heap_t*	heap)	/*!< in: temporary memory heap */
+{
+	page_t*		page;
+	ulint		insert_size;
+	ulint		free_space;
+	ulint		total_data;
+	ulint		total_n_recs;
+	const rec_t*	rec;
+	const rec_t*	end_rec;
+	ulint*		offs;
+
+	page = btr_cur_get_page(cursor);
+
+	ut_ad(!split_rec == !offsets);
+	ut_ad(!offsets
+	      || !page_is_comp(page) == !rec_offs_comp(offsets));
+	ut_ad(!offsets
+	      || rec_offs_validate(split_rec, cursor->index, offsets));
+
+	insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
+	free_space  = page_get_free_space_of_empty(page_is_comp(page));
+
+	/* free_space is now the free space of a created new page */
+
+	total_data   = page_get_data_size(page) + insert_size;
+	total_n_recs = page_get_n_recs(page) + 1;
+
+	/* We determine which records (from rec to end_rec, not including
+	end_rec) will end up on the other half page from tuple when it is
+	inserted. */
+
+	if (split_rec == NULL) {
+		rec = page_rec_get_next(page_get_infimum_rec(page));
+		end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
+
+	} else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+
+		rec = page_rec_get_next(page_get_infimum_rec(page));
+		end_rec = split_rec;
+	} else {
+		rec = split_rec;
+		end_rec = page_get_supremum_rec(page);
+	}
+
+	if (total_data + page_dir_calc_reserved_space(total_n_recs)
+	    <= free_space) {
+
+		/* Ok, there will be enough available space on the
+		half page where the tuple is inserted */
+
+		return(TRUE);
+	}
+
+	offs = NULL;
+
+	while (rec != end_rec) {
+		/* In this loop we calculate the amount of reserved
+		space after rec is removed from page. */
+
+		offs = rec_get_offsets(rec, cursor->index, offs,
+				       ULINT_UNDEFINED, &heap);
+
+		total_data -= rec_offs_size(offs);
+		total_n_recs--;
+
+		if (total_data + page_dir_calc_reserved_space(total_n_recs)
+		    <= free_space) {
+
+			/* Ok, there will be enough available space on the
+			half page where the tuple is inserted */
+
+			return(TRUE);
+		}
+
+		rec = page_rec_get_next_const(rec);
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************//**
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+UNIV_INTERN
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level, must be > 0 */
+	dtuple_t*	tuple,	/*!< in: the record to be inserted */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	big_rec_t*	dummy_big_rec;
+	btr_cur_t	cursor;
+	ulint		err;
+	rec_t*		rec;
+
+	ut_ad(level > 0);
+
+	btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
+				    BTR_CONT_MODIFY_TREE,
+				    &cursor, 0, mtr);
+
+	err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
+					 | BTR_KEEP_SYS_FLAG
+					 | BTR_NO_UNDO_LOG_FLAG,
+					 &cursor, tuple, &rec,
+					 &dummy_big_rec, 0, NULL, mtr);
+	ut_a(err == DB_SUCCESS);
+}
+
+/**************************************************************//**
+Attaches the halves of an index page on the appropriate level in an
+index tree. */
+static
+void
+btr_attach_half_pages(
+/*==================*/
+	dict_index_t*	index,		/*!< in: the index tree */
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	rec_t*		split_rec,	/*!< in: first record on upper
+					half page */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ulint		space;
+	ulint		zip_size;
+	ulint		prev_page_no;
+	ulint		next_page_no;
+	ulint		level;
+	page_t*		page		= buf_block_get_frame(block);
+	page_t*		lower_page;
+	page_t*		upper_page;
+	ulint		lower_page_no;
+	ulint		upper_page_no;
+	page_zip_des_t*	lower_page_zip;
+	page_zip_des_t*	upper_page_zip;
+	dtuple_t*	node_ptr_upper;
+	mem_heap_t*	heap;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
+
+	/* Create a memory heap where the data tuple is stored */
+	heap = mem_heap_create(1024);
+
+	/* Based on split direction, decide upper and lower pages */
+	if (direction == FSP_DOWN) {
+
+		btr_cur_t	cursor;
+		ulint*		offsets;
+
+		lower_page = buf_block_get_frame(new_block);
+		lower_page_no = buf_block_get_page_no(new_block);
+		lower_page_zip = buf_block_get_page_zip(new_block);
+		upper_page = buf_block_get_frame(block);
+		upper_page_no = buf_block_get_page_no(block);
+		upper_page_zip = buf_block_get_page_zip(block);
+
+		/* Look up the index for the node pointer to page */
+		offsets = btr_page_get_father_block(NULL, heap, index,
+						    block, mtr, &cursor);
+
+		/* Replace the address of the old child node (= page) with the
+		address of the new lower half */
+
+		btr_node_ptr_set_child_page_no(
+			btr_cur_get_rec(&cursor),
+			btr_cur_get_page_zip(&cursor),
+			offsets, lower_page_no, mtr);
+		mem_heap_empty(heap);
+	} else {
+		lower_page = buf_block_get_frame(block);
+		lower_page_no = buf_block_get_page_no(block);
+		lower_page_zip = buf_block_get_page_zip(block);
+		upper_page = buf_block_get_frame(new_block);
+		upper_page_no = buf_block_get_page_no(new_block);
+		upper_page_zip = buf_block_get_page_zip(new_block);
+	}
+
+	/* Get the level of the split pages */
+	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+	ut_ad(level
+	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
+
+	/* Build the node pointer (= node key and page address) for the upper
+	half */
+
+	node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
+						   upper_page_no, heap, level);
+
+	/* Insert it next to the pointer to the lower half. Note that this
+	may generate recursion leading to a split on the higher level. */
+
+	btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
+
+	/* Free the memory heap */
+	mem_heap_free(heap);
+
+	/* Get the previous and next pages of page */
+
+	prev_page_no = btr_page_get_prev(page, mtr);
+	next_page_no = btr_page_get_next(page, mtr);
+	space = buf_block_get_space(block);
+	zip_size = buf_block_get_zip_size(block);
+
+	/* Update page links of the level */
+
+	if (prev_page_no != FIL_NULL) {
+		buf_block_t*	prev_block = btr_block_get(space, zip_size,
+							   prev_page_no,
+							   RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
+		ut_a(btr_page_get_next(prev_block->frame, mtr)
+		     == buf_block_get_page_no(block));
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_next(buf_block_get_frame(prev_block),
+				  buf_block_get_page_zip(prev_block),
+				  lower_page_no, mtr);
+	}
+
+	if (next_page_no != FIL_NULL) {
+		buf_block_t*	next_block = btr_block_get(space, zip_size,
+							   next_page_no,
+							   RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
+		ut_a(btr_page_get_prev(next_block->frame, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_prev(buf_block_get_frame(next_block),
+				  buf_block_get_page_zip(next_block),
+				  upper_page_no, mtr);
+	}
+
+	btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
+	btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
+
+	btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
+	btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
+}
+
+/*************************************************************//**
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+UNIV_INTERN
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_block_t*	block;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	ulint		page_no;
+	byte		direction;
+	ulint		hint_page_no;
+	buf_block_t*	new_block;
+	page_t*		new_page;
+	page_zip_des_t*	new_page_zip;
+	rec_t*		split_rec;
+	buf_block_t*	left_block;
+	buf_block_t*	right_block;
+	buf_block_t*	insert_block;
+	page_t*		insert_page;
+	page_cur_t*	page_cursor;
+	rec_t*		first_rec;
+	byte*		buf = 0; /* remove warning */
+	rec_t*		move_limit;
+	ibool		insert_will_fit;
+	ibool		insert_left;
+	ulint		n_iterations = 0;
+	rec_t*		rec;
+	mem_heap_t*	heap;
+	ulint		n_uniq;
+	ulint*		offsets;
+
+	heap = mem_heap_create(1024);
+	n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
+func_start:
+	mem_heap_empty(heap);
+	offsets = NULL;
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+				MTR_MEMO_X_LOCK));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	page_zip = buf_block_get_page_zip(block);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page_get_n_recs(page) >= 1);
+
+	page_no = buf_block_get_page_no(block);
+
+	/* 1. Decide the split record; split_rec == NULL means that the
+	tuple to be inserted should be the first record on the upper
+	half-page */
+
+	if (n_iterations > 0) {
+		direction = FSP_UP;
+		hint_page_no = page_no + 1;
+		split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
+
+	} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
+		direction = FSP_UP;
+		hint_page_no = page_no + 1;
+
+	} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
+		direction = FSP_DOWN;
+		hint_page_no = page_no - 1;
+	} else {
+		direction = FSP_UP;
+		hint_page_no = page_no + 1;
+
+		if (page_get_n_recs(page) == 1) {
+			page_cur_t	pcur;
+
+			/* There is only one record in the index page
+			therefore we can't split the node in the middle
+			by default. We need to determine whether the
+			new record will be inserted to the left or right. */
+
+			/* Read the first (and only) record in the page. */
+			page_cur_set_before_first(block, &pcur);
+			page_cur_move_to_next(&pcur);
+			first_rec = page_cur_get_rec(&pcur);
+
+			offsets = rec_get_offsets(
+				first_rec, cursor->index, offsets,
+				n_uniq, &heap);
+
+			/* If the new record is less than the existing record
+			the the split in the middle will copy the existing
+			record to the new node. */
+			if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
+				split_rec = page_get_middle_rec(page);
+			} else {
+				split_rec = NULL;
+			}
+		} else {
+			split_rec = page_get_middle_rec(page);
+		}
+	}
+
+	/* 2. Allocate a new page to the index */
+	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
+				   btr_page_get_level(page, mtr), mtr);
+	new_page = buf_block_get_frame(new_block);
+	new_page_zip = buf_block_get_page_zip(new_block);
+	btr_page_create(new_block, new_page_zip, cursor->index,
+			btr_page_get_level(page, mtr), mtr);
+
+	/* 3. Calculate the first record on the upper half-page, and the
+	first record (move_limit) on original page which ends up on the
+	upper half */
+
+	if (split_rec) {
+		first_rec = move_limit = split_rec;
+
+		offsets = rec_get_offsets(split_rec, cursor->index, offsets,
+					  n_uniq, &heap);
+
+		insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+
+		if (UNIV_UNLIKELY(!insert_left && new_page_zip
+				  && n_iterations > 0)) {
+			/* If a compressed page has already been split,
+			avoid further splits by inserting the record
+			to an empty page. */
+			split_rec = NULL;
+			goto insert_right;
+		}
+	} else {
+insert_right:
+		insert_left = FALSE;
+		buf = mem_alloc(rec_get_converted_size(cursor->index,
+						       tuple, n_ext));
+
+		first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
+						      tuple, n_ext);
+		move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
+	}
+
+	/* 4. Do first the modifications in the tree structure */
+
+	btr_attach_half_pages(cursor->index, block,
+			      first_rec, new_block, direction, mtr);
+
+	/* If the split is made on the leaf level and the insert will fit
+	on the appropriate half-page, we may release the tree x-latch.
+	We can then move the records after releasing the tree latch,
+	thus reducing the tree latch contention. */
+
+	if (split_rec) {
+		insert_will_fit = !new_page_zip
+			&& btr_page_insert_fits(cursor, split_rec,
+						offsets, tuple, n_ext, heap);
+	} else {
+		mem_free(buf);
+		insert_will_fit = !new_page_zip
+			&& btr_page_insert_fits(cursor, NULL,
+						NULL, tuple, n_ext, heap);
+	}
+
+	if (insert_will_fit && page_is_leaf(page)) {
+
+		mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
+				 MTR_MEMO_X_LOCK);
+	}
+
+	/* 5. Move then the records to the new page */
+	if (direction == FSP_DOWN) {
+		/*		fputs("Split left\n", stderr); */
+
+		if (0
+#ifdef UNIV_ZIP_COPY
+		    || page_zip
+#endif /* UNIV_ZIP_COPY */
+		    || UNIV_UNLIKELY
+		    (!page_move_rec_list_start(new_block, block, move_limit,
+					       cursor->index, mtr))) {
+			/* For some reason, compressing new_page failed,
+			even though it should contain fewer records than
+			the original page.  Copy the page byte for byte
+			and then delete the records from both pages
+			as appropriate.  Deleting will always succeed. */
+			ut_a(new_page_zip);
+
+			page_zip_copy_recs(new_page_zip, new_page,
+					   page_zip, page, cursor->index, mtr);
+			page_delete_rec_list_end(move_limit - page + new_page,
+						 new_block, cursor->index,
+						 ULINT_UNDEFINED,
+						 ULINT_UNDEFINED, mtr);
+
+			/* Update the lock table and possible hash index. */
+
+			lock_move_rec_list_start(
+				new_block, block, move_limit,
+				new_page + PAGE_NEW_INFIMUM);
+
+			btr_search_move_or_delete_hash_entries(
+				new_block, block, cursor->index);
+
+			/* Delete the records from the source page. */
+
+			page_delete_rec_list_start(move_limit, block,
+						   cursor->index, mtr);
+		}
+
+		left_block = new_block;
+		right_block = block;
+
+		lock_update_split_left(right_block, left_block);
+	} else {
+		/*		fputs("Split right\n", stderr); */
+
+		if (0
+#ifdef UNIV_ZIP_COPY
+		    || page_zip
+#endif /* UNIV_ZIP_COPY */
+		    || UNIV_UNLIKELY
+		    (!page_move_rec_list_end(new_block, block, move_limit,
+					     cursor->index, mtr))) {
+			/* For some reason, compressing new_page failed,
+			even though it should contain fewer records than
+			the original page.  Copy the page byte for byte
+			and then delete the records from both pages
+			as appropriate.  Deleting will always succeed. */
+			ut_a(new_page_zip);
+
+			page_zip_copy_recs(new_page_zip, new_page,
+					   page_zip, page, cursor->index, mtr);
+			page_delete_rec_list_start(move_limit - page
+						   + new_page, new_block,
+						   cursor->index, mtr);
+
+			/* Update the lock table and possible hash index. */
+
+			lock_move_rec_list_end(new_block, block, move_limit);
+
+			btr_search_move_or_delete_hash_entries(
+				new_block, block, cursor->index);
+
+			/* Delete the records from the source page. */
+
+			page_delete_rec_list_end(move_limit, block,
+						 cursor->index,
+						 ULINT_UNDEFINED,
+						 ULINT_UNDEFINED, mtr);
+		}
+
+		left_block = block;
+		right_block = new_block;
+
+		lock_update_split_right(right_block, left_block);
+	}
+
+#ifdef UNIV_ZIP_DEBUG
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		ut_a(page_zip_validate(page_zip, page));
+		ut_a(page_zip_validate(new_page_zip, new_page));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* At this point, split_rec, move_limit and first_rec may point
+	to garbage on the old page. */
+
+	/* 6. The split and the tree modification is now completed. Decide the
+	page where the tuple should be inserted */
+
+	if (insert_left) {
+		insert_block = left_block;
+	} else {
+		insert_block = right_block;
+	}
+
+	insert_page = buf_block_get_frame(insert_block);
+
+	/* 7. Reposition the cursor for insert and try insertion */
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	page_cur_search(insert_block, cursor->index, tuple,
+			PAGE_CUR_LE, page_cursor);
+
+	rec = page_cur_tuple_insert(page_cursor, tuple,
+				    cursor->index, n_ext, mtr);
+
+#ifdef UNIV_ZIP_DEBUG
+	{
+		page_zip_des_t*	insert_page_zip
+			= buf_block_get_page_zip(insert_block);
+		ut_a(!insert_page_zip
+		     || page_zip_validate(insert_page_zip, insert_page));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (UNIV_LIKELY(rec != NULL)) {
+
+		goto func_exit;
+	}
+
+	/* 8. If insert did not fit, try page reorganization */
+
+	if (UNIV_UNLIKELY
+	    (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
+
+		goto insert_failed;
+	}
+
+	page_cur_search(insert_block, cursor->index, tuple,
+			PAGE_CUR_LE, page_cursor);
+	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+				    n_ext, mtr);
+
+	if (UNIV_UNLIKELY(rec == NULL)) {
+		/* The insert did not fit on the page: loop back to the
+		start of the function for a new split */
+insert_failed:
+		/* We play safe and reset the free bits for new_page */
+		if (!dict_index_is_clust(cursor->index)) {
+			ibuf_reset_free_bits(new_block);
+		}
+
+		/* fprintf(stderr, "Split second round %lu\n",
+		page_get_page_no(page)); */
+		n_iterations++;
+		ut_ad(n_iterations < 2
+		      || buf_block_get_page_zip(insert_block));
+		ut_ad(!insert_will_fit);
+
+		goto func_start;
+	}
+
+func_exit:
+	/* Insert fit on the page: update the free bits for the
+	left and right pages in the same mtr */
+
+	if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
+		ibuf_update_free_bits_for_two_pages_low(
+			buf_block_get_zip_size(left_block),
+			left_block, right_block, mtr);
+	}
+
+#if 0
+	fprintf(stderr, "Split and insert done %lu %lu\n",
+		buf_block_get_page_no(left_block),
+		buf_block_get_page_no(right_block));
+#endif
+
+	ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
+	ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
+
+	mem_heap_free(heap);
+	return(rec);
+}
+
+/*************************************************************//**
+Removes a page from the level list of pages. */
+static
+void
+btr_level_list_remove(
+/*==================*/
+	ulint		space,	/*!< in: space where removed */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	page_t*		page,	/*!< in: page to remove */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint	prev_page_no;
+	ulint	next_page_no;
+
+	ut_ad(page && mtr);
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(space == page_get_space_id(page));
+	/* Get the previous and next page numbers of page */
+
+	prev_page_no = btr_page_get_prev(page, mtr);
+	next_page_no = btr_page_get_next(page, mtr);
+
+	/* Update page links of the level */
+
+	if (prev_page_no != FIL_NULL) {
+		buf_block_t*	prev_block
+			= btr_block_get(space, zip_size, prev_page_no,
+					RW_X_LATCH, mtr);
+		page_t*		prev_page
+			= buf_block_get_frame(prev_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(prev_page) == page_is_comp(page));
+		ut_a(btr_page_get_next(prev_page, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_next(prev_page,
+				  buf_block_get_page_zip(prev_block),
+				  next_page_no, mtr);
+	}
+
+	if (next_page_no != FIL_NULL) {
+		buf_block_t*	next_block
+			= btr_block_get(space, zip_size, next_page_no,
+					RW_X_LATCH, mtr);
+		page_t*		next_page
+			= buf_block_get_frame(next_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(next_page) == page_is_comp(page));
+		ut_a(btr_page_get_prev(next_page, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_prev(next_page,
+				  buf_block_get_page_zip(next_block),
+				  prev_page_no, mtr);
+	}
+}
+
+/****************************************************************//**
+Writes the redo log record for setting an index record as the predefined
+minimum record. */
+UNIV_INLINE
+void
+btr_set_min_rec_mark_log(
+/*=====================*/
+	rec_t*	rec,	/*!< in: record */
+	byte	type,	/*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	mlog_write_initial_log_record(rec, type, mtr);
+
+	/* Write rec offset as a 2-byte ulint */
+	mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
+}
+#else /* !UNIV_HOTBACKUP */
+# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/****************************************************************//**
+Parses the redo log record for setting an index record as the predefined
+minimum record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	ulint	comp,	/*!< in: nonzero=compact page format */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
+{
+	rec_t*	rec;
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	if (page) {
+		ut_a(!page_is_comp(page) == !comp);
+
+		rec = page + mach_read_from_2(ptr);
+
+		btr_set_min_rec_mark(rec, mtr);
+	}
+
+	return(ptr + 2);
+}
+
+/****************************************************************//**
+Sets a record as the predefined minimum record. */
+UNIV_INTERN
+void
+btr_set_min_rec_mark(
+/*=================*/
+	rec_t*	rec,	/*!< in: record */
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	ulint	info_bits;
+
+	if (UNIV_LIKELY(page_rec_is_comp(rec))) {
+		info_bits = rec_get_info_bits(rec, TRUE);
+
+		rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
+
+		btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
+	} else {
+		info_bits = rec_get_info_bits(rec, FALSE);
+
+		rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
+
+		btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+UNIV_INTERN
+void
+btr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	btr_cur_t	cursor;
+	ibool		compressed;
+	ulint		err;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+	/* Delete node pointer on father page */
+	btr_page_get_father(index, block, mtr, &cursor);
+
+	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
+						mtr);
+	ut_a(err == DB_SUCCESS);
+
+	if (!compressed) {
+		btr_cur_compress_if_useful(&cursor, mtr);
+	}
+}
+
+/*************************************************************//**
+If page is the only on its level, this function moves its records to the
+father page, thus reducing the tree height. */
+static
+void
+btr_lift_page_up(
+/*=============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: page which is the only on its level;
+				must not be empty: use
+				btr_discard_only_page_on_level if the last
+				record from the page should be removed */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_block_t*	father_block;
+	page_t*		father_page;
+	ulint		page_level;
+	page_zip_des_t*	father_page_zip;
+	page_t*		page		= buf_block_get_frame(block);
+	ulint		root_page_no;
+	buf_block_t*	blocks[BTR_MAX_LEVELS];
+	ulint		n_blocks;	/*!< last used index in blocks[] */
+	ulint		i;
+
+	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+	page_level = btr_page_get_level(page, mtr);
+	root_page_no = dict_index_get_page(index);
+
+	{
+		btr_cur_t	cursor;
+		mem_heap_t*	heap	= mem_heap_create(100);
+		ulint*		offsets;
+		buf_block_t*	b;
+
+		offsets = btr_page_get_father_block(NULL, heap, index,
+						    block, mtr, &cursor);
+		father_block = btr_cur_get_block(&cursor);
+		father_page_zip = buf_block_get_page_zip(father_block);
+		father_page = buf_block_get_frame(father_block);
+
+		n_blocks = 0;
+
+		/* Store all ancestor pages so we can reset their
+		levels later on.  We have to do all the searches on
+		the tree now because later on, after we've replaced
+		the first level, the tree is in an inconsistent state
+		and can not be searched. */
+		for (b = father_block;
+		     buf_block_get_page_no(b) != root_page_no; ) {
+			ut_a(n_blocks < BTR_MAX_LEVELS);
+
+			offsets = btr_page_get_father_block(offsets, heap,
+							    index, b,
+							    mtr, &cursor);
+
+			blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
+		}
+
+		mem_heap_free(heap);
+	}
+
+	btr_search_drop_page_hash_index(block);
+
+	/* Make the father empty */
+	btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+
+	/* Copy the records to the father page one by one. */
+	if (0
+#ifdef UNIV_ZIP_COPY
+	    || father_page_zip
+#endif /* UNIV_ZIP_COPY */
+	    || UNIV_UNLIKELY
+	    (!page_copy_rec_list_end(father_block, block,
+				     page_get_infimum_rec(page),
+				     index, mtr))) {
+		const page_zip_des_t*	page_zip
+			= buf_block_get_page_zip(block);
+		ut_a(father_page_zip);
+		ut_a(page_zip);
+
+		/* Copy the page byte for byte. */
+		page_zip_copy_recs(father_page_zip, father_page,
+				   page_zip, page, index, mtr);
+
+		/* Update the lock table and possible hash index. */
+
+		lock_move_rec_list_end(father_block, block,
+				       page_get_infimum_rec(page));
+
+		btr_search_move_or_delete_hash_entries(father_block, block,
+						       index);
+	}
+
+	lock_update_copy_and_discard(father_block, block);
+
+	/* Go upward to root page, decrementing levels by one. */
+	for (i = 0; i < n_blocks; i++, page_level++) {
+		page_t*		page	= buf_block_get_frame(blocks[i]);
+		page_zip_des_t*	page_zip= buf_block_get_page_zip(blocks[i]);
+
+		ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
+
+		btr_page_set_level(page, page_zip, page_level, mtr);
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	/* Free the file page */
+	btr_page_free(index, block, mtr);
+
+	/* We play it safe and reset the free bits for the father */
+	if (!dict_index_is_clust(index)) {
+		ibuf_reset_free_bits(father_block);
+	}
+	ut_ad(page_validate(father_page, index));
+	ut_ad(btr_check_node_ptr(index, father_block, mtr));
+}
+
+/*************************************************************//**
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the brother
+reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to the
+brothers, if they exist.
+@return	TRUE on success */
+UNIV_INTERN
+ibool
+btr_compress(
+/*=========*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
+				the page must not be empty: in record delete
+				use btr_discard_page if the page would become
+				empty */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index;
+	ulint		space;
+	ulint		zip_size;
+	ulint		left_page_no;
+	ulint		right_page_no;
+	buf_block_t*	merge_block;
+	page_t*		merge_page;
+	page_zip_des_t*	merge_page_zip;
+	ibool		is_left;
+	buf_block_t*	block;
+	page_t*		page;
+	btr_cur_t	father_cursor;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+	ulint		data_size;
+	ulint		n_recs;
+	ulint		max_ins_size;
+	ulint		max_ins_size_reorg;
+	ulint		level;
+
+	block = btr_cur_get_block(cursor);
+	page = btr_cur_get_page(cursor);
+	index = btr_cur_get_index(cursor);
+	ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	level = btr_page_get_level(page, mtr);
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+
+	left_page_no = btr_page_get_prev(page, mtr);
+	right_page_no = btr_page_get_next(page, mtr);
+
+#if 0
+	fprintf(stderr, "Merge left page %lu right %lu \n",
+		left_page_no, right_page_no);
+#endif
+
+	heap = mem_heap_create(100);
+	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+					    &father_cursor);
+
+	/* Decide the page to which we try to merge and which will inherit
+	the locks */
+
+	is_left = left_page_no != FIL_NULL;
+
+	if (is_left) {
+
+		merge_block = btr_block_get(space, zip_size, left_page_no,
+					    RW_X_LATCH, mtr);
+		merge_page = buf_block_get_frame(merge_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(btr_page_get_next(merge_page, mtr)
+		     == buf_block_get_page_no(block));
+#endif /* UNIV_BTR_DEBUG */
+	} else if (right_page_no != FIL_NULL) {
+
+		merge_block = btr_block_get(space, zip_size, right_page_no,
+					    RW_X_LATCH, mtr);
+		merge_page = buf_block_get_frame(merge_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(btr_page_get_prev(merge_page, mtr)
+		     == buf_block_get_page_no(block));
+#endif /* UNIV_BTR_DEBUG */
+	} else {
+		/* The page is the only one on the level, lift the records
+		to the father */
+		btr_lift_page_up(index, block, mtr);
+		mem_heap_free(heap);
+		return(TRUE);
+	}
+
+	n_recs = page_get_n_recs(page);
+	data_size = page_get_data_size(page);
+#ifdef UNIV_BTR_DEBUG
+	ut_a(page_is_comp(merge_page) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+
+	max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+		merge_page, n_recs);
+	if (data_size > max_ins_size_reorg) {
+
+		/* No space for merge */
+err_exit:
+		/* We play it safe and reset the free bits. */
+		if (zip_size
+		    && page_is_leaf(merge_page)
+		    && !dict_index_is_clust(index)) {
+			ibuf_reset_free_bits(merge_block);
+		}
+
+		mem_heap_free(heap);
+		return(FALSE);
+	}
+
+	ut_ad(page_validate(merge_page, index));
+
+	max_ins_size = page_get_max_insert_size(merge_page, n_recs);
+
+	if (UNIV_UNLIKELY(data_size > max_ins_size)) {
+
+		/* We have to reorganize merge_page */
+
+		if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
+						       index, mtr))) {
+
+			goto err_exit;
+		}
+
+		max_ins_size = page_get_max_insert_size(merge_page, n_recs);
+
+		ut_ad(page_validate(merge_page, index));
+		ut_ad(max_ins_size == max_ins_size_reorg);
+
+		if (UNIV_UNLIKELY(data_size > max_ins_size)) {
+
+			/* Add fault tolerance, though this should
+			never happen */
+
+			goto err_exit;
+		}
+	}
+
+	merge_page_zip = buf_block_get_page_zip(merge_block);
+#ifdef UNIV_ZIP_DEBUG
+	if (UNIV_LIKELY_NULL(merge_page_zip)) {
+		const page_zip_des_t*	page_zip
+			= buf_block_get_page_zip(block);
+		ut_a(page_zip);
+		ut_a(page_zip_validate(merge_page_zip, merge_page));
+		ut_a(page_zip_validate(page_zip, page));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* Move records to the merge page */
+	if (is_left) {
+		rec_t*	orig_pred = page_copy_rec_list_start(
+			merge_block, block, page_get_supremum_rec(page),
+			index, mtr);
+
+		if (UNIV_UNLIKELY(!orig_pred)) {
+			goto err_exit;
+		}
+
+		btr_search_drop_page_hash_index(block);
+
+		/* Remove the page from the level list */
+		btr_level_list_remove(space, zip_size, page, mtr);
+
+		btr_node_ptr_delete(index, block, mtr);
+		lock_update_merge_left(merge_block, orig_pred, block);
+	} else {
+		rec_t*		orig_succ;
+#ifdef UNIV_BTR_DEBUG
+		byte		fil_page_prev[4];
+#endif /* UNIV_BTR_DEBUG */
+
+		if (UNIV_LIKELY_NULL(merge_page_zip)) {
+			/* The function page_zip_compress(), which will be
+			invoked by page_copy_rec_list_end() below,
+			requires that FIL_PAGE_PREV be FIL_NULL.
+			Clear the field, but prepare to restore it. */
+#ifdef UNIV_BTR_DEBUG
+			memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
+#endif /* UNIV_BTR_DEBUG */
+#if FIL_NULL != 0xffffffff
+# error "FIL_NULL != 0xffffffff"
+#endif
+			memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
+		}
+
+		orig_succ = page_copy_rec_list_end(merge_block, block,
+						   page_get_infimum_rec(page),
+						   cursor->index, mtr);
+
+		if (UNIV_UNLIKELY(!orig_succ)) {
+			ut_a(merge_page_zip);
+#ifdef UNIV_BTR_DEBUG
+			/* FIL_PAGE_PREV was restored from merge_page_zip. */
+			ut_a(!memcmp(fil_page_prev,
+				     merge_page + FIL_PAGE_PREV, 4));
+#endif /* UNIV_BTR_DEBUG */
+			goto err_exit;
+		}
+
+		btr_search_drop_page_hash_index(block);
+
+#ifdef UNIV_BTR_DEBUG
+		if (UNIV_LIKELY_NULL(merge_page_zip)) {
+			/* Restore FIL_PAGE_PREV in order to avoid an assertion
+			failure in btr_level_list_remove(), which will set
+			the field again to FIL_NULL.  Even though this makes
+			merge_page and merge_page_zip inconsistent for a
+			split second, it is harmless, because the pages
+			are X-latched. */
+			memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
+		}
+#endif /* UNIV_BTR_DEBUG */
+
+		/* Remove the page from the level list */
+		btr_level_list_remove(space, zip_size, page, mtr);
+
+		/* Replace the address of the old child node (= page) with the
+		address of the merge page to the right */
+
+		btr_node_ptr_set_child_page_no(
+			btr_cur_get_rec(&father_cursor),
+			btr_cur_get_page_zip(&father_cursor),
+			offsets, right_page_no, mtr);
+		btr_node_ptr_delete(index, merge_block, mtr);
+
+		lock_update_merge_right(merge_block, orig_succ, block);
+	}
+
+	mem_heap_free(heap);
+
+	if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
+		/* Update the free bits of the B-tree page in the
+		insert buffer bitmap.  This has to be done in a
+		separate mini-transaction that is committed before the
+		main mini-transaction.  We cannot update the insert
+		buffer bitmap in this mini-transaction, because
+		btr_compress() can be invoked recursively without
+		committing the mini-transaction in between.  Since
+		insert buffer bitmap pages have a lower rank than
+		B-tree pages, we must not access other pages in the
+		same mini-transaction after accessing an insert buffer
+		bitmap page. */
+
+		/* The free bits in the insert buffer bitmap must
+		never exceed the free space on a page.  It is safe to
+		decrement or reset the bits in the bitmap in a
+		mini-transaction that is committed before the
+		mini-transaction that affects the free space. */
+
+		/* It is unsafe to increment the bits in a separately
+		committed mini-transaction, because in crash recovery,
+		the free bits could momentarily be set too high. */
+
+		if (zip_size) {
+			/* Because the free bits may be incremented
+			and we cannot update the insert buffer bitmap
+			in the same mini-transaction, the only safe
+			thing we can do here is the pessimistic
+			approach: reset the free bits. */
+			ibuf_reset_free_bits(merge_block);
+		} else {
+			/* On uncompressed pages, the free bits will
+			never increase here.  Thus, it is safe to
+			write the bits accurately in a separate
+			mini-transaction. */
+			ibuf_update_free_bits_if_full(merge_block,
+						      UNIV_PAGE_SIZE,
+						      ULINT_UNDEFINED);
+		}
+	}
+
+	ut_ad(page_validate(merge_page, index));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* Free the file page */
+	btr_page_free(index, block, mtr);
+
+	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+	return(TRUE);
+}
+
+/*************************************************************//**
+Discards a page that is the only page on its level.  This will empty
+the whole B-tree, leaving just an empty root page.  This function
+should never be reached, because btr_compress(), which is invoked in
+delete operations, calls btr_lift_page_up() to flatten the B-tree. */
+static
+void
+btr_discard_only_page_on_level(
+/*===========================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: page which is the only on its level */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint		page_level = 0;
+	trx_id_t	max_trx_id;
+
+	/* Save the PAGE_MAX_TRX_ID from the leaf page. */
+	max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
+
+	while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+		btr_cur_t	cursor;
+		buf_block_t*	father;
+		const page_t*	page	= buf_block_get_frame(block);
+
+		ut_a(page_get_n_recs(page) == 1);
+		ut_a(page_level == btr_page_get_level(page, mtr));
+		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
+
+		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+		btr_search_drop_page_hash_index(block);
+
+		btr_page_get_father(index, block, mtr, &cursor);
+		father = btr_cur_get_block(&cursor);
+
+		lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
+
+		/* Free the file page */
+		btr_page_free(index, block, mtr);
+
+		block = father;
+		page_level++;
+	}
+
+	/* block is the root page, which must be empty, except
+	for the node pointer to the (now discarded) block(s). */
+
+#ifdef UNIV_BTR_DEBUG
+	if (!dict_index_is_ibuf(index)) {
+		const page_t*	root	= buf_block_get_frame(block);
+		const ulint	space	= dict_index_get_space(index);
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+					    + root, space));
+		ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+					    + root, space));
+	}
+#endif /* UNIV_BTR_DEBUG */
+
+	btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
+
+	if (!dict_index_is_clust(index)) {
+		/* We play it safe and reset the free bits for the root */
+		ibuf_reset_free_bits(block);
+
+		if (page_is_leaf(buf_block_get_frame(block))) {
+			ut_a(!ut_dulint_is_zero(max_trx_id));
+			page_set_max_trx_id(block,
+					    buf_block_get_page_zip(block),
+					    max_trx_id, mtr);
+		}
+	}
+}
+
+/*************************************************************//**
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+UNIV_INTERN
+void
+btr_discard_page(
+/*=============*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
+				the root page */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index;
+	ulint		space;
+	ulint		zip_size;
+	ulint		left_page_no;
+	ulint		right_page_no;
+	buf_block_t*	merge_block;
+	page_t*		merge_page;
+	buf_block_t*	block;
+	page_t*		page;
+	rec_t*		node_ptr;
+
+	block = btr_cur_get_block(cursor);
+	index = btr_cur_get_index(cursor);
+
+	ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+
+	/* Decide the page which will inherit the locks */
+
+	left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
+	right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
+
+	if (left_page_no != FIL_NULL) {
+		merge_block = btr_block_get(space, zip_size, left_page_no,
+					    RW_X_LATCH, mtr);
+		merge_page = buf_block_get_frame(merge_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(btr_page_get_next(merge_page, mtr)
+		     == buf_block_get_page_no(block));
+#endif /* UNIV_BTR_DEBUG */
+	} else if (right_page_no != FIL_NULL) {
+		merge_block = btr_block_get(space, zip_size, right_page_no,
+					    RW_X_LATCH, mtr);
+		merge_page = buf_block_get_frame(merge_block);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(btr_page_get_prev(merge_page, mtr)
+		     == buf_block_get_page_no(block));
+#endif /* UNIV_BTR_DEBUG */
+	} else {
+		btr_discard_only_page_on_level(index, block, mtr);
+
+		return;
+	}
+
+	page = buf_block_get_frame(block);
+	ut_a(page_is_comp(merge_page) == page_is_comp(page));
+	btr_search_drop_page_hash_index(block);
+
+	if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
+
+		/* We have to mark the leftmost node pointer on the right
+		side page as the predefined minimum record */
+		node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
+
+		ut_ad(page_rec_is_user_rec(node_ptr));
+
+		/* This will make page_zip_validate() fail on merge_page
+		until btr_level_list_remove() completes.  This is harmless,
+		because everything will take place within a single
+		mini-transaction and because writing to the redo log
+		is an atomic operation (performed by mtr_commit()). */
+		btr_set_min_rec_mark(node_ptr, mtr);
+	}
+
+	btr_node_ptr_delete(index, block, mtr);
+
+	/* Remove the page from the level list */
+	btr_level_list_remove(space, zip_size, page, mtr);
+#ifdef UNIV_ZIP_DEBUG
+	{
+		page_zip_des_t*	merge_page_zip
+			= buf_block_get_page_zip(merge_block);
+		ut_a(!merge_page_zip
+		     || page_zip_validate(merge_page_zip, merge_page));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (left_page_no != FIL_NULL) {
+		lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
+				    block);
+	} else {
+		lock_update_discard(merge_block,
+				    lock_get_min_heap_no(merge_block),
+				    block);
+	}
+
+	/* Free the file page */
+	btr_page_free(index, block, mtr);
+
+	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+}
+
+#ifdef UNIV_BTR_PRINT
+/*************************************************************//**
+Prints size info of a B-tree. */
+UNIV_INTERN
+void
+btr_print_size(
+/*===========*/
+	dict_index_t*	index)	/*!< in: index tree */
+{
+	page_t*		root;
+	fseg_header_t*	seg;
+	mtr_t		mtr;
+
+	if (dict_index_is_ibuf(index)) {
+		fputs("Sorry, cannot print info of an ibuf tree:"
+		      " use ibuf functions\n", stderr);
+
+		return;
+	}
+
+	mtr_start(&mtr);
+
+	root = btr_root_get(index, &mtr);
+
+	seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+	fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
+	fseg_print(seg, &mtr);
+
+	if (!(index->type & DICT_UNIVERSAL)) {
+
+		seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+		fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
+		fseg_print(seg, &mtr);
+	}
+
+	mtr_commit(&mtr);
+}
+
+/************************************************************//**
+Prints recursively index tree pages. */
+static
+void
+btr_print_recursive(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: index page */
+	ulint		width,	/*!< in: print this many entries from start
+				and end */
+	mem_heap_t**	heap,	/*!< in/out: heap for rec_get_offsets() */
+	ulint**		offsets,/*!< in/out: buffer for rec_get_offsets() */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	const page_t*	page	= buf_block_get_frame(block);
+	page_cur_t	cursor;
+	ulint		n_recs;
+	ulint		i	= 0;
+	mtr_t		mtr2;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
+		(ulong) btr_page_get_level(page, mtr),
+		(ulong) buf_block_get_page_no(block));
+
+	page_print(block, index, width, width);
+
+	n_recs = page_get_n_recs(page);
+
+	page_cur_set_before_first(block, &cursor);
+	page_cur_move_to_next(&cursor);
+
+	while (!page_cur_is_after_last(&cursor)) {
+
+		if (page_is_leaf(page)) {
+
+			/* If this is the leaf level, do nothing */
+
+		} else if ((i <= width) || (i >= n_recs - width)) {
+
+			const rec_t*	node_ptr;
+
+			mtr_start(&mtr2);
+
+			node_ptr = page_cur_get_rec(&cursor);
+
+			*offsets = rec_get_offsets(node_ptr, index, *offsets,
+						   ULINT_UNDEFINED, heap);
+			btr_print_recursive(index,
+					    btr_node_ptr_get_child(node_ptr,
+								   index,
+								   *offsets,
+								   &mtr2),
+					    width, heap, offsets, &mtr2);
+			mtr_commit(&mtr2);
+		}
+
+		page_cur_move_to_next(&cursor);
+		i++;
+	}
+}
+
+/**************************************************************//**
+Prints directories and other info of all nodes in the tree. */
+UNIV_INTERN
+void
+btr_print_index(
+/*============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		width)	/*!< in: print this many entries from start
+				and end */
+{
+	mtr_t		mtr;
+	buf_block_t*	root;
+	mem_heap_t*	heap	= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+	rec_offs_init(offsets_);
+
+	fputs("--------------------------\n"
+	      "INDEX TREE PRINT\n", stderr);
+
+	mtr_start(&mtr);
+
+	root = btr_root_block_get(index, &mtr);
+
+	btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	mtr_commit(&mtr);
+
+	btr_validate_index(index, NULL);
+}
+#endif /* UNIV_BTR_PRINT */
+
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return	TRUE */
+UNIV_INTERN
+ibool
+btr_check_node_ptr(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: index page */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	mem_heap_t*	heap;
+	dtuple_t*	tuple;
+	ulint*		offsets;
+	btr_cur_t	cursor;
+	page_t*		page = buf_block_get_frame(block);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+
+		return(TRUE);
+	}
+
+	heap = mem_heap_create(256);
+	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+					    &cursor);
+
+	if (page_is_leaf(page)) {
+
+		goto func_exit;
+	}
+
+	tuple = dict_index_build_node_ptr(
+		index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
+		btr_page_get_level(page, mtr));
+
+	ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+func_exit:
+	mem_heap_free(heap);
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/************************************************************//**
+Display identification information for a record. */
+static
+void
+btr_index_rec_validate_report(
+/*==========================*/
+	const page_t*		page,	/*!< in: index page */
+	const rec_t*		rec,	/*!< in: index record */
+	const dict_index_t*	index)	/*!< in: index */
+{
+	fputs("InnoDB: Record in ", stderr);
+	dict_index_name_print(stderr, NULL, index);
+	fprintf(stderr, ", page %lu, at offset %lu\n",
+		page_get_page_no(page), (ulint) page_offset(rec));
+}
+
+/************************************************************//**
+Checks the size and number of fields in a record based on the definition of
+the index.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_index_rec_validate(
+/*===================*/
+	const rec_t*		rec,		/*!< in: index record */
+	const dict_index_t*	index,		/*!< in: index */
+	ibool			dump_on_error)	/*!< in: TRUE if the function
+						should print hex dump of record
+						and page on error */
+{
+	ulint		len;
+	ulint		n;
+	ulint		i;
+	const page_t*	page;
+	mem_heap_t*	heap	= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+	rec_offs_init(offsets_);
+
+	page = page_align(rec);
+
+	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
+		/* The insert buffer index tree can contain records from any
+		other index: we cannot check the number of fields or
+		their length */
+
+		return(TRUE);
+	}
+
+	if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
+			  != dict_table_is_comp(index->table))) {
+		btr_index_rec_validate_report(page, rec, index);
+		fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
+			(ulong) !!page_is_comp(page),
+			(ulong) dict_table_is_comp(index->table));
+
+		return(FALSE);
+	}
+
+	n = dict_index_get_n_fields(index);
+
+	if (!page_is_comp(page)
+	    && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
+		btr_index_rec_validate_report(page, rec, index);
+		fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
+			(ulong) rec_get_n_fields_old(rec), (ulong) n);
+
+		if (dump_on_error) {
+			buf_page_print(page, 0);
+
+			fputs("InnoDB: corrupt record ", stderr);
+			rec_print_old(stderr, rec);
+			putc('\n', stderr);
+		}
+		return(FALSE);
+	}
+
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+	for (i = 0; i < n; i++) {
+		ulint	fixed_size = dict_col_get_fixed_size(
+			dict_index_get_nth_col(index, i), page_is_comp(page));
+
+		rec_get_nth_field_offs(offsets, i, &len);
+
+		/* Note that if fixed_size != 0, it equals the
+		length of a fixed-size column in the clustered index.
+		A prefix index of the column is of fixed, but different
+		length.  When fixed_size == 0, prefix_len is the maximum
+		length of the prefix index column. */
+
+		if ((dict_index_get_nth_field(index, i)->prefix_len == 0
+		     && len != UNIV_SQL_NULL && fixed_size
+		     && len != fixed_size)
+		    || (dict_index_get_nth_field(index, i)->prefix_len > 0
+			&& len != UNIV_SQL_NULL
+			&& len
+			> dict_index_get_nth_field(index, i)->prefix_len)) {
+
+			btr_index_rec_validate_report(page, rec, index);
+			fprintf(stderr,
+				"InnoDB: field %lu len is %lu,"
+				" should be %lu\n",
+				(ulong) i, (ulong) len, (ulong) fixed_size);
+
+			if (dump_on_error) {
+				buf_page_print(page, 0);
+
+				fputs("InnoDB: corrupt record ", stderr);
+				rec_print_new(stderr, rec, offsets);
+				putc('\n', stderr);
+			}
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+			return(FALSE);
+		}
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(TRUE);
+}
+
+/************************************************************//**
+Checks the size and number of fields in records based on the definition of
+the index.
+@return	TRUE if ok */
+static
+ibool
+btr_index_page_validate(
+/*====================*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index)	/*!< in: index */
+{
+	page_cur_t	cur;
+	ibool		ret	= TRUE;
+
+	page_cur_set_before_first(block, &cur);
+	page_cur_move_to_next(&cur);
+
+	for (;;) {
+		if (page_cur_is_after_last(&cur)) {
+
+			break;
+		}
+
+		if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
+
+			return(FALSE);
+		}
+
+		page_cur_move_to_next(&cur);
+	}
+
+	return(ret);
+}
+
+/************************************************************//**
+Report an error on one page of an index tree. */
+static
+void
+btr_validate_report1(
+/*=================*/
+	dict_index_t*		index,	/*!< in: index */
+	ulint			level,	/*!< in: B-tree level */
+	const buf_block_t*	block)	/*!< in: index page */
+{
+	fprintf(stderr, "InnoDB: Error in page %lu of ",
+		buf_block_get_page_no(block));
+	dict_index_name_print(stderr, NULL, index);
+	if (level) {
+		fprintf(stderr, ", index tree level %lu", level);
+	}
+	putc('\n', stderr);
+}
+
+/************************************************************//**
+Report an error on two pages of an index tree. */
+static
+void
+btr_validate_report2(
+/*=================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			level,	/*!< in: B-tree level */
+	const buf_block_t*	block1,	/*!< in: first index page */
+	const buf_block_t*	block2)	/*!< in: second index page */
+{
+	fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
+		buf_block_get_page_no(block1),
+		buf_block_get_page_no(block2));
+	dict_index_name_print(stderr, NULL, index);
+	if (level) {
+		fprintf(stderr, ", index tree level %lu", level);
+	}
+	putc('\n', stderr);
+}
+
+/************************************************************//**
+Validates index tree level.
+@return	TRUE if ok */
+static
+ibool
+btr_validate_level(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	trx_t*		trx,	/*!< in: transaction or NULL */
+	ulint		level)	/*!< in: level number */
+{
+	ulint		space;
+	ulint		zip_size;
+	buf_block_t*	block;
+	page_t*		page;
+	buf_block_t*	right_block = 0; /* remove warning */
+	page_t*		right_page = 0; /* remove warning */
+	page_t*		father_page;
+	btr_cur_t	node_cur;
+	btr_cur_t	right_node_cur;
+	rec_t*		rec;
+	ulint		right_page_no;
+	ulint		left_page_no;
+	page_cur_t	cursor;
+	dtuple_t*	node_ptr_tuple;
+	ibool		ret	= TRUE;
+	mtr_t		mtr;
+	mem_heap_t*	heap	= mem_heap_create(256);
+	ulint*		offsets	= NULL;
+	ulint*		offsets2= NULL;
+#ifdef UNIV_ZIP_DEBUG
+	page_zip_des_t*	page_zip;
+#endif /* UNIV_ZIP_DEBUG */
+
+	mtr_start(&mtr);
+
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+	block = btr_root_block_get(index, &mtr);
+	page = buf_block_get_frame(block);
+
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+
+	while (level != btr_page_get_level(page, &mtr)) {
+		const rec_t*	node_ptr;
+
+		ut_a(space == buf_block_get_space(block));
+		ut_a(space == page_get_space_id(page));
+#ifdef UNIV_ZIP_DEBUG
+		page_zip = buf_block_get_page_zip(block);
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+		ut_a(!page_is_leaf(page));
+
+		page_cur_set_before_first(block, &cursor);
+		page_cur_move_to_next(&cursor);
+
+		node_ptr = page_cur_get_rec(&cursor);
+		offsets = rec_get_offsets(node_ptr, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
+		page = buf_block_get_frame(block);
+	}
+
+	/* Now we are on the desired level. Loop through the pages on that
+	level. */
+loop:
+	if (trx_is_interrupted(trx)) {
+		mtr_commit(&mtr);
+		mem_heap_free(heap);
+		return(ret);
+	}
+	mem_heap_empty(heap);
+	offsets = offsets2 = NULL;
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+#ifdef UNIV_ZIP_DEBUG
+	page_zip = buf_block_get_page_zip(block);
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* Check ordering etc. of records */
+
+	if (!page_validate(page, index)) {
+		btr_validate_report1(index, level, block);
+
+		ret = FALSE;
+	} else if (level == 0) {
+		/* We are on level 0. Check that the records have the right
+		number of fields, and field lengths are right. */
+
+		if (!btr_index_page_validate(block, index)) {
+
+			ret = FALSE;
+		}
+	}
+
+	ut_a(btr_page_get_level(page, &mtr) == level);
+
+	right_page_no = btr_page_get_next(page, &mtr);
+	left_page_no = btr_page_get_prev(page, &mtr);
+
+	ut_a(page_get_n_recs(page) > 0 || (level == 0
+					   && page_get_page_no(page)
+					   == dict_index_get_page(index)));
+
+	if (right_page_no != FIL_NULL) {
+		const rec_t*	right_rec;
+		right_block = btr_block_get(space, zip_size, right_page_no,
+					    RW_X_LATCH, &mtr);
+		right_page = buf_block_get_frame(right_block);
+		if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
+				  != page_get_page_no(page))) {
+			btr_validate_report2(index, level, block, right_block);
+			fputs("InnoDB: broken FIL_PAGE_NEXT"
+			      " or FIL_PAGE_PREV links\n", stderr);
+			buf_page_print(page, 0);
+			buf_page_print(right_page, 0);
+
+			ret = FALSE;
+		}
+
+		if (UNIV_UNLIKELY(page_is_comp(right_page)
+				  != page_is_comp(page))) {
+			btr_validate_report2(index, level, block, right_block);
+			fputs("InnoDB: 'compact' flag mismatch\n", stderr);
+			buf_page_print(page, 0);
+			buf_page_print(right_page, 0);
+
+			ret = FALSE;
+
+			goto node_ptr_fails;
+		}
+
+		rec = page_rec_get_prev(page_get_supremum_rec(page));
+		right_rec = page_rec_get_next(page_get_infimum_rec(
+						      right_page));
+		offsets = rec_get_offsets(rec, index,
+					  offsets, ULINT_UNDEFINED, &heap);
+		offsets2 = rec_get_offsets(right_rec, index,
+					   offsets2, ULINT_UNDEFINED, &heap);
+		if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
+					      offsets, offsets2,
+					      index) >= 0)) {
+
+			btr_validate_report2(index, level, block, right_block);
+
+			fputs("InnoDB: records in wrong order"
+			      " on adjacent pages\n", stderr);
+
+			buf_page_print(page, 0);
+			buf_page_print(right_page, 0);
+
+			fputs("InnoDB: record ", stderr);
+			rec = page_rec_get_prev(page_get_supremum_rec(page));
+			rec_print(stderr, rec, index);
+			putc('\n', stderr);
+			fputs("InnoDB: record ", stderr);
+			rec = page_rec_get_next(
+				page_get_infimum_rec(right_page));
+			rec_print(stderr, rec, index);
+			putc('\n', stderr);
+
+			ret = FALSE;
+		}
+	}
+
+	if (level > 0 && left_page_no == FIL_NULL) {
+		ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+			     page_rec_get_next(page_get_infimum_rec(page)),
+			     page_is_comp(page)));
+	}
+
+	if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+
+		/* Check father node pointers */
+
+		rec_t*	node_ptr;
+
+		offsets = btr_page_get_father_block(offsets, heap, index,
+						    block, &mtr, &node_cur);
+		father_page = btr_cur_get_page(&node_cur);
+		node_ptr = btr_cur_get_rec(&node_cur);
+
+		btr_cur_position(
+			index, page_rec_get_prev(page_get_supremum_rec(page)),
+			block, &node_cur);
+		offsets = btr_page_get_father_node_ptr(offsets, heap,
+						       &node_cur, &mtr);
+
+		if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
+		    || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
+								    offsets)
+				     != buf_block_get_page_no(block))) {
+
+			btr_validate_report1(index, level, block);
+
+			fputs("InnoDB: node pointer to the page is wrong\n",
+			      stderr);
+
+			buf_page_print(father_page, 0);
+			buf_page_print(page, 0);
+
+			fputs("InnoDB: node ptr ", stderr);
+			rec_print(stderr, node_ptr, index);
+
+			rec = btr_cur_get_rec(&node_cur);
+			fprintf(stderr, "\n"
+				"InnoDB: node ptr child page n:o %lu\n",
+				(ulong) btr_node_ptr_get_child_page_no(
+					rec, offsets));
+
+			fputs("InnoDB: record on page ", stderr);
+			rec_print_new(stderr, rec, offsets);
+			putc('\n', stderr);
+			ret = FALSE;
+
+			goto node_ptr_fails;
+		}
+
+		if (!page_is_leaf(page)) {
+			node_ptr_tuple = dict_index_build_node_ptr(
+				index,
+				page_rec_get_next(page_get_infimum_rec(page)),
+				0, heap, btr_page_get_level(page, &mtr));
+
+			if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
+					   offsets)) {
+				const rec_t* first_rec = page_rec_get_next(
+					page_get_infimum_rec(page));
+
+				btr_validate_report1(index, level, block);
+
+				buf_page_print(father_page, 0);
+				buf_page_print(page, 0);
+
+				fputs("InnoDB: Error: node ptrs differ"
+				      " on levels > 0\n"
+				      "InnoDB: node ptr ", stderr);
+				rec_print_new(stderr, node_ptr, offsets);
+				fputs("InnoDB: first rec ", stderr);
+				rec_print(stderr, first_rec, index);
+				putc('\n', stderr);
+				ret = FALSE;
+
+				goto node_ptr_fails;
+			}
+		}
+
+		if (left_page_no == FIL_NULL) {
+			ut_a(node_ptr == page_rec_get_next(
+				     page_get_infimum_rec(father_page)));
+			ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
+		}
+
+		if (right_page_no == FIL_NULL) {
+			ut_a(node_ptr == page_rec_get_prev(
+				     page_get_supremum_rec(father_page)));
+			ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
+		} else {
+			const rec_t*	right_node_ptr
+				= page_rec_get_next(node_ptr);
+
+			offsets = btr_page_get_father_block(
+				offsets, heap, index, right_block,
+				&mtr, &right_node_cur);
+			if (right_node_ptr
+			    != page_get_supremum_rec(father_page)) {
+
+				if (btr_cur_get_rec(&right_node_cur)
+				    != right_node_ptr) {
+					ret = FALSE;
+					fputs("InnoDB: node pointer to"
+					      " the right page is wrong\n",
+					      stderr);
+
+					btr_validate_report1(index, level,
+							     block);
+
+					buf_page_print(father_page, 0);
+					buf_page_print(page, 0);
+					buf_page_print(right_page, 0);
+				}
+			} else {
+				page_t*	right_father_page
+					= btr_cur_get_page(&right_node_cur);
+
+				if (btr_cur_get_rec(&right_node_cur)
+				    != page_rec_get_next(
+					    page_get_infimum_rec(
+						    right_father_page))) {
+					ret = FALSE;
+					fputs("InnoDB: node pointer 2 to"
+					      " the right page is wrong\n",
+					      stderr);
+
+					btr_validate_report1(index, level,
+							     block);
+
+					buf_page_print(father_page, 0);
+					buf_page_print(right_father_page, 0);
+					buf_page_print(page, 0);
+					buf_page_print(right_page, 0);
+				}
+
+				if (page_get_page_no(right_father_page)
+				    != btr_page_get_next(father_page, &mtr)) {
+
+					ret = FALSE;
+					fputs("InnoDB: node pointer 3 to"
+					      " the right page is wrong\n",
+					      stderr);
+
+					btr_validate_report1(index, level,
+							     block);
+
+					buf_page_print(father_page, 0);
+					buf_page_print(right_father_page, 0);
+					buf_page_print(page, 0);
+					buf_page_print(right_page, 0);
+				}
+			}
+		}
+	}
+
+node_ptr_fails:
+	/* Commit the mini-transaction to release the latch on 'page'.
+	Re-acquire the latch on right_page, which will become 'page'
+	on the next loop.  The page has already been checked. */
+	mtr_commit(&mtr);
+
+	if (right_page_no != FIL_NULL) {
+		mtr_start(&mtr);
+
+		block = btr_block_get(space, zip_size, right_page_no,
+				      RW_X_LATCH, &mtr);
+		page = buf_block_get_frame(block);
+
+		goto loop;
+	}
+
+	mem_heap_free(heap);
+	return(ret);
+}
+
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_validate_index(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index */
+	trx_t*		trx)	/*!< in: transaction or NULL */
+{
+	mtr_t	mtr;
+	page_t*	root;
+	ulint	i;
+	ulint	n;
+
+	mtr_start(&mtr);
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+	root = btr_root_get(index, &mtr);
+	n = btr_page_get_level(root, &mtr);
+
+	for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
+		if (!btr_validate_level(index, trx, n - i)) {
+
+			mtr_commit(&mtr);
+
+			return(FALSE);
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	return(TRUE);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
new file mode 100644
index 00000000000..46dfb5d1a46
--- /dev/null
+++ b/storage/innodb_plugin/btr/btr0cur.c
@@ -0,0 +1,4847 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0cur.c
+The index tree cursor
+
+All changes that row operations make to a B-tree or the records
+there must go through this module! Undo log records are written here
+of every modify or insert of a clustered index record.
+
+			NOTE!!!
+To make sure we do not run out of disk space during a pessimistic
+insert or update, we have to reserve 2 x the height of the index tree
+many pages in the tablespace before we start the operation, because
+if leaf splitting has been started, it is difficult to undo, except
+by crashing the database and doing a roll-forward.
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0cur.h"
+
+#ifdef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+
+#include "row0upd.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0log.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "buf0lru.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "trx0rec.h"
+#include "trx0roll.h" /* trx_is_recv() */
+#include "que0que.h"
+#include "row0row.h"
+#include "srv0srv.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "zlib.h"
+
+#ifdef UNIV_DEBUG
+/** If the following is set to TRUE, this module prints a lot of
+trace information of individual record operations */
+UNIV_INTERN ibool	btr_cur_print_record_ops = FALSE;
+#endif /* UNIV_DEBUG */
+
+/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+UNIV_INTERN ulint	btr_cur_n_non_sea	= 0;
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+UNIV_INTERN ulint	btr_cur_n_sea		= 0;
+/** Old value of btr_cur_n_non_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+UNIV_INTERN ulint	btr_cur_n_non_sea_old	= 0;
+/** Old value of btr_cur_n_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
+
+/** In the optimistic insert, if the insert does not fit, but this much space
+can be released by page reorganize, then it is reorganized */
+#define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
+
+/** The structure of a BLOB part header */
+/* @{ */
+/*--------------------------------------*/
+#define BTR_BLOB_HDR_PART_LEN		0	/*!< BLOB part len on this
+						page */
+#define BTR_BLOB_HDR_NEXT_PAGE_NO	4	/*!< next BLOB part page no,
+						FIL_NULL if none */
+/*--------------------------------------*/
+#define BTR_BLOB_HDR_SIZE		8	/*!< Size of a BLOB
+						part header, in bytes */
+/* @} */
+#endif /* !UNIV_HOTBACKUP */
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Marks all extern fields in a record as owned by the record. This function
+should be called if the delete mark of a record is removed: a not delete
+marked record always owns all its extern fields. */
+static
+void
+btr_cur_unmark_extern_fields(
+/*=========================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: record in a clustered index */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	mtr_t*		mtr);	/*!< in: mtr, or NULL if not logged */
+/*******************************************************************//**
+Adds path information to the cursor for the current page, for which
+the binary search has been performed. */
+static
+void
+btr_cur_add_path_info(
+/*==================*/
+	btr_cur_t*	cursor,		/*!< in: cursor positioned on a page */
+	ulint		height,		/*!< in: height of the page in tree;
+					0 means leaf node */
+	ulint		root_height);	/*!< in: root node height in tree */
+/***********************************************************//**
+Frees the externally stored fields for a record, if the field is mentioned
+in the update vector. */
+static
+void
+btr_rec_free_updated_extern_fields(
+/*===============================*/
+	dict_index_t*	index,	/*!< in: index of rec; the index tree MUST be
+				X-latched */
+	rec_t*		rec,	/*!< in: record */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update,	/*!< in: update vector */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
+				an X-latch to record page and to the tree */
+/***********************************************************//**
+Frees the externally stored fields for a record. */
+static
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+	dict_index_t*	index,	/*!< in: index of the data, the index
+				tree MUST be X-latched */
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
+				an X-latch to record page and to the index
+				tree */
+/***********************************************************//**
+Gets the externally stored size of a record, in units of a database page.
+@return	externally stored part, in units of a database page */
+static
+ulint
+btr_rec_get_externally_stored_len(
+/*==============================*/
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+{
+	if (page_rec_is_comp(rec)) {
+		rec_set_deleted_flag_new(rec, page_zip, flag);
+	} else {
+		ut_ad(!page_zip);
+		rec_set_deleted_flag_old(rec, flag);
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/*==================== B-TREE SEARCH =========================*/
+
+/********************************************************************//**
+Latches the leaf page or pages requested. */
+static
+void
+btr_cur_latch_leaves(
+/*=================*/
+	page_t*		page,		/*!< in: leaf page where the search
+					converged */
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in bytes
+					or 0 for uncompressed pages */
+	ulint		page_no,	/*!< in: page number of the leaf */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in: cursor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ulint		mode;
+	ulint		left_page_no;
+	ulint		right_page_no;
+	buf_block_t*	get_block;
+
+	ut_ad(page && mtr);
+
+	switch (latch_mode) {
+	case BTR_SEARCH_LEAF:
+	case BTR_MODIFY_LEAF:
+		mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
+		get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+		get_block->check_index_page_at_flush = TRUE;
+		return;
+	case BTR_MODIFY_TREE:
+		/* x-latch also brothers from left to right */
+		left_page_no = btr_page_get_prev(page, mtr);
+
+		if (left_page_no != FIL_NULL) {
+			get_block = btr_block_get(space, zip_size,
+						  left_page_no,
+						  RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+			ut_a(page_is_comp(get_block->frame)
+			     == page_is_comp(page));
+			ut_a(btr_page_get_next(get_block->frame, mtr)
+			     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+			get_block->check_index_page_at_flush = TRUE;
+		}
+
+		get_block = btr_block_get(space, zip_size, page_no,
+					  RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+		get_block->check_index_page_at_flush = TRUE;
+
+		right_page_no = btr_page_get_next(page, mtr);
+
+		if (right_page_no != FIL_NULL) {
+			get_block = btr_block_get(space, zip_size,
+						  right_page_no,
+						  RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+			ut_a(page_is_comp(get_block->frame)
+			     == page_is_comp(page));
+			ut_a(btr_page_get_prev(get_block->frame, mtr)
+			     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+			get_block->check_index_page_at_flush = TRUE;
+		}
+
+		return;
+
+	case BTR_SEARCH_PREV:
+	case BTR_MODIFY_PREV:
+		mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
+		/* latch also left brother */
+		left_page_no = btr_page_get_prev(page, mtr);
+
+		if (left_page_no != FIL_NULL) {
+			get_block = btr_block_get(space, zip_size,
+						  left_page_no, mode, mtr);
+			cursor->left_block = get_block;
+#ifdef UNIV_BTR_DEBUG
+			ut_a(page_is_comp(get_block->frame)
+			     == page_is_comp(page));
+			ut_a(btr_page_get_next(get_block->frame, mtr)
+			     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+			get_block->check_index_page_at_flush = TRUE;
+		}
+
+		get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+		get_block->check_index_page_at_flush = TRUE;
+		return;
+	}
+
+	ut_error;
+}
+
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
+
+If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
+search tuple should be performed in the B-tree. InnoDB does an insert
+immediately after the cursor. Thus, the cursor may end up on a user record,
+or on a page infimum record. */
+UNIV_INTERN
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				Inserts should always be made using
+				PAGE_CUR_LE to search the position! */
+	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
+				BTR_INSERT and BTR_ESTIMATE;
+				cursor->left_block is used to store a pointer
+				to the left neighbor page, in the cases
+				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
+				NOTE that if has_search_latch
+				is != 0, we maybe do not have a latch set
+				on the cursor page, we assume
+				the caller uses his search latch
+				to protect the record! */
+	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
+				s- or x-latched, but see also above! */
+	ulint		has_search_latch,/*!< in: info on the latch mode the
+				caller currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_cur_t*	page_cursor;
+	page_t*		page;
+	buf_block_t*	guess;
+	rec_t*		node_ptr;
+	ulint		page_no;
+	ulint		space;
+	ulint		up_match;
+	ulint		up_bytes;
+	ulint		low_match;
+	ulint		low_bytes;
+	ulint		height;
+	ulint		savepoint;
+	ulint		page_mode;
+	ulint		insert_planned;
+	ulint		estimate;
+	ulint		ignore_sec_unique;
+	ulint		root_height = 0; /* remove warning */
+#ifdef BTR_CUR_ADAPT
+	btr_search_t*	info;
+#endif
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+	/* Currently, PAGE_CUR_LE is the only search mode used for searches
+	ending to upper levels */
+
+	ut_ad(level == 0 || mode == PAGE_CUR_LE);
+	ut_ad(dict_index_check_search_tuple(index, tuple));
+	ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
+	ut_ad(dtuple_check_typed(tuple));
+
+#ifdef UNIV_DEBUG
+	cursor->up_match = ULINT_UNDEFINED;
+	cursor->low_match = ULINT_UNDEFINED;
+#endif
+	insert_planned = latch_mode & BTR_INSERT;
+	estimate = latch_mode & BTR_ESTIMATE;
+	ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
+	latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
+				    | BTR_IGNORE_SEC_UNIQUE);
+
+	ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
+
+	cursor->flag = BTR_CUR_BINARY;
+	cursor->index = index;
+
+#ifndef BTR_CUR_ADAPT
+	guess = NULL;
+#else
+	info = btr_search_get_info(index);
+
+	guess = info->root_guess;
+
+#ifdef BTR_CUR_HASH_ADAPT
+
+#ifdef UNIV_SEARCH_PERF_STAT
+	info->n_searches++;
+#endif
+	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+	    && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
+	    && !estimate
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+	    && mode != PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+	    /* If !has_search_latch, we do a dirty read of
+	    btr_search_enabled below, and btr_search_guess_on_hash()
+	    will have to check it again. */
+	    && UNIV_LIKELY(btr_search_enabled)
+	    && btr_search_guess_on_hash(index, info, tuple, mode,
+					latch_mode, cursor,
+					has_search_latch, mtr)) {
+
+		/* Search using the hash index succeeded */
+
+		ut_ad(cursor->up_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_GE);
+		ut_ad(cursor->up_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_LE);
+		ut_ad(cursor->low_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_LE);
+		btr_cur_n_sea++;
+
+		return;
+	}
+#endif /* BTR_CUR_HASH_ADAPT */
+#endif /* BTR_CUR_ADAPT */
+	btr_cur_n_non_sea++;
+
+	/* If the hash search did not succeed, do binary search down the
+	tree */
+
+	if (has_search_latch) {
+		/* Release possible search latch to obey latching order */
+		rw_lock_s_unlock(&btr_search_latch);
+	}
+
+	/* Store the position of the tree latch we push to mtr so that we
+	know how to release it when we have latched leaf node(s) */
+
+	savepoint = mtr_set_savepoint(mtr);
+
+	if (latch_mode == BTR_MODIFY_TREE) {
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+
+	} else if (latch_mode == BTR_CONT_MODIFY_TREE) {
+		/* Do nothing */
+		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK));
+	} else {
+		mtr_s_lock(dict_index_get_lock(index), mtr);
+	}
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	space = dict_index_get_space(index);
+	page_no = dict_index_get_page(index);
+
+	up_match = 0;
+	up_bytes = 0;
+	low_match = 0;
+	low_bytes = 0;
+
+	height = ULINT_UNDEFINED;
+
+	/* We use these modified search modes on non-leaf levels of the
+	B-tree. These let us end up in the right B-tree leaf. In that leaf
+	we use the original search mode. */
+
+	switch (mode) {
+	case PAGE_CUR_GE:
+		page_mode = PAGE_CUR_L;
+		break;
+	case PAGE_CUR_G:
+		page_mode = PAGE_CUR_LE;
+		break;
+	default:
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+		      || mode == PAGE_CUR_LE_OR_EXTENDS);
+#else /* PAGE_CUR_LE_OR_EXTENDS */
+		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+		page_mode = mode;
+		break;
+	}
+
+	/* Loop and search until we arrive at the desired level */
+
+	for (;;) {
+		ulint		zip_size;
+		buf_block_t*	block;
+		ulint		rw_latch;
+		ulint		buf_mode;
+
+		zip_size = dict_table_zip_size(index->table);
+		rw_latch = RW_NO_LATCH;
+		buf_mode = BUF_GET;
+
+		if (height == 0 && latch_mode <= BTR_MODIFY_LEAF) {
+
+			rw_latch = latch_mode;
+
+			if (insert_planned
+			    && ibuf_should_try(index, ignore_sec_unique)) {
+
+				/* Try insert to the insert buffer if the
+				page is not in the buffer pool */
+
+				buf_mode = BUF_GET_IF_IN_POOL;
+			}
+		}
+
+retry_page_get:
+		block = buf_page_get_gen(space, zip_size, page_no,
+					 rw_latch, guess, buf_mode,
+					 __FILE__, __LINE__, mtr);
+		if (block == NULL) {
+			/* This must be a search to perform an insert;
+			try insert to the insert buffer */
+
+			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+			ut_ad(insert_planned);
+			ut_ad(cursor->thr);
+
+			if (ibuf_insert(tuple, index, space, zip_size,
+					page_no, cursor->thr)) {
+				/* Insertion to the insert buffer succeeded */
+				cursor->flag = BTR_CUR_INSERT_TO_IBUF;
+				if (UNIV_LIKELY_NULL(heap)) {
+					mem_heap_free(heap);
+				}
+				goto func_exit;
+			}
+
+			/* Insert to the insert buffer did not succeed:
+			retry page get */
+
+			buf_mode = BUF_GET;
+
+			goto retry_page_get;
+		}
+
+		page = buf_block_get_frame(block);
+
+		block->check_index_page_at_flush = TRUE;
+
+		if (rw_latch != RW_NO_LATCH) {
+#ifdef UNIV_ZIP_DEBUG
+			const page_zip_des_t*	page_zip
+				= buf_block_get_page_zip(block);
+			ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+			buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+		}
+
+		ut_ad(0 == ut_dulint_cmp(index->id,
+					 btr_page_get_index_id(page)));
+
+		if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			root_height = height;
+			cursor->tree_height = root_height + 1;
+#ifdef BTR_CUR_ADAPT
+			if (block != guess) {
+				info->root_guess = block;
+			}
+#endif
+		}
+
+		if (height == 0) {
+			if (rw_latch == RW_NO_LATCH) {
+
+				btr_cur_latch_leaves(page, space, zip_size,
+						     page_no, latch_mode,
+						     cursor, mtr);
+			}
+
+			if ((latch_mode != BTR_MODIFY_TREE)
+			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+
+				/* Release the tree s-latch */
+
+				mtr_release_s_latch_at_savepoint(
+					mtr, savepoint,
+					dict_index_get_lock(index));
+			}
+
+			page_mode = mode;
+		}
+
+		page_cur_search_with_match(block, index, tuple, page_mode,
+					   &up_match, &up_bytes,
+					   &low_match, &low_bytes,
+					   page_cursor);
+
+		if (estimate) {
+			btr_cur_add_path_info(cursor, height, root_height);
+		}
+
+		/* If this is the desired level, leave the loop */
+
+		ut_ad(height == btr_page_get_level(
+			      page_cur_get_page(page_cursor), mtr));
+
+		if (level == height) {
+
+			if (level > 0) {
+				/* x-latch the page */
+				page = btr_page_get(space, zip_size,
+						    page_no, RW_X_LATCH, mtr);
+				ut_a((ibool)!!page_is_comp(page)
+				     == dict_table_is_comp(index->table));
+			}
+
+			break;
+		}
+
+		ut_ad(height > 0);
+
+		height--;
+
+		guess = NULL;
+
+		node_ptr = page_cur_get_rec(page_cursor);
+		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		/* Go to the child node */
+		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	if (level == 0) {
+		cursor->low_match = low_match;
+		cursor->low_bytes = low_bytes;
+		cursor->up_match = up_match;
+		cursor->up_bytes = up_bytes;
+
+#ifdef BTR_CUR_ADAPT
+		/* We do a dirty read of btr_search_enabled here.  We
+		will properly check btr_search_enabled again in
+		btr_search_build_page_hash_index() before building a
+		page hash index, while holding btr_search_latch. */
+		if (UNIV_LIKELY(btr_search_enabled)) {
+
+			btr_search_info_update(index, cursor);
+		}
+#endif
+		ut_ad(cursor->up_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_GE);
+		ut_ad(cursor->up_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_LE);
+		ut_ad(cursor->low_match != ULINT_UNDEFINED
+		      || mode != PAGE_CUR_LE);
+	}
+
+func_exit:
+	if (has_search_latch) {
+
+		rw_lock_s_lock(&btr_search_latch);
+	}
+}
+
+/*****************************************************************//**
+Opens a cursor at either end of an index. */
+UNIV_INTERN
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+	ibool		from_left,	/*!< in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_cur_t*	cursor,		/*!< in: cursor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_cur_t*	page_cursor;
+	ulint		page_no;
+	ulint		space;
+	ulint		zip_size;
+	ulint		height;
+	ulint		root_height = 0; /* remove warning */
+	rec_t*		node_ptr;
+	ulint		estimate;
+	ulint		savepoint;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	estimate = latch_mode & BTR_ESTIMATE;
+	latch_mode = latch_mode & ~BTR_ESTIMATE;
+
+	/* Store the position of the tree latch we push to mtr so that we
+	know how to release it when we have latched the leaf node */
+
+	savepoint = mtr_set_savepoint(mtr);
+
+	if (latch_mode == BTR_MODIFY_TREE) {
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+	} else {
+		mtr_s_lock(dict_index_get_lock(index), mtr);
+	}
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+	cursor->index = index;
+
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+	page_no = dict_index_get_page(index);
+
+	height = ULINT_UNDEFINED;
+
+	for (;;) {
+		buf_block_t*	block;
+		page_t*		page;
+		block = buf_page_get_gen(space, zip_size, page_no,
+					 RW_NO_LATCH, NULL, BUF_GET,
+					 __FILE__, __LINE__, mtr);
+		page = buf_block_get_frame(block);
+		ut_ad(0 == ut_dulint_cmp(index->id,
+					 btr_page_get_index_id(page)));
+
+		block->check_index_page_at_flush = TRUE;
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			root_height = height;
+		}
+
+		if (height == 0) {
+			btr_cur_latch_leaves(page, space, zip_size, page_no,
+					     latch_mode, cursor, mtr);
+
+			/* In versions <= 3.23.52 we had forgotten to
+			release the tree latch here. If in an index scan
+			we had to scan far to find a record visible to the
+			current transaction, that could starve others
+			waiting for the tree latch. */
+
+			if ((latch_mode != BTR_MODIFY_TREE)
+			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+
+				/* Release the tree s-latch */
+
+				mtr_release_s_latch_at_savepoint(
+					mtr, savepoint,
+					dict_index_get_lock(index));
+			}
+		}
+
+		if (from_left) {
+			page_cur_set_before_first(block, page_cursor);
+		} else {
+			page_cur_set_after_last(block, page_cursor);
+		}
+
+		if (height == 0) {
+			if (estimate) {
+				btr_cur_add_path_info(cursor, height,
+						      root_height);
+			}
+
+			break;
+		}
+
+		ut_ad(height > 0);
+
+		if (from_left) {
+			page_cur_move_to_next(page_cursor);
+		} else {
+			page_cur_move_to_prev(page_cursor);
+		}
+
+		if (estimate) {
+			btr_cur_add_path_info(cursor, height, root_height);
+		}
+
+		height--;
+
+		node_ptr = page_cur_get_rec(page_cursor);
+		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		/* Go to the child node */
+		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/**********************************************************************//**
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INTERN
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_cur_t*	page_cursor;
+	ulint		page_no;
+	ulint		space;
+	ulint		zip_size;
+	ulint		height;
+	rec_t*		node_ptr;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	if (latch_mode == BTR_MODIFY_TREE) {
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+	} else {
+		mtr_s_lock(dict_index_get_lock(index), mtr);
+	}
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+	cursor->index = index;
+
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+	page_no = dict_index_get_page(index);
+
+	height = ULINT_UNDEFINED;
+
+	for (;;) {
+		buf_block_t*	block;
+		page_t*		page;
+
+		block = buf_page_get_gen(space, zip_size, page_no,
+					 RW_NO_LATCH, NULL, BUF_GET,
+					 __FILE__, __LINE__, mtr);
+		page = buf_block_get_frame(block);
+		ut_ad(0 == ut_dulint_cmp(index->id,
+					 btr_page_get_index_id(page)));
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+		}
+
+		if (height == 0) {
+			btr_cur_latch_leaves(page, space, zip_size, page_no,
+					     latch_mode, cursor, mtr);
+		}
+
+		page_cur_open_on_rnd_user_rec(block, page_cursor);
+
+		if (height == 0) {
+
+			break;
+		}
+
+		ut_ad(height > 0);
+
+		height--;
+
+		node_ptr = page_cur_get_rec(page_cursor);
+		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		/* Go to the child node */
+		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/*==================== B-TREE INSERT =========================*/
+
+/*************************************************************//**
+Inserts a record if there is enough space, or if enough space can
+be freed by reorganizing. Differs from btr_cur_optimistic_insert because
+no heuristics is applied to whether it pays to use CPU time for
+reorganizing the page or not.
+@return	pointer to inserted record if succeed, else NULL */
+static
+rec_t*
+btr_cur_insert_if_possible(
+/*=======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
+				cursor stays valid */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert; the size info need not
+				have been stored to tuple */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_cur_t*	page_cursor;
+	buf_block_t*	block;
+	rec_t*		rec;
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	block = btr_cur_get_block(cursor);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	/* Now, try the insert */
+	rec = page_cur_tuple_insert(page_cursor, tuple,
+				    cursor->index, n_ext, mtr);
+
+	if (UNIV_UNLIKELY(!rec)) {
+		/* If record did not fit, reorganize */
+
+		if (btr_page_reorganize(block, cursor->index, mtr)) {
+
+			page_cur_search(block, cursor->index, tuple,
+					PAGE_CUR_LE, page_cursor);
+
+			rec = page_cur_tuple_insert(page_cursor, tuple,
+						    cursor->index, n_ext, mtr);
+		}
+	}
+
+	return(rec);
+}
+
+/*************************************************************//**
+For an insert, checks the locks and does the undo logging if desired.
+@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INLINE
+ulint
+btr_cur_ins_lock_and_undo(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if
+				not zero, the parameters index and thr
+				should be specified */
+	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool*		inherit)/*!< out: TRUE if the inserted new record maybe
+				should inherit LOCK_GAP type locks from the
+				successor record */
+{
+	dict_index_t*	index;
+	ulint		err;
+	rec_t*		rec;
+	roll_ptr_t	roll_ptr;
+
+	/* Check if we have to wait for a lock: enqueue an explicit lock
+	request if yes */
+
+	rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+
+	err = lock_rec_insert_check_and_lock(flags, rec,
+					     btr_cur_get_block(cursor),
+					     index, thr, mtr, inherit);
+
+	if (err != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
+
+		err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
+						    thr, index, entry,
+						    NULL, 0, NULL,
+						    &roll_ptr);
+		if (err != DB_SUCCESS) {
+
+			return(err);
+		}
+
+		/* Now we can fill in the roll ptr field in entry */
+
+		if (!(flags & BTR_KEEP_SYS_FLAG)) {
+
+			row_upd_index_entry_sys_field(entry, index,
+						      DATA_ROLL_PTR, roll_ptr);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Report information about a transaction. */
+static
+void
+btr_cur_trx_report(
+/*===============*/
+	trx_t*			trx,	/*!< in: transaction */
+	const dict_index_t*	index,	/*!< in: index */
+	const char*		op)	/*!< in: operation */
+{
+	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
+		TRX_ID_PREP_PRINTF(trx->id));
+	fputs(op, stderr);
+	dict_index_name_print(stderr, trx, index);
+	putc('\n', stderr);
+}
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record.
+@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INTERN
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr)	/*!< in: mtr; if this function returns
+				DB_SUCCESS on a leaf page of a secondary
+				index in a compressed tablespace, the
+				mtr must be committed before latching
+				any further pages */
+{
+	big_rec_t*	big_rec_vec	= NULL;
+	dict_index_t*	index;
+	page_cur_t*	page_cursor;
+	buf_block_t*	block;
+	page_t*		page;
+	ulint		max_size;
+	rec_t*		dummy_rec;
+	ibool		leaf;
+	ibool		reorg;
+	ibool		inherit;
+	ulint		zip_size;
+	ulint		rec_size;
+	mem_heap_t*	heap		= NULL;
+	ulint		err;
+
+	*big_rec = NULL;
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	index = cursor->index;
+	zip_size = buf_block_get_zip_size(block);
+#ifdef UNIV_DEBUG_VALGRIND
+	if (zip_size) {
+		UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+		UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	if (!dtuple_check_typed_no_assert(entry)) {
+		fputs("InnoDB: Error in a tuple to insert into ", stderr);
+		dict_index_name_print(stderr, thr_get_trx(thr), index);
+	}
+#ifdef UNIV_DEBUG
+	if (btr_cur_print_record_ops && thr) {
+		btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
+		dtuple_print(stderr, entry);
+	}
+#endif /* UNIV_DEBUG */
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	max_size = page_get_max_insert_size_after_reorganize(page, 1);
+	leaf = page_is_leaf(page);
+
+	/* Calculate the record size when entry is converted to a record */
+	rec_size = rec_get_converted_size(index, entry, n_ext);
+
+	if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
+				   dtuple_get_n_fields(entry), zip_size)) {
+
+		/* The record is so big that we have to store some fields
+		externally on separate database pages */
+		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+
+		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
+
+			return(DB_TOO_BIG_RECORD);
+		}
+
+		rec_size = rec_get_converted_size(index, entry, n_ext);
+	}
+
+	if (UNIV_UNLIKELY(zip_size)) {
+		/* Estimate the free space of an empty compressed page.
+		Subtract one byte for the encoded heap_no in the
+		modification log. */
+		ulint	free_space_zip = page_zip_empty_size(
+			cursor->index->n_fields, zip_size) - 1;
+		ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
+
+		ut_ad(dict_table_is_comp(index->table));
+
+		/* There should be enough room for two node pointer
+		records on an empty non-leaf page.  This prevents
+		infinite page splits. */
+
+		if (UNIV_LIKELY(entry->n_fields >= n_uniq)
+		    && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
+				     + rec_get_converted_size_comp_prefix(
+					     index, entry->fields, n_uniq,
+					     NULL)
+				     /* On a compressed page, there is
+				     a two-byte entry in the dense
+				     page directory for every record.
+				     But there is no record header. */
+				     - (REC_N_NEW_EXTRA_BYTES - 2)
+				     > free_space_zip / 2)) {
+
+			if (big_rec_vec) {
+				dtuple_convert_back_big_rec(
+					index, entry, big_rec_vec);
+			}
+
+			if (heap) {
+				mem_heap_free(heap);
+			}
+
+			return(DB_TOO_BIG_RECORD);
+		}
+	}
+
+	/* If there have been many consecutive inserts, and we are on the leaf
+	level, check if we have to split the page to reserve enough free space
+	for future updates of records. */
+
+	if (dict_index_is_clust(index)
+	    && (page_get_n_recs(page) >= 2)
+	    && UNIV_LIKELY(leaf)
+	    && (dict_index_get_space_reserve() + rec_size > max_size)
+	    && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
+		|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
+fail:
+		err = DB_FAIL;
+fail_err:
+
+		if (big_rec_vec) {
+			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+		}
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+
+		return(err);
+	}
+
+	if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
+	     || max_size < rec_size)
+	    && UNIV_LIKELY(page_get_n_recs(page) > 1)
+	    && page_get_max_insert_size(page, 1) < rec_size) {
+
+		goto fail;
+	}
+
+	/* Check locks and write to the undo log, if specified */
+	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+					thr, mtr, &inherit);
+
+	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+
+		goto fail_err;
+	}
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	/* Now, try the insert */
+
+	{
+		const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
+		*rec = page_cur_tuple_insert(page_cursor, entry, index,
+					     n_ext, mtr);
+		reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
+
+		if (UNIV_UNLIKELY(reorg)) {
+			ut_a(zip_size);
+			ut_a(*rec);
+		}
+	}
+
+	if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
+		/* If the record did not fit, reorganize */
+		if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
+			ut_a(zip_size);
+
+			goto fail;
+		}
+
+		ut_ad(zip_size
+		      || page_get_max_insert_size(page, 1) == max_size);
+
+		reorg = TRUE;
+
+		page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
+
+		*rec = page_cur_tuple_insert(page_cursor, entry, index,
+					     n_ext, mtr);
+
+		if (UNIV_UNLIKELY(!*rec)) {
+			if (UNIV_LIKELY(zip_size != 0)) {
+
+				goto fail;
+			}
+
+			fputs("InnoDB: Error: cannot insert tuple ", stderr);
+			dtuple_print(stderr, entry);
+			fputs(" into ", stderr);
+			dict_index_name_print(stderr, thr_get_trx(thr), index);
+			fprintf(stderr, "\nInnoDB: max insert size %lu\n",
+				(ulong) max_size);
+			ut_error;
+		}
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+#ifdef BTR_CUR_HASH_ADAPT
+	if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
+		btr_search_update_hash_node_on_insert(cursor);
+	} else {
+		btr_search_update_hash_on_insert(cursor);
+	}
+#endif
+
+	if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
+
+		lock_update_insert(block, *rec);
+	}
+
+#if 0
+	fprintf(stderr, "Insert into page %lu, max ins size %lu,"
+		" rec %lu ind type %lu\n",
+		buf_block_get_page_no(block), max_size,
+		rec_size + PAGE_DIR_SLOT_SIZE, index->type);
+#endif
+	if (leaf && !dict_index_is_clust(index)) {
+		/* Update the free bits of the B-tree page in the
+		insert buffer bitmap. */
+
+		/* The free bits in the insert buffer bitmap must
+		never exceed the free space on a page.  It is safe to
+		decrement or reset the bits in the bitmap in a
+		mini-transaction that is committed before the
+		mini-transaction that affects the free space. */
+
+		/* It is unsafe to increment the bits in a separately
+		committed mini-transaction, because in crash recovery,
+		the free bits could momentarily be set too high. */
+
+		if (zip_size) {
+			/* Update the bits in the same mini-transaction. */
+			ibuf_update_free_bits_zip(block, mtr);
+		} else {
+			/* Decrement the bits in a separate
+			mini-transaction. */
+			ibuf_update_free_bits_if_full(
+				block, max_size,
+				rec_size + PAGE_DIR_SLOT_SIZE);
+		}
+	}
+
+	*big_rec = big_rec_vec;
+
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameter thr should be
+				specified; if no undo logging is specified,
+				then the caller must have reserved enough
+				free extents in the file space so that the
+				insertion will certainly succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index		= cursor->index;
+	ulint		zip_size	= dict_table_zip_size(index->table);
+	big_rec_t*	big_rec_vec	= NULL;
+	mem_heap_t*	heap		= NULL;
+	ulint		err;
+	ibool		dummy_inh;
+	ibool		success;
+	ulint		n_extents	= 0;
+	ulint		n_reserved;
+
+	ut_ad(dtuple_check_typed(entry));
+
+	*big_rec = NULL;
+
+	ut_ad(mtr_memo_contains(mtr,
+				dict_index_get_lock(btr_cur_get_index(cursor)),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	/* Try first an optimistic insert; reset the cursor flag: we do not
+	assume anything of how it was positioned */
+
+	cursor->flag = BTR_CUR_BINARY;
+
+	err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
+					big_rec, n_ext, thr, mtr);
+	if (err != DB_FAIL) {
+
+		return(err);
+	}
+
+	/* Retry with a pessimistic insert. Check locks and write to undo log,
+	if specified */
+
+	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+					thr, mtr, &dummy_inh);
+
+	if (err != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+		/* First reserve enough free space for the file segments
+		of the index tree, so that the insert will not fail because
+		of lack of space */
+
+		n_extents = cursor->tree_height / 16 + 3;
+
+		success = fsp_reserve_free_extents(&n_reserved, index->space,
+						   n_extents, FSP_NORMAL, mtr);
+		if (!success) {
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+	}
+
+	if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
+				   dict_table_is_comp(index->table),
+				   dict_index_get_n_fields(index),
+				   zip_size)) {
+		/* The record is so big that we have to store some fields
+		externally on separate database pages */
+
+		if (UNIV_LIKELY_NULL(big_rec_vec)) {
+			/* This should never happen, but we handle
+			the situation in a robust manner. */
+			ut_ad(0);
+			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+		}
+
+		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+
+		if (big_rec_vec == NULL) {
+
+			if (n_extents > 0) {
+				fil_space_release_free_extents(index->space,
+							       n_reserved);
+			}
+			return(DB_TOO_BIG_RECORD);
+		}
+	}
+
+	if (dict_index_get_page(index)
+	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
+
+		/* The page is the root page */
+		*rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
+	} else {
+		*rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
+
+#ifdef BTR_CUR_ADAPT
+	btr_search_update_hash_on_insert(cursor);
+#endif
+	if (!(flags & BTR_NO_LOCKING_FLAG)) {
+
+		lock_update_insert(btr_cur_get_block(cursor), *rec);
+	}
+
+	if (n_extents > 0) {
+		fil_space_release_free_extents(index->space, n_reserved);
+	}
+
+	*big_rec = big_rec_vec;
+
+	return(DB_SUCCESS);
+}
+
+/*==================== B-TREE UPDATE =========================*/
+
+/*************************************************************//**
+For an update, checks the locks and does the undo logging.
+@return	DB_SUCCESS, DB_WAIT_LOCK, or error number */
+UNIV_INLINE
+ulint
+btr_cur_upd_lock_and_undo(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on record to update */
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	roll_ptr_t*	roll_ptr)/*!< out: roll pointer */
+{
+	dict_index_t*	index;
+	rec_t*		rec;
+	ulint		err;
+
+	ut_ad(cursor && update && thr && roll_ptr);
+
+	rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+
+	if (!dict_index_is_clust(index)) {
+		/* We do undo logging only when we update a clustered index
+		record */
+		return(lock_sec_rec_modify_check_and_lock(
+			       flags, btr_cur_get_block(cursor), rec,
+			       index, thr, mtr));
+	}
+
+	/* Check if we have to wait for a lock: enqueue an explicit lock
+	request if yes */
+
+	err = DB_SUCCESS;
+
+	if (!(flags & BTR_NO_LOCKING_FLAG)) {
+		mem_heap_t*	heap		= NULL;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		rec_offs_init(offsets_);
+
+		err = lock_clust_rec_modify_check_and_lock(
+			flags, btr_cur_get_block(cursor), rec, index,
+			rec_get_offsets(rec, index, offsets_,
+					ULINT_UNDEFINED, &heap), thr);
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+		if (err != DB_SUCCESS) {
+
+			return(err);
+		}
+	}
+
+	/* Append the info about the update in the undo log */
+
+	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+					    index, NULL, update,
+					    cmpl_info, rec, roll_ptr);
+	return(err);
+}
+
+/***********************************************************//**
+Writes a redo log record of updating a record in-place. */
+UNIV_INLINE
+void
+btr_cur_update_in_place_log(
+/*========================*/
+	ulint		flags,		/*!< in: flags */
+	rec_t*		rec,		/*!< in: record */
+	dict_index_t*	index,		/*!< in: index where cursor positioned */
+	const upd_t*	update,		/*!< in: update vector */
+	trx_t*		trx,		/*!< in: transaction */
+	roll_ptr_t	roll_ptr,	/*!< in: roll ptr */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	byte*	log_ptr;
+	page_t*	page	= page_align(rec);
+	ut_ad(flags < 256);
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
+					    ? MLOG_COMP_REC_UPDATE_IN_PLACE
+					    : MLOG_REC_UPDATE_IN_PLACE,
+					    1 + DATA_ROLL_PTR_LEN + 14 + 2
+					    + MLOG_BUF_MARGIN);
+
+	if (!log_ptr) {
+		/* Logging in mtr is switched off during crash recovery */
+		return;
+	}
+
+	/* The code below assumes index is a clustered index: change index to
+	the clustered index if we are updating a secondary index record (or we
+	could as well skip writing the sys col values to the log in this case
+	because they are not needed for a secondary index record update) */
+
+	index = dict_table_get_first_index(index->table);
+
+	mach_write_to_1(log_ptr, flags);
+	log_ptr++;
+
+	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
+						mtr);
+	mach_write_to_2(log_ptr, page_offset(rec));
+	log_ptr += 2;
+
+	row_upd_index_write_log(update, log_ptr, mtr);
+}
+#endif /* UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a redo log record of updating a record in-place.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index)	/*!< in: index corresponding to page */
+{
+	ulint		flags;
+	rec_t*		rec;
+	upd_t*		update;
+	ulint		pos;
+	trx_id_t	trx_id;
+	roll_ptr_t	roll_ptr;
+	ulint		rec_offset;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+
+	if (end_ptr < ptr + 1) {
+
+		return(NULL);
+	}
+
+	flags = mach_read_from_1(ptr);
+	ptr++;
+
+	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
+
+	if (ptr == NULL) {
+
+		return(NULL);
+	}
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	rec_offset = mach_read_from_2(ptr);
+	ptr += 2;
+
+	ut_a(rec_offset <= UNIV_PAGE_SIZE);
+
+	heap = mem_heap_create(256);
+
+	ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
+
+	if (!ptr || !page) {
+
+		goto func_exit;
+	}
+
+	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
+	rec = page + rec_offset;
+
+	/* We do not need to reserve btr_search_latch, as the page is only
+	being recovered, and there cannot be a hash index to it. */
+
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+		row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
+						   pos, trx_id, roll_ptr);
+	}
+
+	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+
+func_exit:
+	mem_heap_free(heap);
+
+	return(ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+See if there is enough place in the page modification log to log
+an update-in-place.
+@return	TRUE if enough place */
+static
+ibool
+btr_cur_update_alloc_zip(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	buf_block_t*	block,	/*!< in/out: buffer page */
+	dict_index_t*	index,	/*!< in: the index corresponding to the block */
+	ulint		length,	/*!< in: size needed */
+	ibool		create,	/*!< in: TRUE=delete-and-insert,
+				FALSE=update-in-place */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	ut_a(page_zip == buf_block_get_page_zip(block));
+	ut_ad(page_zip);
+	ut_ad(!dict_index_is_ibuf(index));
+
+	if (page_zip_available(page_zip, dict_index_is_clust(index),
+			       length, create)) {
+		return(TRUE);
+	}
+
+	if (!page_zip->m_nonempty) {
+		/* The page has been freshly compressed, so
+		recompressing it will not help. */
+		return(FALSE);
+	}
+
+	if (!page_zip_compress(page_zip, buf_block_get_frame(block),
+			       index, mtr)) {
+		/* Unable to compress the page */
+		return(FALSE);
+	}
+
+	/* After recompressing a page, we must make sure that the free
+	bits in the insert buffer bitmap will not exceed the free
+	space on the page.  Because this function will not attempt
+	recompression unless page_zip_available() fails above, it is
+	safe to reset the free bits if page_zip_available() fails
+	again, below.  The free bits can safely be reset in a separate
+	mini-transaction.  If page_zip_available() succeeds below, we
+	can be sure that the page_zip_compress() above did not reduce
+	the free space available on the page. */
+
+	if (!page_zip_available(page_zip, dict_index_is_clust(index),
+				length, create)) {
+		/* Out of space: reset the free bits. */
+		if (!dict_index_is_clust(index)
+		    && page_is_leaf(buf_block_get_frame(block))) {
+			ibuf_reset_free_bits(block);
+		}
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************//**
+Updates a record when the update causes no size changes in its fields.
+We assume here that the ordering fields of the record do not change.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+ulint
+btr_cur_update_in_place(
+/*====================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
+				latching any further pages */
+{
+	dict_index_t*	index;
+	buf_block_t*	block;
+	page_zip_des_t*	page_zip;
+	ulint		err;
+	rec_t*		rec;
+	roll_ptr_t	roll_ptr	= ut_dulint_zero;
+	trx_t*		trx;
+	ulint		was_delete_marked;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	/* The insert buffer tree should never be updated in place. */
+	ut_ad(!dict_index_is_ibuf(index));
+
+	trx = thr_get_trx(thr);
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+#ifdef UNIV_DEBUG
+	if (btr_cur_print_record_ops && thr) {
+		btr_cur_trx_report(trx, index, "update ");
+		rec_print_new(stderr, rec, offsets);
+	}
+#endif /* UNIV_DEBUG */
+
+	block = btr_cur_get_block(cursor);
+	page_zip = buf_block_get_page_zip(block);
+
+	/* Check that enough space is available on the compressed page. */
+	if (UNIV_LIKELY_NULL(page_zip)
+	    && !btr_cur_update_alloc_zip(page_zip, block, index,
+					 rec_offs_size(offsets), FALSE, mtr)) {
+		return(DB_ZIP_OVERFLOW);
+	}
+
+	/* Do lock checking and undo logging */
+	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+					thr, mtr, &roll_ptr);
+	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+		return(err);
+	}
+
+	if (block->is_hashed) {
+		/* The function row_upd_changes_ord_field_binary works only
+		if the update vector was built for a clustered index, we must
+		NOT call it if index is secondary */
+
+		if (!dict_index_is_clust(index)
+		    || row_upd_changes_ord_field_binary(NULL, index, update)) {
+
+			/* Remove possible hash index pointer to this record */
+			btr_search_update_hash_on_delete(cursor);
+		}
+
+		rw_lock_x_lock(&btr_search_latch);
+	}
+
+	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+		row_upd_rec_sys_fields(rec, NULL,
+				       index, offsets, trx, roll_ptr);
+	}
+
+	was_delete_marked = rec_get_deleted_flag(
+		rec, page_is_comp(buf_block_get_frame(block)));
+
+	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+
+	if (block->is_hashed) {
+		rw_lock_x_unlock(&btr_search_latch);
+	}
+
+	if (page_zip && !dict_index_is_clust(index)
+	    && page_is_leaf(buf_block_get_frame(block))) {
+		/* Update the free bits in the insert buffer. */
+		ibuf_update_free_bits_zip(block, mtr);
+	}
+
+	btr_cur_update_in_place_log(flags, rec, index, update,
+				    trx, roll_ptr, mtr);
+
+	if (was_delete_marked
+	    && !rec_get_deleted_flag(rec, page_is_comp(
+					     buf_block_get_frame(block)))) {
+		/* The new updated record owns its possible externally
+		stored fields */
+
+		btr_cur_unmark_extern_fields(page_zip,
+					     rec, index, offsets, mtr);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended. We assume here that the ordering
+fields of the record do not change.
+@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
+DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
+there is not enough space left on the compressed page */
+UNIV_INTERN
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	const upd_t*	update,	/*!< in: update vector; this must also
+				contain trx id and roll ptr fields */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
+				latching any further pages */
+{
+	dict_index_t*	index;
+	page_cur_t*	page_cursor;
+	ulint		err;
+	buf_block_t*	block;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	rec_t*		rec;
+	rec_t*		orig_rec;
+	ulint		max_size;
+	ulint		new_rec_size;
+	ulint		old_rec_size;
+	dtuple_t*	new_entry;
+	roll_ptr_t	roll_ptr;
+	trx_t*		trx;
+	mem_heap_t*	heap;
+	ulint		i;
+	ulint		n_ext;
+	ulint*		offsets;
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	orig_rec = rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* The insert buffer tree should never be updated in place. */
+	ut_ad(!dict_index_is_ibuf(index));
+
+	heap = mem_heap_create(1024);
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+#ifdef UNIV_DEBUG
+	if (btr_cur_print_record_ops && thr) {
+		btr_cur_trx_report(thr_get_trx(thr), index, "update ");
+		rec_print_new(stderr, rec, offsets);
+	}
+#endif /* UNIV_DEBUG */
+
+	if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
+
+		/* The simplest and the most common case: the update does not
+		change the size of any field and none of the updated fields is
+		externally stored in rec or update, and there is enough space
+		on the compressed page to log the update. */
+
+		mem_heap_free(heap);
+		return(btr_cur_update_in_place(flags, cursor, update,
+					       cmpl_info, thr, mtr));
+	}
+
+	if (rec_offs_any_extern(offsets)) {
+any_extern:
+		/* Externally stored fields are treated in pessimistic
+		update */
+
+		mem_heap_free(heap);
+		return(DB_OVERFLOW);
+	}
+
+	for (i = 0; i < upd_get_n_fields(update); i++) {
+		if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
+
+			goto any_extern;
+		}
+	}
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+					   &n_ext, heap);
+	/* We checked above that there are no externally stored fields. */
+	ut_a(!n_ext);
+
+	/* The page containing the clustered index record
+	corresponding to new_entry is latched in mtr.
+	Thus the following call is safe. */
+	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
+						     FALSE, heap);
+	old_rec_size = rec_offs_size(offsets);
+	new_rec_size = rec_get_converted_size(index, new_entry, 0);
+
+	page_zip = buf_block_get_page_zip(block);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (UNIV_LIKELY_NULL(page_zip)
+	    && !btr_cur_update_alloc_zip(page_zip, block, index,
+					 new_rec_size, TRUE, mtr)) {
+		err = DB_ZIP_OVERFLOW;
+		goto err_exit;
+	}
+
+	if (UNIV_UNLIKELY(new_rec_size
+			  >= (page_get_free_space_of_empty(page_is_comp(page))
+			      / 2))) {
+
+		err = DB_OVERFLOW;
+		goto err_exit;
+	}
+
+	if (UNIV_UNLIKELY(page_get_data_size(page)
+			  - old_rec_size + new_rec_size
+			  < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
+
+		/* The page would become too empty */
+
+		err = DB_UNDERFLOW;
+		goto err_exit;
+	}
+
+	max_size = old_rec_size
+		+ page_get_max_insert_size_after_reorganize(page, 1);
+
+	if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
+	       && (max_size >= new_rec_size))
+	      || (page_get_n_recs(page) <= 1))) {
+
+		/* There was not enough space, or it did not pay to
+		reorganize: for simplicity, we decide what to do assuming a
+		reorganization is needed, though it might not be necessary */
+
+		err = DB_OVERFLOW;
+		goto err_exit;
+	}
+
+	/* Do lock checking and undo logging */
+	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+					thr, mtr, &roll_ptr);
+	if (err != DB_SUCCESS) {
+err_exit:
+		mem_heap_free(heap);
+		return(err);
+	}
+
+	/* Ok, we may do the replacement. Store on the page infimum the
+	explicit locks on rec, before deleting rec (see the comment in
+	btr_cur_pessimistic_update). */
+
+	lock_rec_store_on_page_infimum(block, rec);
+
+	btr_search_update_hash_on_delete(cursor);
+
+	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
+	invokes rec_offs_make_valid() to point to the copied record that
+	the fields of new_entry point to.  We have to undo it here. */
+	ut_ad(rec_offs_validate(NULL, index, offsets));
+	rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
+
+	page_cur_delete_rec(page_cursor, index, offsets, mtr);
+
+	page_cur_move_to_prev(page_cursor);
+
+	trx = thr_get_trx(thr);
+
+	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+					      roll_ptr);
+		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+					      trx->id);
+	}
+
+	/* There are no externally stored columns in new_entry */
+	rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
+	ut_a(rec); /* <- We calculated above the insert would fit */
+
+	if (page_zip && !dict_index_is_clust(index)
+	    && page_is_leaf(page)) {
+		/* Update the free bits in the insert buffer. */
+		ibuf_update_free_bits_zip(block, mtr);
+	}
+
+	/* Restore the old explicit lock state on the record */
+
+	lock_rec_restore_from_page_infimum(block, rec, block);
+
+	page_cur_move_to_next(page_cursor);
+
+	mem_heap_free(heap);
+
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+If, in a split, a new supremum record was created as the predecessor of the
+updated record, the supremum record must inherit exactly the locks on the
+updated record. In the split it may have inherited locks from the successor
+of the updated record, which is not correct. This function restores the
+right locks for the new supremum. */
+static
+void
+btr_cur_pess_upd_restore_supremum(
+/*==============================*/
+	buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*	rec,	/*!< in: updated record */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page;
+	buf_block_t*	prev_block;
+	ulint		space;
+	ulint		zip_size;
+	ulint		prev_page_no;
+
+	page = buf_block_get_frame(block);
+
+	if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
+		/* Updated record is not the first user record on its page */
+
+		return;
+	}
+
+	space = buf_block_get_space(block);
+	zip_size = buf_block_get_zip_size(block);
+	prev_page_no = btr_page_get_prev(page, mtr);
+
+	ut_ad(prev_page_no != FIL_NULL);
+	prev_block = buf_page_get_with_no_latch(space, zip_size,
+						prev_page_no, mtr);
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_page_get_next(prev_block->frame, mtr)
+	     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+	/* We must already have an x-latch on prev_block! */
+	ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
+
+	lock_rec_reset_and_inherit_gap_locks(prev_block, block,
+					     PAGE_HEAP_NO_SUPREMUM,
+					     page_rec_get_heap_no(rec));
+}
+
+/*************************************************************//**
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist. We assume
+here that the ordering fields of the record do not change.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging, locking, and rollback
+				flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or NULL */
+	const upd_t*	update,	/*!< in: update vector; this is allowed also
+				contain trx id and roll ptr fields, but
+				the values in update vector have no effect */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
+				latching any further pages */
+{
+	big_rec_t*	big_rec_vec	= NULL;
+	big_rec_t*	dummy_big_rec;
+	dict_index_t*	index;
+	buf_block_t*	block;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	rec_t*		rec;
+	page_cur_t*	page_cursor;
+	dtuple_t*	new_entry;
+	ulint		err;
+	ulint		optim_err;
+	roll_ptr_t	roll_ptr;
+	trx_t*		trx;
+	ibool		was_first;
+	ulint		n_extents	= 0;
+	ulint		n_reserved;
+	ulint		n_ext;
+	ulint*		offsets		= NULL;
+
+	*big_rec = NULL;
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	page_zip = buf_block_get_page_zip(block);
+	rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	/* The insert buffer tree should never be updated in place. */
+	ut_ad(!dict_index_is_ibuf(index));
+
+	optim_err = btr_cur_optimistic_update(flags, cursor, update,
+					      cmpl_info, thr, mtr);
+
+	switch (optim_err) {
+	case DB_UNDERFLOW:
+	case DB_OVERFLOW:
+	case DB_ZIP_OVERFLOW:
+		break;
+	default:
+		return(optim_err);
+	}
+
+	/* Do lock checking and undo logging */
+	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+					thr, mtr, &roll_ptr);
+	if (err != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	if (optim_err == DB_OVERFLOW) {
+		ulint	reserve_flag;
+
+		/* First reserve enough free space for the file segments
+		of the index tree, so that the update will not fail because
+		of lack of space */
+
+		n_extents = cursor->tree_height / 16 + 3;
+
+		if (flags & BTR_NO_UNDO_LOG_FLAG) {
+			reserve_flag = FSP_CLEANING;
+		} else {
+			reserve_flag = FSP_NORMAL;
+		}
+
+		if (!fsp_reserve_free_extents(&n_reserved, index->space,
+					      n_extents, reserve_flag, mtr)) {
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+	}
+
+	if (!*heap) {
+		*heap = mem_heap_create(1024);
+	}
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
+
+	trx = thr_get_trx(thr);
+
+	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+					   &n_ext, *heap);
+	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
+	invokes rec_offs_make_valid() to point to the copied record that
+	the fields of new_entry point to.  We have to undo it here. */
+	ut_ad(rec_offs_validate(NULL, index, offsets));
+	rec_offs_make_valid(rec, index, offsets);
+
+	/* The page containing the clustered index record
+	corresponding to new_entry is latched in mtr.  If the
+	clustered index record is delete-marked, then its externally
+	stored fields cannot have been purged yet, because then the
+	purge would also have removed the clustered index record
+	itself.  Thus the following call is safe. */
+	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
+						     FALSE, *heap);
+	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+					      roll_ptr);
+		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+					      trx->id);
+	}
+
+	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
+		/* We are in a transaction rollback undoing a row
+		update: we must free possible externally stored fields
+		which got new values in the update, if they are not
+		inherited values. They can be inherited if we have
+		updated the primary key to another value, and then
+		update it back again. */
+
+		ut_ad(big_rec_vec == NULL);
+
+		btr_rec_free_updated_extern_fields(
+			index, rec, page_zip, offsets, update,
+			trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
+	}
+
+	/* We have to set appropriate extern storage bits in the new
+	record to be inserted: we have to remember which fields were such */
+
+	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+	n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		ut_ad(page_is_comp(page));
+		if (page_zip_rec_needs_ext(
+			    rec_get_converted_size(index, new_entry, n_ext),
+			    TRUE,
+			    dict_index_get_n_fields(index),
+			    page_zip_get_size(page_zip))) {
+
+			goto make_external;
+		}
+	} else if (page_zip_rec_needs_ext(
+			   rec_get_converted_size(index, new_entry, n_ext),
+			   page_is_comp(page), 0, 0)) {
+make_external:
+		big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
+		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
+
+			err = DB_TOO_BIG_RECORD;
+			goto return_after_reservations;
+		}
+	}
+
+	/* Store state of explicit locks on rec on the page infimum record,
+	before deleting rec. The page infimum acts as a dummy carrier of the
+	locks, taking care also of lock releases, before we can move the locks
+	back on the actual record. There is a special case: if we are
+	inserting on the root page and the insert causes a call of
+	btr_root_raise_and_insert. Therefore we cannot in the lock system
+	delete the lock structs set on the root page even if the root
+	page carries just node pointers. */
+
+	lock_rec_store_on_page_infimum(block, rec);
+
+	btr_search_update_hash_on_delete(cursor);
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	page_cur_delete_rec(page_cursor, index, offsets, mtr);
+
+	page_cur_move_to_prev(page_cursor);
+
+	rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
+
+	if (rec) {
+		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
+						   rec, block);
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, heap);
+
+		if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+			/* The new inserted record owns its possible externally
+			stored fields */
+			btr_cur_unmark_extern_fields(page_zip,
+						     rec, index, offsets, mtr);
+		}
+
+		btr_cur_compress_if_useful(cursor, mtr);
+
+		if (page_zip && !dict_index_is_clust(index)
+		    && page_is_leaf(page)) {
+			/* Update the free bits in the insert buffer. */
+			ibuf_update_free_bits_zip(block, mtr);
+		}
+
+		err = DB_SUCCESS;
+		goto return_after_reservations;
+	} else {
+		ut_a(optim_err != DB_UNDERFLOW);
+
+		/* Out of space: reset the free bits. */
+		if (!dict_index_is_clust(index)
+		    && page_is_leaf(page)) {
+			ibuf_reset_free_bits(block);
+		}
+	}
+
+	/* Was the record to be updated positioned as the first user
+	record on its page? */
+	was_first = page_cur_is_before_first(page_cursor);
+
+	/* The first parameter means that no lock checking and undo logging
+	is made in the insert */
+
+	err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
+					 | BTR_NO_LOCKING_FLAG
+					 | BTR_KEEP_SYS_FLAG,
+					 cursor, new_entry, &rec,
+					 &dummy_big_rec, n_ext, NULL, mtr);
+	ut_a(rec);
+	ut_a(err == DB_SUCCESS);
+	ut_a(dummy_big_rec == NULL);
+
+	if (dict_index_is_sec_or_ibuf(index)) {
+		/* Update PAGE_MAX_TRX_ID in the index page header.
+		It was not updated by btr_cur_pessimistic_insert()
+		because of BTR_NO_LOCKING_FLAG. */
+		buf_block_t*	rec_block;
+
+		rec_block = btr_cur_get_block(cursor);
+
+		page_update_max_trx_id(rec_block,
+				       buf_block_get_page_zip(rec_block),
+				       trx->id, mtr);
+	}
+
+	if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+		/* The new inserted record owns its possible externally
+		stored fields */
+		buf_block_t*	rec_block = btr_cur_get_block(cursor);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+		page = buf_block_get_frame(rec_block);
+#endif /* UNIV_ZIP_DEBUG */
+		page_zip = buf_block_get_page_zip(rec_block);
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, heap);
+		btr_cur_unmark_extern_fields(page_zip,
+					     rec, index, offsets, mtr);
+	}
+
+	lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
+					   rec, block);
+
+	/* If necessary, restore also the correct lock state for a new,
+	preceding supremum record created in a page split. While the old
+	record was nonexistent, the supremum might have inherited its locks
+	from a wrong record. */
+
+	if (!was_first) {
+		btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
+						  rec, mtr);
+	}
+
+return_after_reservations:
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (n_extents > 0) {
+		fil_space_release_free_extents(index->space, n_reserved);
+	}
+
+	*big_rec = big_rec_vec;
+
+	return(err);
+}
+
+/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
+
+/****************************************************************//**
+Writes the redo log record for delete marking or unmarking of an index
+record. */
+UNIV_INLINE
+void
+btr_cur_del_mark_set_clust_rec_log(
+/*===============================*/
+	ulint		flags,	/*!< in: flags */
+	rec_t*		rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: index of the record */
+	ibool		val,	/*!< in: value to set */
+	trx_t*		trx,	/*!< in: deleting transaction */
+	roll_ptr_t	roll_ptr,/*!< in: roll ptr to the undo log record */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte*	log_ptr;
+	ut_ad(flags < 256);
+	ut_ad(val <= 1);
+
+	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+
+	log_ptr = mlog_open_and_write_index(mtr, rec, index,
+					    page_rec_is_comp(rec)
+					    ? MLOG_COMP_REC_CLUST_DELETE_MARK
+					    : MLOG_REC_CLUST_DELETE_MARK,
+					    1 + 1 + DATA_ROLL_PTR_LEN
+					    + 14 + 2);
+
+	if (!log_ptr) {
+		/* Logging in mtr is switched off during crash recovery */
+		return;
+	}
+
+	mach_write_to_1(log_ptr, flags);
+	log_ptr++;
+	mach_write_to_1(log_ptr, val);
+	log_ptr++;
+
+	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
+						mtr);
+	mach_write_to_2(log_ptr, page_offset(rec));
+	log_ptr += 2;
+
+	mlog_close(mtr, log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a clustered
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index)	/*!< in: index corresponding to page */
+{
+	ulint		flags;
+	ulint		val;
+	ulint		pos;
+	trx_id_t	trx_id;
+	roll_ptr_t	roll_ptr;
+	ulint		offset;
+	rec_t*		rec;
+
+	ut_ad(!page
+	      || !!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	flags = mach_read_from_1(ptr);
+	ptr++;
+	val = mach_read_from_1(ptr);
+	ptr++;
+
+	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
+
+	if (ptr == NULL) {
+
+		return(NULL);
+	}
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	offset = mach_read_from_2(ptr);
+	ptr += 2;
+
+	ut_a(offset <= UNIV_PAGE_SIZE);
+
+	if (page) {
+		rec = page + offset;
+
+		/* We do not need to reserve btr_search_latch, as the page
+		is only being recovered, and there cannot be a hash index to
+		it. */
+
+		btr_rec_set_deleted_flag(rec, page_zip, val);
+
+		if (!(flags & BTR_KEEP_SYS_FLAG)) {
+			mem_heap_t*	heap		= NULL;
+			ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+			rec_offs_init(offsets_);
+
+			row_upd_rec_sys_fields_in_recovery(
+				rec, page_zip,
+				rec_get_offsets(rec, index, offsets_,
+						ULINT_UNDEFINED, &heap),
+				pos, trx_id, roll_ptr);
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+		}
+	}
+
+	return(ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	ibool		val,	/*!< in: value to set */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index;
+	buf_block_t*	block;
+	roll_ptr_t	roll_ptr;
+	ulint		err;
+	rec_t*		rec;
+	page_zip_des_t*	page_zip;
+	trx_t*		trx;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	rec = btr_cur_get_rec(cursor);
+	index = cursor->index;
+	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+#ifdef UNIV_DEBUG
+	if (btr_cur_print_record_ops && thr) {
+		btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
+		rec_print_new(stderr, rec, offsets);
+	}
+#endif /* UNIV_DEBUG */
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
+
+	err = lock_clust_rec_modify_check_and_lock(flags,
+						   btr_cur_get_block(cursor),
+						   rec, index, offsets, thr);
+
+	if (err != DB_SUCCESS) {
+
+		goto func_exit;
+	}
+
+	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+					    index, NULL, NULL, 0, rec,
+					    &roll_ptr);
+	if (err != DB_SUCCESS) {
+
+		goto func_exit;
+	}
+
+	block = btr_cur_get_block(cursor);
+
+	if (block->is_hashed) {
+		rw_lock_x_lock(&btr_search_latch);
+	}
+
+	page_zip = buf_block_get_page_zip(block);
+
+	btr_rec_set_deleted_flag(rec, page_zip, val);
+
+	trx = thr_get_trx(thr);
+
+	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+		row_upd_rec_sys_fields(rec, page_zip,
+				       index, offsets, trx, roll_ptr);
+	}
+
+	if (block->is_hashed) {
+		rw_lock_x_unlock(&btr_search_latch);
+	}
+
+	btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+					   roll_ptr, mtr);
+
+func_exit:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(err);
+}
+
+/****************************************************************//**
+Writes the redo log record for a delete mark setting of a secondary
+index record. */
+UNIV_INLINE
+void
+btr_cur_del_mark_set_sec_rec_log(
+/*=============================*/
+	rec_t*		rec,	/*!< in: record */
+	ibool		val,	/*!< in: value to set */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte*	log_ptr;
+	ut_ad(val <= 1);
+
+	log_ptr = mlog_open(mtr, 11 + 1 + 2);
+
+	if (!log_ptr) {
+		/* Logging in mtr is switched off during crash recovery:
+		in that case mlog_open returns NULL */
+		return;
+	}
+
+	log_ptr = mlog_write_initial_log_record_fast(
+		rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
+	mach_write_to_1(log_ptr, val);
+	log_ptr++;
+
+	mach_write_to_2(log_ptr, page_offset(rec));
+	log_ptr += 2;
+
+	mlog_close(mtr, log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a secondary
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip)/*!< in/out: compressed page, or NULL */
+{
+	ulint	val;
+	ulint	offset;
+	rec_t*	rec;
+
+	if (end_ptr < ptr + 3) {
+
+		return(NULL);
+	}
+
+	val = mach_read_from_1(ptr);
+	ptr++;
+
+	offset = mach_read_from_2(ptr);
+	ptr += 2;
+
+	ut_a(offset <= UNIV_PAGE_SIZE);
+
+	if (page) {
+		rec = page + offset;
+
+		/* We do not need to reserve btr_search_latch, as the page
+		is only being recovered, and there cannot be a hash index to
+		it. */
+
+		btr_rec_set_deleted_flag(rec, page_zip, val);
+	}
+
+	return(ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
+Sets a secondary index record delete mark to TRUE or FALSE.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+	ulint		flags,	/*!< in: locking flag */
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	ibool		val,	/*!< in: value to set */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_block_t*	block;
+	rec_t*		rec;
+	ulint		err;
+
+	block = btr_cur_get_block(cursor);
+	rec = btr_cur_get_rec(cursor);
+
+#ifdef UNIV_DEBUG
+	if (btr_cur_print_record_ops && thr) {
+		btr_cur_trx_report(thr_get_trx(thr), cursor->index,
+				   "del mark ");
+		rec_print(stderr, rec, cursor->index);
+	}
+#endif /* UNIV_DEBUG */
+
+	err = lock_sec_rec_modify_check_and_lock(flags,
+						 btr_cur_get_block(cursor),
+						 rec, cursor->index, thr, mtr);
+	if (err != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	ut_ad(!!page_rec_is_comp(rec)
+	      == dict_table_is_comp(cursor->index->table));
+
+	if (block->is_hashed) {
+		rw_lock_x_lock(&btr_search_latch);
+	}
+
+	btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
+
+	if (block->is_hashed) {
+		rw_lock_x_unlock(&btr_search_latch);
+	}
+
+	btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
+
+	return(DB_SUCCESS);
+}
+
+/***********************************************************//**
+Clear a secondary index record's delete mark.  This function is only
+used by the insert buffer insert merge mechanism. */
+UNIV_INTERN
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+	rec_t*		rec,		/*!< in/out: record to delete unmark */
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
+					corresponding to rec, or NULL
+					when the tablespace is
+					uncompressed */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	/* We do not need to reserve btr_search_latch, as the page has just
+	been read to the buffer pool and there cannot be a hash index to it. */
+
+	btr_rec_set_deleted_flag(rec, page_zip, FALSE);
+
+	btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
+}
+
+/*==================== B-TREE RECORD REMOVE =========================*/
+
+/*************************************************************//**
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done!
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
+				cursor does not stay valid if compression
+				occurs */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(mtr_memo_contains(mtr,
+				dict_index_get_lock(btr_cur_get_index(cursor)),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	return(btr_cur_compress_recommendation(cursor, mtr)
+	       && btr_compress(cursor, mtr));
+}
+
+/*******************************************************//**
+Removes the record on which the tree cursor is positioned on a leaf page.
+It is assumed that the mtr has an x-latch on the page where the cursor is
+positioned, but no latch on the whole tree.
+@return	TRUE if success, i.e., the page did not become too empty */
+UNIV_INTERN
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on leaf page, on the record to
+				delete; cursor stays valid: if deletion
+				succeeds, on function exit it points to the
+				successor of the deleted record */
+	mtr_t*		mtr)	/*!< in: mtr; if this function returns
+				TRUE on a leaf page of a secondary
+				index, the mtr must be committed
+				before latching any further pages */
+{
+	buf_block_t*	block;
+	rec_t*		rec;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	ibool		no_compress_needed;
+	rec_offs_init(offsets_);
+
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+	/* This is intended only for leaf page deletions */
+
+	block = btr_cur_get_block(cursor);
+
+	ut_ad(page_is_leaf(buf_block_get_frame(block)));
+
+	rec = btr_cur_get_rec(cursor);
+	offsets = rec_get_offsets(rec, cursor->index, offsets,
+				  ULINT_UNDEFINED, &heap);
+
+	no_compress_needed = !rec_offs_any_extern(offsets)
+		&& btr_cur_can_delete_without_compress(
+			cursor, rec_offs_size(offsets), mtr);
+
+	if (no_compress_needed) {
+
+		page_t*		page	= buf_block_get_frame(block);
+		page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
+		ulint		max_ins	= 0;
+
+		lock_update_delete(block, rec);
+
+		btr_search_update_hash_on_delete(cursor);
+
+		if (!page_zip) {
+			max_ins = page_get_max_insert_size_after_reorganize(
+				page, 1);
+		}
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+				    cursor->index, offsets, mtr);
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+		if (dict_index_is_clust(cursor->index)
+		    || dict_index_is_ibuf(cursor->index)
+		    || !page_is_leaf(page)) {
+			/* The insert buffer does not handle
+			inserts to clustered indexes, to
+			non-leaf pages of secondary index B-trees,
+			or to the insert buffer. */
+		} else if (page_zip) {
+			ibuf_update_free_bits_zip(block, mtr);
+		} else {
+			ibuf_update_free_bits_low(block, max_ins, mtr);
+		}
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	return(no_compress_needed);
+}
+
+/*************************************************************//**
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist.
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+	ulint*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+				the latter may occur because we may have
+				to update node pointers on upper levels,
+				and in the case of variable length keys
+				these may actually grow in size */
+	ibool		has_reserved_extents, /*!< in: TRUE if the
+				caller has already reserved enough free
+				extents so that he knows that the operation
+				will succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
+				if compression does not occur, the cursor
+				stays valid: it points to successor of
+				deleted record on function exit */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_block_t*	block;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	dict_index_t*	index;
+	rec_t*		rec;
+	dtuple_t*	node_ptr;
+	ulint		n_extents	= 0;
+	ulint		n_reserved;
+	ibool		success;
+	ibool		ret		= FALSE;
+	ulint		level;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	index = btr_cur_get_index(cursor);
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	if (!has_reserved_extents) {
+		/* First reserve enough free space for the file segments
+		of the index tree, so that the node pointer updates will
+		not fail because of lack of space */
+
+		n_extents = cursor->tree_height / 32 + 1;
+
+		success = fsp_reserve_free_extents(&n_reserved,
+						   index->space,
+						   n_extents,
+						   FSP_CLEANING, mtr);
+		if (!success) {
+			*err = DB_OUT_OF_FILE_SPACE;
+
+			return(FALSE);
+		}
+	}
+
+	heap = mem_heap_create(1024);
+	rec = btr_cur_get_rec(cursor);
+	page_zip = buf_block_get_page_zip(block);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+	if (rec_offs_any_extern(offsets)) {
+		btr_rec_free_externally_stored_fields(index,
+						      rec, offsets, page_zip,
+						      rb_ctx, mtr);
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
+	    && UNIV_UNLIKELY(dict_index_get_page(index)
+			     != buf_block_get_page_no(block))) {
+
+		/* If there is only one record, drop the whole page in
+		btr_discard_page, if this is not the root page */
+
+		btr_discard_page(cursor, mtr);
+
+		*err = DB_SUCCESS;
+		ret = TRUE;
+
+		goto return_after_reservations;
+	}
+
+	lock_update_delete(block, rec);
+	level = btr_page_get_level(page, mtr);
+
+	if (level > 0
+	    && UNIV_UNLIKELY(rec == page_rec_get_next(
+				     page_get_infimum_rec(page)))) {
+
+		rec_t*	next_rec = page_rec_get_next(rec);
+
+		if (btr_page_get_prev(page, mtr) == FIL_NULL) {
+
+			/* If we delete the leftmost node pointer on a
+			non-leaf level, we must mark the new leftmost node
+			pointer as the predefined minimum record */
+
+			/* This will make page_zip_validate() fail until
+			page_cur_delete_rec() completes.  This is harmless,
+			because everything will take place within a single
+			mini-transaction and because writing to the redo log
+			is an atomic operation (performed by mtr_commit()). */
+			btr_set_min_rec_mark(next_rec, mtr);
+		} else {
+			/* Otherwise, if we delete the leftmost node pointer
+			on a page, we have to change the father node pointer
+			so that it is equal to the new leftmost node pointer
+			on the page */
+
+			btr_node_ptr_delete(index, block, mtr);
+
+			node_ptr = dict_index_build_node_ptr(
+				index, next_rec, buf_block_get_page_no(block),
+				heap, level);
+
+			btr_insert_on_non_leaf_level(index,
+						     level + 1, node_ptr, mtr);
+		}
+	}
+
+	btr_search_update_hash_on_delete(cursor);
+
+	page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	ut_ad(btr_check_node_ptr(index, block, mtr));
+
+	*err = DB_SUCCESS;
+
+return_after_reservations:
+	mem_heap_free(heap);
+
+	if (ret == FALSE) {
+		ret = btr_cur_compress_if_useful(cursor, mtr);
+	}
+
+	if (n_extents > 0) {
+		fil_space_release_free_extents(index->space, n_reserved);
+	}
+
+	return(ret);
+}
+
+/*******************************************************************//**
+Adds path information to the cursor for the current page, for which
+the binary search has been performed. */
+static
+void
+btr_cur_add_path_info(
+/*==================*/
+	btr_cur_t*	cursor,		/*!< in: cursor positioned on a page */
+	ulint		height,		/*!< in: height of the page in tree;
+					0 means leaf node */
+	ulint		root_height)	/*!< in: root node height in tree */
+{
+	btr_path_t*	slot;
+	rec_t*		rec;
+
+	ut_a(cursor->path_arr);
+
+	if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
+		/* Do nothing; return empty path */
+
+		slot = cursor->path_arr;
+		slot->nth_rec = ULINT_UNDEFINED;
+
+		return;
+	}
+
+	if (height == 0) {
+		/* Mark end of slots for path */
+		slot = cursor->path_arr + root_height + 1;
+		slot->nth_rec = ULINT_UNDEFINED;
+	}
+
+	rec = btr_cur_get_rec(cursor);
+
+	slot = cursor->path_arr + (root_height - height);
+
+	slot->nth_rec = page_rec_get_n_recs_before(rec);
+	slot->n_recs = page_get_n_recs(page_align(rec));
+}
+
+/*******************************************************************//**
+Estimates the number of rows in a given index range.
+@return	estimated number of rows */
+UNIV_INTERN
+ib_int64_t
+btr_estimate_n_rows_in_range(
+/*=========================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
+	ulint		mode1,	/*!< in: search mode for range start */
+	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
+	ulint		mode2)	/*!< in: search mode for range end */
+{
+	btr_path_t	path1[BTR_PATH_ARRAY_N_SLOTS];
+	btr_path_t	path2[BTR_PATH_ARRAY_N_SLOTS];
+	btr_cur_t	cursor;
+	btr_path_t*	slot1;
+	btr_path_t*	slot2;
+	ibool		diverged;
+	ibool		diverged_lot;
+	ulint		divergence_level;
+	ib_int64_t	n_rows;
+	ulint		i;
+	mtr_t		mtr;
+
+	mtr_start(&mtr);
+
+	cursor.path_arr = path1;
+
+	if (dtuple_get_n_fields(tuple1) > 0) {
+
+		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
+					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
+					    &cursor, 0, &mtr);
+	} else {
+		btr_cur_open_at_index_side(TRUE, index,
+					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
+					   &cursor, &mtr);
+	}
+
+	mtr_commit(&mtr);
+
+	mtr_start(&mtr);
+
+	cursor.path_arr = path2;
+
+	if (dtuple_get_n_fields(tuple2) > 0) {
+
+		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
+					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
+					    &cursor, 0, &mtr);
+	} else {
+		btr_cur_open_at_index_side(FALSE, index,
+					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
+					   &cursor, &mtr);
+	}
+
+	mtr_commit(&mtr);
+
+	/* We have the path information for the range in path1 and path2 */
+
+	n_rows = 1;
+	diverged = FALSE;	    /* This becomes true when the path is not
+				    the same any more */
+	diverged_lot = FALSE;	    /* This becomes true when the paths are
+				    not the same or adjacent any more */
+	divergence_level = 1000000; /* This is the level where paths diverged
+				    a lot */
+	for (i = 0; ; i++) {
+		ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
+
+		slot1 = path1 + i;
+		slot2 = path2 + i;
+
+		if (slot1->nth_rec == ULINT_UNDEFINED
+		    || slot2->nth_rec == ULINT_UNDEFINED) {
+
+			if (i > divergence_level + 1) {
+				/* In trees whose height is > 1 our algorithm
+				tends to underestimate: multiply the estimate
+				by 2: */
+
+				n_rows = n_rows * 2;
+			}
+
+			/* Do not estimate the number of rows in the range
+			to over 1 / 2 of the estimated rows in the whole
+			table */
+
+			if (n_rows > index->table->stat_n_rows / 2) {
+				n_rows = index->table->stat_n_rows / 2;
+
+				/* If there are just 0 or 1 rows in the table,
+				then we estimate all rows are in the range */
+
+				if (n_rows == 0) {
+					n_rows = index->table->stat_n_rows;
+				}
+			}
+
+			return(n_rows);
+		}
+
+		if (!diverged && slot1->nth_rec != slot2->nth_rec) {
+
+			diverged = TRUE;
+
+			if (slot1->nth_rec < slot2->nth_rec) {
+				n_rows = slot2->nth_rec - slot1->nth_rec;
+
+				if (n_rows > 1) {
+					diverged_lot = TRUE;
+					divergence_level = i;
+				}
+			} else {
+				/* Maybe the tree has changed between
+				searches */
+
+				return(10);
+			}
+
+		} else if (diverged && !diverged_lot) {
+
+			if (slot1->nth_rec < slot1->n_recs
+			    || slot2->nth_rec > 1) {
+
+				diverged_lot = TRUE;
+				divergence_level = i;
+
+				n_rows = 0;
+
+				if (slot1->nth_rec < slot1->n_recs) {
+					n_rows += slot1->n_recs
+						- slot1->nth_rec;
+				}
+
+				if (slot2->nth_rec > 1) {
+					n_rows += slot2->nth_rec - 1;
+				}
+			}
+		} else if (diverged_lot) {
+
+			n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
+				/ 2;
+		}
+	}
+}
+
+/*******************************************************************//**
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals. */
+UNIV_INTERN
+void
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	btr_cur_t	cursor;
+	page_t*		page;
+	rec_t*		rec;
+	ulint		n_cols;
+	ulint		matched_fields;
+	ulint		matched_bytes;
+	ib_int64_t*	n_diff;
+	ullint		n_sample_pages; /* number of pages to sample */
+	ulint		not_empty_flag	= 0;
+	ulint		total_external_size = 0;
+	ulint		i;
+	ulint		j;
+	ullint		add_on;
+	mtr_t		mtr;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_rec_[REC_OFFS_NORMAL_SIZE];
+	ulint		offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets_rec	= offsets_rec_;
+	ulint*		offsets_next_rec= offsets_next_rec_;
+	rec_offs_init(offsets_rec_);
+	rec_offs_init(offsets_next_rec_);
+
+	n_cols = dict_index_get_n_unique(index);
+
+	n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
+
+	/* It makes no sense to test more pages than are contained
+	in the index, thus we lower the number if it is too high */
+	if (srv_stats_sample_pages > index->stat_index_size) {
+		if (index->stat_index_size > 0) {
+			n_sample_pages = index->stat_index_size;
+		} else {
+			n_sample_pages = 1;
+		}
+	} else {
+		n_sample_pages = srv_stats_sample_pages;
+	}
+
+	/* We sample some pages in the index to get an estimate */
+
+	for (i = 0; i < n_sample_pages; i++) {
+		rec_t*	supremum;
+		mtr_start(&mtr);
+
+		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
+
+		/* Count the number of different key values for each prefix of
+		the key on this index page. If the prefix does not determine
+		the index record uniquely in the B-tree, then we subtract one
+		because otherwise our algorithm would give a wrong estimate
+		for an index where there is just one key value. */
+
+		page = btr_cur_get_page(&cursor);
+
+		supremum = page_get_supremum_rec(page);
+		rec = page_rec_get_next(page_get_infimum_rec(page));
+
+		if (rec != supremum) {
+			not_empty_flag = 1;
+			offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+						      ULINT_UNDEFINED, &heap);
+		}
+
+		while (rec != supremum) {
+			rec_t*	next_rec = page_rec_get_next(rec);
+			if (next_rec == supremum) {
+				break;
+			}
+
+			matched_fields = 0;
+			matched_bytes = 0;
+			offsets_next_rec = rec_get_offsets(next_rec, index,
+							   offsets_next_rec,
+							   n_cols, &heap);
+
+			cmp_rec_rec_with_match(rec, next_rec,
+					       offsets_rec, offsets_next_rec,
+					       index, &matched_fields,
+					       &matched_bytes);
+
+			for (j = matched_fields + 1; j <= n_cols; j++) {
+				/* We add one if this index record has
+				a different prefix from the previous */
+
+				n_diff[j]++;
+			}
+
+			total_external_size
+				+= btr_rec_get_externally_stored_len(
+					rec, offsets_rec);
+
+			rec = next_rec;
+			/* Initialize offsets_rec for the next round
+			and assign the old offsets_rec buffer to
+			offsets_next_rec. */
+			{
+				ulint*	offsets_tmp = offsets_rec;
+				offsets_rec = offsets_next_rec;
+				offsets_next_rec = offsets_tmp;
+			}
+		}
+
+
+		if (n_cols == dict_index_get_n_unique_in_tree(index)) {
+
+			/* If there is more than one leaf page in the tree,
+			we add one because we know that the first record
+			on the page certainly had a different prefix than the
+			last record on the previous index page in the
+			alphabetical order. Before this fix, if there was
+			just one big record on each clustered index page, the
+			algorithm grossly underestimated the number of rows
+			in the table. */
+
+			if (btr_page_get_prev(page, &mtr) != FIL_NULL
+			    || btr_page_get_next(page, &mtr) != FIL_NULL) {
+
+				n_diff[n_cols]++;
+			}
+		}
+
+		offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+					      ULINT_UNDEFINED, &heap);
+		total_external_size += btr_rec_get_externally_stored_len(
+			rec, offsets_rec);
+		mtr_commit(&mtr);
+	}
+
+	/* If we saw k borders between different key values on
+	n_sample_pages leaf pages, we can estimate how many
+	there will be in index->stat_n_leaf_pages */
+
+	/* We must take into account that our sample actually represents
+	also the pages used for external storage of fields (those pages are
+	included in index->stat_n_leaf_pages) */
+
+	for (j = 0; j <= n_cols; j++) {
+		index->stat_n_diff_key_vals[j]
+			= ((n_diff[j]
+			    * (ib_int64_t)index->stat_n_leaf_pages
+			    + n_sample_pages - 1
+			    + total_external_size
+			    + not_empty_flag)
+			   / (n_sample_pages
+			      + total_external_size));
+
+		/* If the tree is small, smaller than
+		10 * n_sample_pages + total_external_size, then
+		the above estimate is ok. For bigger trees it is common that we
+		do not see any borders between key values in the few pages
+		we pick. But still there may be n_sample_pages
+		different key values, or even more. Let us try to approximate
+		that: */
+
+		add_on = index->stat_n_leaf_pages
+			/ (10 * (n_sample_pages
+				 + total_external_size));
+
+		if (add_on > n_sample_pages) {
+			add_on = n_sample_pages;
+		}
+
+		index->stat_n_diff_key_vals[j] += add_on;
+	}
+
+	mem_free(n_diff);
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
+
+/***********************************************************//**
+Gets the externally stored size of a record, in units of a database page.
+@return	externally stored part, in units of a database page */
+static
+ulint
+btr_rec_get_externally_stored_len(
+/*==============================*/
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	n_fields;
+	byte*	data;
+	ulint	local_len;
+	ulint	extern_len;
+	ulint	total_extern_len = 0;
+	ulint	i;
+
+	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
+	n_fields = rec_offs_n_fields(offsets);
+
+	for (i = 0; i < n_fields; i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+
+			data = rec_get_nth_field(rec, offsets, i, &local_len);
+
+			local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+			extern_len = mach_read_from_4(data + local_len
+						      + BTR_EXTERN_LEN + 4);
+
+			total_extern_len += ut_calc_align(extern_len,
+							  UNIV_PAGE_SIZE);
+		}
+	}
+
+	return(total_extern_len / UNIV_PAGE_SIZE);
+}
+
+/*******************************************************************//**
+Sets the ownership bit of an externally stored field in a record. */
+static
+void
+btr_cur_set_ownership_of_extern_field(
+/*==================================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: clustered index record */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		i,	/*!< in: field number */
+	ibool		val,	/*!< in: value to set */
+	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
+{
+	byte*	data;
+	ulint	local_len;
+	ulint	byte_val;
+
+	data = rec_get_nth_field(rec, offsets, i, &local_len);
+
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+	byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
+
+	if (val) {
+		byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
+	} else {
+		byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
+	}
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
+		page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
+	} else if (UNIV_LIKELY(mtr != NULL)) {
+
+		mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
+				 MLOG_1BYTE, mtr);
+	} else {
+		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
+	}
+}
+
+/*******************************************************************//**
+Marks not updated extern fields as not-owned by this record. The ownership
+is transferred to the updated record which is inserted elsewhere in the
+index tree. In purge only the owner of externally stored field is allowed
+to free the field. */
+UNIV_INTERN
+void
+btr_cur_mark_extern_inherited_fields(
+/*=================================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: record in a clustered index */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
+{
+	ulint	n;
+	ulint	j;
+	ulint	i;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
+
+	if (!rec_offs_any_extern(offsets)) {
+
+		return;
+	}
+
+	n = rec_offs_n_fields(offsets);
+
+	for (i = 0; i < n; i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+
+			/* Check it is not in updated fields */
+
+			if (update) {
+				for (j = 0; j < upd_get_n_fields(update);
+				     j++) {
+					if (upd_get_nth_field(update, j)
+					    ->field_no == i) {
+
+						goto updated;
+					}
+				}
+			}
+
+			btr_cur_set_ownership_of_extern_field(
+				page_zip, rec, index, offsets, i, FALSE, mtr);
+updated:
+			;
+		}
+	}
+}
+
+/*******************************************************************//**
+The complement of the previous function: in an update entry may inherit
+some externally stored fields from a record. We must mark them as inherited
+in entry, so that they are not freed in a rollback. */
+UNIV_INTERN
+void
+btr_cur_mark_dtuple_inherited_extern(
+/*=================================*/
+	dtuple_t*	entry,		/*!< in/out: updated entry to be
+					inserted to clustered index */
+	const upd_t*	update)		/*!< in: update vector */
+{
+	ulint		i;
+
+	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+
+		dfield_t*	dfield = dtuple_get_nth_field(entry, i);
+		byte*		data;
+		ulint		len;
+		ulint		j;
+
+		if (!dfield_is_ext(dfield)) {
+			continue;
+		}
+
+		/* Check if it is in updated fields */
+
+		for (j = 0; j < upd_get_n_fields(update); j++) {
+			if (upd_get_nth_field(update, j)->field_no == i) {
+
+				goto is_updated;
+			}
+		}
+
+		data = dfield_get_data(dfield);
+		len = dfield_get_len(dfield);
+		data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
+			|= BTR_EXTERN_INHERITED_FLAG;
+
+is_updated:
+		;
+	}
+}
+
+/*******************************************************************//**
+Marks all extern fields in a record as owned by the record. This function
+should be called if the delete mark of a record is removed: a not delete
+marked record always owns all its extern fields. */
+static
+void
+btr_cur_unmark_extern_fields(
+/*=========================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: record in a clustered index */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
+{
+	ulint	n;
+	ulint	i;
+
+	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
+	n = rec_offs_n_fields(offsets);
+
+	if (!rec_offs_any_extern(offsets)) {
+
+		return;
+	}
+
+	for (i = 0; i < n; i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+
+			btr_cur_set_ownership_of_extern_field(
+				page_zip, rec, index, offsets, i, TRUE, mtr);
+		}
+	}
+}
+
+/*******************************************************************//**
+Marks all extern fields in a dtuple as owned by the record. */
+UNIV_INTERN
+void
+btr_cur_unmark_dtuple_extern_fields(
+/*================================*/
+	dtuple_t*	entry)		/*!< in/out: clustered index entry */
+{
+	ulint	i;
+
+	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+		dfield_t* dfield = dtuple_get_nth_field(entry, i);
+
+		if (dfield_is_ext(dfield)) {
+			byte*	data = dfield_get_data(dfield);
+			ulint	len = dfield_get_len(dfield);
+
+			data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
+				&= ~BTR_EXTERN_OWNER_FLAG;
+		}
+	}
+}
+
+/*******************************************************************//**
+Flags the data tuple fields that are marked as extern storage in the
+update vector.  We use this function to remember which fields we must
+mark as extern storage in a record inserted for an update.
+@return	number of flagged external columns */
+UNIV_INTERN
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+	dtuple_t*	tuple,	/*!< in/out: data tuple */
+	const upd_t*	update,	/*!< in: update vector */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+{
+	ulint			n_pushed	= 0;
+	ulint			n;
+	const upd_field_t*	uf;
+
+	ut_ad(tuple);
+	ut_ad(update);
+
+	uf = update->fields;
+	n = upd_get_n_fields(update);
+
+	for (; n--; uf++) {
+		if (dfield_is_ext(&uf->new_val)) {
+			dfield_t*	field
+				= dtuple_get_nth_field(tuple, uf->field_no);
+
+			if (!dfield_is_ext(field)) {
+				dfield_set_ext(field);
+				n_pushed++;
+			}
+
+			switch (uf->orig_len) {
+				byte*	data;
+				ulint	len;
+				byte*	buf;
+			case 0:
+				break;
+			case BTR_EXTERN_FIELD_REF_SIZE:
+				/* Restore the original locally stored
+				part of the column.  In the undo log,
+				InnoDB writes a longer prefix of externally
+				stored columns, so that column prefixes
+				in secondary indexes can be reconstructed. */
+				dfield_set_data(field, (byte*) dfield_get_data(field)
+						+ dfield_get_len(field)
+						- BTR_EXTERN_FIELD_REF_SIZE,
+						BTR_EXTERN_FIELD_REF_SIZE);
+				dfield_set_ext(field);
+				break;
+			default:
+				/* Reconstruct the original locally
+				stored part of the column.  The data
+				will have to be copied. */
+				ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
+
+				data = dfield_get_data(field);
+				len = dfield_get_len(field);
+
+				buf = mem_heap_alloc(heap, uf->orig_len);
+				/* Copy the locally stored prefix. */
+				memcpy(buf, data,
+				       uf->orig_len
+				       - BTR_EXTERN_FIELD_REF_SIZE);
+				/* Copy the BLOB pointer. */
+				memcpy(buf + uf->orig_len
+				       - BTR_EXTERN_FIELD_REF_SIZE,
+				       data + len - BTR_EXTERN_FIELD_REF_SIZE,
+				       BTR_EXTERN_FIELD_REF_SIZE);
+
+				dfield_set_data(field, buf, uf->orig_len);
+				dfield_set_ext(field);
+			}
+		}
+	}
+
+	return(n_pushed);
+}
+
+/*******************************************************************//**
+Returns the length of a BLOB part stored on the header page.
+@return	part length */
+static
+ulint
+btr_blob_get_part_len(
+/*==================*/
+	const byte*	blob_header)	/*!< in: blob header */
+{
+	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
+}
+
+/*******************************************************************//**
+Returns the page number where the next BLOB part is stored.
+@return	page number or FIL_NULL if no more pages */
+static
+ulint
+btr_blob_get_next_page_no(
+/*======================*/
+	const byte*	blob_header)	/*!< in: blob header */
+{
+	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
+}
+
+/*******************************************************************//**
+Deallocate a buffer block that was reserved for a BLOB part. */
+static
+void
+btr_blob_free(
+/*==========*/
+	buf_block_t*	block,	/*!< in: buffer block */
+	ibool		all,	/*!< in: TRUE=remove also the compressed page
+				if there is one */
+	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
+{
+	ulint	space	= buf_block_get_space(block);
+	ulint	page_no	= buf_block_get_page_no(block);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+	mtr_commit(mtr);
+
+	buf_pool_mutex_enter();
+	mutex_enter(&block->mutex);
+
+	/* Only free the block if it is still allocated to
+	the same file page. */
+
+	if (buf_block_get_state(block)
+	    == BUF_BLOCK_FILE_PAGE
+	    && buf_block_get_space(block) == space
+	    && buf_block_get_page_no(block) == page_no) {
+
+		if (buf_LRU_free_block(&block->page, all, NULL)
+		    != BUF_LRU_FREED
+		    && all && block->page.zip.data) {
+			/* Attempt to deallocate the uncompressed page
+			if the whole block cannot be deallocted. */
+
+			buf_LRU_free_block(&block->page, FALSE, NULL);
+		}
+	}
+
+	buf_pool_mutex_exit();
+	mutex_exit(&block->mutex);
+}
+
+/*******************************************************************//**
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec.  The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ulint
+btr_store_big_rec_extern_fields(
+/*============================*/
+	dict_index_t*	index,		/*!< in: index of rec; the index tree
+					MUST be X-latched */
+	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
+	rec_t*		rec,		/*!< in/out: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
+					the "external storage" flags in offsets
+					will not correspond to rec when
+					this function returns */
+	big_rec_t*	big_rec_vec,	/*!< in: vector containing fields
+					to be stored externally */
+	mtr_t*		local_mtr __attribute__((unused))) /*!< in: mtr
+					containing the latch to rec and to the
+					tree */
+{
+	ulint	rec_page_no;
+	byte*	field_ref;
+	ulint	extern_len;
+	ulint	store_len;
+	ulint	page_no;
+	ulint	space_id;
+	ulint	zip_size;
+	ulint	prev_page_no;
+	ulint	hint_page_no;
+	ulint	i;
+	mtr_t	mtr;
+	mem_heap_t* heap = NULL;
+	page_zip_des_t*	page_zip;
+	z_stream c_stream;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
+	ut_a(dict_index_is_clust(index));
+
+	page_zip = buf_block_get_page_zip(rec_block);
+	ut_a(dict_table_zip_size(index->table)
+	     == buf_block_get_zip_size(rec_block));
+
+	space_id = buf_block_get_space(rec_block);
+	zip_size = buf_block_get_zip_size(rec_block);
+	rec_page_no = buf_block_get_page_no(rec_block);
+	ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		int	err;
+
+		/* Zlib deflate needs 128 kilobytes for the default
+		window size, plus 512 << memLevel, plus a few
+		kilobytes for small objects.  We use reduced memLevel
+		to limit the memory consumption, and preallocate the
+		heap, hoping to avoid memory fragmentation. */
+		heap = mem_heap_create(250000);
+		page_zip_set_alloc(&c_stream, heap);
+
+		err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+				   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
+		ut_a(err == Z_OK);
+	}
+
+	/* We have to create a file segment to the tablespace
+	for each field and put the pointer to the field in rec */
+
+	for (i = 0; i < big_rec_vec->n_fields; i++) {
+		ut_ad(rec_offs_nth_extern(offsets,
+					  big_rec_vec->fields[i].field_no));
+		{
+			ulint	local_len;
+			field_ref = rec_get_nth_field(
+				rec, offsets, big_rec_vec->fields[i].field_no,
+				&local_len);
+			ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+			local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+			field_ref += local_len;
+		}
+		extern_len = big_rec_vec->fields[i].len;
+
+		ut_a(extern_len > 0);
+
+		prev_page_no = FIL_NULL;
+
+		if (UNIV_LIKELY_NULL(page_zip)) {
+			int	err = deflateReset(&c_stream);
+			ut_a(err == Z_OK);
+
+			c_stream.next_in = (void*) big_rec_vec->fields[i].data;
+			c_stream.avail_in = extern_len;
+		}
+
+		for (;;) {
+			buf_block_t*	block;
+			page_t*		page;
+
+			mtr_start(&mtr);
+
+			if (prev_page_no == FIL_NULL) {
+				hint_page_no = 1 + rec_page_no;
+			} else {
+				hint_page_no = prev_page_no + 1;
+			}
+
+			block = btr_page_alloc(index, hint_page_no,
+					       FSP_NO_DIR, 0, &mtr);
+			if (UNIV_UNLIKELY(block == NULL)) {
+
+				mtr_commit(&mtr);
+
+				if (UNIV_LIKELY_NULL(page_zip)) {
+					deflateEnd(&c_stream);
+					mem_heap_free(heap);
+				}
+
+				return(DB_OUT_OF_FILE_SPACE);
+			}
+
+			page_no = buf_block_get_page_no(block);
+			page = buf_block_get_frame(block);
+
+			if (prev_page_no != FIL_NULL) {
+				buf_block_t*	prev_block;
+				page_t*		prev_page;
+
+				prev_block = buf_page_get(space_id, zip_size,
+							  prev_page_no,
+							  RW_X_LATCH, &mtr);
+				buf_block_dbg_add_level(prev_block,
+							SYNC_EXTERN_STORAGE);
+				prev_page = buf_block_get_frame(prev_block);
+
+				if (UNIV_LIKELY_NULL(page_zip)) {
+					mlog_write_ulint(
+						prev_page + FIL_PAGE_NEXT,
+						page_no, MLOG_4BYTES, &mtr);
+					memcpy(buf_block_get_page_zip(
+						       prev_block)
+					       ->data + FIL_PAGE_NEXT,
+					       prev_page + FIL_PAGE_NEXT, 4);
+				} else {
+					mlog_write_ulint(
+						prev_page + FIL_PAGE_DATA
+						+ BTR_BLOB_HDR_NEXT_PAGE_NO,
+						page_no, MLOG_4BYTES, &mtr);
+				}
+
+			}
+
+			if (UNIV_LIKELY_NULL(page_zip)) {
+				int		err;
+				page_zip_des_t*	blob_page_zip;
+
+				/* Write FIL_PAGE_TYPE to the redo log
+				separately, before logging any other
+				changes to the page, so that the debug
+				assertions in
+				recv_parse_or_apply_log_rec_body() can
+				be made simpler.  Before InnoDB Plugin
+				1.0.4, the initialization of
+				FIL_PAGE_TYPE was logged as part of
+				the mlog_log_string() below. */
+
+				mlog_write_ulint(page + FIL_PAGE_TYPE,
+						 prev_page_no == FIL_NULL
+						 ? FIL_PAGE_TYPE_ZBLOB
+						 : FIL_PAGE_TYPE_ZBLOB2,
+						 MLOG_2BYTES, &mtr);
+
+				c_stream.next_out = page
+					+ FIL_PAGE_DATA;
+				c_stream.avail_out
+					= page_zip_get_size(page_zip)
+					- FIL_PAGE_DATA;
+
+				err = deflate(&c_stream, Z_FINISH);
+				ut_a(err == Z_OK || err == Z_STREAM_END);
+				ut_a(err == Z_STREAM_END
+				     || c_stream.avail_out == 0);
+
+				/* Write the "next BLOB page" pointer */
+				mlog_write_ulint(page + FIL_PAGE_NEXT,
+						 FIL_NULL, MLOG_4BYTES, &mtr);
+				/* Initialize the unused "prev page" pointer */
+				mlog_write_ulint(page + FIL_PAGE_PREV,
+						 FIL_NULL, MLOG_4BYTES, &mtr);
+				/* Write a back pointer to the record
+				into the otherwise unused area.  This
+				information could be useful in
+				debugging.  Later, we might want to
+				implement the possibility to relocate
+				BLOB pages.  Then, we would need to be
+				able to adjust the BLOB pointer in the
+				record.  We do not store the heap
+				number of the record, because it can
+				change in page_zip_reorganize() or
+				btr_page_reorganize().  However, also
+				the page number of the record may
+				change when B-tree nodes are split or
+				merged. */
+				mlog_write_ulint(page
+						 + FIL_PAGE_FILE_FLUSH_LSN,
+						 space_id,
+						 MLOG_4BYTES, &mtr);
+				mlog_write_ulint(page
+						 + FIL_PAGE_FILE_FLUSH_LSN + 4,
+						 rec_page_no,
+						 MLOG_4BYTES, &mtr);
+
+				/* Zero out the unused part of the page. */
+				memset(page + page_zip_get_size(page_zip)
+				       - c_stream.avail_out,
+				       0, c_stream.avail_out);
+				mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN,
+						page_zip_get_size(page_zip)
+						- FIL_PAGE_FILE_FLUSH_LSN,
+						&mtr);
+				/* Copy the page to compressed storage,
+				because it will be flushed to disk
+				from there. */
+				blob_page_zip = buf_block_get_page_zip(block);
+				ut_ad(blob_page_zip);
+				ut_ad(page_zip_get_size(blob_page_zip)
+				      == page_zip_get_size(page_zip));
+				memcpy(blob_page_zip->data, page,
+				       page_zip_get_size(page_zip));
+
+				if (err == Z_OK && prev_page_no != FIL_NULL) {
+
+					goto next_zip_page;
+				}
+
+				rec_block = buf_page_get(space_id, zip_size,
+							 rec_page_no,
+							 RW_X_LATCH, &mtr);
+				buf_block_dbg_add_level(rec_block,
+							SYNC_NO_ORDER_CHECK);
+
+				if (err == Z_STREAM_END) {
+					mach_write_to_4(field_ref
+							+ BTR_EXTERN_LEN, 0);
+					mach_write_to_4(field_ref
+							+ BTR_EXTERN_LEN + 4,
+							c_stream.total_in);
+				} else {
+					memset(field_ref + BTR_EXTERN_LEN,
+					       0, 8);
+				}
+
+				if (prev_page_no == FIL_NULL) {
+					mach_write_to_4(field_ref
+							+ BTR_EXTERN_SPACE_ID,
+							space_id);
+
+					mach_write_to_4(field_ref
+							+ BTR_EXTERN_PAGE_NO,
+							page_no);
+
+					mach_write_to_4(field_ref
+							+ BTR_EXTERN_OFFSET,
+							FIL_PAGE_NEXT);
+				}
+
+				page_zip_write_blob_ptr(
+					page_zip, rec, index, offsets,
+					big_rec_vec->fields[i].field_no, &mtr);
+
+next_zip_page:
+				prev_page_no = page_no;
+
+				/* Commit mtr and release the
+				uncompressed page frame to save memory. */
+				btr_blob_free(block, FALSE, &mtr);
+
+				if (err == Z_STREAM_END) {
+					break;
+				}
+			} else {
+				mlog_write_ulint(page + FIL_PAGE_TYPE,
+						 FIL_PAGE_TYPE_BLOB,
+						 MLOG_2BYTES, &mtr);
+
+				if (extern_len > (UNIV_PAGE_SIZE
+						  - FIL_PAGE_DATA
+						  - BTR_BLOB_HDR_SIZE
+						  - FIL_PAGE_DATA_END)) {
+					store_len = UNIV_PAGE_SIZE
+						- FIL_PAGE_DATA
+						- BTR_BLOB_HDR_SIZE
+						- FIL_PAGE_DATA_END;
+				} else {
+					store_len = extern_len;
+				}
+
+				mlog_write_string(page + FIL_PAGE_DATA
+						  + BTR_BLOB_HDR_SIZE,
+						  (const byte*)
+						  big_rec_vec->fields[i].data
+						  + big_rec_vec->fields[i].len
+						  - extern_len,
+						  store_len, &mtr);
+				mlog_write_ulint(page + FIL_PAGE_DATA
+						 + BTR_BLOB_HDR_PART_LEN,
+						 store_len, MLOG_4BYTES, &mtr);
+				mlog_write_ulint(page + FIL_PAGE_DATA
+						 + BTR_BLOB_HDR_NEXT_PAGE_NO,
+						 FIL_NULL, MLOG_4BYTES, &mtr);
+
+				extern_len -= store_len;
+
+				rec_block = buf_page_get(space_id, zip_size,
+							 rec_page_no,
+							 RW_X_LATCH, &mtr);
+				buf_block_dbg_add_level(rec_block,
+							SYNC_NO_ORDER_CHECK);
+
+				mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
+						 MLOG_4BYTES, &mtr);
+				mlog_write_ulint(field_ref
+						 + BTR_EXTERN_LEN + 4,
+						 big_rec_vec->fields[i].len
+						 - extern_len,
+						 MLOG_4BYTES, &mtr);
+
+				if (prev_page_no == FIL_NULL) {
+					mlog_write_ulint(field_ref
+							 + BTR_EXTERN_SPACE_ID,
+							 space_id,
+							 MLOG_4BYTES, &mtr);
+
+					mlog_write_ulint(field_ref
+							 + BTR_EXTERN_PAGE_NO,
+							 page_no,
+							 MLOG_4BYTES, &mtr);
+
+					mlog_write_ulint(field_ref
+							 + BTR_EXTERN_OFFSET,
+							 FIL_PAGE_DATA,
+							 MLOG_4BYTES, &mtr);
+				}
+
+				prev_page_no = page_no;
+
+				mtr_commit(&mtr);
+
+				if (extern_len == 0) {
+					break;
+				}
+			}
+		}
+	}
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		deflateEnd(&c_stream);
+		mem_heap_free(heap);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*******************************************************************//**
+Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */
+static
+void
+btr_check_blob_fil_page_type(
+/*=========================*/
+	ulint		space_id,	/*!< in: space id */
+	ulint		page_no,	/*!< in: page number */
+	const page_t*	page,		/*!< in: page */
+	ibool		read)		/*!< in: TRUE=read, FALSE=purge */
+{
+	ulint	type = fil_page_get_type(page);
+
+	ut_a(space_id == page_get_space_id(page));
+	ut_a(page_no == page_get_page_no(page));
+
+	if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
+		ulint	flags = fil_space_get_flags(space_id);
+
+		if (UNIV_LIKELY
+		    ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
+			/* Old versions of InnoDB did not initialize
+			FIL_PAGE_TYPE on BLOB pages.  Do not print
+			anything about the type mismatch when reading
+			a BLOB page that is in Antelope format.*/
+			return;
+		}
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: FIL_PAGE_TYPE=%lu"
+			" on BLOB %s space %lu page %lu flags %lx\n",
+			(ulong) type, read ? "read" : "purge",
+			(ulong) space_id, (ulong) page_no, (ulong) flags);
+		ut_error;
+	}
+}
+
+/*******************************************************************//**
+Frees the space in an externally stored field to the file space
+management if the field in data is owned by the externally stored field,
+in a rollback we may have the additional condition that the field must
+not be inherited. */
+UNIV_INTERN
+void
+btr_free_externally_stored_field(
+/*=============================*/
+	dict_index_t*	index,		/*!< in: index of the data, the index
+					tree MUST be X-latched; if the tree
+					height is 1, then also the root page
+					must be X-latched! (this is relevant
+					in the case this function is called
+					from purge where 'data' is located on
+					an undo log page, not an index
+					page) */
+	byte*		field_ref,	/*!< in/out: field reference */
+	const rec_t*	rec,		/*!< in: record containing field_ref, for
+					page_zip_write_blob_ptr(), or NULL */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index),
+					or NULL */
+	page_zip_des_t*	page_zip,	/*!< in: compressed page corresponding
+					to rec, or NULL if rec == NULL */
+	ulint		i,		/*!< in: field number of field_ref;
+					ignored if rec == NULL */
+	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
+	mtr_t*		local_mtr __attribute__((unused))) /*!< in: mtr
+					containing the latch to data an an
+					X-latch to the index tree */
+{
+	page_t*		page;
+	ulint		space_id;
+	ulint		rec_zip_size = dict_table_zip_size(index->table);
+	ulint		ext_zip_size;
+	ulint		page_no;
+	ulint		next_page_no;
+	mtr_t		mtr;
+#ifdef UNIV_DEBUG
+	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
+				     MTR_MEMO_PAGE_X_FIX));
+	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
+
+	if (rec) {
+		ulint	local_len;
+		const byte*	f = rec_get_nth_field(rec, offsets,
+						      i, &local_len);
+		ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+		local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+		f += local_len;
+		ut_ad(f == field_ref);
+	}
+#endif /* UNIV_DEBUG */
+
+	if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
+				  BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* In the rollback of uncommitted transactions, we may
+		encounter a clustered index record whose BLOBs have
+		not been written.  There is nothing to free then. */
+		ut_a(rb_ctx == RB_RECOVERY);
+		return;
+	}
+
+	space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
+
+	if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
+		ext_zip_size = fil_space_get_zip_size(space_id);
+		/* This must be an undo log record in the system tablespace,
+		that is, in row_purge_upd_exist_or_extern().
+		Currently, externally stored records are stored in the
+		same tablespace as the referring records. */
+		ut_ad(!page_get_space_id(page_align(field_ref)));
+		ut_ad(!rec);
+		ut_ad(!page_zip);
+	} else {
+		ext_zip_size = rec_zip_size;
+	}
+
+	if (!rec) {
+		/* This is a call from row_purge_upd_exist_or_extern(). */
+		ut_ad(!page_zip);
+		rec_zip_size = 0;
+	}
+
+	for (;;) {
+		buf_block_t*	rec_block;
+		buf_block_t*	ext_block;
+
+		mtr_start(&mtr);
+
+		rec_block = buf_page_get(page_get_space_id(
+						 page_align(field_ref)),
+					 rec_zip_size,
+					 page_get_page_no(
+						 page_align(field_ref)),
+					 RW_X_LATCH, &mtr);
+		buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
+		page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
+
+		if (/* There is no external storage data */
+		    page_no == FIL_NULL
+		    /* This field does not own the externally stored field */
+		    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
+			& BTR_EXTERN_OWNER_FLAG)
+		    /* Rollback and inherited field */
+		    || (rb_ctx != RB_NONE
+			&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
+			    & BTR_EXTERN_INHERITED_FLAG))) {
+
+			/* Do not free */
+			mtr_commit(&mtr);
+
+			return;
+		}
+
+		ext_block = buf_page_get(space_id, ext_zip_size, page_no,
+					 RW_X_LATCH, &mtr);
+		buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
+		page = buf_block_get_frame(ext_block);
+
+		if (ext_zip_size) {
+			/* Note that page_zip will be NULL
+			in row_purge_upd_exist_or_extern(). */
+			switch (fil_page_get_type(page)) {
+			case FIL_PAGE_TYPE_ZBLOB:
+			case FIL_PAGE_TYPE_ZBLOB2:
+				break;
+			default:
+				ut_error;
+			}
+			next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
+
+			btr_page_free_low(index, ext_block, 0, &mtr);
+
+			if (UNIV_LIKELY(page_zip != NULL)) {
+				mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
+						next_page_no);
+				mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
+						0);
+				page_zip_write_blob_ptr(page_zip, rec, index,
+							offsets, i, &mtr);
+			} else {
+				mlog_write_ulint(field_ref
+						 + BTR_EXTERN_PAGE_NO,
+						 next_page_no,
+						 MLOG_4BYTES, &mtr);
+				mlog_write_ulint(field_ref
+						 + BTR_EXTERN_LEN + 4, 0,
+						 MLOG_4BYTES, &mtr);
+			}
+		} else {
+			ut_a(!page_zip);
+			btr_check_blob_fil_page_type(space_id, page_no, page,
+						     FALSE);
+
+			next_page_no = mach_read_from_4(
+				page + FIL_PAGE_DATA
+				+ BTR_BLOB_HDR_NEXT_PAGE_NO);
+
+			/* We must supply the page level (= 0) as an argument
+			because we did not store it on the page (we save the
+			space overhead from an index page header. */
+
+			btr_page_free_low(index, ext_block, 0, &mtr);
+
+			mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
+					 next_page_no,
+					 MLOG_4BYTES, &mtr);
+			/* Zero out the BLOB length.  If the server
+			crashes during the execution of this function,
+			trx_rollback_or_clean_all_recovered() could
+			dereference the half-deleted BLOB, fetching a
+			wrong prefix for the BLOB. */
+			mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
+					 0,
+					 MLOG_4BYTES, &mtr);
+		}
+
+		/* Commit mtr and release the BLOB block to save memory. */
+		btr_blob_free(ext_block, TRUE, &mtr);
+	}
+}
+
+/***********************************************************//**
+Frees the externally stored fields for a record. */
+static
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+	dict_index_t*	index,	/*!< in: index of the data, the index
+				tree MUST be X-latched */
+	rec_t*		rec,	/*!< in/out: record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
+				an X-latch to record page and to the index
+				tree */
+{
+	ulint	n_fields;
+	ulint	i;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	/* Free possible externally stored fields in the record */
+
+	ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
+	n_fields = rec_offs_n_fields(offsets);
+
+	for (i = 0; i < n_fields; i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint	len;
+			byte*	data
+				= rec_get_nth_field(rec, offsets, i, &len);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			btr_free_externally_stored_field(
+				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
+				rec, offsets, page_zip, i, rb_ctx, mtr);
+		}
+	}
+}
+
+/***********************************************************//**
+Frees the externally stored fields for a record, if the field is mentioned
+in the update vector. */
+static
+void
+btr_rec_free_updated_extern_fields(
+/*===============================*/
+	dict_index_t*	index,	/*!< in: index of rec; the index tree MUST be
+				X-latched */
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update,	/*!< in: update vector */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
+				an X-latch to record page and to the tree */
+{
+	ulint	n_fields;
+	ulint	i;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+
+	/* Free possible externally stored fields in the record */
+
+	n_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_fields; i++) {
+		const upd_field_t* ufield = upd_get_nth_field(update, i);
+
+		if (rec_offs_nth_extern(offsets, ufield->field_no)) {
+			ulint	len;
+			byte*	data = rec_get_nth_field(
+				rec, offsets, ufield->field_no, &len);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			btr_free_externally_stored_field(
+				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
+				rec, offsets, page_zip,
+				ufield->field_no, rb_ctx, mtr);
+		}
+	}
+}
+
+/*******************************************************************//**
+Copies the prefix of an uncompressed BLOB.  The clustered index record
+that points to this BLOB must be protected by a lock or a page latch.
+@return	number of bytes written to buf */
+static
+ulint
+btr_copy_blob_prefix(
+/*=================*/
+	byte*		buf,	/*!< out: the externally stored part of
+				the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
+	ulint		space_id,/*!< in: space id of the BLOB pages */
+	ulint		page_no,/*!< in: page number of the first BLOB page */
+	ulint		offset)	/*!< in: offset on the first BLOB page */
+{
+	ulint	copied_len	= 0;
+
+	for (;;) {
+		mtr_t		mtr;
+		buf_block_t*	block;
+		const page_t*	page;
+		const byte*	blob_header;
+		ulint		part_len;
+		ulint		copy_len;
+
+		mtr_start(&mtr);
+
+		block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
+		buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
+		page = buf_block_get_frame(block);
+
+		btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
+
+		blob_header = page + offset;
+		part_len = btr_blob_get_part_len(blob_header);
+		copy_len = ut_min(part_len, len - copied_len);
+
+		memcpy(buf + copied_len,
+		       blob_header + BTR_BLOB_HDR_SIZE, copy_len);
+		copied_len += copy_len;
+
+		page_no = btr_blob_get_next_page_no(blob_header);
+
+		mtr_commit(&mtr);
+
+		if (page_no == FIL_NULL || copy_len != part_len) {
+			return(copied_len);
+		}
+
+		/* On other BLOB pages except the first the BLOB header
+		always is at the page data start: */
+
+		offset = FIL_PAGE_DATA;
+
+		ut_ad(copied_len <= len);
+	}
+}
+
+/*******************************************************************//**
+Copies the prefix of a compressed BLOB.  The clustered index record
+that points to this BLOB must be protected by a lock or a page latch. */
+static
+void
+btr_copy_zblob_prefix(
+/*==================*/
+	z_stream*	d_stream,/*!< in/out: the decompressing stream */
+	ulint		zip_size,/*!< in: compressed BLOB page size */
+	ulint		space_id,/*!< in: space id of the BLOB pages */
+	ulint		page_no,/*!< in: page number of the first BLOB page */
+	ulint		offset)	/*!< in: offset on the first BLOB page */
+{
+	ulint	page_type = FIL_PAGE_TYPE_ZBLOB;
+
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+	ut_ad(space_id);
+
+	for (;;) {
+		buf_page_t*	bpage;
+		int		err;
+		ulint		next_page_no;
+
+		/* There is no latch on bpage directly.  Instead,
+		bpage is protected by the B-tree page latch that
+		is being held on the clustered index record, or,
+		in row_merge_copy_blobs(), by an exclusive table lock. */
+		bpage = buf_page_get_zip(space_id, zip_size, page_no);
+
+		if (UNIV_UNLIKELY(!bpage)) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Cannot load"
+				" compressed BLOB"
+				" page %lu space %lu\n",
+				(ulong) page_no, (ulong) space_id);
+			return;
+		}
+
+		if (UNIV_UNLIKELY
+		    (fil_page_get_type(bpage->zip.data) != page_type)) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Unexpected type %lu of"
+				" compressed BLOB"
+				" page %lu space %lu\n",
+				(ulong) fil_page_get_type(bpage->zip.data),
+				(ulong) page_no, (ulong) space_id);
+			goto end_of_blob;
+		}
+
+		next_page_no = mach_read_from_4(bpage->zip.data + offset);
+
+		if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
+			/* When the BLOB begins at page header,
+			the compressed data payload does not
+			immediately follow the next page pointer. */
+			offset = FIL_PAGE_DATA;
+		} else {
+			offset += 4;
+		}
+
+		d_stream->next_in = bpage->zip.data + offset;
+		d_stream->avail_in = zip_size - offset;
+
+		err = inflate(d_stream, Z_NO_FLUSH);
+		switch (err) {
+		case Z_OK:
+			if (!d_stream->avail_out) {
+				goto end_of_blob;
+			}
+			break;
+		case Z_STREAM_END:
+			if (next_page_no == FIL_NULL) {
+				goto end_of_blob;
+			}
+			/* fall through */
+		default:
+inflate_error:
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: inflate() of"
+				" compressed BLOB"
+				" page %lu space %lu returned %d (%s)\n",
+				(ulong) page_no, (ulong) space_id,
+				err, d_stream->msg);
+		case Z_BUF_ERROR:
+			goto end_of_blob;
+		}
+
+		if (next_page_no == FIL_NULL) {
+			if (!d_stream->avail_in) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"  InnoDB: unexpected end of"
+					" compressed BLOB"
+					" page %lu space %lu\n",
+					(ulong) page_no,
+					(ulong) space_id);
+			} else {
+				err = inflate(d_stream, Z_FINISH);
+				switch (err) {
+				case Z_STREAM_END:
+				case Z_BUF_ERROR:
+					break;
+				default:
+					goto inflate_error;
+				}
+			}
+
+end_of_blob:
+			buf_page_release_zip(bpage);
+			return;
+		}
+
+		buf_page_release_zip(bpage);
+
+		/* On other BLOB pages except the first
+		the BLOB header always is at the page header: */
+
+		page_no = next_page_no;
+		offset = FIL_PAGE_NEXT;
+		page_type = FIL_PAGE_TYPE_ZBLOB2;
+	}
+}
+
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record.  The
+clustered index record that points to this BLOB must be protected by a
+lock or a page latch.
+@return	number of bytes written to buf */
+static
+ulint
+btr_copy_externally_stored_field_prefix_low(
+/*========================================*/
+	byte*		buf,	/*!< out: the externally stored part of
+				the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		space_id,/*!< in: space id of the first BLOB page */
+	ulint		page_no,/*!< in: page number of the first BLOB page */
+	ulint		offset)	/*!< in: offset on the first BLOB page */
+{
+	if (UNIV_UNLIKELY(len == 0)) {
+		return(0);
+	}
+
+	if (UNIV_UNLIKELY(zip_size)) {
+		int		err;
+		z_stream	d_stream;
+		mem_heap_t*	heap;
+
+		/* Zlib inflate needs 32 kilobytes for the default
+		window size, plus a few kilobytes for small objects. */
+		heap = mem_heap_create(40000);
+		page_zip_set_alloc(&d_stream, heap);
+
+		err = inflateInit(&d_stream);
+		ut_a(err == Z_OK);
+
+		d_stream.next_out = buf;
+		d_stream.avail_out = len;
+		d_stream.avail_in = 0;
+
+		btr_copy_zblob_prefix(&d_stream, zip_size,
+				      space_id, page_no, offset);
+		inflateEnd(&d_stream);
+		mem_heap_free(heap);
+		return(d_stream.total_out);
+	} else {
+		return(btr_copy_blob_prefix(buf, len, space_id,
+					    page_no, offset));
+	}
+}
+
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record.  The
+clustered index record must be protected by a lock or a page latch.
+@return the length of the copied field, or 0 if the column was being
+or has been deleted */
+UNIV_INTERN
+ulint
+btr_copy_externally_stored_field_prefix(
+/*====================================*/
+	byte*		buf,	/*!< out: the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	const byte*	data,	/*!< in: 'internally' stored part of the
+				field containing also the reference to
+				the external part; must be protected by
+				a lock or a page latch */
+	ulint		local_len)/*!< in: length of data, in bytes */
+{
+	ulint	space_id;
+	ulint	page_no;
+	ulint	offset;
+
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+	if (UNIV_UNLIKELY(local_len >= len)) {
+		memcpy(buf, data, len);
+		return(len);
+	}
+
+	memcpy(buf, data, local_len);
+	data += local_len;
+
+	ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
+
+	if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
+		/* The externally stored part of the column has been
+		(partially) deleted.  Signal the half-deleted BLOB
+		to the caller. */
+
+		return(0);
+	}
+
+	space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
+
+	page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
+
+	offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
+
+	return(local_len
+	       + btr_copy_externally_stored_field_prefix_low(buf + local_len,
+							     len - local_len,
+							     zip_size,
+							     space_id, page_no,
+							     offset));
+}
+
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.  The
+clustered index record must be protected by a lock or a page latch.
+@return	the whole field copied to heap */
+static
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+	ulint*		len,	/*!< out: length of the whole field */
+	const byte*	data,	/*!< in: 'internally' stored part of the
+				field containing also the reference to
+				the external part; must be protected by
+				a lock or a page latch */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		local_len,/*!< in: length of data */
+	mem_heap_t*	heap)	/*!< in: mem heap */
+{
+	ulint	space_id;
+	ulint	page_no;
+	ulint	offset;
+	ulint	extern_len;
+	byte*	buf;
+
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+	space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
+
+	page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
+
+	offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
+
+	/* Currently a BLOB cannot be bigger than 4 GB; we
+	leave the 4 upper bytes in the length field unused */
+
+	extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
+
+	buf = mem_heap_alloc(heap, local_len + extern_len);
+
+	memcpy(buf, data, local_len);
+	*len = local_len
+		+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
+							      extern_len,
+							      zip_size,
+							      space_id,
+							      page_no, offset);
+
+	return(buf);
+}
+
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.
+@return	the field copied to heap */
+UNIV_INTERN
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index;
+				must be protected by a lock or a page latch */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		no,	/*!< in: field number */
+	ulint*		len,	/*!< out: length of the field */
+	mem_heap_t*	heap)	/*!< in: mem heap */
+{
+	ulint		local_len;
+	const byte*	data;
+
+	ut_a(rec_offs_nth_extern(offsets, no));
+
+	/* An externally stored field can contain some initial
+	data from the field, and in the last 20 bytes it has the
+	space id, page number, and offset where the rest of the
+	field data is stored, and the data length in addition to
+	the data stored locally. We may need to store some data
+	locally to get the local record length above the 128 byte
+	limit so that field offsets are stored in two bytes, and
+	the extern bit is available in those two bytes. */
+
+	data = rec_get_nth_field(rec, offsets, no, &local_len);
+
+	return(btr_copy_externally_stored_field(len, data,
+						zip_size, local_len, heap));
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0pcur.c b/storage/innodb_plugin/btr/btr0pcur.c
similarity index 68%
rename from storage/innobase/btr/btr0pcur.c
rename to storage/innodb_plugin/btr/btr0pcur.c
index 65b3c90c809..ec98692c35b 100644
--- a/storage/innobase/btr/btr0pcur.c
+++ b/storage/innodb_plugin/btr/btr0pcur.c
@@ -1,7 +1,24 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0pcur.c
+The index tree persistent cursor
 
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
@@ -16,13 +33,13 @@ Created 2/23/1996 Heikki Tuuri
 #include "rem0cmp.h"
 #include "trx0trx.h"
 
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
+/**************************************************************//**
+Allocates memory for a persistent cursor object and initializes the cursor.
+@return	own: persistent cursor */
+UNIV_INTERN
 btr_pcur_t*
 btr_pcur_create_for_mysql(void)
 /*============================*/
-				/* out, own: persistent cursor */
 {
 	btr_pcur_t*	pcur;
 
@@ -34,13 +51,13 @@ btr_pcur_create_for_mysql(void)
 	return(pcur);
 }
 
-/******************************************************************
+/**************************************************************//**
 Frees the memory for a persistent cursor object. */
-
+UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
-	btr_pcur_t*	cursor)	/* in, own: persistent cursor */
+	btr_pcur_t*	cursor)	/*!< in, own: persistent cursor */
 {
 	if (cursor->old_rec_buf != NULL) {
 
@@ -60,21 +77,22 @@ btr_pcur_free_for_mysql(
 	mem_free(cursor);
 }
 
-/******************************************************************
+/**************************************************************//**
 The position of the cursor is stored by taking an initial segment of the
 record the cursor is positioned on, before, or after, and copying it to the
 cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-
+UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
-	btr_pcur_t*	cursor, /* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor, /*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
+	buf_block_t*	block;
 	rec_t*		rec;
 	dict_index_t*	index;
 	page_t*		page;
@@ -83,6 +101,7 @@ btr_pcur_store_position(
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
+	block = btr_pcur_get_block(cursor);
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
 
 	page_cursor = btr_pcur_get_page_cur(cursor);
@@ -91,10 +110,8 @@ btr_pcur_store_position(
 	page = page_align(rec);
 	offs = page_offset(rec);
 
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(page),
-				   MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_a(cursor->latch_mode != BTR_NO_LATCHES);
 
 	if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
@@ -137,20 +154,19 @@ btr_pcur_store_position(
 		index, rec, &cursor->old_n_fields,
 		&cursor->old_rec_buf, &cursor->buf_size);
 
-	cursor->block_when_stored = buf_block_align(page);
-	cursor->modify_clock = buf_block_get_modify_clock(
-		cursor->block_when_stored);
+	cursor->block_when_stored = block;
+	cursor->modify_clock = buf_block_get_modify_clock(block);
 }
 
-/******************************************************************
+/**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-
+UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
-	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
+	btr_pcur_t*	pcur_receive,	/*!< in: pcur which will receive the
 					position info */
-	btr_pcur_t*	pcur_donate)	/* in: pcur from which the info is
+	btr_pcur_t*	pcur_donate)	/*!< in: pcur from which the info is
 					copied */
 {
 	if (pcur_receive->old_rec_buf) {
@@ -172,7 +188,7 @@ btr_pcur_copy_stored_position(
 	pcur_receive->old_n_fields = pcur_donate->old_n_fields;
 }
 
-/******************************************************************
+/**************************************************************//**
 Restores the stored position of a persistent cursor bufferfixing the page and
 obtaining the specified latches. If the cursor position was saved when the
 (1) cursor was positioned on a user record: this function restores the position
@@ -183,22 +199,19 @@ infimum;
 (3) cursor was positioned on the page supremum: restores to the first record
 GREATER than the user record which was the predecessor of the supremum.
 (4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
+restores to before first or after the last in the tree.
+@return TRUE if the cursor position was stored when it was on a user
+record and it can be restored on a user record whose ordering fields
+are identical to the ones of the original user record */
+UNIV_INTERN
 ibool
 btr_pcur_restore_position(
 /*======================*/
-					/* out: TRUE if the cursor position
-					was stored when it was on a user record
-					and it can be restored on a user record
-					whose ordering fields are identical to
-					the ones of the original user record */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor,		/* in: detached persistent cursor */
-	mtr_t*		mtr)		/* in: mtr */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dict_index_t*	index;
-	page_t*		page;
 	dtuple_t*	tuple;
 	ulint		mode;
 	ulint		old_mode;
@@ -210,6 +223,7 @@ btr_pcur_restore_position(
 	    || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
 			     && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
 		ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
+		putc('\n', stderr);
 		if (cursor->trx_if_known) {
 			trx_print(stderr, cursor->trx_if_known, 0);
 		}
@@ -217,9 +231,9 @@ btr_pcur_restore_position(
 		ut_error;
 	}
 
-	if (UNIV_UNLIKELY(
-		    cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
-		    || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
+	if (UNIV_UNLIKELY
+	    (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+	     || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
 
 		/* In these cases we do not try an optimistic restoration,
 		but always do a search */
@@ -228,8 +242,7 @@ btr_pcur_restore_position(
 			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
 			index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
 
-		cursor->block_when_stored
-			= buf_block_align(btr_pcur_get_page(cursor));
+		cursor->block_when_stored = btr_pcur_get_block(cursor);
 
 		return(FALSE);
 	}
@@ -237,25 +250,24 @@ btr_pcur_restore_position(
 	ut_a(cursor->old_rec);
 	ut_a(cursor->old_n_fields);
 
-	page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
-
 	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
 	    || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
 		/* Try optimistic restoration */
 
 		if (UNIV_LIKELY(buf_page_optimistic_get(
 					latch_mode,
-					cursor->block_when_stored, page,
+					cursor->block_when_stored,
 					cursor->modify_clock, mtr))) {
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+
+			buf_block_dbg_add_level(btr_pcur_get_block(cursor),
+						SYNC_TREE_NODE);
+
 			if (cursor->rel_pos == BTR_PCUR_ON) {
 #ifdef UNIV_DEBUG
-				rec_t*		rec;
-				ulint*		offsets1;
-				ulint*		offsets2;
+				const rec_t*	rec;
+				const ulint*	offsets1;
+				const ulint*	offsets2;
 #endif /* UNIV_DEBUG */
 				cursor->latch_mode = latch_mode;
 #ifdef UNIV_DEBUG
@@ -307,7 +319,7 @@ btr_pcur_restore_position(
 	cursor->search_mode = old_mode;
 
 	if (cursor->rel_pos == BTR_PCUR_ON
-	    && btr_pcur_is_on_user_rec(cursor, mtr)
+	    && btr_pcur_is_on_user_rec(cursor)
 	    && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
 				   rec_get_offsets(
 					   btr_pcur_get_rec(cursor), index,
@@ -317,8 +329,7 @@ btr_pcur_restore_position(
 		the cursor can now be on a different page! But we can retain
 		the value of old_rec */
 
-		cursor->block_when_stored = buf_block_align(
-			btr_pcur_get_page(cursor));
+		cursor->block_when_stored = btr_pcur_get_block(cursor);
 		cursor->modify_clock = buf_block_get_modify_clock(
 			cursor->block_when_stored);
 		cursor->old_stored = BTR_PCUR_OLD_STORED;
@@ -339,79 +350,85 @@ btr_pcur_restore_position(
 	return(FALSE);
 }
 
-/******************************************************************
+/**************************************************************//**
 If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
 releases the page latch and bufferfix reserved by the cursor.
 NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
 made by the current mini-transaction to the data protected by the
 cursor latch, as then the latch must not be released until mtr_commit. */
-
+UNIV_INTERN
 void
 btr_pcur_release_leaf(
 /*==================*/
-	btr_pcur_t*	cursor, /* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor, /*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*	page;
+	buf_block_t*	block;
 
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+	block = btr_pcur_get_block(cursor);
 
-	btr_leaf_page_release(page, cursor->latch_mode, mtr);
+	btr_leaf_page_release(block, cursor->latch_mode, mtr);
 
 	cursor->latch_mode = BTR_NO_LATCHES;
 
 	cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the first record on the next page. Releases the
 latch on the current page, and bufferunfixes it. Note that there must not be
 modifications on the current page, as then the x-latch can be released only in
 mtr_commit. */
-
+UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; must be on the
 				last record of the current page */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	next_page_no;
-	ulint	space;
-	page_t*	page;
-	page_t*	next_page;
+	ulint		next_page_no;
+	ulint		space;
+	ulint		zip_size;
+	page_t*		page;
+	buf_block_t*	next_block;
+	page_t*		next_page;
 
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-	ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));
+	ut_ad(btr_pcur_is_after_last_on_page(cursor));
 
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 
 	page = btr_pcur_get_page(cursor);
-
 	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_frame_get_space_id(page);
+	space = buf_block_get_space(btr_pcur_get_block(cursor));
+	zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
 
 	ut_ad(next_page_no != FIL_NULL);
 
-	next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+	next_block = btr_block_get(space, zip_size, next_page_no,
+				   cursor->latch_mode, mtr);
+	next_page = buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
-	ut_a(btr_page_get_prev(next_page, mtr) == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
 	ut_a(page_is_comp(next_page) == page_is_comp(page));
-	buf_block_align(next_page)->check_index_page_at_flush = TRUE;
+	ut_a(btr_page_get_prev(next_page, mtr)
+	     == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+#endif /* UNIV_BTR_DEBUG */
+	next_block->check_index_page_at_flush = TRUE;
 
-	btr_leaf_page_release(page, cursor->latch_mode, mtr);
+	btr_leaf_page_release(btr_pcur_get_block(cursor),
+			      cursor->latch_mode, mtr);
 
-	page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
+	page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
 
 	page_check_dir(next_page);
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor backward if it is on the first record of the page.
 Commits mtr. Note that to prevent a possible deadlock, the operation
 first stores the position of the cursor, commits mtr, acquires the necessary
@@ -420,24 +437,24 @@ alphabetical position of the cursor is guaranteed to be sensible on
 return, but it may happen that the cursor is not positioned on the last
 record of any page, because the structure of the tree may have changed
 during the time when the cursor had no latches. */
-
+UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the first
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor, must be on the first
 				record of the current page */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	prev_page_no;
-	ulint	space;
-	page_t*	page;
-	page_t*	prev_page;
-	ulint	latch_mode;
-	ulint	latch_mode2;
+	ulint		prev_page_no;
+	ulint		space;
+	page_t*		page;
+	buf_block_t*	prev_block;
+	ulint		latch_mode;
+	ulint		latch_mode2;
 
 	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-	ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));
+	ut_ad(btr_pcur_is_before_first_on_page(cursor));
 	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
 
 	latch_mode = cursor->latch_mode;
@@ -465,26 +482,27 @@ btr_pcur_move_backward_from_page(
 	page = btr_pcur_get_page(cursor);
 
 	prev_page_no = btr_page_get_prev(page, mtr);
-	space = buf_frame_get_space_id(page);
+	space = buf_block_get_space(btr_pcur_get_block(cursor));
 
-	if (btr_pcur_is_before_first_on_page(cursor, mtr)
-	    && (prev_page_no != FIL_NULL)) {
+	if (prev_page_no == FIL_NULL) {
+	} else if (btr_pcur_is_before_first_on_page(cursor)) {
 
-		prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		btr_leaf_page_release(page, latch_mode, mtr);
+		btr_leaf_page_release(btr_pcur_get_block(cursor),
+				      latch_mode, mtr);
 
-		page_cur_set_after_last(prev_page,
+		page_cur_set_after_last(prev_block,
 					btr_pcur_get_page_cur(cursor));
-	} else if (prev_page_no != FIL_NULL) {
+	} else {
 
 		/* The repositioned cursor did not end on an infimum record on
 		a page. Cursor repositioning acquired a latch also on the
 		previous page, but we do not need the latch: release it. */
 
-		prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		btr_leaf_page_release(prev_page, latch_mode, mtr);
+		btr_leaf_page_release(prev_block, latch_mode, mtr);
 	}
 
 	cursor->latch_mode = latch_mode;
@@ -492,25 +510,24 @@ btr_pcur_move_backward_from_page(
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
+are left, the cursor stays 'before first in tree'.
+@return	TRUE if the cursor was not before first in tree */
+UNIV_INTERN
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
-				/* out: TRUE if the cursor was not before first
-				in tree */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 
-	if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
+	if (btr_pcur_is_before_first_on_page(cursor)) {
 
 		if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
 
@@ -522,36 +539,36 @@ btr_pcur_move_to_prev(
 		return(TRUE);
 	}
 
-	btr_pcur_move_to_prev_on_page(cursor, mtr);
+	btr_pcur_move_to_prev_on_page(cursor);
 
 	return(TRUE);
 }
 
-/******************************************************************
+/**************************************************************//**
 If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
 user record satisfying the search condition, in the case PAGE_CUR_L or
 PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-
+UNIV_INTERN
 void
 btr_pcur_open_on_user_rec(
 /*======================*/
-	dict_index_t*	index,		/* in: index */
-	dtuple_t*	tuple,		/* in: tuple on which search done */
-	ulint		mode,		/* in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
+	dict_index_t*	index,		/*!< in: index */
+	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
-	btr_pcur_t*	cursor,		/* in: memory buffer for persistent
+	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
 					cursor */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
 
 	if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
 
-		if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+		if (btr_pcur_is_after_last_on_page(cursor)) {
 
 			btr_pcur_move_to_next_user_rec(cursor, mtr);
 		}
diff --git a/storage/innobase/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c
similarity index 75%
rename from storage/innobase/btr/btr0sea.c
rename to storage/innodb_plugin/btr/btr0sea.c
index 8d296fdd061..faa1c13897e 100644
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innodb_plugin/btr/btr0sea.c
@@ -1,7 +1,31 @@
-/************************************************************************
-The index tree adaptive search
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file btr/btr0sea.c
+The index tree adaptive search
 
 Created 2/17/1996 Heikki Tuuri
 *************************************************************************/
@@ -19,47 +43,55 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0btr.h"
 #include "ha0ha.h"
 
-ulint	btr_search_this_is_zero = 0;	/* A dummy variable to fool the
-					compiler */
+/** Flag: has the search system been enabled?
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+UNIV_INTERN char		btr_search_enabled	= TRUE;
+
+/** Mutex protecting btr_search_enabled */
+static mutex_t			btr_search_enabled_mutex;
+
+/** A dummy variable to fool the compiler */
+UNIV_INTERN ulint		btr_search_this_is_zero = 0;
 
 #ifdef UNIV_SEARCH_PERF_STAT
-ulint	btr_search_n_succ	= 0;
-ulint	btr_search_n_hash_fail	= 0;
+/** Number of successful adaptive hash index lookups */
+UNIV_INTERN ulint		btr_search_n_succ	= 0;
+/** Number of failed adaptive hash index lookups */
+UNIV_INTERN ulint		btr_search_n_hash_fail	= 0;
 #endif /* UNIV_SEARCH_PERF_STAT */
 
-byte	btr_sea_pad1[64];	/* padding to prevent other memory update
-				hotspots from residing on the same memory
-				cache line as btr_search_latch */
+/** padding to prevent other memory update
+hotspots from residing on the same memory
+cache line as btr_search_latch */
+UNIV_INTERN byte		btr_sea_pad1[64];
 
-/* The latch protecting the adaptive search system: this latch protects the
+/** The latch protecting the adaptive search system: this latch protects the
 (1) positions of records on those pages where a hash index has been built.
 NOTE: It does not protect values of non-ordering fields within a record from
 being updated in-place! We can use fact (1) to perform unique searches to
 indexes. */
 
-rw_lock_t*	btr_search_latch_temp; /* We will allocate the latch from
-					dynamic memory to get it to the
-					same DRAM page as other hotspot
-					semaphores */
+/* We will allocate the latch from dynamic memory to get it to the
+same DRAM page as other hotspot semaphores */
+UNIV_INTERN rw_lock_t*		btr_search_latch_temp;
 
-byte	btr_sea_pad2[64];	/* padding to prevent other memory update
-				hotspots from residing on the same memory
-				cache line */
+/** padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte		btr_sea_pad2[64];
 
-btr_search_sys_t*	btr_search_sys;
+/** The adaptive hash index */
+UNIV_INTERN btr_search_sys_t*	btr_search_sys;
 
-/* If the number of records on the page divided by this parameter
+/** If the number of records on the page divided by this parameter
 would have been successfully accessed using a hash index, the index
 is then built on the page, assuming the global limit has been reached */
-
 #define BTR_SEARCH_PAGE_BUILD_LIMIT	16
 
-/* The global limit for consecutive potentially successful hash searches,
+/** The global limit for consecutive potentially successful hash searches,
 before hash index building is started */
-
 #define BTR_SEARCH_BUILD_LIMIT		100
 
-/************************************************************************
+/********************************************************************//**
 Builds a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
 If index is non-NULL, this function checks if n_fields and n_bytes are
@@ -68,15 +100,15 @@ static
 void
 btr_search_build_page_hash_index(
 /*=============================*/
-	dict_index_t*	index,	/* in: index for which to build, or NULL if
+	dict_index_t*	index,	/*!< in: index for which to build, or NULL if
 				not known */
-	page_t*		page,	/* in: index page, s- or x-latched */
-	ulint		n_fields,/* in: hash this many full fields */
-	ulint		n_bytes,/* in: hash this many bytes from the next
+	buf_block_t*	block,	/*!< in: index page, s- or x-latched */
+	ulint		n_fields,/*!< in: hash this many full fields */
+	ulint		n_bytes,/*!< in: hash this many bytes from the next
 				field */
-	ibool		left_side);/* in: hash for searches from left side? */
+	ibool		left_side);/*!< in: hash for searches from left side? */
 
-/*********************************************************************
+/*****************************************************************//**
 This function should be called before reserving any btr search mutex, if
 the intended operation might add nodes to the search system hash table.
 Because of the latching order, once we have reserved the btr search system
@@ -91,7 +123,6 @@ void
 btr_search_check_free_space_in_heap(void)
 /*=====================================*/
 {
-	buf_frame_t*	frame;
 	hash_table_t*	table;
 	mem_heap_t*	heap;
 
@@ -109,27 +140,27 @@ btr_search_check_free_space_in_heap(void)
 	be enough free space in the hash table. */
 
 	if (heap->free_block == NULL) {
-		frame = buf_frame_alloc();
+		buf_block_t*	block = buf_block_alloc(0);
 
 		rw_lock_x_lock(&btr_search_latch);
 
 		if (heap->free_block == NULL) {
-			heap->free_block = frame;
+			heap->free_block = block;
 		} else {
-			buf_frame_free(frame);
+			buf_block_free(block);
 		}
 
 		rw_lock_x_unlock(&btr_search_latch);
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the adaptive search system at a database start. */
-
+UNIV_INTERN
 void
 btr_search_sys_create(
 /*==================*/
-	ulint	hash_size)	/* in: hash index hash table size */
+	ulint	hash_size)	/*!< in: hash index hash table size */
 {
 	/* We allocate the search latch from dynamic memory:
 	see above at the global variable definition */
@@ -137,21 +168,60 @@ btr_search_sys_create(
 	btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
 
 	rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
+	mutex_create(&btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
 
 	btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
 
-	btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
-
+	btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
 }
 
-/*********************************************************************
-Creates and initializes a search info struct. */
+/********************************************************************//**
+Disable the adaptive hash search system and empty the index. */
+UNIV_INTERN
+void
+btr_search_disable(void)
+/*====================*/
+{
+	mutex_enter(&btr_search_enabled_mutex);
+	rw_lock_x_lock(&btr_search_latch);
 
+	btr_search_enabled = FALSE;
+
+	/* Clear all block->is_hashed flags and remove all entries
+	from btr_search_sys->hash_index. */
+	buf_pool_drop_hash_index();
+
+	/* btr_search_enabled_mutex should guarantee this. */
+	ut_ad(!btr_search_enabled);
+
+	rw_lock_x_unlock(&btr_search_latch);
+	mutex_exit(&btr_search_enabled_mutex);
+}
+
+/********************************************************************//**
+Enable the adaptive hash search system. */
+UNIV_INTERN
+void
+btr_search_enable(void)
+/*====================*/
+{
+	mutex_enter(&btr_search_enabled_mutex);
+	rw_lock_x_lock(&btr_search_latch);
+
+	btr_search_enabled = TRUE;
+
+	rw_lock_x_unlock(&btr_search_latch);
+	mutex_exit(&btr_search_enabled_mutex);
+}
+
+/*****************************************************************//**
+Creates and initializes a search info struct.
+@return	own: search info struct */
+UNIV_INTERN
 btr_search_t*
 btr_search_info_create(
 /*===================*/
-				/* out, own: search info struct */
-	mem_heap_t*	heap)	/* in: heap where created */
+	mem_heap_t*	heap)	/*!< in: heap where created */
 {
 	btr_search_t*	info;
 
@@ -185,14 +255,15 @@ btr_search_info_create(
 	return(info);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Returns the value of ref_count. The value is protected by
-btr_search_latch. */
+btr_search_latch.
+@return	ref_count value. */
+UNIV_INTERN
 ulint
 btr_search_info_get_ref_count(
 /*==========================*/
-				/* out: ref_count value. */
-	btr_search_t*   info)	/* in: search info. */
+	btr_search_t*   info)	/*!< in: search info. */
 {
 	ulint ret;
 
@@ -210,7 +281,7 @@ btr_search_info_get_ref_count(
 	return(ret);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the search info of an index about hash successes. NOTE that info
 is NOT protected by any semaphore, to save CPU time! Do not assume its fields
 are consistent. */
@@ -218,8 +289,8 @@ static
 void
 btr_search_info_update_hash(
 /*========================*/
-	btr_search_t*	info,	/* in/out: search info */
-	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
+	btr_search_t*	info,	/*!< in/out: search info */
+	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
 	dict_index_t*	index;
 	ulint		n_unique;
@@ -232,7 +303,7 @@ btr_search_info_update_hash(
 
 	index = cursor->index;
 
-	if (index->type & DICT_IBUF) {
+	if (dict_index_is_ibuf(index)) {
 		/* So many deletes are performed on an insert buffer tree
 		that we do not consider a hash index useful on it: */
 
@@ -330,32 +401,31 @@ set_new_recomm:
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the block search info on hash successes. NOTE that info and
 block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
-semaphore, to save CPU time! Do not assume the fields are consistent. */
+semaphore, to save CPU time! Do not assume the fields are consistent.
+@return	TRUE if building a (new) hash index on the block is recommended */
 static
 ibool
 btr_search_update_block_hash_info(
 /*==============================*/
-				/* out: TRUE if building a (new) hash index on
-				the block is recommended */
-	btr_search_t*	info,	/* in: search info */
-	buf_block_t*	block,	/* in: buffer block */
+	btr_search_t*	info,	/*!< in: search info */
+	buf_block_t*	block,	/*!< in: buffer block */
 	btr_cur_t*	cursor __attribute__((unused)))
-				/* in: cursor */
+				/*!< in: cursor */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
+	ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
+	      || rw_lock_own(&block->lock, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 	ut_ad(cursor);
 
 	info->last_hash_succ = FALSE;
 
-	ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
+	ut_a(buf_block_state_valid(block));
 	ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
 
 	if ((block->n_hash_helps > 0)
@@ -409,7 +479,7 @@ btr_search_update_block_hash_info(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates a hash node reference when it has been unsuccessfully used in a
 search which could have succeeded with the used hash parameters. This can
 happen because when building a hash index for a page, we do not check
@@ -421,9 +491,9 @@ static
 void
 btr_search_update_hash_ref(
 /*=======================*/
-	btr_search_t*	info,	/* in: search info */
-	buf_block_t*	block,	/* in: buffer block where cursor positioned */
-	btr_cur_t*	cursor)	/* in: cursor */
+	btr_search_t*	info,	/*!< in: search info */
+	buf_block_t*	block,	/*!< in: buffer block where cursor positioned */
+	btr_cur_t*	cursor)	/*!< in: cursor */
 {
 	ulint	fold;
 	rec_t*	rec;
@@ -435,17 +505,24 @@ btr_search_update_hash_ref(
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
 	      || rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
-	ut_a(!block->is_hashed || block->index == cursor->index);
+	ut_ad(page_align(btr_cur_get_rec(cursor))
+	      == buf_block_get_frame(block));
 
-	if (block->is_hashed
-	    && (info->n_hash_potential > 0)
+	if (!block->is_hashed) {
+
+		return;
+	}
+
+	ut_a(block->index == cursor->index);
+	ut_a(!dict_index_is_ibuf(cursor->index));
+
+	if ((info->n_hash_potential > 0)
 	    && (block->curr_n_fields == info->n_fields)
 	    && (block->curr_n_bytes == info->n_bytes)
 	    && (block->curr_left_side == info->left_side)) {
 		mem_heap_t*	heap		= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		rec = btr_cur_get_rec(cursor);
 
@@ -467,18 +544,19 @@ btr_search_update_hash_ref(
 		ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 
-		ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
+		ha_insert_for_fold(btr_search_sys->hash_index, fold,
+				   block, rec);
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the search info. */
-
+UNIV_INTERN
 void
 btr_search_info_update_slow(
 /*========================*/
-	btr_search_t*	info,	/* in/out: search info */
-	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
+	btr_search_t*	info,	/*!< in/out: search info */
+	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
 	buf_block_t*	block;
 	ibool		build_index;
@@ -490,7 +568,7 @@ btr_search_info_update_slow(
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 
-	block = buf_block_align(btr_cur_get_rec(cursor));
+	block = btr_cur_get_block(cursor);
 
 	/* NOTE that the following two function calls do NOT protect
 	info or block->n_fields etc. with any semaphore, to save CPU time!
@@ -540,7 +618,7 @@ btr_search_info_update_slow(
 		params2 = params + btr_search_this_is_zero;
 
 		btr_search_build_page_hash_index(cursor->index,
-						 block->frame,
+						 block,
 						 params2[0],
 						 params2[1],
 						 params2[2]);
@@ -548,28 +626,28 @@ btr_search_info_update_slow(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Checks if a guessed position for a tree cursor is right. Note that if
 mode is PAGE_CUR_LE, which is used in inserts, and the function returns
-TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
+TRUE, then cursor->up_match and cursor->low_match both have sensible values.
+@return	TRUE if success */
 static
 ibool
 btr_search_check_guess(
 /*===================*/
-				/* out: TRUE if success */
-	btr_cur_t*	cursor,	/* in: guessed cursor position */
+	btr_cur_t*	cursor,	/*!< in: guessed cursor position */
 	ibool		can_only_compare_to_cursor_rec,
-				/* in: if we do not have a latch on the page
+				/*!< in: if we do not have a latch on the page
 				of cursor, but only a latch on
 				btr_search_latch, then ONLY the columns
 				of the record UNDER the cursor are
 				protected, not the next or previous record
 				in the chain: we cannot look at the next or
 				previous record to check our guess! */
-	dtuple_t*	tuple,	/* in: data tuple */
-	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+	const dtuple_t*	tuple,	/*!< in: data tuple */
+	ulint		mode,	/*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
 				or PAGE_CUR_GE */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	rec_t*		rec;
 	ulint		n_unique;
@@ -580,7 +658,7 @@ btr_search_check_guess(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	ibool		success		= FALSE;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	n_unique = dict_index_get_n_unique_in_tree(cursor->index);
 
@@ -641,8 +719,8 @@ btr_search_check_guess(
 		prev_rec = page_rec_get_prev(rec);
 
 		if (page_rec_is_infimum(prev_rec)) {
-			success = btr_page_get_prev(
-				buf_frame_align(prev_rec), mtr) == FIL_NULL;
+			success = btr_page_get_prev(page_align(prev_rec), mtr)
+				== FIL_NULL;
 
 			goto exit_func;
 		}
@@ -666,8 +744,7 @@ btr_search_check_guess(
 		next_rec = page_rec_get_next(rec);
 
 		if (page_rec_is_supremum(next_rec)) {
-			if (btr_page_get_next(
-				    buf_frame_align(next_rec), mtr)
+			if (btr_page_get_next(page_align(next_rec), mtr)
 			    == FIL_NULL) {
 
 				cursor->up_match = 0;
@@ -695,39 +772,36 @@ exit_func:
 	return(success);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
+both have sensible values.
+@return	TRUE if succeeded */
+UNIV_INTERN
 ibool
 btr_search_guess_on_hash(
 /*=====================*/
-					/* out: TRUE if succeeded */
-	dict_index_t*	index,		/* in: index */
-	btr_search_t*	info,		/* in: index search info */
-	dtuple_t*	tuple,		/* in: logical record */
-	ulint		mode,		/* in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ...;
+	dict_index_t*	index,		/*!< in: index */
+	btr_search_t*	info,		/*!< in: index search info */
+	const dtuple_t*	tuple,		/*!< in: logical record */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ...;
 					NOTE that only if has_search_latch
 					is 0, we will have a latch set on
 					the cursor page, otherwise we assume
 					the caller uses his search latch
 					to protect the record! */
-	btr_cur_t*	cursor,		/* out: tree cursor */
-	ulint		has_search_latch,/* in: latch mode the caller
+	btr_cur_t*	cursor,		/*!< out: tree cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
 					currently has on btr_search_latch:
 					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	buf_block_t*	block;
 	rec_t*		rec;
-	page_t*		page;
 	ulint		fold;
-	ulint		tuple_n_fields;
 	dulint		index_id;
-	ibool		can_only_compare_to_cursor_rec = TRUE;
 #ifdef notdefined
 	btr_cur_t	cursor2;
 	btr_pcur_t	pcur;
@@ -747,15 +821,8 @@ btr_search_guess_on_hash(
 	cursor->n_fields = info->n_fields;
 	cursor->n_bytes = info->n_bytes;
 
-	tuple_n_fields = dtuple_get_n_fields(tuple);
-
-	if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
-
-		return(FALSE);
-	}
-
-	if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
-	    && (cursor->n_bytes > 0)) {
+	if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
+			  < cursor->n_fields + (cursor->n_bytes > 0))) {
 
 		return(FALSE);
 	}
@@ -772,6 +839,10 @@ btr_search_guess_on_hash(
 
 	if (UNIV_LIKELY(!has_search_latch)) {
 		rw_lock_s_lock(&btr_search_latch);
+
+		if (UNIV_UNLIKELY(!btr_search_enabled)) {
+			goto failure_unlock;
+		}
 	}
 
 	ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
@@ -783,12 +854,12 @@ btr_search_guess_on_hash(
 		goto failure_unlock;
 	}
 
-	page = buf_frame_align(rec);
+	block = buf_block_align(rec);
 
 	if (UNIV_LIKELY(!has_search_latch)) {
 
 		if (UNIV_UNLIKELY(
-			    !buf_page_get_known_nowait(latch_mode, page,
+			    !buf_page_get_known_nowait(latch_mode, block,
 						       BUF_MAKE_YOUNG,
 						       __FILE__, __LINE__,
 						       mtr))) {
@@ -796,28 +867,24 @@ btr_search_guess_on_hash(
 		}
 
 		rw_lock_s_unlock(&btr_search_latch);
-		can_only_compare_to_cursor_rec = FALSE;
 
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 	}
 
-	block = buf_block_align(page);
+	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
 
-	if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
 		if (UNIV_LIKELY(!has_search_latch)) {
 
-			btr_leaf_page_release(page, latch_mode, mtr);
+			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
 		goto failure;
 	}
 
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
 	ut_ad(page_rec_is_user_rec(rec));
 
-	btr_cur_position(index, rec, cursor);
+	btr_cur_position(index, rec, block, cursor);
 
 	/* Check the validity of the guess within the page */
 
@@ -826,13 +893,13 @@ btr_search_guess_on_hash(
 	is positioned on. We cannot look at the next of the previous
 	record to determine if our guess for the cursor position is
 	right. */
-	if (UNIV_EXPECT(
-		    ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
+	if (UNIV_EXPECT
+	    (ut_dulint_cmp(index_id, btr_page_get_index_id(block->frame)), 0)
 	    || !btr_search_check_guess(cursor,
-				       can_only_compare_to_cursor_rec,
+				       has_search_latch,
 				       tuple, mode, mtr)) {
 		if (UNIV_LIKELY(!has_search_latch)) {
-			btr_leaf_page_release(page, latch_mode, mtr);
+			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
 		goto failure;
@@ -852,7 +919,7 @@ btr_search_guess_on_hash(
 	/* Currently, does not work if the following fails: */
 	ut_ad(!has_search_latch);
 
-	btr_leaf_page_release(page, latch_mode, mtr);
+	btr_leaf_page_release(block, latch_mode, mtr);
 
 	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
 				    &cursor2, 0, mtr);
@@ -882,9 +949,9 @@ btr_search_guess_on_hash(
 	btr_search_n_succ++;
 #endif
 	if (UNIV_LIKELY(!has_search_latch)
-	    && buf_block_peek_if_too_old(block)) {
+	    && buf_page_peek_if_too_old(&block->page)) {
 
-		buf_page_make_young(page);
+		buf_page_make_young(&block->page);
 	}
 
 	/* Increment the page get statistics though we did not really
@@ -914,39 +981,41 @@ failure:
 	return(FALSE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Drops a page hash index. */
-
+UNIV_INTERN
 void
 btr_search_drop_page_hash_index(
 /*============================*/
-	page_t*	page)	/* in: index page, s- or x-latched, or an index page
-			for which we know that block->buf_fix_count == 0 */
+	buf_block_t*	block)	/*!< in: block containing index page,
+				s- or x-latched, or an index page
+				for which we know that
+				block->buf_fix_count == 0 */
 {
-	hash_table_t*	table;
-	buf_block_t*	block;
-	ulint		n_fields;
-	ulint		n_bytes;
-	rec_t*		rec;
-	ulint		fold;
-	ulint		prev_fold;
-	dulint		index_id;
-	ulint		n_cached;
-	ulint		n_recs;
-	ulint*		folds;
-	ulint		i;
-	mem_heap_t*	heap;
-	dict_index_t*	index;
-	ulint*		offsets;
+	hash_table_t*		table;
+	ulint			n_fields;
+	ulint			n_bytes;
+	const page_t*		page;
+	const rec_t*		rec;
+	ulint			fold;
+	ulint			prev_fold;
+	dulint			index_id;
+	ulint			n_cached;
+	ulint			n_recs;
+	ulint*			folds;
+	ulint			i;
+	mem_heap_t*		heap;
+	const dict_index_t*	index;
+	ulint*			offsets;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
+
 retry:
 	rw_lock_s_lock(&btr_search_latch);
-
-	block = buf_block_align(page);
+	page = block->frame;
 
 	if (UNIV_LIKELY(!block->is_hashed)) {
 
@@ -960,12 +1029,13 @@ retry:
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
 	      || rw_lock_own(&(block->lock), RW_LOCK_EX)
-	      || (block->buf_fix_count == 0));
+	      || (block->page.buf_fix_count == 0));
 #endif /* UNIV_SYNC_DEBUG */
 
 	n_fields = block->curr_n_fields;
 	n_bytes = block->curr_n_bytes;
 	index = block->index;
+	ut_a(!dict_index_is_ibuf(index));
 
 	/* NOTE: The fields of block must not be accessed after
 	releasing btr_search_latch, as the index page might only
@@ -985,7 +1055,7 @@ retry:
 	n_cached = 0;
 
 	rec = page_get_infimum_rec(page);
-	rec = page_rec_get_next(rec);
+	rec = page_rec_get_next_low(rec, page_is_comp(page));
 
 	index_id = btr_page_get_index_id(page);
 
@@ -1013,7 +1083,7 @@ retry:
 		folds[n_cached] = fold;
 		n_cached++;
 next_rec:
-		rec = page_rec_get_next(rec);
+		rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
 		prev_fold = fold;
 	}
 
@@ -1055,6 +1125,7 @@ next_rec:
 	block->index = NULL;
 	
 cleanup:
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (UNIV_UNLIKELY(block->n_pointers)) {
 		/* Corruption */
 		ut_print_timestamp(stderr);
@@ -1070,27 +1141,29 @@ cleanup:
 	} else {
 		rw_lock_x_unlock(&btr_search_latch);
 	}
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	rw_lock_x_unlock(&btr_search_latch);
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
 	mem_free(folds);
 }
 
-/************************************************************************
+/********************************************************************//**
 Drops a page hash index when a page is freed from a fseg to the file system.
 Drops possible hash index if the page happens to be in the buffer pool. */
-
+UNIV_INTERN
 void
 btr_search_drop_page_hash_when_freed(
 /*=================================*/
-	ulint	space,		/* in: space id */
-	ulint	page_no)	/* in: page number */
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no)	/*!< in: page number */
 {
-	ibool	is_hashed;
-	page_t*	page;
-	mtr_t	mtr;
+	buf_block_t*	block;
+	mtr_t		mtr;
 
-	is_hashed = buf_page_peek_if_search_hashed(space, page_no);
-
-	if (!is_hashed) {
+	if (!buf_page_peek_if_search_hashed(space, page_no)) {
 
 		return;
 	}
@@ -1102,7 +1175,7 @@ btr_search_drop_page_hash_when_freed(
 	get here. Therefore we can acquire the s-latch to the page without
 	having to fear a deadlock. */
 
-	page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
+	block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
 				BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
 				&mtr);
 	/* Because the buffer pool mutex was released by
@@ -1111,19 +1184,17 @@ btr_search_drop_page_hash_when_freed(
 	before buf_page_get_gen() got a chance to acquire the buffer
 	pool mutex again.  Thus, we must check for a NULL return. */
 
-	if (UNIV_LIKELY(page != NULL)) {
+	if (UNIV_LIKELY(block != NULL)) {
 
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 
-		btr_search_drop_page_hash_index(page);
+		btr_search_drop_page_hash_index(block);
 	}
 
 	mtr_commit(&mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Builds a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
 If index is non-NULL, this function checks if n_fields and n_bytes are
@@ -1132,15 +1203,15 @@ static
 void
 btr_search_build_page_hash_index(
 /*=============================*/
-	dict_index_t*	index,	/* in: index for which to build */
-	page_t*		page,	/* in: index page, s- or x-latched */
-	ulint		n_fields,/* in: hash this many full fields */
-	ulint		n_bytes,/* in: hash this many bytes from the next
+	dict_index_t*	index,	/*!< in: index for which to build */
+	buf_block_t*	block,	/*!< in: index page, s- or x-latched */
+	ulint		n_fields,/*!< in: hash this many full fields */
+	ulint		n_bytes,/*!< in: hash this many bytes from the next
 				field */
-	ibool		left_side)/* in: hash for searches from left side? */
+	ibool		left_side)/*!< in: hash for searches from left side? */
 {
 	hash_table_t*	table;
-	buf_block_t*	block;
+	page_t*		page;
 	rec_t*		rec;
 	rec_t*		next_rec;
 	ulint		fold;
@@ -1154,12 +1225,13 @@ btr_search_build_page_hash_index(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(index);
+	ut_a(!dict_index_is_ibuf(index));
 
-	block = buf_block_align(page);
 	table = btr_search_sys->hash_index;
+	page = buf_block_get_frame(block);
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
@@ -1175,7 +1247,7 @@ btr_search_build_page_hash_index(
 
 		rw_lock_s_unlock(&btr_search_latch);
 
-		btr_search_drop_page_hash_index(page);
+		btr_search_drop_page_hash_index(block);
 	} else {
 		rw_lock_s_unlock(&btr_search_latch);
 	}
@@ -1210,8 +1282,7 @@ btr_search_build_page_hash_index(
 
 	index_id = btr_page_get_index_id(page);
 
-	rec = page_get_infimum_rec(page);
-	rec = page_rec_get_next(rec);
+	rec = page_rec_get_next(page_get_infimum_rec(page));
 
 	offsets = rec_get_offsets(rec, index, offsets,
 				  n_fields + (n_bytes > 0), &heap);
@@ -1276,6 +1347,10 @@ btr_search_build_page_hash_index(
 
 	rw_lock_x_lock(&btr_search_latch);
 
+	if (UNIV_UNLIKELY(!btr_search_enabled)) {
+		goto exit_func;
+	}
+
 	if (block->is_hashed && ((block->curr_n_fields != n_fields)
 				 || (block->curr_n_bytes != n_bytes)
 				 || (block->curr_left_side != left_side))) {
@@ -1301,7 +1376,7 @@ btr_search_build_page_hash_index(
 
 	for (i = 0; i < n_cached; i++) {
 
-		ha_insert_for_fold(table, folds[i], recs[i]);
+		ha_insert_for_fold(table, folds[i], block, recs[i]);
 	}
 
 exit_func:
@@ -1314,32 +1389,26 @@ exit_func:
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Moves or deletes hash entries for moved records. If new_page is already hashed,
 then the hash index for page, if any, is dropped. If new_page is not hashed,
 and page is hashed, then a new hash index is built to new_page with the same
 parameters as page (this often happens when a page is split). */
-
+UNIV_INTERN
 void
 btr_search_move_or_delete_hash_entries(
 /*===================================*/
-	page_t*		new_page,	/* in: records are copied
+	buf_block_t*	new_block,	/*!< in: records are copied
 					to this page */
-	page_t*		page,		/* in: index page from which
+	buf_block_t*	block,		/*!< in: index page from which
 					records were copied, and the
 					copied records will be deleted
 					from this page */
-	dict_index_t*	index)		/* in: record descriptor */
+	dict_index_t*	index)		/*!< in: record descriptor */
 {
-	buf_block_t*	block;
-	buf_block_t*	new_block;
-	ulint		n_fields;
-	ulint		n_bytes;
-	ibool		left_side;
-
-	block = buf_block_align(page);
-	new_block = buf_block_align(new_page);
-	ut_a(page_is_comp(page) == page_is_comp(new_page));
+	ulint	n_fields;
+	ulint	n_bytes;
+	ibool	left_side;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1347,6 +1416,8 @@ btr_search_move_or_delete_hash_entries(
 #endif /* UNIV_SYNC_DEBUG */
 	ut_a(!new_block->is_hashed || new_block->index == index);
 	ut_a(!block->is_hashed || block->index == index);
+	ut_a(!(new_block->is_hashed || block->is_hashed)
+	     || !dict_index_is_ibuf(index));
 
 	rw_lock_s_lock(&btr_search_latch);
 
@@ -1354,7 +1425,7 @@ btr_search_move_or_delete_hash_entries(
 
 		rw_lock_s_unlock(&btr_search_latch);
 
-		btr_search_drop_page_hash_index(page);
+		btr_search_drop_page_hash_index(block);
 
 		return;
 	}
@@ -1373,26 +1444,24 @@ btr_search_move_or_delete_hash_entries(
 
 		ut_a(n_fields + n_bytes > 0);
 
-		btr_search_build_page_hash_index(index, new_page, n_fields,
+		btr_search_build_page_hash_index(index, new_block, n_fields,
 						 n_bytes, left_side);
-#if 1 /* TODO: safe to remove? */
-		ut_a(n_fields == block->curr_n_fields);
-		ut_a(n_bytes == block->curr_n_bytes);
-		ut_a(left_side == block->curr_left_side);
-#endif
+		ut_ad(n_fields == block->curr_n_fields);
+		ut_ad(n_bytes == block->curr_n_bytes);
+		ut_ad(left_side == block->curr_left_side);
 		return;
 	}
 
 	rw_lock_s_unlock(&btr_search_latch);
 }
 
-/************************************************************************
+/********************************************************************//**
 Updates the page hash index when a single record is deleted from a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_on_delete(
 /*=============================*/
-	btr_cur_t*	cursor)	/* in: cursor which was positioned on the
+	btr_cur_t*	cursor)	/*!< in: cursor which was positioned on the
 				record to delete using btr_cur_search_...,
 				the record is not yet deleted */
 {
@@ -1404,11 +1473,11 @@ btr_search_update_hash_on_delete(
 	ibool		found;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	mem_heap_t*	heap		= NULL;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	rec = btr_cur_get_rec(cursor);
 
-	block = buf_block_align(rec);
+	block = btr_cur_get_block(cursor);
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1421,6 +1490,7 @@ btr_search_update_hash_on_delete(
 
 	ut_a(block->index == cursor->index);
 	ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+	ut_a(!dict_index_is_ibuf(cursor->index));
 
 	table = btr_search_sys->hash_index;
 
@@ -1438,13 +1508,13 @@ btr_search_update_hash_on_delete(
 	rw_lock_x_unlock(&btr_search_latch);
 }
 
-/************************************************************************
+/********************************************************************//**
 Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_node_on_insert(
 /*==================================*/
-	btr_cur_t*	cursor)	/* in: cursor which was positioned to the
+	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
@@ -1455,7 +1525,7 @@ btr_search_update_hash_node_on_insert(
 
 	rec = btr_cur_get_rec(cursor);
 
-	block = buf_block_align(rec);
+	block = btr_cur_get_block(cursor);
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1467,6 +1537,7 @@ btr_search_update_hash_node_on_insert(
 	}
 
 	ut_a(block->index == cursor->index);
+	ut_a(!dict_index_is_ibuf(cursor->index));
 
 	rw_lock_x_lock(&btr_search_latch);
 
@@ -1478,7 +1549,7 @@ btr_search_update_hash_node_on_insert(
 		table = btr_search_sys->hash_index;
 
 		ha_search_and_update_if_found(table, cursor->fold, rec,
-					      page_rec_get_next(rec));
+					      block, page_rec_get_next(rec));
 
 		rw_lock_x_unlock(&btr_search_latch);
 	} else {
@@ -1488,13 +1559,13 @@ btr_search_update_hash_node_on_insert(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_on_insert(
 /*=============================*/
-	btr_cur_t*	cursor)	/* in: cursor which was positioned to the
+	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
@@ -1515,7 +1586,7 @@ btr_search_update_hash_on_insert(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	table = btr_search_sys->hash_index;
 
@@ -1523,7 +1594,7 @@ btr_search_update_hash_on_insert(
 
 	rec = btr_cur_get_rec(cursor);
 
-	block = buf_block_align(rec);
+	block = btr_cur_get_block(cursor);
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1535,6 +1606,7 @@ btr_search_update_hash_on_insert(
 	}
 
 	ut_a(block->index == cursor->index);
+	ut_a(!dict_index_is_ibuf(cursor->index));
 
 	index_id = cursor->index->id;
 
@@ -1567,7 +1639,7 @@ btr_search_update_hash_on_insert(
 
 			locked = TRUE;
 
-			ha_insert_for_fold(table, ins_fold, ins_rec);
+			ha_insert_for_fold(table, ins_fold, block, ins_rec);
 		}
 
 		goto check_next_rec;
@@ -1583,9 +1655,9 @@ btr_search_update_hash_on_insert(
 		}
 
 		if (!left_side) {
-			ha_insert_for_fold(table, fold, rec);
+			ha_insert_for_fold(table, fold, block, rec);
 		} else {
-			ha_insert_for_fold(table, ins_fold, ins_rec);
+			ha_insert_for_fold(table, ins_fold, block, ins_rec);
 		}
 	}
 
@@ -1600,7 +1672,7 @@ check_next_rec:
 				locked = TRUE;
 			}
 
-			ha_insert_for_fold(table, ins_fold, ins_rec);
+			ha_insert_for_fold(table, ins_fold, block, ins_rec);
 		}
 
 		goto function_exit;
@@ -1617,14 +1689,14 @@ check_next_rec:
 
 		if (!left_side) {
 
-			ha_insert_for_fold(table, ins_fold, ins_rec);
+			ha_insert_for_fold(table, ins_fold, block, ins_rec);
 			/*
 			fputs("Hash insert for ", stderr);
 			dict_index_name_print(stderr, cursor->index);
 			fprintf(stderr, " fold %lu\n", ins_fold);
 			*/
 		} else {
-			ha_insert_for_fold(table, next_fold, next_rec);
+			ha_insert_for_fold(table, next_fold, block, next_rec);
 		}
 	}
 
@@ -1637,16 +1709,14 @@ function_exit:
 	}
 }
 
-/************************************************************************
-Validates the search system. */
-
+/********************************************************************//**
+Validates the search system.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 btr_search_validate(void)
 /*=====================*/
-				/* out: TRUE if ok */
 {
-	buf_block_t*	block;
-	page_t*		page;
 	ha_node_t*	node;
 	ulint		n_page_dumps	= 0;
 	ibool		ok		= TRUE;
@@ -1660,9 +1730,10 @@ btr_search_validate(void)
 	btr_search_latch. */
 	ulint		chunk_size = 10000;
 
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	rw_lock_x_lock(&btr_search_latch);
+	buf_pool_mutex_enter();
 
 	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
 
@@ -1670,17 +1741,55 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
+			buf_pool_mutex_exit();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
+			buf_pool_mutex_enter();
 		}
 
 		node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
 
-		while (node != NULL) {
-			block = buf_block_align(node->data);
-			page = buf_frame_align(node->data);
-			offsets = rec_get_offsets((rec_t*) node->data,
+		for (; node != NULL; node = node->next) {
+			const buf_block_t*	block
+				= buf_block_align(node->data);
+			const buf_block_t*	hash_block;
+
+			if (UNIV_LIKELY(buf_block_get_state(block)
+					== BUF_BLOCK_FILE_PAGE)) {
+
+				/* The space and offset are only valid
+				for file blocks.  It is possible that
+				the block is being freed
+				(BUF_BLOCK_REMOVE_HASH, see the
+				assertion and the comment below) */
+				hash_block = buf_block_hash_get(
+					buf_block_get_space(block),
+					buf_block_get_page_no(block));
+			} else {
+				hash_block = NULL;
+			}
+
+			if (hash_block) {
+				ut_a(hash_block == block);
+			} else {
+				/* When a block is being freed,
+				buf_LRU_search_and_free_block() first
+				removes the block from
+				buf_pool->page_hash by calling
+				buf_LRU_block_remove_hashed_page().
+				After that, it invokes
+				btr_search_drop_page_hash_index() to
+				remove the block from
+				btr_search_sys->hash_index. */
+
+				ut_a(buf_block_get_state(block)
+				     == BUF_BLOCK_REMOVE_HASH);
+			}
+
+			ut_a(!dict_index_is_ibuf(block->index));
+
+			offsets = rec_get_offsets((const rec_t*) node->data,
 						  block->index, offsets,
 						  block->curr_n_fields
 						  + (block->curr_n_bytes > 0),
@@ -1691,7 +1800,9 @@ btr_search_validate(void)
 					offsets,
 					block->curr_n_fields,
 					block->curr_n_bytes,
-					btr_page_get_index_id(page))) {
+					btr_page_get_index_id(block->frame))) {
+				const page_t*	page = block->frame;
+
 				ok = FALSE;
 				ut_print_timestamp(stderr);
 
@@ -1701,7 +1812,7 @@ btr_search_validate(void)
 					"InnoDB: ptr mem address %p"
 					" index id %lu %lu,"
 					" node fold %lu, rec fold %lu\n",
-					(ulong) buf_frame_get_page_no(page),
+					(ulong) page_get_page_no(page),
 					node->data,
 					(ulong) ut_dulint_get_high(
 						btr_page_get_index_id(page)),
@@ -1728,12 +1839,10 @@ btr_search_validate(void)
 					(ulong) block->curr_left_side);
 
 				if (n_page_dumps < 20) {
-					buf_page_print(page);
+					buf_page_print(page, 0);
 					n_page_dumps++;
 				}
 			}
-
-			node = node->next;
 		}
 	}
 
@@ -1743,9 +1852,11 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
+			buf_pool_mutex_exit();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
+			buf_pool_mutex_enter();
 		}
 
 		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
@@ -1753,6 +1864,7 @@ btr_search_validate(void)
 		}
 	}
 
+	buf_pool_mutex_exit();
 	rw_lock_x_unlock(&btr_search_latch);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
diff --git a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c
new file mode 100644
index 00000000000..f0e1395c307
--- /dev/null
+++ b/storage/innodb_plugin/buf/buf0buddy.c
@@ -0,0 +1,692 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0buddy.c
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#define THIS_MODULE
+#include "buf0buddy.h"
+#ifdef UNIV_NONINL
+# include "buf0buddy.ic"
+#endif
+#undef THIS_MODULE
+#include "buf0buf.h"
+#include "buf0lru.h"
+#include "buf0flu.h"
+#include "page0zip.h"
+
+/* Statistic counters */
+
+#ifdef UNIV_DEBUG
+/** Number of frames allocated from the buffer pool to the buddy system.
+Protected by buf_pool_mutex. */
+static ulint buf_buddy_n_frames;
+#endif /* UNIV_DEBUG */
+/** Statistics of the buddy system, indexed by block size.
+Protected by buf_pool_mutex. */
+UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
+
+/**********************************************************************//**
+Get the offset of the buddy of a compressed page frame.
+@return	the buddy relative of page */
+UNIV_INLINE
+byte*
+buf_buddy_get(
+/*==========*/
+	byte*	page,	/*!< in: compressed page */
+	ulint	size)	/*!< in: page size in bytes */
+{
+	ut_ad(ut_is_2pow(size));
+	ut_ad(size >= BUF_BUDDY_LOW);
+	ut_ad(size < BUF_BUDDY_HIGH);
+	ut_ad(!ut_align_offset(page, size));
+
+	if (((ulint) page) & size) {
+		return(page - size);
+	} else {
+		return(page + size);
+	}
+}
+
+/**********************************************************************//**
+Add a block to the head of the appropriate buddy free list. */
+UNIV_INLINE
+void
+buf_buddy_add_to_free(
+/*==================*/
+	buf_page_t*	bpage,	/*!< in,own: block to be freed */
+	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
+{
+#ifdef UNIV_DEBUG_VALGRIND
+	buf_page_t*	b  = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+	if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+	ut_ad(buf_pool->zip_free[i].start != bpage);
+	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
+	UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+#endif /* UNIV_DEBUG_VALGRIND */
+}
+
+/**********************************************************************//**
+Remove a block from the appropriate buddy free list. */
+UNIV_INLINE
+void
+buf_buddy_remove_from_free(
+/*=======================*/
+	buf_page_t*	bpage,	/*!< in: block to be removed */
+	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
+{
+#ifdef UNIV_DEBUG_VALGRIND
+	buf_page_t*	prev = UT_LIST_GET_PREV(list, bpage);
+	buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
+
+	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
+	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
+
+	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
+	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+	UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
+	if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
+#endif /* UNIV_DEBUG_VALGRIND */
+}
+
+/**********************************************************************//**
+Try to allocate a block from buf_pool->zip_free[].
+@return	allocated block, or NULL if buf_pool->zip_free[] was empty */
+static
+void*
+buf_buddy_alloc_zip(
+/*================*/
+	ulint	i)	/*!< in: index of buf_pool->zip_free[] */
+{
+	buf_page_t*	bpage;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_a(i < BUF_BUDDY_SIZES);
+
+#ifndef UNIV_DEBUG_VALGRIND
+	/* Valgrind would complain about accessing free memory. */
+	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
+			      ut_ad(buf_page_get_state(ut_list_node_313)
+				    == BUF_BLOCK_ZIP_FREE)));
+#endif /* !UNIV_DEBUG_VALGRIND */
+	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+	if (bpage) {
+		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+
+		buf_buddy_remove_from_free(bpage, i);
+	} else if (i + 1 < BUF_BUDDY_SIZES) {
+		/* Attempt to split. */
+		bpage = buf_buddy_alloc_zip(i + 1);
+
+		if (bpage) {
+			buf_page_t*	buddy = (buf_page_t*)
+				(((char*) bpage) + (BUF_BUDDY_LOW << i));
+
+			ut_ad(!buf_pool_contains_zip(buddy));
+			ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
+			buddy->state = BUF_BLOCK_ZIP_FREE;
+			buf_buddy_add_to_free(buddy, i);
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	if (bpage) {
+		memset(bpage, ~i, BUF_BUDDY_LOW << i);
+	}
+#endif /* UNIV_DEBUG */
+
+	UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
+
+	return(bpage);
+}
+
+/**********************************************************************//**
+Deallocate a buffer frame of UNIV_PAGE_SIZE. */
+static
+void
+buf_buddy_block_free(
+/*=================*/
+	void*	buf)	/*!< in: buffer frame to deallocate */
+{
+	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
+	buf_page_t*	bpage;
+	buf_block_t*	block;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+
+	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+			  && bpage->in_zip_hash && !bpage->in_page_hash),
+		    ((buf_block_t*) bpage)->frame == buf);
+	ut_a(bpage);
+	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
+	ut_ad(!bpage->in_page_hash);
+	ut_ad(bpage->in_zip_hash);
+	ut_d(bpage->in_zip_hash = FALSE);
+	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+
+	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+
+	block = (buf_block_t*) bpage;
+	mutex_enter(&block->mutex);
+	buf_LRU_block_free_non_file_page(block);
+	mutex_exit(&block->mutex);
+
+	ut_ad(buf_buddy_n_frames > 0);
+	ut_d(buf_buddy_n_frames--);
+}
+
+/**********************************************************************//**
+Allocate a buffer block to the buddy allocator. */
+static
+void
+buf_buddy_block_register(
+/*=====================*/
+	buf_block_t*	block)	/*!< in: buffer frame to allocate */
+{
+	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+
+	buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+	ut_a(block->frame);
+	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
+
+	ut_ad(!block->page.in_page_hash);
+	ut_ad(!block->page.in_zip_hash);
+	ut_d(block->page.in_zip_hash = TRUE);
+	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
+
+	ut_d(buf_buddy_n_frames++);
+}
+
+/**********************************************************************//**
+Allocate a block from a bigger object.
+@return	allocated block */
+static
+void*
+buf_buddy_alloc_from(
+/*=================*/
+	void*		buf,	/*!< in: a block that is free to use */
+	ulint		i,	/*!< in: index of buf_pool->zip_free[] */
+	ulint		j)	/*!< in: size of buf as an index
+				of buf_pool->zip_free[] */
+{
+	ulint	offs	= BUF_BUDDY_LOW << j;
+	ut_ad(j <= BUF_BUDDY_SIZES);
+	ut_ad(j >= i);
+	ut_ad(!ut_align_offset(buf, offs));
+
+	/* Add the unused parts of the block to the free lists. */
+	while (j > i) {
+		buf_page_t*	bpage;
+
+		offs >>= 1;
+		j--;
+
+		bpage = (buf_page_t*) ((byte*) buf + offs);
+		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
+		bpage->state = BUF_BLOCK_ZIP_FREE;
+#ifndef UNIV_DEBUG_VALGRIND
+		/* Valgrind would complain about accessing free memory. */
+		ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
+				      ut_ad(buf_page_get_state(
+						    ut_list_node_313)
+					    == BUF_BLOCK_ZIP_FREE)));
+#endif /* !UNIV_DEBUG_VALGRIND */
+		buf_buddy_add_to_free(bpage, j);
+	}
+
+	return(buf);
+}
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
+The buf_pool_mutex may only be released and reacquired if lru != NULL.
+@return	allocated block, possibly NULL if lru==NULL */
+UNIV_INTERN
+void*
+buf_buddy_alloc_low(
+/*================*/
+	ulint	i,	/*!< in: index of buf_pool->zip_free[],
+			or BUF_BUDDY_SIZES */
+	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
+			TRUE if storage was allocated from the LRU list
+			and buf_pool_mutex was temporarily released,
+			or NULL if the LRU list should not be used */
+{
+	buf_block_t*	block;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+
+	if (i < BUF_BUDDY_SIZES) {
+		/* Try to allocate from the buddy system. */
+		block = buf_buddy_alloc_zip(i);
+
+		if (block) {
+
+			goto func_exit;
+		}
+	}
+
+	/* Try allocating from the buf_pool->free list. */
+	block = buf_LRU_get_free_only();
+
+	if (block) {
+
+		goto alloc_big;
+	}
+
+	if (!lru) {
+
+		return(NULL);
+	}
+
+	/* Try replacing an uncompressed page in the buffer pool. */
+	buf_pool_mutex_exit();
+	block = buf_LRU_get_free_block(0);
+	*lru = TRUE;
+	buf_pool_mutex_enter();
+
+alloc_big:
+	buf_buddy_block_register(block);
+
+	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
+
+func_exit:
+	buf_buddy_stat[i].used++;
+	return(block);
+}
+
+/**********************************************************************//**
+Try to relocate the control block of a compressed page.
+@return	TRUE if relocated */
+static
+ibool
+buf_buddy_relocate_block(
+/*=====================*/
+	buf_page_t*	bpage,	/*!< in: block to relocate */
+	buf_page_t*	dpage)	/*!< in: free block to relocate to */
+{
+	buf_page_t*	b;
+
+	ut_ad(buf_pool_mutex_own());
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_FILE_PAGE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+	case BUF_BLOCK_ZIP_DIRTY:
+		/* Cannot relocate dirty pages. */
+		return(FALSE);
+
+	case BUF_BLOCK_ZIP_PAGE:
+		break;
+	}
+
+	mutex_enter(&buf_pool_zip_mutex);
+
+	if (!buf_page_can_relocate(bpage)) {
+		mutex_exit(&buf_pool_zip_mutex);
+		return(FALSE);
+	}
+
+	buf_relocate(bpage, dpage);
+	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
+
+	/* relocate buf_pool->zip_clean */
+	b = UT_LIST_GET_PREV(list, dpage);
+	UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
+
+	if (b) {
+		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
+	} else {
+		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
+	}
+
+	mutex_exit(&buf_pool_zip_mutex);
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Try to relocate a block.
+@return	TRUE if relocated */
+static
+ibool
+buf_buddy_relocate(
+/*===============*/
+	void*	src,	/*!< in: block to relocate */
+	void*	dst,	/*!< in: free block to relocate to */
+	ulint	i)	/*!< in: index of buf_pool->zip_free[] */
+{
+	buf_page_t*	bpage;
+	const ulint	size	= BUF_BUDDY_LOW << i;
+	ullint		usec	= ut_time_us(NULL);
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+	ut_ad(!ut_align_offset(src, size));
+	ut_ad(!ut_align_offset(dst, size));
+	UNIV_MEM_ASSERT_W(dst, size);
+
+	/* We assume that all memory from buf_buddy_alloc()
+	is used for either compressed pages or buf_page_t
+	objects covering compressed pages. */
+
+	/* We look inside the allocated objects returned by
+	buf_buddy_alloc() and assume that anything of
+	PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
+	a valid space_id and page_no in the page header.  Should the
+	fields be invalid, we will be unable to relocate the block.
+	We also assume that anything that fits sizeof(buf_page_t)
+	actually is a properly initialized buf_page_t object. */
+
+	if (size >= PAGE_ZIP_MIN_SIZE) {
+		/* This is a compressed page. */
+		mutex_t*	mutex;
+
+		/* The src block may be split into smaller blocks,
+		some of which may be free.  Thus, the
+		mach_read_from_4() calls below may attempt to read
+		from free memory.  The memory is "owned" by the buddy
+		allocator (and it has been allocated from the buffer
+		pool), so there is nothing wrong about this.  The
+		mach_read_from_4() calls here will only trigger bogus
+		Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
+		bpage = buf_page_hash_get(
+			mach_read_from_4((const byte*) src
+					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
+			mach_read_from_4((const byte*) src
+					 + FIL_PAGE_OFFSET));
+
+		if (!bpage || bpage->zip.data != src) {
+			/* The block has probably been freshly
+			allocated by buf_LRU_get_free_block() but not
+			added to buf_pool->page_hash yet.  Obviously,
+			it cannot be relocated. */
+
+			return(FALSE);
+		}
+
+		if (page_zip_get_size(&bpage->zip) != size) {
+			/* The block is of different size.  We would
+			have to relocate all blocks covered by src.
+			For the sake of simplicity, give up. */
+			ut_ad(page_zip_get_size(&bpage->zip) < size);
+
+			return(FALSE);
+		}
+
+		/* The block must have been allocated, but it may
+		contain uninitialized data. */
+		UNIV_MEM_ASSERT_W(src, size);
+
+		mutex = buf_page_get_mutex(bpage);
+
+		mutex_enter(mutex);
+
+		if (buf_page_can_relocate(bpage)) {
+			/* Relocate the compressed page. */
+			ut_a(bpage->zip.data == src);
+			memcpy(dst, src, size);
+			bpage->zip.data = dst;
+			mutex_exit(mutex);
+success:
+			UNIV_MEM_INVALID(src, size);
+			{
+				buf_buddy_stat_t*	buddy_stat
+					= &buf_buddy_stat[i];
+				buddy_stat->relocated++;
+				buddy_stat->relocated_usec
+					+= ut_time_us(NULL) - usec;
+			}
+			return(TRUE);
+		}
+
+		mutex_exit(mutex);
+	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+		/* This must be a buf_page_t object. */
+		UNIV_MEM_ASSERT_RW(src, size);
+		if (buf_buddy_relocate_block(src, dst)) {
+
+			goto success;
+		}
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INTERN
+void
+buf_buddy_free_low(
+/*===============*/
+	void*	buf,	/*!< in: block to be freed, must not be
+			pointed to by the buffer pool */
+	ulint	i)	/*!< in: index of buf_pool->zip_free[],
+			or BUF_BUDDY_SIZES */
+{
+	buf_page_t*	bpage;
+	buf_page_t*	buddy;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+	ut_ad(i <= BUF_BUDDY_SIZES);
+	ut_ad(buf_buddy_stat[i].used > 0);
+
+	buf_buddy_stat[i].used--;
+recombine:
+	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
+	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+
+	if (i == BUF_BUDDY_SIZES) {
+		buf_buddy_block_free(buf);
+		return;
+	}
+
+	ut_ad(i < BUF_BUDDY_SIZES);
+	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
+	ut_ad(!buf_pool_contains_zip(buf));
+
+	/* Try to combine adjacent blocks. */
+
+	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
+
+#ifndef UNIV_DEBUG_VALGRIND
+	/* Valgrind would complain about accessing free memory. */
+
+	if (buddy->state != BUF_BLOCK_ZIP_FREE) {
+
+		goto buddy_nonfree;
+	}
+
+	/* The field buddy->state can only be trusted for free blocks.
+	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
+	it is in the free list. */
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
+		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+
+		if (bpage == buddy) {
+buddy_free:
+			/* The buddy is free: recombine */
+			buf_buddy_remove_from_free(bpage, i);
+buddy_free2:
+			ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
+			ut_ad(!buf_pool_contains_zip(buddy));
+			i++;
+			buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
+
+			goto recombine;
+		}
+
+		ut_a(bpage != buf);
+
+		{
+			buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
+			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+			bpage = next;
+		}
+	}
+
+#ifndef UNIV_DEBUG_VALGRIND
+buddy_nonfree:
+	/* Valgrind would complain about accessing free memory. */
+	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
+			      ut_ad(buf_page_get_state(ut_list_node_313)
+				    == BUF_BLOCK_ZIP_FREE)));
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	/* The buddy is not free. Is there a free block of this size? */
+	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+	if (bpage) {
+		/* Remove the block from the free list, because a successful
+		buf_buddy_relocate() will overwrite bpage->list. */
+
+		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+		buf_buddy_remove_from_free(bpage, i);
+
+		/* Try to relocate the buddy of buf to the free block. */
+		if (buf_buddy_relocate(buddy, bpage, i)) {
+
+			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+			goto buddy_free2;
+		}
+
+		buf_buddy_add_to_free(bpage, i);
+
+		/* Try to relocate the buddy of the free block to buf. */
+		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
+						    BUF_BUDDY_LOW << i);
+
+#ifndef UNIV_DEBUG_VALGRIND
+		/* Valgrind would complain about accessing free memory. */
+
+		/* The buddy must not be (completely) free, because we
+		always recombine adjacent free blocks.
+
+		(Parts of the buddy can be free in
+		buf_pool->zip_free[j] with j < i.) */
+		ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
+				      ut_ad(buf_page_get_state(
+						    ut_list_node_313)
+					    == BUF_BLOCK_ZIP_FREE
+					    && ut_list_node_313 != buddy)));
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+		if (buf_buddy_relocate(buddy, buf, i)) {
+
+			buf = bpage;
+			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+			goto buddy_free;
+		}
+	}
+
+	/* Free the block to the buddy list. */
+	bpage = buf;
+#ifdef UNIV_DEBUG
+	if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
+		/* This area has most likely been allocated for at
+		least one compressed-only block descriptor.  Check
+		that there are no live objects in the area.  This is
+		not a complete check: it may yield false positives as
+		well as false negatives.  Also, due to buddy blocks
+		being recombined, it is possible (although unlikely)
+		that this branch is never reached. */
+
+		char* c;
+
+# ifndef UNIV_DEBUG_VALGRIND
+		/* Valgrind would complain about accessing
+		uninitialized memory.  Besides, Valgrind performs a
+		more exhaustive check, at every memory access. */
+		const buf_page_t* b = buf;
+		const buf_page_t* const b_end = (buf_page_t*)
+			((char*) b + (BUF_BUDDY_LOW << i));
+
+		for (; b < b_end; b++) {
+			/* Avoid false positives (and cause false
+			negatives) by checking for b->space < 1000. */
+
+			if ((b->state == BUF_BLOCK_ZIP_PAGE
+			     || b->state == BUF_BLOCK_ZIP_DIRTY)
+			    && b->space > 0 && b->space < 1000) {
+				fprintf(stderr,
+					"buddy dirty %p %u (%u,%u) %p,%lu\n",
+					(void*) b,
+					b->state, b->space, b->offset,
+					buf, i);
+			}
+		}
+# endif /* !UNIV_DEBUG_VALGRIND */
+
+		/* Scramble the block.  This should make any pointers
+		invalid and trigger a segmentation violation.  Because
+		the scrambling can be reversed, it may be possible to
+		track down the object pointing to the freed data by
+		dereferencing the unscrambled bpage->LRU or
+		bpage->list pointers. */
+		for (c = (char*) buf + (BUF_BUDDY_LOW << i);
+		     c-- > (char*) buf; ) {
+			*c = ~*c ^ i;
+		}
+	} else {
+		/* Fill large blocks with a constant pattern. */
+		memset(bpage, i, BUF_BUDDY_LOW << i);
+	}
+#endif /* UNIV_DEBUG */
+	bpage->state = BUF_BLOCK_ZIP_FREE;
+	buf_buddy_add_to_free(bpage, i);
+}
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
new file mode 100644
index 00000000000..0008fcb1271
--- /dev/null
+++ b/storage/innodb_plugin/buf/buf0buf.c
@@ -0,0 +1,3942 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0buf.c
+The database buffer buf_pool
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#include "mem0mem.h"
+#include "btr0btr.h"
+#include "fil0fil.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0buddy.h"
+#include "lock0lock.h"
+#include "btr0sea.h"
+#include "ibuf0ibuf.h"
+#include "trx0undo.h"
+#include "log0log.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "log0recv.h"
+#include "page0zip.h"
+
+/*
+		IMPLEMENTATION OF THE BUFFER POOL
+		=================================
+
+Performance improvement:
+------------------------
+Thread scheduling in NT may be so slow that the OS wait mechanism should
+not be used even in waiting for disk reads to complete.
+Rather, we should put waiting query threads to the queue of
+waiting jobs, and let the OS thread do something useful while the i/o
+is processed. In this way we could remove most OS thread switches in
+an i/o-intensive benchmark like TPC-C.
+
+A possibility is to put a user space thread library between the database
+and NT. User space thread libraries might be very fast.
+
+SQL Server 7.0 can be configured to use 'fibers' which are lightweight
+threads in NT. These should be studied.
+
+		Buffer frames and blocks
+		------------------------
+Following the terminology of Gray and Reuter, we call the memory
+blocks where file pages are loaded buffer frames. For each buffer
+frame there is a control block, or shortly, a block, in the buffer
+control array. The control info which does not need to be stored
+in the file along with the file page, resides in the control block.
+
+		Buffer pool struct
+		------------------
+The buffer buf_pool contains a single mutex which protects all the
+control data structures of the buf_pool. The content of a buffer frame is
+protected by a separate read-write lock in its control block, though.
+These locks can be locked and unlocked without owning the buf_pool mutex.
+The OS events in the buf_pool struct can be waited for without owning the
+buf_pool mutex.
+
+The buf_pool mutex is a hot-spot in main memory, causing a lot of
+memory bus traffic on multiprocessor systems when processors
+alternately access the mutex. On our Pentium, the mutex is accessed
+maybe every 10 microseconds. We gave up the solution to have mutexes
+for each control block, for instance, because it seemed to be
+complicated.
+
+A solution to reduce mutex contention of the buf_pool mutex is to
+create a separate mutex for the page hash table. On Pentium,
+accessing the hash table takes 2 microseconds, about half
+of the total buf_pool mutex hold time.
+
+		Control blocks
+		--------------
+
+The control block contains, for instance, the bufferfix count
+which is incremented when a thread wants a file page to be fixed
+in a buffer frame. The bufferfix operation does not lock the
+contents of the frame, however. For this purpose, the control
+block contains a read-write lock.
+
+The buffer frames have to be aligned so that the start memory
+address of a frame is divisible by the universal page size, which
+is a power of two.
+
+We intend to make the buffer buf_pool size on-line reconfigurable,
+that is, the buf_pool size can be changed without closing the database.
+Then the database administarator may adjust it to be bigger
+at night, for example. The control block array must
+contain enough control blocks for the maximum buffer buf_pool size
+which is used in the particular database.
+If the buf_pool size is cut, we exploit the virtual memory mechanism of
+the OS, and just refrain from using frames at high addresses. Then the OS
+can swap them to disk.
+
+The control blocks containing file pages are put to a hash table
+according to the file address of the page.
+We could speed up the access to an individual page by using
+"pointer swizzling": we could replace the page references on
+non-leaf index pages by direct pointers to the page, if it exists
+in the buf_pool. We could make a separate hash table where we could
+chain all the page references in non-leaf pages residing in the buf_pool,
+using the page reference as the hash key,
+and at the time of reading of a page update the pointers accordingly.
+Drawbacks of this solution are added complexity and,
+possibly, extra space required on non-leaf pages for memory pointers.
+A simpler solution is just to speed up the hash table mechanism
+in the database, using tables whose size is a power of 2.
+
+		Lists of blocks
+		---------------
+
+There are several lists of control blocks.
+
+The free list (buf_pool->free) contains blocks which are currently not
+used.
+
+The common LRU list contains all the blocks holding a file page
+except those for which the bufferfix count is non-zero.
+The pages are in the LRU list roughly in the order of the last
+access to the page, so that the oldest pages are at the end of the
+list. We also keep a pointer to near the end of the LRU list,
+which we can use when we want to artificially age a page in the
+buf_pool. This is used if we know that some page is not needed
+again for some time: we insert the block right after the pointer,
+causing it to be replaced sooner than would noramlly be the case.
+Currently this aging mechanism is used for read-ahead mechanism
+of pages, and it can also be used when there is a scan of a full
+table which cannot fit in the memory. Putting the pages near the
+of the LRU list, we make sure that most of the buf_pool stays in the
+main memory, undisturbed.
+
+The unzip_LRU list contains a subset of the common LRU list.  The
+blocks on the unzip_LRU list hold a compressed file page and the
+corresponding uncompressed page frame.  A block is in unzip_LRU if and
+only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
+holds.  The blocks in unzip_LRU will be in same order as they are in
+the common LRU list.  That is, each manipulation of the common LRU
+list will result in the same manipulation of the unzip_LRU list.
+
+The chain of modified blocks (buf_pool->flush_list) contains the blocks
+holding file pages that have been modified in the memory
+but not written to disk yet. The block with the oldest modification
+which has not yet been written to disk is at the end of the chain.
+
+The chain of unmodified compressed blocks (buf_pool->zip_clean)
+contains the control blocks (buf_page_t) of those compressed pages
+that are not in buf_pool->flush_list and for which no uncompressed
+page has been allocated in the buffer pool.  The control blocks for
+uncompressed pages are accessible via buf_block_t objects that are
+reachable via buf_pool->chunks[].
+
+The chains of free memory blocks (buf_pool->zip_free[]) are used by
+the buddy allocator (buf0buddy.c) to keep track of currently unused
+memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
+blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
+BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
+pool.  The buddy allocator is solely used for allocating control
+blocks for compressed pages (buf_page_t) and compressed page frames.
+
+		Loading a file page
+		-------------------
+
+First, a victim block for replacement has to be found in the
+buf_pool. It is taken from the free list or searched for from the
+end of the LRU-list. An exclusive lock is reserved for the frame,
+the io_fix field is set in the block fixing the block in buf_pool,
+and the io-operation for loading the page is queued. The io-handler thread
+releases the X-lock on the frame and resets the io_fix field
+when the io operation completes.
+
+A thread may request the above operation using the function
+buf_page_get(). It may then continue to request a lock on the frame.
+The lock is granted when the io-handler releases the x-lock.
+
+		Read-ahead
+		----------
+
+The read-ahead mechanism is intended to be intelligent and
+isolated from the semantically higher levels of the database
+index management. From the higher level we only need the
+information if a file page has a natural successor or
+predecessor page. On the leaf level of a B-tree index,
+these are the next and previous pages in the natural
+order of the pages.
+
+Let us first explain the read-ahead mechanism when the leafs
+of a B-tree are scanned in an ascending or descending order.
+When a read page is the first time referenced in the buf_pool,
+the buffer manager checks if it is at the border of a so-called
+linear read-ahead area. The tablespace is divided into these
+areas of size 64 blocks, for example. So if the page is at the
+border of such an area, the read-ahead mechanism checks if
+all the other blocks in the area have been accessed in an
+ascending or descending order. If this is the case, the system
+looks at the natural successor or predecessor of the page,
+checks if that is at the border of another area, and in this case
+issues read-requests for all the pages in that area. Maybe
+we could relax the condition that all the pages in the area
+have to be accessed: if data is deleted from a table, there may
+appear holes of unused pages in the area.
+
+A different read-ahead mechanism is used when there appears
+to be a random access pattern to a file.
+If a new page is referenced in the buf_pool, and several pages
+of its random access area (for instance, 32 consecutive pages
+in a tablespace) have recently been referenced, we may predict
+that the whole area may be needed in the near future, and issue
+the read requests for the whole area.
+*/
+
+#ifndef UNIV_HOTBACKUP
+/** Value in microseconds */
+static const int WAIT_FOR_READ	= 5000;
+
+/** The buffer buf_pool of the database */
+UNIV_INTERN buf_pool_t*	buf_pool = NULL;
+
+/** mutex protecting the buffer pool struct and control blocks, except the
+read-write lock in them */
+UNIV_INTERN mutex_t		buf_pool_mutex;
+/** mutex protecting the control blocks of compressed-only pages
+(of type buf_page_t, not buf_block_t) */
+UNIV_INTERN mutex_t		buf_pool_zip_mutex;
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+static ulint	buf_dbg_counter	= 0; /*!< This is used to insert validation
+					operations in excution in the
+					debug version */
+/** Flag to forbid the release of the buffer pool mutex.
+Protected by buf_pool_mutex. */
+UNIV_INTERN ulint		buf_pool_mutex_exit_forbidden = 0;
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG
+/** If this is set TRUE, the program prints info whenever
+read-ahead or flush occurs */
+UNIV_INTERN ibool		buf_debug_prints = FALSE;
+#endif /* UNIV_DEBUG */
+
+/** A chunk of buffers.  The buffer pool is allocated in chunks. */
+struct buf_chunk_struct{
+	ulint		mem_size;	/*!< allocated size of the chunk */
+	ulint		size;		/*!< size of frames[] and blocks[] */
+	void*		mem;		/*!< pointer to the memory area which
+					was allocated for the frames */
+	buf_block_t*	blocks;		/*!< array of buffer control blocks */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Calculates a page checksum which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value on
+32-bit and 64-bit architectures.
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_new_checksum(
+/*=======================*/
+	const byte*	page)	/*!< in: buffer page */
+{
+	ulint checksum;
+
+	/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
+	..._ARCH_LOG_NO, are written outside the buffer pool to the first
+	pages of data files, we have to skip them in the page checksum
+	calculation.
+	We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
+	checksum is stored, and also the last 8 bytes of page because
+	there we store the old formula checksum. */
+
+	checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
+				  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
+		+ ut_fold_binary(page + FIL_PAGE_DATA,
+				 UNIV_PAGE_SIZE - FIL_PAGE_DATA
+				 - FIL_PAGE_END_LSN_OLD_CHKSUM);
+	checksum = checksum & 0xFFFFFFFFUL;
+
+	return(checksum);
+}
+
+/********************************************************************//**
+In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+looked at the first few bytes of the page. This calculates that old
+checksum.
+NOTE: we must first store the new formula checksum to
+FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
+because this takes that field as an input!
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_old_checksum(
+/*=======================*/
+	const byte*	page)	/*!< in: buffer page */
+{
+	ulint checksum;
+
+	checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
+
+	checksum = checksum & 0xFFFFFFFFUL;
+
+	return(checksum);
+}
+
+/********************************************************************//**
+Checks if a page is corrupt.
+@return	TRUE if corrupted */
+UNIV_INTERN
+ibool
+buf_page_is_corrupted(
+/*==================*/
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size)	/*!< in: size of compressed page;
+					0 for uncompressed pages */
+{
+	ulint		checksum_field;
+	ulint		old_checksum_field;
+
+	if (UNIV_LIKELY(!zip_size)
+	    && memcmp(read_buf + FIL_PAGE_LSN + 4,
+		      read_buf + UNIV_PAGE_SIZE
+		      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
+
+		/* Stored log sequence numbers at the start and the end
+		of page do not match */
+
+		return(TRUE);
+	}
+
+#ifndef UNIV_HOTBACKUP
+	if (recv_lsn_checks_on) {
+		ib_uint64_t	current_lsn;
+
+		if (log_peek_lsn(&current_lsn)
+		    && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
+			ut_print_timestamp(stderr);
+
+			fprintf(stderr,
+				"  InnoDB: Error: page %lu log sequence number"
+				" %llu\n"
+				"InnoDB: is in the future! Current system "
+				"log sequence number %llu.\n"
+				"InnoDB: Your database may be corrupt or "
+				"you may have copied the InnoDB\n"
+				"InnoDB: tablespace but not the InnoDB "
+				"log files. See\n"
+				"InnoDB: " REFMAN "forcing-recovery.html\n"
+				"InnoDB: for more information.\n",
+				(ulong) mach_read_from_4(read_buf
+							 + FIL_PAGE_OFFSET),
+				mach_read_ull(read_buf + FIL_PAGE_LSN),
+				current_lsn);
+		}
+	}
+#endif
+
+	/* If we use checksums validation, make additional check before
+	returning TRUE to ensure that the checksum is not equal to
+	BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
+	disabled. Otherwise, skip checksum calculation and return FALSE */
+
+	if (UNIV_LIKELY(srv_use_checksums)) {
+		checksum_field = mach_read_from_4(read_buf
+						  + FIL_PAGE_SPACE_OR_CHKSUM);
+
+		if (UNIV_UNLIKELY(zip_size)) {
+			return(checksum_field != BUF_NO_CHECKSUM_MAGIC
+			       && checksum_field
+			       != page_zip_calc_checksum(read_buf, zip_size));
+		}
+
+		old_checksum_field = mach_read_from_4(
+			read_buf + UNIV_PAGE_SIZE
+			- FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+		/* There are 2 valid formulas for old_checksum_field:
+
+		1. Very old versions of InnoDB only stored 8 byte lsn to the
+		start and the end of the page.
+
+		2. Newer InnoDB versions store the old formula checksum
+		there. */
+
+		if (old_checksum_field != mach_read_from_4(read_buf
+							   + FIL_PAGE_LSN)
+		    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
+		    && old_checksum_field
+		    != buf_calc_page_old_checksum(read_buf)) {
+
+			return(TRUE);
+		}
+
+		/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
+		(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
+
+		if (checksum_field != 0
+		    && checksum_field != BUF_NO_CHECKSUM_MAGIC
+		    && checksum_field
+		    != buf_calc_page_new_checksum(read_buf)) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/********************************************************************//**
+Prints a page to stderr. */
+UNIV_INTERN
+void
+buf_page_print(
+/*===========*/
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size)	/*!< in: compressed page size, or
+				0 for uncompressed pages */
+{
+#ifndef UNIV_HOTBACKUP
+	dict_index_t*	index;
+#endif /* !UNIV_HOTBACKUP */
+	ulint		checksum;
+	ulint		old_checksum;
+	ulint		size	= zip_size;
+
+	if (!size) {
+		size = UNIV_PAGE_SIZE;
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
+		(ulong) size);
+	ut_print_buf(stderr, read_buf, size);
+	fputs("\nInnoDB: End of page dump\n", stderr);
+
+	if (zip_size) {
+		/* Print compressed page. */
+
+		switch (fil_page_get_type(read_buf)) {
+		case FIL_PAGE_TYPE_ZBLOB:
+		case FIL_PAGE_TYPE_ZBLOB2:
+			checksum = srv_use_checksums
+				? page_zip_calc_checksum(read_buf, zip_size)
+				: BUF_NO_CHECKSUM_MAGIC;
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Compressed BLOB page"
+				" checksum %lu, stored %lu\n"
+				"InnoDB: Page lsn %lu %lu\n"
+				"InnoDB: Page number (if stored"
+				" to page already) %lu,\n"
+				"InnoDB: space id (if stored"
+				" to page already) %lu\n",
+				(ulong) checksum,
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_LSN),
+				(ulong) mach_read_from_4(
+					read_buf + (FIL_PAGE_LSN + 4)),
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_OFFSET),
+				(ulong) mach_read_from_4(
+					read_buf
+					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+			return;
+		default:
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: unknown page type %lu,"
+				" assuming FIL_PAGE_INDEX\n",
+				fil_page_get_type(read_buf));
+			/* fall through */
+		case FIL_PAGE_INDEX:
+			checksum = srv_use_checksums
+				? page_zip_calc_checksum(read_buf, zip_size)
+				: BUF_NO_CHECKSUM_MAGIC;
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Compressed page checksum %lu,"
+				" stored %lu\n"
+				"InnoDB: Page lsn %lu %lu\n"
+				"InnoDB: Page number (if stored"
+				" to page already) %lu,\n"
+				"InnoDB: space id (if stored"
+				" to page already) %lu\n",
+				(ulong) checksum,
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_LSN),
+				(ulong) mach_read_from_4(
+					read_buf + (FIL_PAGE_LSN + 4)),
+				(ulong) mach_read_from_4(
+					read_buf + FIL_PAGE_OFFSET),
+				(ulong) mach_read_from_4(
+					read_buf
+					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+			return;
+		case FIL_PAGE_TYPE_XDES:
+			/* This is an uncompressed page. */
+			break;
+		}
+	}
+
+	checksum = srv_use_checksums
+		? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+	old_checksum = srv_use_checksums
+		? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
+		" checksum %lu\n"
+		"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
+		" stored checksum %lu\n"
+		"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
+		" at page end %lu\n"
+		"InnoDB: Page number (if stored to page already) %lu,\n"
+		"InnoDB: space id (if created with >= MySQL-4.1.1"
+		" and stored already) %lu\n",
+		(ulong) checksum, (ulong) old_checksum,
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+		(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+					 - FIL_PAGE_END_LSN_OLD_CHKSUM),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
+		(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+					 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
+		(ulong) mach_read_from_4(read_buf
+					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+
+#ifndef UNIV_HOTBACKUP
+	if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
+	    == TRX_UNDO_INSERT) {
+		fprintf(stderr,
+			"InnoDB: Page may be an insert undo log page\n");
+	} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
+				    + TRX_UNDO_PAGE_TYPE)
+		   == TRX_UNDO_UPDATE) {
+		fprintf(stderr,
+			"InnoDB: Page may be an update undo log page\n");
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	switch (fil_page_get_type(read_buf)) {
+	case FIL_PAGE_INDEX:
+		fprintf(stderr,
+			"InnoDB: Page may be an index page where"
+			" index id is %lu %lu\n",
+			(ulong) ut_dulint_get_high(
+				btr_page_get_index_id(read_buf)),
+			(ulong) ut_dulint_get_low(
+				btr_page_get_index_id(read_buf)));
+#ifndef UNIV_HOTBACKUP
+		index = dict_index_find_on_id_low(
+			btr_page_get_index_id(read_buf));
+		if (index) {
+			fputs("InnoDB: (", stderr);
+			dict_index_name_print(stderr, NULL, index);
+			fputs(")\n", stderr);
+		}
+#endif /* !UNIV_HOTBACKUP */
+		break;
+	case FIL_PAGE_INODE:
+		fputs("InnoDB: Page may be an 'inode' page\n", stderr);
+		break;
+	case FIL_PAGE_IBUF_FREE_LIST:
+		fputs("InnoDB: Page may be an insert buffer free list page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_ALLOCATED:
+		fputs("InnoDB: Page may be a freshly allocated page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_IBUF_BITMAP:
+		fputs("InnoDB: Page may be an insert buffer bitmap page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_SYS:
+		fputs("InnoDB: Page may be a system page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_TRX_SYS:
+		fputs("InnoDB: Page may be a transaction system page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_FSP_HDR:
+		fputs("InnoDB: Page may be a file space header page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_XDES:
+		fputs("InnoDB: Page may be an extent descriptor page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_BLOB:
+		fputs("InnoDB: Page may be a BLOB page\n",
+		      stderr);
+		break;
+	case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB2:
+		fputs("InnoDB: Page may be a compressed BLOB page\n",
+		      stderr);
+		break;
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+buf_block_init(
+/*===========*/
+	buf_block_t*	block,	/*!< in: pointer to control block */
+	byte*		frame)	/*!< in: pointer to buffer frame */
+{
+	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
+
+	block->frame = frame;
+
+	block->page.state = BUF_BLOCK_NOT_USED;
+	block->page.buf_fix_count = 0;
+	block->page.io_fix = BUF_IO_NONE;
+
+	block->modify_clock = 0;
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	block->page.file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+	block->check_index_page_at_flush = FALSE;
+	block->index = NULL;
+
+#ifdef UNIV_DEBUG
+	block->page.in_page_hash = FALSE;
+	block->page.in_zip_hash = FALSE;
+	block->page.in_flush_list = FALSE;
+	block->page.in_free_list = FALSE;
+	block->page.in_LRU_list = FALSE;
+	block->in_unzip_LRU_list = FALSE;
+#endif /* UNIV_DEBUG */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	block->n_pointers = 0;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	page_zip_des_init(&block->page.zip);
+
+	mutex_create(&block->mutex, SYNC_BUF_BLOCK);
+
+	rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
+	ut_ad(rw_lock_validate(&(block->lock)));
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
+#endif /* UNIV_SYNC_DEBUG */
+}
+
+/********************************************************************//**
+Allocates a chunk of buffer frames.
+@return	chunk, or NULL on failure */
+static
+buf_chunk_t*
+buf_chunk_init(
+/*===========*/
+	buf_chunk_t*	chunk,		/*!< out: chunk of buffers */
+	ulint		mem_size)	/*!< in: requested size in bytes */
+{
+	buf_block_t*	block;
+	byte*		frame;
+	ulint		i;
+
+	/* Round down to a multiple of page size,
+	although it already should be. */
+	mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
+	/* Reserve space for the block descriptors. */
+	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
+
+	chunk->mem_size = mem_size;
+	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+
+	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
+
+		return(NULL);
+	}
+
+	/* Allocate the block descriptors from
+	the start of the memory block. */
+	chunk->blocks = chunk->mem;
+
+	/* Align a pointer to the first frame.  Note that when
+	os_large_page_size is smaller than UNIV_PAGE_SIZE,
+	we may allocate one fewer block than requested.  When
+	it is bigger, we may allocate more blocks than requested. */
+
+	frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
+	chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+		- (frame != chunk->mem);
+
+	/* Subtract the space needed for block descriptors. */
+	{
+		ulint	size = chunk->size;
+
+		while (frame < (byte*) (chunk->blocks + size)) {
+			frame += UNIV_PAGE_SIZE;
+			size--;
+		}
+
+		chunk->size = size;
+	}
+
+	/* Init block structs and assign frames for them. Then we
+	assign the frames to the first blocks (we already mapped the
+	memory above). */
+
+	block = chunk->blocks;
+
+	for (i = chunk->size; i--; ) {
+
+		buf_block_init(block, frame);
+
+#ifdef HAVE_purify
+		/* Wipe contents of frame to eliminate a Purify warning */
+		memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#endif
+		/* Add the block to the free list */
+		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+		ut_d(block->page.in_free_list = TRUE);
+
+		block++;
+		frame += UNIV_PAGE_SIZE;
+	}
+
+	return(chunk);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Finds a block in the given buffer chunk that points to a
+given compressed page.
+@return	buffer block pointing to the compressed page, or NULL */
+static
+buf_block_t*
+buf_chunk_contains_zip(
+/*===================*/
+	buf_chunk_t*	chunk,	/*!< in: chunk being checked */
+	const void*	data)	/*!< in: pointer to compressed page */
+{
+	buf_block_t*	block;
+	ulint		i;
+
+	ut_ad(buf_pool);
+	ut_ad(buf_pool_mutex_own());
+
+	block = chunk->blocks;
+
+	for (i = chunk->size; i--; block++) {
+		if (block->page.zip.data == data) {
+
+			return(block);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Finds a block in the buffer pool that points to a
+given compressed page.
+@return	buffer block pointing to the compressed page, or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_pool_contains_zip(
+/*==================*/
+	const void*	data)	/*!< in: pointer to compressed page */
+{
+	ulint		n;
+	buf_chunk_t*	chunk = buf_pool->chunks;
+
+	for (n = buf_pool->n_chunks; n--; chunk++) {
+		buf_block_t* block = buf_chunk_contains_zip(chunk, data);
+
+		if (block) {
+			return(block);
+		}
+	}
+
+	return(NULL);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Checks that all file pages in the buffer chunk are in a replaceable state.
+@return	address of a non-free block, or NULL if all freed */
+static
+const buf_block_t*
+buf_chunk_not_freed(
+/*================*/
+	buf_chunk_t*	chunk)	/*!< in: chunk being checked */
+{
+	buf_block_t*	block;
+	ulint		i;
+
+	ut_ad(buf_pool);
+	ut_ad(buf_pool_mutex_own());
+
+	block = chunk->blocks;
+
+	for (i = chunk->size; i--; block++) {
+		mutex_enter(&block->mutex);
+
+		if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+		    && !buf_flush_ready_for_replace(&block->page)) {
+
+			mutex_exit(&block->mutex);
+			return(block);
+		}
+
+		mutex_exit(&block->mutex);
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
+@return	TRUE if all freed */
+static
+ibool
+buf_chunk_all_free(
+/*===============*/
+	const buf_chunk_t*	chunk)	/*!< in: chunk being checked */
+{
+	const buf_block_t*	block;
+	ulint			i;
+
+	ut_ad(buf_pool);
+	ut_ad(buf_pool_mutex_own());
+
+	block = chunk->blocks;
+
+	for (i = chunk->size; i--; block++) {
+
+		if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
+
+			return(FALSE);
+		}
+	}
+
+	return(TRUE);
+}
+
+/********************************************************************//**
+Frees a chunk of buffer frames. */
+static
+void
+buf_chunk_free(
+/*===========*/
+	buf_chunk_t*	chunk)		/*!< out: chunk of buffers */
+{
+	buf_block_t*		block;
+	const buf_block_t*	block_end;
+
+	ut_ad(buf_pool_mutex_own());
+
+	block_end = chunk->blocks + chunk->size;
+
+	for (block = chunk->blocks; block < block_end; block++) {
+		ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
+		ut_a(!block->page.zip.data);
+
+		ut_ad(!block->page.in_LRU_list);
+		ut_ad(!block->in_unzip_LRU_list);
+		ut_ad(!block->page.in_flush_list);
+		/* Remove the block from the free list. */
+		ut_ad(block->page.in_free_list);
+		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+
+		/* Free the latches. */
+		mutex_free(&block->mutex);
+		rw_lock_free(&block->lock);
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_free(&block->debug_latch);
+#endif /* UNIV_SYNC_DEBUG */
+		UNIV_MEM_UNDESC(block);
+	}
+
+	os_mem_free_large(chunk->mem, chunk->mem_size);
+}
+
+/********************************************************************//**
+Creates the buffer pool.
+@return	own: buf_pool object, NULL if not enough memory or error */
+UNIV_INTERN
+buf_pool_t*
+buf_pool_init(void)
+/*===============*/
+{
+	buf_chunk_t*	chunk;
+	ulint		i;
+
+	buf_pool = mem_zalloc(sizeof(buf_pool_t));
+
+	/* 1. Initialize general fields
+	------------------------------- */
+	mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
+	mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
+
+	buf_pool_mutex_enter();
+
+	buf_pool->n_chunks = 1;
+	buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
+
+	UT_LIST_INIT(buf_pool->free);
+
+	if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
+		mem_free(chunk);
+		mem_free(buf_pool);
+		buf_pool = NULL;
+		return(NULL);
+	}
+
+	srv_buf_pool_old_size = srv_buf_pool_size;
+	buf_pool->curr_size = chunk->size;
+	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+
+	buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
+	buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
+
+	buf_pool->last_printout_time = time(NULL);
+
+	/* 2. Initialize flushing fields
+	-------------------------------- */
+
+	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
+		buf_pool->no_flush[i] = os_event_create(NULL);
+	}
+
+	buf_pool->ulint_clock = 1;
+
+	/* 3. Initialize LRU fields
+	--------------------------- */
+	/* All fields are initialized by mem_zalloc(). */
+
+	buf_pool_mutex_exit();
+
+	btr_search_sys_create(buf_pool->curr_size
+			      * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+
+	/* 4. Initialize the buddy allocator fields */
+	/* All fields are initialized by mem_zalloc(). */
+
+	return(buf_pool);
+}
+
+/********************************************************************//**
+Frees the buffer pool at shutdown.  This must not be invoked before
+freeing all mutexes. */
+UNIV_INTERN
+void
+buf_pool_free(void)
+/*===============*/
+{
+	buf_chunk_t*	chunk;
+	buf_chunk_t*	chunks;
+
+	chunks = buf_pool->chunks;
+	chunk = chunks + buf_pool->n_chunks;
+
+	while (--chunk >= chunks) {
+		/* Bypass the checks of buf_chunk_free(), since they
+		would fail at shutdown. */
+		os_mem_free_large(chunk->mem, chunk->mem_size);
+	}
+
+	buf_pool->n_chunks = 0;
+}
+
+/********************************************************************//**
+Drops the adaptive hash index.  To prevent a livelock, this function
+is only to be called while holding btr_search_latch and while
+btr_search_enabled == FALSE. */
+UNIV_INTERN
+void
+buf_pool_drop_hash_index(void)
+/*==========================*/
+{
+	ibool		released_search_latch;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!btr_search_enabled);
+
+	do {
+		buf_chunk_t*	chunks	= buf_pool->chunks;
+		buf_chunk_t*	chunk	= chunks + buf_pool->n_chunks;
+
+		released_search_latch = FALSE;
+
+		while (--chunk >= chunks) {
+			buf_block_t*	block	= chunk->blocks;
+			ulint		i	= chunk->size;
+
+			for (; i--; block++) {
+				/* block->is_hashed cannot be modified
+				when we have an x-latch on btr_search_latch;
+				see the comment in buf0buf.h */
+
+				if (!block->is_hashed) {
+					continue;
+				}
+
+				/* To follow the latching order, we
+				have to release btr_search_latch
+				before acquiring block->latch. */
+				rw_lock_x_unlock(&btr_search_latch);
+				/* When we release the search latch,
+				we must rescan all blocks, because
+				some may become hashed again. */
+				released_search_latch = TRUE;
+
+				rw_lock_x_lock(&block->lock);
+
+				/* This should be guaranteed by the
+				callers, which will be holding
+				btr_search_enabled_mutex. */
+				ut_ad(!btr_search_enabled);
+
+				/* Because we did not buffer-fix the
+				block by calling buf_block_get_gen(),
+				it is possible that the block has been
+				allocated for some other use after
+				btr_search_latch was released above.
+				We do not care which file page the
+				block is mapped to.  All we want to do
+				is to drop any hash entries referring
+				to the page. */
+
+				/* It is possible that
+				block->page.state != BUF_FILE_PAGE.
+				Even that does not matter, because
+				btr_search_drop_page_hash_index() will
+				check block->is_hashed before doing
+				anything.  block->is_hashed can only
+				be set on uncompressed file pages. */
+
+				btr_search_drop_page_hash_index(block);
+
+				rw_lock_x_unlock(&block->lock);
+
+				rw_lock_x_lock(&btr_search_latch);
+
+				ut_ad(!btr_search_enabled);
+			}
+		}
+	} while (released_search_latch);
+}
+
+/********************************************************************//**
+Relocate a buffer control block.  Relocates the block on the LRU list
+and in buf_pool->page_hash.  Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+UNIV_INTERN
+void
+buf_relocate(
+/*=========*/
+	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
+				buf_page_get_state(bpage) must be
+				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+	buf_page_t*	dpage)	/*!< in/out: destination control block */
+{
+	buf_page_t*	b;
+	ulint		fold;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+	ut_a(bpage->buf_fix_count == 0);
+	ut_ad(bpage->in_LRU_list);
+	ut_ad(!bpage->in_zip_hash);
+	ut_ad(bpage->in_page_hash);
+	ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
+#ifdef UNIV_DEBUG
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_FILE_PAGE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_ZIP_PAGE:
+		break;
+	}
+#endif /* UNIV_DEBUG */
+
+	memcpy(dpage, bpage, sizeof *dpage);
+
+	ut_d(bpage->in_LRU_list = FALSE);
+	ut_d(bpage->in_page_hash = FALSE);
+
+	/* relocate buf_pool->LRU */
+	b = UT_LIST_GET_PREV(LRU, bpage);
+	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+
+	if (b) {
+		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
+	} else {
+		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
+	}
+
+	if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
+		buf_pool->LRU_old = dpage;
+#ifdef UNIV_LRU_DEBUG
+		/* buf_pool->LRU_old must be the first item in the LRU list
+		whose "old" flag is set. */
+		ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+		     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+		ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+		     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+	}
+
+	ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
+			      ut_ad(ut_list_node_313->in_LRU_list)));
+
+	/* relocate buf_pool->page_hash */
+	fold = buf_page_address_fold(bpage->space, bpage->offset);
+
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
+	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
+
+	UNIV_MEM_INVALID(bpage, sizeof *bpage);
+}
+
+/********************************************************************//**
+Shrinks the buffer pool. */
+static
+void
+buf_pool_shrink(
+/*============*/
+	ulint	chunk_size)	/*!< in: number of pages to remove */
+{
+	buf_chunk_t*	chunks;
+	buf_chunk_t*	chunk;
+	ulint		max_size;
+	ulint		max_free_size;
+	buf_chunk_t*	max_chunk;
+	buf_chunk_t*	max_free_chunk;
+
+	ut_ad(!buf_pool_mutex_own());
+
+try_again:
+	btr_search_disable(); /* Empty the adaptive hash index again */
+	buf_pool_mutex_enter();
+
+shrink_again:
+	if (buf_pool->n_chunks <= 1) {
+
+		/* Cannot shrink if there is only one chunk */
+		goto func_done;
+	}
+
+	/* Search for the largest free chunk
+	not larger than the size difference */
+	chunks = buf_pool->chunks;
+	chunk = chunks + buf_pool->n_chunks;
+	max_size = max_free_size = 0;
+	max_chunk = max_free_chunk = NULL;
+
+	while (--chunk >= chunks) {
+		if (chunk->size <= chunk_size
+		    && chunk->size > max_free_size) {
+			if (chunk->size > max_size) {
+				max_size = chunk->size;
+				max_chunk = chunk;
+			}
+
+			if (buf_chunk_all_free(chunk)) {
+				max_free_size = chunk->size;
+				max_free_chunk = chunk;
+			}
+		}
+	}
+
+	if (!max_free_size) {
+
+		ulint		dirty	= 0;
+		ulint		nonfree	= 0;
+		buf_block_t*	block;
+		buf_block_t*	bend;
+
+		/* Cannot shrink: try again later
+		(do not assign srv_buf_pool_old_size) */
+		if (!max_chunk) {
+
+			goto func_exit;
+		}
+
+		block = max_chunk->blocks;
+		bend = block + max_chunk->size;
+
+		/* Move the blocks of chunk to the end of the
+		LRU list and try to flush them. */
+		for (; block < bend; block++) {
+			switch (buf_block_get_state(block)) {
+			case BUF_BLOCK_NOT_USED:
+				continue;
+			case BUF_BLOCK_FILE_PAGE:
+				break;
+			default:
+				nonfree++;
+				continue;
+			}
+
+			mutex_enter(&block->mutex);
+			/* The following calls will temporarily
+			release block->mutex and buf_pool_mutex.
+			Therefore, we have to always retry,
+			even if !dirty && !nonfree. */
+
+			if (!buf_flush_ready_for_replace(&block->page)) {
+
+				buf_LRU_make_block_old(&block->page);
+				dirty++;
+			} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
+				   != BUF_LRU_FREED) {
+				nonfree++;
+			}
+
+			mutex_exit(&block->mutex);
+		}
+
+		buf_pool_mutex_exit();
+
+		/* Request for a flush of the chunk if it helps.
+		Do not flush if there are non-free blocks, since
+		flushing will not make the chunk freeable. */
+		if (nonfree) {
+			/* Avoid busy-waiting. */
+			os_thread_sleep(100000);
+		} else if (dirty
+			   && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
+			   == ULINT_UNDEFINED) {
+
+			buf_flush_wait_batch_end(BUF_FLUSH_LRU);
+		}
+
+		goto try_again;
+	}
+
+	max_size = max_free_size;
+	max_chunk = max_free_chunk;
+
+	srv_buf_pool_old_size = srv_buf_pool_size;
+
+	/* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
+	chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
+	memcpy(chunks, buf_pool->chunks,
+	       (max_chunk - buf_pool->chunks) * sizeof *chunks);
+	memcpy(chunks + (max_chunk - buf_pool->chunks),
+	       max_chunk + 1,
+	       buf_pool->chunks + buf_pool->n_chunks
+	       - (max_chunk + 1));
+	ut_a(buf_pool->curr_size > max_chunk->size);
+	buf_pool->curr_size -= max_chunk->size;
+	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+	chunk_size -= max_chunk->size;
+	buf_chunk_free(max_chunk);
+	mem_free(buf_pool->chunks);
+	buf_pool->chunks = chunks;
+	buf_pool->n_chunks--;
+
+	/* Allow a slack of one megabyte. */
+	if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
+
+		goto shrink_again;
+	}
+
+func_done:
+	srv_buf_pool_old_size = srv_buf_pool_size;
+func_exit:
+	buf_pool_mutex_exit();
+	btr_search_enable();
+}
+
+/********************************************************************//**
+Rebuild buf_pool->page_hash. */
+static
+void
+buf_pool_page_hash_rebuild(void)
+/*============================*/
+{
+	ulint		i;
+	ulint		n_chunks;
+	buf_chunk_t*	chunk;
+	hash_table_t*	page_hash;
+	hash_table_t*	zip_hash;
+	buf_page_t*	b;
+
+	buf_pool_mutex_enter();
+
+	/* Free, create, and populate the hash table. */
+	hash_table_free(buf_pool->page_hash);
+	buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
+	zip_hash = hash_create(2 * buf_pool->curr_size);
+
+	HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
+		     BUF_POOL_ZIP_FOLD_BPAGE);
+
+	hash_table_free(buf_pool->zip_hash);
+	buf_pool->zip_hash = zip_hash;
+
+	/* Insert the uncompressed file pages to buf_pool->page_hash. */
+
+	chunk = buf_pool->chunks;
+	n_chunks = buf_pool->n_chunks;
+
+	for (i = 0; i < n_chunks; i++, chunk++) {
+		ulint		j;
+		buf_block_t*	block = chunk->blocks;
+
+		for (j = 0; j < chunk->size; j++, block++) {
+			if (buf_block_get_state(block)
+			    == BUF_BLOCK_FILE_PAGE) {
+				ut_ad(!block->page.in_zip_hash);
+				ut_ad(block->page.in_page_hash);
+
+				HASH_INSERT(buf_page_t, hash, page_hash,
+					    buf_page_address_fold(
+						    block->page.space,
+						    block->page.offset),
+					    &block->page);
+			}
+		}
+	}
+
+	/* Insert the compressed-only pages to buf_pool->page_hash.
+	All such blocks are either in buf_pool->zip_clean or
+	in buf_pool->flush_list. */
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+		ut_ad(!b->in_flush_list);
+		ut_ad(b->in_LRU_list);
+		ut_ad(b->in_page_hash);
+		ut_ad(!b->in_zip_hash);
+
+		HASH_INSERT(buf_page_t, hash, page_hash,
+			    buf_page_address_fold(b->space, b->offset), b);
+	}
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_ad(b->in_flush_list);
+		ut_ad(b->in_LRU_list);
+		ut_ad(b->in_page_hash);
+		ut_ad(!b->in_zip_hash);
+
+		switch (buf_page_get_state(b)) {
+		case BUF_BLOCK_ZIP_DIRTY:
+			HASH_INSERT(buf_page_t, hash, page_hash,
+				    buf_page_address_fold(b->space,
+							  b->offset), b);
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			/* uncompressed page */
+			break;
+		case BUF_BLOCK_ZIP_FREE:
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+			ut_error;
+			break;
+		}
+	}
+
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
+Resizes the buffer pool. */
+UNIV_INTERN
+void
+buf_pool_resize(void)
+/*=================*/
+{
+	buf_pool_mutex_enter();
+
+	if (srv_buf_pool_old_size == srv_buf_pool_size) {
+
+		buf_pool_mutex_exit();
+		return;
+	}
+
+	if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
+
+		buf_pool_mutex_exit();
+
+		/* Disable adaptive hash indexes and empty the index
+		in order to free up memory in the buffer pool chunks. */
+		buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
+				/ UNIV_PAGE_SIZE);
+	} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
+
+		/* Enlarge the buffer pool by at least one megabyte */
+
+		ulint		mem_size
+			= srv_buf_pool_size - srv_buf_pool_curr_size;
+		buf_chunk_t*	chunks;
+		buf_chunk_t*	chunk;
+
+		chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
+
+		memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
+		       * sizeof *chunks);
+
+		chunk = &chunks[buf_pool->n_chunks];
+
+		if (!buf_chunk_init(chunk, mem_size)) {
+			mem_free(chunks);
+		} else {
+			buf_pool->curr_size += chunk->size;
+			srv_buf_pool_curr_size = buf_pool->curr_size
+				* UNIV_PAGE_SIZE;
+			mem_free(buf_pool->chunks);
+			buf_pool->chunks = chunks;
+			buf_pool->n_chunks++;
+		}
+
+		srv_buf_pool_old_size = srv_buf_pool_size;
+		buf_pool_mutex_exit();
+	}
+
+	buf_pool_page_hash_rebuild();
+}
+
+/********************************************************************//**
+Moves the block to the start of the LRU list if there is a danger
+that the block would drift out of the buffer pool. */
+UNIV_INLINE
+void
+buf_block_make_young(
+/*=================*/
+	buf_page_t*	bpage)	/*!< in: block to make younger */
+{
+	ut_ad(!buf_pool_mutex_own());
+
+	/* Note that we read freed_page_clock's without holding any mutex:
+	this is allowed since the result is used only in heuristics */
+
+	if (buf_page_peek_if_too_old(bpage)) {
+
+		buf_pool_mutex_enter();
+		/* There has been freeing activity in the LRU list:
+		best to move to the head of the LRU list */
+
+		buf_LRU_make_block_young(bpage);
+		buf_pool_mutex_exit();
+	}
+}
+
+/********************************************************************//**
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_make_young(
+/*================*/
+	buf_page_t*	bpage)	/*!< in: buffer block of a file page */
+{
+	buf_pool_mutex_enter();
+
+	ut_a(buf_page_in_file(bpage));
+
+	buf_LRU_make_block_young(bpage);
+
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
+Resets the check_index_page_at_flush field of a page if found in the buffer
+pool. */
+UNIV_INTERN
+void
+buf_reset_check_index_page_at_flush(
+/*================================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	buf_block_t*	block;
+
+	buf_pool_mutex_enter();
+
+	block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
+		block->check_index_page_at_flush = FALSE;
+	}
+
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there.
+@return	TRUE if page hash index is built in search system */
+UNIV_INTERN
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	buf_block_t*	block;
+	ibool		is_hashed;
+
+	buf_pool_mutex_enter();
+
+	block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+		is_hashed = FALSE;
+	} else {
+		is_hashed = block->is_hashed;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(is_hashed);
+}
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+/********************************************************************//**
+Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_set_file_page_was_freed(
+/*=============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	buf_page_t*	bpage;
+
+	buf_pool_mutex_enter();
+
+	bpage = buf_page_hash_get(space, offset);
+
+	if (bpage) {
+		bpage->file_page_was_freed = TRUE;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(bpage);
+}
+
+/********************************************************************//**
+Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_reset_file_page_was_freed(
+/*===============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	buf_page_t*	bpage;
+
+	buf_pool_mutex_enter();
+
+	bpage = buf_page_hash_get(space, offset);
+
+	if (bpage) {
+		bpage->file_page_was_freed = FALSE;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(bpage);
+}
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+/********************************************************************//**
+Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch.  Mutual exclusion has to
+be implemented at a higher level.  In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@return	pointer to the block */
+UNIV_INTERN
+buf_page_t*
+buf_page_get_zip(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size */
+	ulint		offset)	/*!< in: page number */
+{
+	buf_page_t*	bpage;
+	mutex_t*	block_mutex;
+	ibool		must_read;
+
+#ifndef UNIV_LOG_DEBUG
+	ut_ad(!ibuf_inside());
+#endif
+	buf_pool->n_page_gets++;
+
+	for (;;) {
+		buf_pool_mutex_enter();
+lookup:
+		bpage = buf_page_hash_get(space, offset);
+		if (bpage) {
+			break;
+		}
+
+		/* Page not in buf_pool: needs to be read from file */
+
+		buf_pool_mutex_exit();
+
+		buf_read_page(space, zip_size, offset);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		ut_a(++buf_dbg_counter % 37 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+	}
+
+	if (UNIV_UNLIKELY(!bpage->zip.data)) {
+		/* There is no compressed page. */
+err_exit:
+		buf_pool_mutex_exit();
+		return(NULL);
+	}
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+	case BUF_BLOCK_ZIP_FREE:
+		break;
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+		block_mutex = &buf_pool_zip_mutex;
+		mutex_enter(block_mutex);
+		bpage->buf_fix_count++;
+		goto got_block;
+	case BUF_BLOCK_FILE_PAGE:
+		block_mutex = &((buf_block_t*) bpage)->mutex;
+		mutex_enter(block_mutex);
+
+		/* Discard the uncompressed page frame if possible. */
+		if (buf_LRU_free_block(bpage, FALSE, NULL)
+		    == BUF_LRU_FREED) {
+
+			mutex_exit(block_mutex);
+			goto lookup;
+		}
+
+		buf_block_buf_fix_inc((buf_block_t*) bpage,
+				      __FILE__, __LINE__);
+		goto got_block;
+	}
+
+	ut_error;
+	goto err_exit;
+
+got_block:
+	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+
+	buf_pool_mutex_exit();
+
+	buf_page_set_accessed(bpage, TRUE);
+
+	mutex_exit(block_mutex);
+
+	buf_block_make_young(bpage);
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	ut_a(!bpage->file_page_was_freed);
+#endif
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(bpage->buf_fix_count > 0);
+	ut_a(buf_page_in_file(bpage));
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	if (must_read) {
+		/* Let us wait until the read operation
+		completes */
+
+		for (;;) {
+			enum buf_io_fix	io_fix;
+
+			mutex_enter(block_mutex);
+			io_fix = buf_page_get_io_fix(bpage);
+			mutex_exit(block_mutex);
+
+			if (io_fix == BUF_IO_READ) {
+
+				os_thread_sleep(WAIT_FOR_READ);
+			} else {
+				break;
+			}
+		}
+	}
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(buf_page_get_space(bpage),
+			    buf_page_get_page_no(bpage)) == 0);
+#endif
+	return(bpage);
+}
+
+/********************************************************************//**
+Initialize some fields of a control block. */
+UNIV_INLINE
+void
+buf_block_init_low(
+/*===============*/
+	buf_block_t*	block)	/*!< in: block to init */
+{
+	block->check_index_page_at_flush = FALSE;
+	block->index		= NULL;
+
+	block->n_hash_helps	= 0;
+	block->is_hashed	= FALSE;
+	block->n_fields		= 1;
+	block->n_bytes		= 0;
+	block->left_side	= TRUE;
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Decompress a block.
+@return	TRUE if successful */
+UNIV_INTERN
+ibool
+buf_zip_decompress(
+/*===============*/
+	buf_block_t*	block,	/*!< in/out: block */
+	ibool		check)	/*!< in: TRUE=verify the page checksum */
+{
+	const byte* frame = block->page.zip.data;
+
+	ut_ad(buf_block_get_zip_size(block));
+	ut_a(buf_block_get_space(block) != 0);
+
+	if (UNIV_LIKELY(check)) {
+		ulint	stamp_checksum	= mach_read_from_4(
+			frame + FIL_PAGE_SPACE_OR_CHKSUM);
+		ulint	calc_checksum	= page_zip_calc_checksum(
+			frame, page_zip_get_size(&block->page.zip));
+
+		if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: compressed page checksum mismatch"
+				" (space %u page %u): %lu != %lu\n",
+				block->page.space, block->page.offset,
+				stamp_checksum, calc_checksum);
+			return(FALSE);
+		}
+	}
+
+	switch (fil_page_get_type(frame)) {
+	case FIL_PAGE_INDEX:
+		if (page_zip_decompress(&block->page.zip,
+					block->frame)) {
+			return(TRUE);
+		}
+
+		fprintf(stderr,
+			"InnoDB: unable to decompress space %lu page %lu\n",
+			(ulong) block->page.space,
+			(ulong) block->page.offset);
+		return(FALSE);
+
+	case FIL_PAGE_TYPE_ALLOCATED:
+	case FIL_PAGE_INODE:
+	case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB2:
+		/* Copy to uncompressed storage. */
+		memcpy(block->frame, frame,
+		       buf_block_get_zip_size(block));
+		return(TRUE);
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: unknown compressed page"
+		" type %lu\n",
+		fil_page_get_type(frame));
+	return(FALSE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Gets the block to whose frame the pointer is pointing to.
+@return	pointer to block, never NULL */
+UNIV_INTERN
+buf_block_t*
+buf_block_align(
+/*============*/
+	const byte*	ptr)	/*!< in: pointer to a frame */
+{
+	buf_chunk_t*	chunk;
+	ulint		i;
+
+	/* TODO: protect buf_pool->chunks with a mutex (it will
+	currently remain constant after buf_pool_init()) */
+	for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
+		lint	offs = ptr - chunk->blocks->frame;
+
+		if (UNIV_UNLIKELY(offs < 0)) {
+
+			continue;
+		}
+
+		offs >>= UNIV_PAGE_SIZE_SHIFT;
+
+		if (UNIV_LIKELY((ulint) offs < chunk->size)) {
+			buf_block_t*	block = &chunk->blocks[offs];
+
+			/* The function buf_chunk_init() invokes
+			buf_block_init() so that block[n].frame ==
+			block->frame + n * UNIV_PAGE_SIZE.  Check it. */
+			ut_ad(block->frame == page_align(ptr));
+#ifdef UNIV_DEBUG
+			/* A thread that updates these fields must
+			hold buf_pool_mutex and block->mutex.  Acquire
+			only the latter. */
+			mutex_enter(&block->mutex);
+
+			switch (buf_block_get_state(block)) {
+			case BUF_BLOCK_ZIP_FREE:
+			case BUF_BLOCK_ZIP_PAGE:
+			case BUF_BLOCK_ZIP_DIRTY:
+				/* These types should only be used in
+				the compressed buffer pool, whose
+				memory is allocated from
+				buf_pool->chunks, in UNIV_PAGE_SIZE
+				blocks flagged as BUF_BLOCK_MEMORY. */
+				ut_error;
+				break;
+			case BUF_BLOCK_NOT_USED:
+			case BUF_BLOCK_READY_FOR_USE:
+			case BUF_BLOCK_MEMORY:
+				/* Some data structures contain
+				"guess" pointers to file pages.  The
+				file pages may have been freed and
+				reused.  Do not complain. */
+				break;
+			case BUF_BLOCK_REMOVE_HASH:
+				/* buf_LRU_block_remove_hashed_page()
+				will overwrite the FIL_PAGE_OFFSET and
+				FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
+				0xff and set the state to
+				BUF_BLOCK_REMOVE_HASH. */
+				ut_ad(page_get_space_id(page_align(ptr))
+				      == 0xffffffff);
+				ut_ad(page_get_page_no(page_align(ptr))
+				      == 0xffffffff);
+				break;
+			case BUF_BLOCK_FILE_PAGE:
+				ut_ad(block->page.space
+				      == page_get_space_id(page_align(ptr)));
+				ut_ad(block->page.offset
+				      == page_get_page_no(page_align(ptr)));
+				break;
+			}
+
+			mutex_exit(&block->mutex);
+#endif /* UNIV_DEBUG */
+
+			return(block);
+		}
+	}
+
+	/* The block should always be found. */
+	ut_error;
+	return(NULL);
+}
+
+/********************************************************************//**
+Find out if a pointer belongs to a buf_block_t. It can be a pointer to
+the buf_block_t itself or a member of it
+@return	TRUE if ptr belongs to a buf_block_t struct */
+UNIV_INTERN
+ibool
+buf_pointer_is_block_field(
+/*=======================*/
+	const void*		ptr)	/*!< in: pointer not
+					dereferenced */
+{
+	const buf_chunk_t*		chunk	= buf_pool->chunks;
+	const buf_chunk_t* const	echunk	= chunk + buf_pool->n_chunks;
+
+	/* TODO: protect buf_pool->chunks with a mutex (it will
+	currently remain constant after buf_pool_init()) */
+	while (chunk < echunk) {
+		if (ptr >= (void *)chunk->blocks
+		    && ptr < (void *)(chunk->blocks + chunk->size)) {
+
+			return(TRUE);
+		}
+
+		chunk++;
+	}
+
+	return(FALSE);
+}
+
+/********************************************************************//**
+Find out if a buffer block was created by buf_chunk_init().
+@return	TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
+static
+ibool
+buf_block_is_uncompressed(
+/*======================*/
+	const buf_block_t*	block)	/*!< in: pointer to block,
+					not dereferenced */
+{
+	ut_ad(buf_pool_mutex_own());
+
+	if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+		/* The pointer should be aligned. */
+		return(FALSE);
+	}
+
+	return(buf_pointer_is_block_field((void *)block));
+}
+
+/********************************************************************//**
+This is the general function used to get access to a database page.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_page_get_gen(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		offset,	/*!< in: page number */
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+	buf_block_t*	guess,	/*!< in: guessed block or NULL */
+	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
+				BUF_GET_NO_LATCH */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	buf_block_t*	block;
+	ibool		accessed;
+	ulint		fix_type;
+	ibool		must_read;
+
+	ut_ad(mtr);
+	ut_ad((rw_latch == RW_S_LATCH)
+	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_NO_LATCH));
+	ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
+	ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
+	      || (mode == BUF_GET_NO_LATCH));
+	ut_ad(zip_size == fil_space_get_zip_size(space));
+	ut_ad(ut_is_2pow(zip_size));
+#ifndef UNIV_LOG_DEBUG
+	ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
+#endif
+	buf_pool->n_page_gets++;
+loop:
+	block = guess;
+	buf_pool_mutex_enter();
+
+	if (block) {
+		/* If the guess is a compressed page descriptor that
+		has been allocated by buf_buddy_alloc(), it may have
+		been invalidated by buf_buddy_relocate().  In that
+		case, block could point to something that happens to
+		contain the expected bits in block->page.  Similarly,
+		the guess may be pointing to a buffer pool chunk that
+		has been released when resizing the buffer pool. */
+
+		if (!buf_block_is_uncompressed(block)
+		    || offset != block->page.offset
+		    || space != block->page.space
+		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
+			block = guess = NULL;
+		} else {
+			ut_ad(!block->page.in_zip_hash);
+			ut_ad(block->page.in_page_hash);
+		}
+	}
+
+	if (block == NULL) {
+		block = (buf_block_t*) buf_page_hash_get(space, offset);
+	}
+
+loop2:
+	if (block == NULL) {
+		/* Page not in buf_pool: needs to be read from file */
+
+		buf_pool_mutex_exit();
+
+		if (mode == BUF_GET_IF_IN_POOL) {
+
+			return(NULL);
+		}
+
+		buf_read_page(space, zip_size, offset);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		ut_a(++buf_dbg_counter % 37 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+		goto loop;
+	}
+
+	ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
+
+	must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
+
+	if (must_read && mode == BUF_GET_IF_IN_POOL) {
+		/* The page is only being read to buffer */
+		buf_pool_mutex_exit();
+
+		return(NULL);
+	}
+
+	switch (buf_block_get_state(block)) {
+		buf_page_t*	bpage;
+		ibool		success;
+
+	case BUF_BLOCK_FILE_PAGE:
+		break;
+
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+		bpage = &block->page;
+		/* Protect bpage->buf_fix_count. */
+		mutex_enter(&buf_pool_zip_mutex);
+
+		if (bpage->buf_fix_count
+		    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+			/* This condition often occurs when the buffer
+			is not buffer-fixed, but I/O-fixed by
+			buf_page_init_for_read(). */
+			mutex_exit(&buf_pool_zip_mutex);
+wait_until_unfixed:
+			/* The block is buffer-fixed or I/O-fixed.
+			Try again later. */
+			buf_pool_mutex_exit();
+			os_thread_sleep(WAIT_FOR_READ);
+
+			goto loop;
+		}
+
+		/* Allocate an uncompressed page. */
+		buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_zip_mutex);
+
+		block = buf_LRU_get_free_block(0);
+		ut_a(block);
+
+		buf_pool_mutex_enter();
+		mutex_enter(&block->mutex);
+
+		{
+			buf_page_t*	hash_bpage
+				= buf_page_hash_get(space, offset);
+
+			if (UNIV_UNLIKELY(bpage != hash_bpage)) {
+				/* The buf_pool->page_hash was modified
+				while buf_pool_mutex was released.
+				Free the block that was allocated. */
+
+				buf_LRU_block_free_non_file_page(block);
+				mutex_exit(&block->mutex);
+
+				block = (buf_block_t*) hash_bpage;
+				goto loop2;
+			}
+		}
+
+		if (UNIV_UNLIKELY
+		    (bpage->buf_fix_count
+		     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
+
+			/* The block was buffer-fixed or I/O-fixed
+			while buf_pool_mutex was not held by this thread.
+			Free the block that was allocated and try again.
+			This should be extremely unlikely. */
+
+			buf_LRU_block_free_non_file_page(block);
+			mutex_exit(&block->mutex);
+
+			goto wait_until_unfixed;
+		}
+
+		/* Move the compressed page from bpage to block,
+		and uncompress it. */
+
+		mutex_enter(&buf_pool_zip_mutex);
+
+		buf_relocate(bpage, &block->page);
+		buf_block_init_low(block);
+		block->lock_hash_val = lock_rec_hash(space, offset);
+
+		UNIV_MEM_DESC(&block->page.zip.data,
+			      page_zip_get_size(&block->page.zip), block);
+
+		if (buf_page_get_state(&block->page)
+		    == BUF_BLOCK_ZIP_PAGE) {
+			UT_LIST_REMOVE(list, buf_pool->zip_clean,
+				       &block->page);
+			ut_ad(!block->page.in_flush_list);
+		} else {
+			/* Relocate buf_pool->flush_list. */
+			buf_page_t*	b;
+
+			b = UT_LIST_GET_PREV(list, &block->page);
+			ut_ad(block->page.in_flush_list);
+			UT_LIST_REMOVE(list, buf_pool->flush_list,
+				       &block->page);
+
+			if (b) {
+				UT_LIST_INSERT_AFTER(
+					list, buf_pool->flush_list, b,
+					&block->page);
+			} else {
+				UT_LIST_ADD_FIRST(
+					list, buf_pool->flush_list,
+					&block->page);
+			}
+		}
+
+		/* Buffer-fix, I/O-fix, and X-latch the block
+		for the duration of the decompression.
+		Also add the block to the unzip_LRU list. */
+		block->page.state = BUF_BLOCK_FILE_PAGE;
+
+		/* Insert at the front of unzip_LRU list */
+		buf_unzip_LRU_add_block(block, FALSE);
+
+		block->page.buf_fix_count = 1;
+		buf_block_set_io_fix(block, BUF_IO_READ);
+		rw_lock_x_lock(&block->lock);
+		mutex_exit(&block->mutex);
+		mutex_exit(&buf_pool_zip_mutex);
+		buf_pool->n_pend_unzip++;
+
+		buf_buddy_free(bpage, sizeof *bpage);
+
+		buf_pool_mutex_exit();
+
+		/* Decompress the page and apply buffered operations
+		while not holding buf_pool_mutex or block->mutex. */
+		success = buf_zip_decompress(block, srv_use_checksums);
+
+		if (UNIV_LIKELY(success)) {
+			ibuf_merge_or_delete_for_page(block, space, offset,
+						      zip_size, TRUE);
+		}
+
+		/* Unfix and unlatch the block. */
+		buf_pool_mutex_enter();
+		mutex_enter(&block->mutex);
+		block->page.buf_fix_count--;
+		buf_block_set_io_fix(block, BUF_IO_NONE);
+		mutex_exit(&block->mutex);
+		buf_pool->n_pend_unzip--;
+		rw_lock_x_unlock(&block->lock);
+
+		if (UNIV_UNLIKELY(!success)) {
+
+			buf_pool_mutex_exit();
+			return(NULL);
+		}
+
+		break;
+
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+		break;
+	}
+
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	mutex_enter(&block->mutex);
+	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
+
+	buf_block_buf_fix_inc(block, file, line);
+	buf_pool_mutex_exit();
+
+	/* Check if this is the first access to the page */
+
+	accessed = buf_page_is_accessed(&block->page);
+
+	buf_page_set_accessed(&block->page, TRUE);
+
+	mutex_exit(&block->mutex);
+
+	buf_block_make_young(&block->page);
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	ut_a(!block->page.file_page_was_freed);
+#endif
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(block->page.buf_fix_count > 0);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	switch (rw_latch) {
+	case RW_NO_LATCH:
+		if (must_read) {
+			/* Let us wait until the read operation
+			completes */
+
+			for (;;) {
+				enum buf_io_fix	io_fix;
+
+				mutex_enter(&block->mutex);
+				io_fix = buf_block_get_io_fix(block);
+				mutex_exit(&block->mutex);
+
+				if (io_fix == BUF_IO_READ) {
+
+					os_thread_sleep(WAIT_FOR_READ);
+				} else {
+					break;
+				}
+			}
+		}
+
+		fix_type = MTR_MEMO_BUF_FIX;
+		break;
+
+	case RW_S_LATCH:
+		rw_lock_s_lock_func(&(block->lock), 0, file, line);
+
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+		break;
+
+	default:
+		ut_ad(rw_latch == RW_X_LATCH);
+		rw_lock_x_lock_func(&(block->lock), 0, file, line);
+
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+		break;
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+	if (!accessed) {
+		/* In the case of a first access, try to apply linear
+		read-ahead */
+
+		buf_read_ahead_linear(space, zip_size, offset);
+	}
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(buf_block_get_space(block),
+			    buf_block_get_page_no(block)) == 0);
+#endif
+	return(block);
+}
+
+/********************************************************************//**
+This is the general function used to get optimistic access to a database
+page.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: guessed buffer block */
+	ib_uint64_t	modify_clock,/*!< in: modify clock value if mode is
+				..._GUESS_ON_CLOCK */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	ibool		accessed;
+	ibool		success;
+	ulint		fix_type;
+
+	ut_ad(mtr && block);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	mutex_enter(&block->mutex);
+
+	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+
+		mutex_exit(&block->mutex);
+
+		return(FALSE);
+	}
+
+	buf_block_buf_fix_inc(block, file, line);
+	accessed = buf_page_is_accessed(&block->page);
+	buf_page_set_accessed(&block->page, TRUE);
+
+	mutex_exit(&block->mutex);
+
+	buf_block_make_young(&block->page);
+
+	/* Check if this is the first access to the page */
+
+	ut_ad(!ibuf_inside()
+	      || ibuf_page(buf_block_get_space(block),
+			   buf_block_get_zip_size(block),
+			   buf_block_get_page_no(block), NULL));
+
+	if (rw_latch == RW_S_LATCH) {
+		success = rw_lock_s_lock_nowait(&(block->lock),
+						file, line);
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+	} else {
+		success = rw_lock_x_lock_func_nowait(&(block->lock),
+						     file, line);
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+	}
+
+	if (UNIV_UNLIKELY(!success)) {
+		mutex_enter(&block->mutex);
+		buf_block_buf_fix_dec(block);
+		mutex_exit(&block->mutex);
+
+		return(FALSE);
+	}
+
+	if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
+		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+		if (rw_latch == RW_S_LATCH) {
+			rw_lock_s_unlock(&(block->lock));
+		} else {
+			rw_lock_x_unlock(&(block->lock));
+		}
+
+		mutex_enter(&block->mutex);
+		buf_block_buf_fix_dec(block);
+		mutex_exit(&block->mutex);
+
+		return(FALSE);
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(block->page.buf_fix_count > 0);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	ut_a(block->page.file_page_was_freed == FALSE);
+#endif
+	if (UNIV_UNLIKELY(!accessed)) {
+		/* In the case of a first access, try to apply linear
+		read-ahead */
+
+		buf_read_ahead_linear(buf_block_get_space(block),
+				      buf_block_get_zip_size(block),
+				      buf_block_get_page_no(block));
+	}
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(buf_block_get_space(block),
+			    buf_block_get_page_no(block)) == 0);
+#endif
+	buf_pool->n_page_gets++;
+
+	return(TRUE);
+}
+
+/********************************************************************//**
+This is used to get access to a known database page, when no waiting can be
+done. For example, if a search in an adaptive hash index leads us to this
+frame.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: the known page */
+	ulint		mode,	/*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	ibool		success;
+	ulint		fix_type;
+
+	ut_ad(mtr);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	mutex_enter(&block->mutex);
+
+	if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
+		/* Another thread is just freeing the block from the LRU list
+		of the buffer pool: do not try to access this page; this
+		attempt to access the page can only come through the hash
+		index because when the buffer block state is ..._REMOVE_HASH,
+		we have already removed it from the page address hash table
+		of the buffer pool. */
+
+		mutex_exit(&block->mutex);
+
+		return(FALSE);
+	}
+
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	buf_block_buf_fix_inc(block, file, line);
+
+	mutex_exit(&block->mutex);
+
+	if (mode == BUF_MAKE_YOUNG) {
+		buf_block_make_young(&block->page);
+	}
+
+	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+
+	if (rw_latch == RW_S_LATCH) {
+		success = rw_lock_s_lock_nowait(&(block->lock),
+						file, line);
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+	} else {
+		success = rw_lock_x_lock_func_nowait(&(block->lock),
+						     file, line);
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+	}
+
+	if (!success) {
+		mutex_enter(&block->mutex);
+		buf_block_buf_fix_dec(block);
+		mutex_exit(&block->mutex);
+
+		return(FALSE);
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(block->page.buf_fix_count > 0);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	ut_a(block->page.file_page_was_freed == FALSE);
+#endif
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a((mode == BUF_KEEP_OLD)
+	     || (ibuf_count_get(buf_block_get_space(block),
+				buf_block_get_page_no(block)) == 0));
+#endif
+	buf_pool->n_page_gets++;
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the kernel mutex.
+@return	pointer to a page or NULL */
+UNIV_INTERN
+const buf_block_t*
+buf_page_try_get_func(
+/*==================*/
+	ulint		space_id,/*!< in: tablespace id */
+	ulint		page_no,/*!< in: page number */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	buf_block_t*	block;
+	ibool		success;
+	ulint		fix_type;
+
+	buf_pool_mutex_enter();
+	block = buf_block_hash_get(space_id, page_no);
+
+	if (!block) {
+		buf_pool_mutex_exit();
+		return(NULL);
+	}
+
+	mutex_enter(&block->mutex);
+	buf_pool_mutex_exit();
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_a(buf_block_get_space(block) == space_id);
+	ut_a(buf_block_get_page_no(block) == page_no);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	buf_block_buf_fix_inc(block, file, line);
+	mutex_exit(&block->mutex);
+
+	fix_type = MTR_MEMO_PAGE_S_FIX;
+	success = rw_lock_s_lock_nowait(&block->lock, file, line);
+
+	if (!success) {
+		/* Let us try to get an X-latch. If the current thread
+		is holding an X-latch on the page, we cannot get an
+		S-latch. */
+
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+		success = rw_lock_x_lock_func_nowait(&block->lock,
+						     file, line);
+	}
+
+	if (!success) {
+		mutex_enter(&block->mutex);
+		buf_block_buf_fix_dec(block);
+		mutex_exit(&block->mutex);
+
+		return(NULL);
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(block->page.buf_fix_count > 0);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	ut_a(block->page.file_page_was_freed == FALSE);
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+	buf_pool->n_page_gets++;
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(buf_block_get_space(block),
+			    buf_block_get_page_no(block)) == 0);
+#endif
+
+	return(block);
+}
+
+/********************************************************************//**
+Initialize some fields of a control block. */
+UNIV_INLINE
+void
+buf_page_init_low(
+/*==============*/
+	buf_page_t*	bpage)	/*!< in: block to init */
+{
+	bpage->flush_type = BUF_FLUSH_LRU;
+	bpage->accessed = FALSE;
+	bpage->io_fix = BUF_IO_NONE;
+	bpage->buf_fix_count = 0;
+	bpage->freed_page_clock = 0;
+	bpage->newest_modification = 0;
+	bpage->oldest_modification = 0;
+	HASH_INVALIDATE(bpage, hash);
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+	bpage->file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+}
+
+/********************************************************************//**
+Inits a page to the buffer buf_pool. */
+static
+void
+buf_page_init(
+/*==========*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space
+				in units of a page */
+	buf_block_t*	block)	/*!< in: block to init */
+{
+	buf_page_t*	hash_page;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&(block->mutex)));
+	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+	/* Set the state of the block */
+	buf_block_set_file_page(block, space, offset);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	if (!space) {
+		/* Silence valid Valgrind warnings about uninitialized
+		data being written to data files.  There are some unused
+		bytes on some pages that InnoDB does not initialize. */
+		UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	buf_block_init_low(block);
+
+	block->lock_hash_val	= lock_rec_hash(space, offset);
+
+	/* Insert into the hash table of file pages */
+
+	hash_page = buf_page_hash_get(space, offset);
+
+	if (UNIV_LIKELY_NULL(hash_page)) {
+		fprintf(stderr,
+			"InnoDB: Error: page %lu %lu already found"
+			" in the hash table: %p, %p\n",
+			(ulong) space,
+			(ulong) offset,
+			(const void*) hash_page, (const void*) block);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		mutex_exit(&block->mutex);
+		buf_pool_mutex_exit();
+		buf_print();
+		buf_LRU_print();
+		buf_validate();
+		buf_LRU_validate();
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+		ut_error;
+	}
+
+	buf_page_init_low(&block->page);
+
+	ut_ad(!block->page.in_zip_hash);
+	ut_ad(!block->page.in_page_hash);
+	ut_d(block->page.in_page_hash = TRUE);
+	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+		    buf_page_address_fold(space, offset), &block->page);
+}
+
+/********************************************************************//**
+Function which inits a page for read to the buffer buf_pool. If the page is
+(1) already in buf_pool, or
+(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
+(3) if the space is deleted or being deleted,
+then this function does nothing.
+Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
+on the buffer frame. The io-handler must take care that the flag is cleared
+and the lock released later.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_init_for_read(
+/*===================*/
+	ulint*		err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size, or 0 */
+	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
+	ib_int64_t	tablespace_version,/*!< in: prevents reading from a wrong
+				version of the tablespace in case we have done
+				DISCARD + IMPORT */
+	ulint		offset)	/*!< in: page number */
+{
+	buf_block_t*	block;
+	buf_page_t*	bpage;
+	mtr_t		mtr;
+	ibool		lru	= FALSE;
+	void*		data;
+
+	ut_ad(buf_pool);
+
+	*err = DB_SUCCESS;
+
+	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+		/* It is a read-ahead within an ibuf routine */
+
+		ut_ad(!ibuf_bitmap_page(zip_size, offset));
+		ut_ad(ibuf_inside());
+
+		mtr_start(&mtr);
+
+		if (!recv_no_ibuf_operations
+		    && !ibuf_page(space, zip_size, offset, &mtr)) {
+
+			mtr_commit(&mtr);
+
+			return(NULL);
+		}
+	} else {
+		ut_ad(mode == BUF_READ_ANY_PAGE);
+	}
+
+	if (zip_size && UNIV_LIKELY(!unzip)
+	    && UNIV_LIKELY(!recv_recovery_is_on())) {
+		block = NULL;
+	} else {
+		block = buf_LRU_get_free_block(0);
+		ut_ad(block);
+	}
+
+	buf_pool_mutex_enter();
+
+	if (buf_page_hash_get(space, offset)) {
+		/* The page is already in the buffer pool. */
+err_exit:
+		if (block) {
+			mutex_enter(&block->mutex);
+			buf_LRU_block_free_non_file_page(block);
+			mutex_exit(&block->mutex);
+		}
+
+		bpage = NULL;
+		goto func_exit;
+	}
+
+	if (fil_tablespace_deleted_or_being_deleted_in_mem(
+		    space, tablespace_version)) {
+		/* The page belongs to a space which has been
+		deleted or is being deleted. */
+		*err = DB_TABLESPACE_DELETED;
+
+		goto err_exit;
+	}
+
+	if (block) {
+		bpage = &block->page;
+		mutex_enter(&block->mutex);
+		buf_page_init(space, offset, block);
+
+		/* The block must be put to the LRU list, to the old blocks */
+		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+
+		/* We set a pass-type x-lock on the frame because then
+		the same thread which called for the read operation
+		(and is running now at this point of code) can wait
+		for the read to complete by waiting for the x-lock on
+		the frame; if the x-lock were recursive, the same
+		thread would illegally get the x-lock before the page
+		read is completed.  The x-lock is cleared by the
+		io-handler thread. */
+
+		rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
+		buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+		if (UNIV_UNLIKELY(zip_size)) {
+			page_zip_set_size(&block->page.zip, zip_size);
+
+			/* buf_pool_mutex may be released and
+			reacquired by buf_buddy_alloc().  Thus, we
+			must release block->mutex in order not to
+			break the latching order in the reacquisition
+			of buf_pool_mutex.  We also must defer this
+			operation until after the block descriptor has
+			been added to buf_pool->LRU and
+			buf_pool->page_hash. */
+			mutex_exit(&block->mutex);
+			data = buf_buddy_alloc(zip_size, &lru);
+			mutex_enter(&block->mutex);
+			block->page.zip.data = data;
+
+			/* To maintain the invariant
+			block->in_unzip_LRU_list
+			== buf_page_belongs_to_unzip_LRU(&block->page)
+			we have to add this block to unzip_LRU
+			after block->page.zip.data is set. */
+			ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
+			buf_unzip_LRU_add_block(block, TRUE);
+		}
+
+		mutex_exit(&block->mutex);
+	} else {
+		/* Defer buf_buddy_alloc() until after the block has
+		been found not to exist.  The buf_buddy_alloc() and
+		buf_buddy_free() calls may be expensive because of
+		buf_buddy_relocate(). */
+
+		/* The compressed page must be allocated before the
+		control block (bpage), in order to avoid the
+		invocation of buf_buddy_relocate_block() on
+		uninitialized data. */
+		data = buf_buddy_alloc(zip_size, &lru);
+		bpage = buf_buddy_alloc(sizeof *bpage, &lru);
+
+		/* If buf_buddy_alloc() allocated storage from the LRU list,
+		it released and reacquired buf_pool_mutex.  Thus, we must
+		check the page_hash again, as it may have been modified. */
+		if (UNIV_UNLIKELY(lru)
+		    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
+
+			/* The block was added by some other thread. */
+			buf_buddy_free(bpage, sizeof *bpage);
+			buf_buddy_free(data, zip_size);
+
+			bpage = NULL;
+			goto func_exit;
+		}
+
+		page_zip_des_init(&bpage->zip);
+		page_zip_set_size(&bpage->zip, zip_size);
+		bpage->zip.data = data;
+
+		mutex_enter(&buf_pool_zip_mutex);
+		UNIV_MEM_DESC(bpage->zip.data,
+			      page_zip_get_size(&bpage->zip), bpage);
+		buf_page_init_low(bpage);
+		bpage->state	= BUF_BLOCK_ZIP_PAGE;
+		bpage->space	= space;
+		bpage->offset	= offset;
+
+#ifdef UNIV_DEBUG
+		bpage->in_page_hash = FALSE;
+		bpage->in_zip_hash = FALSE;
+		bpage->in_flush_list = FALSE;
+		bpage->in_free_list = FALSE;
+		bpage->in_LRU_list = FALSE;
+#endif /* UNIV_DEBUG */
+
+		ut_d(bpage->in_page_hash = TRUE);
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+			    buf_page_address_fold(space, offset), bpage);
+
+		/* The block must be put to the LRU list, to the old blocks */
+		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+		buf_LRU_insert_zip_clean(bpage);
+
+		buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+		mutex_exit(&buf_pool_zip_mutex);
+	}
+
+	buf_pool->n_pend_reads++;
+func_exit:
+	buf_pool_mutex_exit();
+
+	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+		mtr_commit(&mtr);
+	}
+
+	ut_ad(!bpage || buf_page_in_file(bpage));
+	return(bpage);
+}
+
+/********************************************************************//**
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_get_gen).
+@return	pointer to the block, page bufferfixed */
+UNIV_INTERN
+buf_block_t*
+buf_page_create(
+/*============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset,	/*!< in: offset of the page within space in units of
+			a page */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
+{
+	buf_frame_t*	frame;
+	buf_block_t*	block;
+	buf_block_t*	free_block	= NULL;
+
+	ut_ad(mtr);
+	ut_ad(space || !zip_size);
+
+	free_block = buf_LRU_get_free_block(0);
+
+	buf_pool_mutex_enter();
+
+	block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+	if (block && buf_page_in_file(&block->page)) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+		ut_a(ibuf_count_get(space, offset) == 0);
+#endif
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+		block->page.file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+		/* Page can be found in buf_pool */
+		buf_pool_mutex_exit();
+
+		buf_block_free(free_block);
+
+		return(buf_page_get_with_no_latch(space, zip_size,
+						  offset, mtr));
+	}
+
+	/* If we get here, the page was not in buf_pool: init it there */
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr, "Creating space %lu page %lu to buffer\n",
+			(ulong) space, (ulong) offset);
+	}
+#endif /* UNIV_DEBUG */
+
+	block = free_block;
+
+	mutex_enter(&block->mutex);
+
+	buf_page_init(space, offset, block);
+
+	/* The block must be put to the LRU list */
+	buf_LRU_add_block(&block->page, FALSE);
+
+	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+	buf_pool->n_pages_created++;
+
+	if (zip_size) {
+		void*	data;
+		ibool	lru;
+
+		/* Prevent race conditions during buf_buddy_alloc(),
+		which may release and reacquire buf_pool_mutex,
+		by IO-fixing and X-latching the block. */
+
+		buf_page_set_io_fix(&block->page, BUF_IO_READ);
+		rw_lock_x_lock(&block->lock);
+
+		page_zip_set_size(&block->page.zip, zip_size);
+		mutex_exit(&block->mutex);
+		/* buf_pool_mutex may be released and reacquired by
+		buf_buddy_alloc().  Thus, we must release block->mutex
+		in order not to break the latching order in
+		the reacquisition of buf_pool_mutex.  We also must
+		defer this operation until after the block descriptor
+		has been added to buf_pool->LRU and buf_pool->page_hash. */
+		data = buf_buddy_alloc(zip_size, &lru);
+		mutex_enter(&block->mutex);
+		block->page.zip.data = data;
+
+		/* To maintain the invariant
+		block->in_unzip_LRU_list
+		== buf_page_belongs_to_unzip_LRU(&block->page)
+		we have to add this block to unzip_LRU after
+		block->page.zip.data is set. */
+		ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
+		buf_unzip_LRU_add_block(block, FALSE);
+
+		buf_page_set_io_fix(&block->page, BUF_IO_NONE);
+		rw_lock_x_unlock(&block->lock);
+	}
+
+	buf_pool_mutex_exit();
+
+	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+	buf_page_set_accessed(&block->page, TRUE);
+
+	mutex_exit(&block->mutex);
+
+	/* Delete possible entries for the page from the insert buffer:
+	such can exist if the page belonged to an index which was dropped */
+
+	ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	frame = block->frame;
+
+	memset(frame + FIL_PAGE_PREV, 0xff, 4);
+	memset(frame + FIL_PAGE_NEXT, 0xff, 4);
+	mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
+
+	/* Reset to zero the file flush lsn field in the page; if the first
+	page of an ibdata file is 'created' in this function into the buffer
+	pool then we lose the original contents of the file flush lsn stamp.
+	Then InnoDB could in a crash recovery print a big, false, corruption
+	warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
+
+	memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(++buf_dbg_counter % 357 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(buf_block_get_space(block),
+			    buf_block_get_page_no(block)) == 0);
+#endif
+	return(block);
+}
+
+/********************************************************************//**
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_io_complete(
+/*=================*/
+	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
+{
+	enum buf_io_fix	io_type;
+	const ibool	uncompressed = (buf_page_get_state(bpage)
+					== BUF_BLOCK_FILE_PAGE);
+
+	ut_a(buf_page_in_file(bpage));
+
+	/* We do not need protect io_fix here by mutex to read
+	it because this is the only function where we can change the value
+	from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
+	ensures that this is the only thread that handles the i/o for this
+	block. */
+
+	io_type = buf_page_get_io_fix(bpage);
+	ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
+
+	if (io_type == BUF_IO_READ) {
+		ulint	read_page_no;
+		ulint	read_space_id;
+		byte*	frame;
+
+		if (buf_page_get_zip_size(bpage)) {
+			frame = bpage->zip.data;
+			buf_pool->n_pend_unzip++;
+			if (uncompressed
+			    && !buf_zip_decompress((buf_block_t*) bpage,
+						   FALSE)) {
+
+				buf_pool->n_pend_unzip--;
+				goto corrupt;
+			}
+			buf_pool->n_pend_unzip--;
+		} else {
+			ut_a(uncompressed);
+			frame = ((buf_block_t*) bpage)->frame;
+		}
+
+		/* If this page is not uninitialized and not in the
+		doublewrite buffer, then the page number and space id
+		should be the same as in block. */
+		read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
+		read_space_id = mach_read_from_4(
+			frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+		if (bpage->space == TRX_SYS_SPACE
+		    && trx_doublewrite_page_inside(bpage->offset)) {
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Error: reading page %lu\n"
+				"InnoDB: which is in the"
+				" doublewrite buffer!\n",
+				(ulong) bpage->offset);
+		} else if (!read_space_id && !read_page_no) {
+			/* This is likely an uninitialized page. */
+		} else if ((bpage->space
+			    && bpage->space != read_space_id)
+			   || bpage->offset != read_page_no) {
+			/* We did not compare space_id to read_space_id
+			if bpage->space == 0, because the field on the
+			page may contain garbage in MySQL < 4.1.1,
+			which only supported bpage->space == 0. */
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: Error: space id and page n:o"
+				" stored in the page\n"
+				"InnoDB: read in are %lu:%lu,"
+				" should be %lu:%lu!\n",
+				(ulong) read_space_id, (ulong) read_page_no,
+				(ulong) bpage->space,
+				(ulong) bpage->offset);
+		}
+
+		/* From version 3.23.38 up we store the page checksum
+		to the 4 first bytes of the page end lsn field */
+
+		if (buf_page_is_corrupted(frame,
+					  buf_page_get_zip_size(bpage))) {
+corrupt:
+			fprintf(stderr,
+				"InnoDB: Database page corruption on disk"
+				" or a failed\n"
+				"InnoDB: file read of page %lu.\n"
+				"InnoDB: You may have to recover"
+				" from a backup.\n",
+				(ulong) bpage->offset);
+			buf_page_print(frame, buf_page_get_zip_size(bpage));
+			fprintf(stderr,
+				"InnoDB: Database page corruption on disk"
+				" or a failed\n"
+				"InnoDB: file read of page %lu.\n"
+				"InnoDB: You may have to recover"
+				" from a backup.\n",
+				(ulong) bpage->offset);
+			fputs("InnoDB: It is also possible that"
+			      " your operating\n"
+			      "InnoDB: system has corrupted its"
+			      " own file cache\n"
+			      "InnoDB: and rebooting your computer"
+			      " removes the\n"
+			      "InnoDB: error.\n"
+			      "InnoDB: If the corrupt page is an index page\n"
+			      "InnoDB: you can also try to"
+			      " fix the corruption\n"
+			      "InnoDB: by dumping, dropping,"
+			      " and reimporting\n"
+			      "InnoDB: the corrupt table."
+			      " You can use CHECK\n"
+			      "InnoDB: TABLE to scan your"
+			      " table for corruption.\n"
+			      "InnoDB: See also "
+			      REFMAN "forcing-recovery.html\n"
+			      "InnoDB: about forcing recovery.\n", stderr);
+
+			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+				fputs("InnoDB: Ending processing because of"
+				      " a corrupt database page.\n",
+				      stderr);
+				exit(1);
+			}
+		}
+
+		if (recv_recovery_is_on()) {
+			/* Pages must be uncompressed for crash recovery. */
+			ut_a(uncompressed);
+			recv_recover_page(TRUE, (buf_block_t*) bpage);
+		}
+
+		if (uncompressed && !recv_no_ibuf_operations) {
+			ibuf_merge_or_delete_for_page(
+				(buf_block_t*) bpage, bpage->space,
+				bpage->offset, buf_page_get_zip_size(bpage),
+				TRUE);
+		}
+	}
+
+	buf_pool_mutex_enter();
+	mutex_enter(buf_page_get_mutex(bpage));
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	if (io_type == BUF_IO_WRITE || uncompressed) {
+		/* For BUF_IO_READ of compressed-only blocks, the
+		buffered operations will be merged by buf_page_get_gen()
+		after the block has been uncompressed. */
+		ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+	}
+#endif
+	/* Because this thread which does the unlocking is not the same that
+	did the locking, we use a pass value != 0 in unlock, which simply
+	removes the newest lock debug record, without checking the thread
+	id. */
+
+	buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
+	switch (io_type) {
+	case BUF_IO_READ:
+		/* NOTE that the call to ibuf may have moved the ownership of
+		the x-latch to this OS thread: do not let this confuse you in
+		debugging! */
+
+		ut_ad(buf_pool->n_pend_reads > 0);
+		buf_pool->n_pend_reads--;
+		buf_pool->n_pages_read++;
+
+		if (uncompressed) {
+			rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
+					     BUF_IO_READ);
+		}
+
+		break;
+
+	case BUF_IO_WRITE:
+		/* Write means a flush operation: call the completion
+		routine in the flush system */
+
+		buf_flush_write_complete(bpage);
+
+		if (uncompressed) {
+			rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+					     BUF_IO_WRITE);
+		}
+
+		buf_pool->n_pages_written++;
+
+		break;
+
+	default:
+		ut_error;
+	}
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr, "Has %s page space %lu page no %lu\n",
+			io_type == BUF_IO_READ ? "read" : "written",
+			(ulong) buf_page_get_space(bpage),
+			(ulong) buf_page_get_page_no(bpage));
+	}
+#endif /* UNIV_DEBUG */
+
+	mutex_exit(buf_page_get_mutex(bpage));
+	buf_pool_mutex_exit();
+}
+
+/*********************************************************************//**
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+UNIV_INTERN
+void
+buf_pool_invalidate(void)
+/*=====================*/
+{
+	ibool	freed;
+
+	ut_ad(buf_all_freed());
+
+	freed = TRUE;
+
+	while (freed) {
+		freed = buf_LRU_search_and_free_block(100);
+	}
+
+	buf_pool_mutex_enter();
+
+	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+
+	buf_pool_mutex_exit();
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Validates the buffer buf_pool data structure.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_validate(void)
+/*==============*/
+{
+	buf_page_t*	b;
+	buf_chunk_t*	chunk;
+	ulint		i;
+	ulint		n_single_flush	= 0;
+	ulint		n_lru_flush	= 0;
+	ulint		n_list_flush	= 0;
+	ulint		n_lru		= 0;
+	ulint		n_flush		= 0;
+	ulint		n_free		= 0;
+	ulint		n_zip		= 0;
+
+	ut_ad(buf_pool);
+
+	buf_pool_mutex_enter();
+
+	chunk = buf_pool->chunks;
+
+	/* Check the uncompressed blocks. */
+
+	for (i = buf_pool->n_chunks; i--; chunk++) {
+
+		ulint		j;
+		buf_block_t*	block = chunk->blocks;
+
+		for (j = chunk->size; j--; block++) {
+
+			mutex_enter(&block->mutex);
+
+			switch (buf_block_get_state(block)) {
+			case BUF_BLOCK_ZIP_FREE:
+			case BUF_BLOCK_ZIP_PAGE:
+			case BUF_BLOCK_ZIP_DIRTY:
+				/* These should only occur on
+				zip_clean, zip_free[], or flush_list. */
+				ut_error;
+				break;
+
+			case BUF_BLOCK_FILE_PAGE:
+				ut_a(buf_page_hash_get(buf_block_get_space(
+							       block),
+						       buf_block_get_page_no(
+							       block))
+				     == &block->page);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+				ut_a(buf_page_get_io_fix(&block->page)
+				     == BUF_IO_READ
+				     || !ibuf_count_get(buf_block_get_space(
+								block),
+							buf_block_get_page_no(
+								block)));
+#endif
+				switch (buf_page_get_io_fix(&block->page)) {
+				case BUF_IO_NONE:
+					break;
+
+				case BUF_IO_WRITE:
+					switch (buf_page_get_flush_type(
+							&block->page)) {
+					case BUF_FLUSH_LRU:
+						n_lru_flush++;
+						ut_a(rw_lock_is_locked(
+							     &block->lock,
+							     RW_LOCK_SHARED));
+						break;
+					case BUF_FLUSH_LIST:
+						n_list_flush++;
+						break;
+					case BUF_FLUSH_SINGLE_PAGE:
+						n_single_flush++;
+						break;
+					default:
+						ut_error;
+					}
+
+					break;
+
+				case BUF_IO_READ:
+
+					ut_a(rw_lock_is_locked(&block->lock,
+							       RW_LOCK_EX));
+					break;
+				}
+
+				n_lru++;
+
+				if (block->page.oldest_modification > 0) {
+					n_flush++;
+				}
+
+				break;
+
+			case BUF_BLOCK_NOT_USED:
+				n_free++;
+				break;
+
+			case BUF_BLOCK_READY_FOR_USE:
+			case BUF_BLOCK_MEMORY:
+			case BUF_BLOCK_REMOVE_HASH:
+				/* do nothing */
+				break;
+			}
+
+			mutex_exit(&block->mutex);
+		}
+	}
+
+	mutex_enter(&buf_pool_zip_mutex);
+
+	/* Check clean compressed-only blocks. */
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+		switch (buf_page_get_io_fix(b)) {
+		case BUF_IO_NONE:
+			/* All clean blocks should be I/O-unfixed. */
+			break;
+		case BUF_IO_READ:
+			/* In buf_LRU_free_block(), we temporarily set
+			b->io_fix = BUF_IO_READ for a newly allocated
+			control block in order to prevent
+			buf_page_get_gen() from decompressing the block. */
+			break;
+		default:
+			ut_error;
+			break;
+		}
+		ut_a(!b->oldest_modification);
+		ut_a(buf_page_hash_get(b->space, b->offset) == b);
+
+		n_lru++;
+		n_zip++;
+	}
+
+	/* Check dirty compressed-only blocks. */
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_ad(b->in_flush_list);
+
+		switch (buf_page_get_state(b)) {
+		case BUF_BLOCK_ZIP_DIRTY:
+			ut_a(b->oldest_modification);
+			n_lru++;
+			n_flush++;
+			n_zip++;
+			switch (buf_page_get_io_fix(b)) {
+			case BUF_IO_NONE:
+			case BUF_IO_READ:
+				break;
+
+			case BUF_IO_WRITE:
+				switch (buf_page_get_flush_type(b)) {
+				case BUF_FLUSH_LRU:
+					n_lru_flush++;
+					break;
+				case BUF_FLUSH_LIST:
+					n_list_flush++;
+					break;
+				case BUF_FLUSH_SINGLE_PAGE:
+					n_single_flush++;
+					break;
+				default:
+					ut_error;
+				}
+				break;
+			}
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			/* uncompressed page */
+			break;
+		case BUF_BLOCK_ZIP_FREE:
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+			ut_error;
+			break;
+		}
+		ut_a(buf_page_hash_get(b->space, b->offset) == b);
+	}
+
+	mutex_exit(&buf_pool_zip_mutex);
+
+	if (n_lru + n_free > buf_pool->curr_size + n_zip) {
+		fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
+			(ulong) n_lru, (ulong) n_free,
+			(ulong) buf_pool->curr_size, (ulong) n_zip);
+		ut_error;
+	}
+
+	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+			(ulong) UT_LIST_GET_LEN(buf_pool->free),
+			(ulong) n_free);
+		ut_error;
+	}
+	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+
+	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+
+	buf_pool_mutex_exit();
+
+	ut_a(buf_LRU_validate());
+	ut_a(buf_flush_validate());
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Prints info of the buffer buf_pool data structure. */
+UNIV_INTERN
+void
+buf_print(void)
+/*===========*/
+{
+	dulint*		index_ids;
+	ulint*		counts;
+	ulint		size;
+	ulint		i;
+	ulint		j;
+	dulint		id;
+	ulint		n_found;
+	buf_chunk_t*	chunk;
+	dict_index_t*	index;
+
+	ut_ad(buf_pool);
+
+	size = buf_pool->curr_size;
+
+	index_ids = mem_alloc(sizeof(dulint) * size);
+	counts = mem_alloc(sizeof(ulint) * size);
+
+	buf_pool_mutex_enter();
+
+	fprintf(stderr,
+		"buf_pool size %lu\n"
+		"database pages %lu\n"
+		"free pages %lu\n"
+		"modified database pages %lu\n"
+		"n pending decompressions %lu\n"
+		"n pending reads %lu\n"
+		"n pending flush LRU %lu list %lu single page %lu\n"
+		"pages read %lu, created %lu, written %lu\n",
+		(ulong) size,
+		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+		(ulong) UT_LIST_GET_LEN(buf_pool->free),
+		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
+		(ulong) buf_pool->n_pend_unzip,
+		(ulong) buf_pool->n_pend_reads,
+		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
+		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
+		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
+		(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
+		(ulong) buf_pool->n_pages_written);
+
+	/* Count the number of blocks belonging to each index in the buffer */
+
+	n_found = 0;
+
+	chunk = buf_pool->chunks;
+
+	for (i = buf_pool->n_chunks; i--; chunk++) {
+		buf_block_t*	block		= chunk->blocks;
+		ulint		n_blocks	= chunk->size;
+
+		for (; n_blocks--; block++) {
+			const buf_frame_t* frame = block->frame;
+
+			if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+
+				id = btr_page_get_index_id(frame);
+
+				/* Look for the id in the index_ids array */
+				j = 0;
+
+				while (j < n_found) {
+
+					if (ut_dulint_cmp(index_ids[j],
+							  id) == 0) {
+						counts[j]++;
+
+						break;
+					}
+					j++;
+				}
+
+				if (j == n_found) {
+					n_found++;
+					index_ids[j] = id;
+					counts[j] = 1;
+				}
+			}
+		}
+	}
+
+	buf_pool_mutex_exit();
+
+	for (i = 0; i < n_found; i++) {
+		index = dict_index_get_if_in_cache(index_ids[i]);
+
+		fprintf(stderr,
+			"Block count for index %lu in buffer is about %lu",
+			(ulong) ut_dulint_get_low(index_ids[i]),
+			(ulong) counts[i]);
+
+		if (index) {
+			putc(' ', stderr);
+			dict_index_name_print(stderr, NULL, index);
+		}
+
+		putc('\n', stderr);
+	}
+
+	mem_free(index_ids);
+	mem_free(counts);
+
+	ut_a(buf_validate());
+}
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the number of latched pages in the buffer pool.
+@return	number of latched pages */
+UNIV_INTERN
+ulint
+buf_get_latched_pages_number(void)
+/*==============================*/
+{
+	buf_chunk_t*	chunk;
+	buf_page_t*	b;
+	ulint		i;
+	ulint		fixed_pages_number = 0;
+
+	buf_pool_mutex_enter();
+
+	chunk = buf_pool->chunks;
+
+	for (i = buf_pool->n_chunks; i--; chunk++) {
+		buf_block_t*	block;
+		ulint		j;
+
+		block = chunk->blocks;
+
+		for (j = chunk->size; j--; block++) {
+			if (buf_block_get_state(block)
+			    != BUF_BLOCK_FILE_PAGE) {
+
+				continue;
+			}
+
+			mutex_enter(&block->mutex);
+
+			if (block->page.buf_fix_count != 0
+			    || buf_page_get_io_fix(&block->page)
+			    != BUF_IO_NONE) {
+				fixed_pages_number++;
+			}
+
+			mutex_exit(&block->mutex);
+		}
+	}
+
+	mutex_enter(&buf_pool_zip_mutex);
+
+	/* Traverse the lists of clean and dirty compressed-only blocks. */
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+		ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+
+		if (b->buf_fix_count != 0
+		    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
+			fixed_pages_number++;
+		}
+	}
+
+	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+	     b = UT_LIST_GET_NEXT(list, b)) {
+		ut_ad(b->in_flush_list);
+
+		switch (buf_page_get_state(b)) {
+		case BUF_BLOCK_ZIP_DIRTY:
+			if (b->buf_fix_count != 0
+			    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
+				fixed_pages_number++;
+			}
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			/* uncompressed page */
+			break;
+		case BUF_BLOCK_ZIP_FREE:
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+			ut_error;
+			break;
+		}
+	}
+
+	mutex_exit(&buf_pool_zip_mutex);
+	buf_pool_mutex_exit();
+
+	return(fixed_pages_number);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Returns the number of pending buf pool ios.
+@return	number of pending I/O operations */
+UNIV_INTERN
+ulint
+buf_get_n_pending_ios(void)
+/*=======================*/
+{
+	return(buf_pool->n_pend_reads
+	       + buf_pool->n_flush[BUF_FLUSH_LRU]
+	       + buf_pool->n_flush[BUF_FLUSH_LIST]
+	       + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+}
+
+/*********************************************************************//**
+Returns the ratio in percents of modified pages in the buffer pool /
+database pages in the buffer pool.
+@return	modified page percentage ratio */
+UNIV_INTERN
+ulint
+buf_get_modified_ratio_pct(void)
+/*============================*/
+{
+	ulint	ratio;
+
+	buf_pool_mutex_enter();
+
+	ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
+		/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+		   + UT_LIST_GET_LEN(buf_pool->free));
+
+	/* 1 + is there to avoid division by zero */
+
+	buf_pool_mutex_exit();
+
+	return(ratio);
+}
+
+/*********************************************************************//**
+Prints info of the buffer i/o. */
+UNIV_INTERN
+void
+buf_print_io(
+/*=========*/
+	FILE*	file)	/*!< in/out: buffer where to print */
+{
+	time_t	current_time;
+	double	time_elapsed;
+	ulint	size;
+
+	ut_ad(buf_pool);
+	size = buf_pool->curr_size;
+
+	buf_pool_mutex_enter();
+
+	fprintf(file,
+		"Buffer pool size   %lu\n"
+		"Free buffers       %lu\n"
+		"Database pages     %lu\n"
+		"Modified db pages  %lu\n"
+		"Pending reads %lu\n"
+		"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+		(ulong) size,
+		(ulong) UT_LIST_GET_LEN(buf_pool->free),
+		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
+		(ulong) buf_pool->n_pend_reads,
+		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
+		+ buf_pool->init_flush[BUF_FLUSH_LRU],
+		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
+		+ buf_pool->init_flush[BUF_FLUSH_LIST],
+		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+
+	current_time = time(NULL);
+	time_elapsed = 0.001 + difftime(current_time,
+					buf_pool->last_printout_time);
+	buf_pool->last_printout_time = current_time;
+
+	fprintf(file,
+		"Pages read %lu, created %lu, written %lu\n"
+		"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
+		(ulong) buf_pool->n_pages_read,
+		(ulong) buf_pool->n_pages_created,
+		(ulong) buf_pool->n_pages_written,
+		(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
+		/ time_elapsed,
+		(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
+		/ time_elapsed,
+		(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
+		/ time_elapsed);
+
+	if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
+		fprintf(file, "Buffer pool hit rate %lu / 1000\n",
+			(ulong)
+			(1000 - ((1000 * (buf_pool->n_pages_read
+					  - buf_pool->n_pages_read_old))
+				 / (buf_pool->n_page_gets
+				    - buf_pool->n_page_gets_old))));
+	} else {
+		fputs("No buffer pool page gets since the last printout\n",
+		      file);
+	}
+
+	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
+	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
+	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
+	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+
+	/* Print some values to help us with visualizing what is
+	happening with LRU eviction. */
+	fprintf(file,
+		"LRU len: %lu, unzip_LRU len: %lu\n"
+		"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
+		UT_LIST_GET_LEN(buf_pool->LRU),
+		UT_LIST_GET_LEN(buf_pool->unzip_LRU),
+		buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
+		buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+
+	buf_pool_mutex_exit();
+}
+
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+buf_refresh_io_stats(void)
+/*======================*/
+{
+	buf_pool->last_printout_time = time(NULL);
+	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
+	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
+	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
+	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+}
+
+/*********************************************************************//**
+Asserts that all file pages in the buffer are in a replaceable state.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_all_freed(void)
+/*===============*/
+{
+	buf_chunk_t*	chunk;
+	ulint		i;
+
+	ut_ad(buf_pool);
+
+	buf_pool_mutex_enter();
+
+	chunk = buf_pool->chunks;
+
+	for (i = buf_pool->n_chunks; i--; chunk++) {
+
+		const buf_block_t* block = buf_chunk_not_freed(chunk);
+
+		if (UNIV_LIKELY_NULL(block)) {
+			fprintf(stderr,
+				"Page %lu %lu still fixed or dirty\n",
+				(ulong) block->page.space,
+				(ulong) block->page.offset);
+			ut_error;
+		}
+	}
+
+	buf_pool_mutex_exit();
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Checks that there currently are no pending i/o-operations for the buffer
+pool.
+@return	TRUE if there is no pending i/o */
+UNIV_INTERN
+ibool
+buf_pool_check_no_pending_io(void)
+/*==============================*/
+{
+	ibool	ret;
+
+	buf_pool_mutex_enter();
+
+	if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+	    + buf_pool->n_flush[BUF_FLUSH_LIST]
+	    + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
+		ret = FALSE;
+	} else {
+		ret = TRUE;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(ret);
+}
+
+/*********************************************************************//**
+Gets the current length of the free list of buffer blocks.
+@return	length of the free list */
+UNIV_INTERN
+ulint
+buf_get_free_list_len(void)
+/*=======================*/
+{
+	ulint	len;
+
+	buf_pool_mutex_enter();
+
+	len = UT_LIST_GET_LEN(buf_pool->free);
+
+	buf_pool_mutex_exit();
+
+	return(len);
+}
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+UNIV_INTERN
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space
+				in units of a page */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	buf_block_t*	block)	/*!< in: block to init */
+{
+	block->page.state	= BUF_BLOCK_FILE_PAGE;
+	block->page.space	= space;
+	block->page.offset	= offset;
+
+	page_zip_des_init(&block->page.zip);
+
+	/* We assume that block->page.data has been allocated
+	with zip_size == UNIV_PAGE_SIZE. */
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+	ut_ad(ut_is_2pow(zip_size));
+	page_zip_set_size(&block->page.zip, zip_size);
+	if (zip_size) {
+		block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c
new file mode 100644
index 00000000000..74dd0c07ca6
--- /dev/null
+++ b/storage/innodb_plugin/buf/buf0flu.c
@@ -0,0 +1,1400 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0flu.c
+The database buffer buf_pool flush algorithm
+
+Created 11/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+
+#ifdef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#include "buf0buf.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "page0page.h"
+#include "fil0fil.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+#include "trx0sys.h"
+
+/**********************************************************************
+These statistics are generated for heuristics used in estimating the
+rate at which we should flush the dirty blocks to avoid bursty IO
+activity. Note that the rate of flushing not only depends on how many
+dirty pages we have in the buffer pool but it is also a fucntion of
+how much redo the workload is generating and at what rate. */
+/* @{ */
+
+/** Number of intervals for which we keep the history of these stats.
+Each interval is 1 second, defined by the rate at which
+srv_error_monitor_thread() calls buf_flush_stat_update(). */
+#define BUF_FLUSH_STAT_N_INTERVAL 20
+
+/** Sampled values buf_flush_stat_cur.
+Not protected by any mutex.  Updated by buf_flush_stat_update(). */
+static buf_flush_stat_t	buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
+
+/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
+static ulint		buf_flush_stat_arr_ind;
+
+/** Values at start of the current interval. Reset by
+buf_flush_stat_update(). */
+static buf_flush_stat_t	buf_flush_stat_cur;
+
+/** Running sum of past values of buf_flush_stat_cur.
+Updated by buf_flush_stat_update(). Not protected by any mutex. */
+static buf_flush_stat_t	buf_flush_stat_sum;
+
+/** Number of pages flushed through non flush_list flushes. */
+static ulint buf_lru_flush_page_count = 0;
+
+/* @} */
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return	TRUE if ok */
+static
+ibool
+buf_flush_validate_low(void);
+/*========================*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/********************************************************************//**
+Inserts a modified block into the flush list. */
+UNIV_INTERN
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_block_t*	block)	/*!< in/out: block which is modified */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
+	      || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
+		  <= block->page.oldest_modification));
+
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.in_LRU_list);
+	ut_ad(block->page.in_page_hash);
+	ut_ad(!block->page.in_zip_hash);
+	ut_ad(!block->page.in_flush_list);
+	ut_d(block->page.in_flush_list = TRUE);
+	UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+}
+
+/********************************************************************//**
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+UNIV_INTERN
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_block_t*	block)	/*!< in/out: block which is modified */
+{
+	buf_page_t*	prev_b;
+	buf_page_t*	b;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	ut_ad(block->page.in_LRU_list);
+	ut_ad(block->page.in_page_hash);
+	ut_ad(!block->page.in_zip_hash);
+	ut_ad(!block->page.in_flush_list);
+	ut_d(block->page.in_flush_list = TRUE);
+
+	prev_b = NULL;
+	b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+	while (b && b->oldest_modification > block->page.oldest_modification) {
+		ut_ad(b->in_flush_list);
+		prev_b = b;
+		b = UT_LIST_GET_NEXT(list, b);
+	}
+
+	if (prev_b == NULL) {
+		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+	} else {
+		UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
+				     prev_b, &block->page);
+	}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+}
+
+/********************************************************************//**
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., the transition FILE_PAGE => NOT_USED allowed.
+@return	TRUE if can replace immediately */
+UNIV_INTERN
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+	buf_page_t*	bpage)	/*!< in: buffer control block, must be
+				buf_page_in_file(bpage) and in the LRU list */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(bpage->in_LRU_list);
+
+	if (UNIV_LIKELY(buf_page_in_file(bpage))) {
+
+		return(bpage->oldest_modification == 0
+		       && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+		       && bpage->buf_fix_count == 0);
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: Error: buffer block state %lu"
+		" in the LRU list!\n",
+		(ulong) buf_page_get_state(bpage));
+	ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+	putc('\n', stderr);
+
+	return(FALSE);
+}
+
+/********************************************************************//**
+Returns TRUE if the block is modified and ready for flushing.
+@return	TRUE if can flush immediately */
+UNIV_INLINE
+ibool
+buf_flush_ready_for_flush(
+/*======================*/
+	buf_page_t*	bpage,	/*!< in: buffer control block, must be
+				buf_page_in_file(bpage) */
+	enum buf_flush	flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+	ut_a(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+
+	if (bpage->oldest_modification != 0
+	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+		ut_ad(bpage->in_flush_list);
+
+		if (flush_type != BUF_FLUSH_LRU) {
+
+			return(TRUE);
+
+		} else if (bpage->buf_fix_count == 0) {
+
+			/* If we are flushing the LRU list, to avoid deadlocks
+			we require the block not to be bufferfixed, and hence
+			not latched. */
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/********************************************************************//**
+Remove a block from the flush list of modified blocks. */
+UNIV_INTERN
+void
+buf_flush_remove(
+/*=============*/
+	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(bpage->in_flush_list);
+	ut_d(bpage->in_flush_list = FALSE);
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_PAGE:
+		/* clean compressed pages should not be on the flush list */
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+		return;
+	case BUF_BLOCK_ZIP_DIRTY:
+		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		buf_LRU_insert_zip_clean(bpage);
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		break;
+	}
+
+	bpage->oldest_modification = 0;
+
+	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
+			      ut_ad(ut_list_node_313->in_flush_list)));
+}
+
+/********************************************************************//**
+Updates the flush system data structures when a write is completed. */
+UNIV_INTERN
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
+{
+	enum buf_flush	flush_type;
+
+	ut_ad(bpage);
+
+	buf_flush_remove(bpage);
+
+	flush_type = buf_page_get_flush_type(bpage);
+	buf_pool->n_flush[flush_type]--;
+
+	if (flush_type == BUF_FLUSH_LRU) {
+		/* Put the block to the end of the LRU list to wait to be
+		moved to the free list */
+
+		buf_LRU_make_block_old(bpage);
+
+		buf_pool->LRU_flush_ended++;
+	}
+
+	/* fprintf(stderr, "n pending flush %lu\n",
+	buf_pool->n_flush[flush_type]); */
+
+	if ((buf_pool->n_flush[flush_type] == 0)
+	    && (buf_pool->init_flush[flush_type] == FALSE)) {
+
+		/* The running flush batch has ended */
+
+		os_event_set(buf_pool->no_flush[flush_type]);
+	}
+}
+
+/********************************************************************//**
+Flushes possible buffered writes from the doublewrite memory buffer to disk,
+and also wakes up the aio thread if simulated aio is used. It is very
+important to call this function after a batch of writes has been posted,
+and also when we may have to wait for a page latch! Otherwise a deadlock
+of threads can occur. */
+static
+void
+buf_flush_buffered_writes(void)
+/*===========================*/
+{
+	byte*		write_buf;
+	ulint		len;
+	ulint		len2;
+	ulint		i;
+
+	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
+		os_aio_simulated_wake_handler_threads();
+
+		return;
+	}
+
+	mutex_enter(&(trx_doublewrite->mutex));
+
+	/* Write first to doublewrite buffer blocks. We use synchronous
+	aio and thus know that file write has been completed when the
+	control returns. */
+
+	if (trx_doublewrite->first_free == 0) {
+
+		mutex_exit(&(trx_doublewrite->mutex));
+
+		return;
+	}
+
+	for (i = 0; i < trx_doublewrite->first_free; i++) {
+
+		const buf_block_t*	block;
+
+		block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
+
+		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
+		    || block->page.zip.data) {
+			/* No simple validate for compressed pages exists. */
+			continue;
+		}
+
+		if (UNIV_UNLIKELY
+		    (memcmp(block->frame + (FIL_PAGE_LSN + 4),
+			    block->frame + (UNIV_PAGE_SIZE
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+			    4))) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: ERROR: The page to be written"
+				" seems corrupt!\n"
+				"InnoDB: The lsn fields do not match!"
+				" Noticed in the buffer pool\n"
+				"InnoDB: before posting to the"
+				" doublewrite buffer.\n");
+		}
+
+		if (!block->check_index_page_at_flush) {
+		} else if (page_is_comp(block->frame)) {
+			if (UNIV_UNLIKELY
+			    (!page_simple_validate_new(block->frame))) {
+corrupted_page:
+				buf_page_print(block->frame, 0);
+
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"  InnoDB: Apparent corruption of an"
+					" index page n:o %lu in space %lu\n"
+					"InnoDB: to be written to data file."
+					" We intentionally crash server\n"
+					"InnoDB: to prevent corrupt data"
+					" from ending up in data\n"
+					"InnoDB: files.\n",
+					(ulong) buf_block_get_page_no(block),
+					(ulong) buf_block_get_space(block));
+
+				ut_error;
+			}
+		} else if (UNIV_UNLIKELY
+			   (!page_simple_validate_old(block->frame))) {
+
+			goto corrupted_page;
+		}
+	}
+
+	/* increment the doublewrite flushed pages counter */
+	srv_dblwr_pages_written+= trx_doublewrite->first_free;
+	srv_dblwr_writes++;
+
+	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
+		     trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
+
+	write_buf = trx_doublewrite->write_buf;
+	i = 0;
+
+	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+	       trx_doublewrite->block1, 0, len,
+	       (void*) write_buf, NULL);
+
+	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
+	     len2 += UNIV_PAGE_SIZE, i++) {
+		const buf_block_t* block = (buf_block_t*)
+			trx_doublewrite->buf_block_arr[i];
+
+		if (UNIV_LIKELY(!block->page.zip.data)
+		    && UNIV_LIKELY(buf_block_get_state(block)
+				   == BUF_BLOCK_FILE_PAGE)
+		    && UNIV_UNLIKELY
+		    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
+			    write_buf + len2
+			    + (UNIV_PAGE_SIZE
+			       - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: ERROR: The page to be written"
+				" seems corrupt!\n"
+				"InnoDB: The lsn fields do not match!"
+				" Noticed in the doublewrite block1.\n");
+		}
+	}
+
+	if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		goto flush;
+	}
+
+	len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+		* UNIV_PAGE_SIZE;
+
+	write_buf = trx_doublewrite->write_buf
+		+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+	ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
+
+	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+	       trx_doublewrite->block2, 0, len,
+	       (void*) write_buf, NULL);
+
+	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
+	     len2 += UNIV_PAGE_SIZE, i++) {
+		const buf_block_t* block = (buf_block_t*)
+			trx_doublewrite->buf_block_arr[i];
+
+		if (UNIV_LIKELY(!block->page.zip.data)
+		    && UNIV_LIKELY(buf_block_get_state(block)
+				   == BUF_BLOCK_FILE_PAGE)
+		    && UNIV_UNLIKELY
+		    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
+			    write_buf + len2
+			    + (UNIV_PAGE_SIZE
+			       - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: ERROR: The page to be"
+				" written seems corrupt!\n"
+				"InnoDB: The lsn fields do not match!"
+				" Noticed in"
+				" the doublewrite block2.\n");
+		}
+	}
+
+flush:
+	/* Now flush the doublewrite buffer data to disk */
+
+	fil_flush(TRX_SYS_SPACE);
+
+	/* We know that the writes have been flushed to disk now
+	and in recovery we will find them in the doublewrite buffer
+	blocks. Next do the writes to the intended positions. */
+
+	for (i = 0; i < trx_doublewrite->first_free; i++) {
+		const buf_block_t* block = (buf_block_t*)
+			trx_doublewrite->buf_block_arr[i];
+
+		ut_a(buf_page_in_file(&block->page));
+		if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+			fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+			       FALSE, buf_page_get_space(&block->page),
+			       buf_page_get_zip_size(&block->page),
+			       buf_page_get_page_no(&block->page), 0,
+			       buf_page_get_zip_size(&block->page),
+			       (void*)block->page.zip.data,
+			       (void*)block);
+
+			/* Increment the counter of I/O operations used
+			for selecting LRU policy. */
+			buf_LRU_stat_inc_io();
+
+			continue;
+		}
+
+		ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+		if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
+					 block->frame
+					 + (UNIV_PAGE_SIZE
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+					 4))) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: ERROR: The page to be written"
+				" seems corrupt!\n"
+				"InnoDB: The lsn fields do not match!"
+				" Noticed in the buffer pool\n"
+				"InnoDB: after posting and flushing"
+				" the doublewrite buffer.\n"
+				"InnoDB: Page buf fix count %lu,"
+				" io fix %lu, state %lu\n",
+				(ulong)block->page.buf_fix_count,
+				(ulong)buf_block_get_io_fix(block),
+				(ulong)buf_block_get_state(block));
+		}
+
+		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+		       FALSE, buf_block_get_space(block), 0,
+		       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
+		       (void*)block->frame, (void*)block);
+
+		/* Increment the counter of I/O operations used
+		for selecting LRU policy. */
+		buf_LRU_stat_inc_io();
+	}
+
+	/* Wake possible simulated aio thread to actually post the
+	writes to the operating system */
+
+	os_aio_simulated_wake_handler_threads();
+
+	/* Wait that all async writes to tablespaces have been posted to
+	the OS */
+
+	os_aio_wait_until_no_pending_writes();
+
+	/* Now we flush the data to disk (for example, with fsync) */
+
+	fil_flush_file_spaces(FIL_TABLESPACE);
+
+	/* We can now reuse the doublewrite memory buffer: */
+
+	trx_doublewrite->first_free = 0;
+
+	mutex_exit(&(trx_doublewrite->mutex));
+}
+
+/********************************************************************//**
+Posts a buffer page for writing. If the doublewrite memory buffer is
+full, calls buf_flush_buffered_writes and waits for for free space to
+appear. */
+static
+void
+buf_flush_post_to_doublewrite_buf(
+/*==============================*/
+	buf_page_t*	bpage)	/*!< in: buffer block to write */
+{
+	ulint	zip_size;
+try_again:
+	mutex_enter(&(trx_doublewrite->mutex));
+
+	ut_a(buf_page_in_file(bpage));
+
+	if (trx_doublewrite->first_free
+	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		mutex_exit(&(trx_doublewrite->mutex));
+
+		buf_flush_buffered_writes();
+
+		goto try_again;
+	}
+
+	zip_size = buf_page_get_zip_size(bpage);
+
+	if (UNIV_UNLIKELY(zip_size)) {
+		/* Copy the compressed page and clear the rest. */
+		memcpy(trx_doublewrite->write_buf
+		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
+		       bpage->zip.data, zip_size);
+		memset(trx_doublewrite->write_buf
+		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free
+		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+	} else {
+		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+
+		memcpy(trx_doublewrite->write_buf
+		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
+		       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
+	}
+
+	trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
+
+	trx_doublewrite->first_free++;
+
+	if (trx_doublewrite->first_free
+	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		mutex_exit(&(trx_doublewrite->mutex));
+
+		buf_flush_buffered_writes();
+
+		return;
+	}
+
+	mutex_exit(&(trx_doublewrite->mutex));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Initializes a page for writing to the tablespace. */
+UNIV_INTERN
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*		page,		/*!< in/out: page */
+	void*		page_zip_,	/*!< in/out: compressed page, or NULL */
+	ib_uint64_t	newest_lsn)	/*!< in: newest modification lsn
+					to the page */
+{
+	ut_ad(page);
+
+	if (page_zip_) {
+		page_zip_des_t*	page_zip = page_zip_;
+		ulint		zip_size = page_zip_get_size(page_zip);
+		ut_ad(zip_size);
+		ut_ad(ut_is_2pow(zip_size));
+		ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+		switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
+		case FIL_PAGE_TYPE_ALLOCATED:
+		case FIL_PAGE_INODE:
+		case FIL_PAGE_IBUF_BITMAP:
+		case FIL_PAGE_TYPE_FSP_HDR:
+		case FIL_PAGE_TYPE_XDES:
+			/* These are essentially uncompressed pages. */
+			memcpy(page_zip->data, page, zip_size);
+			/* fall through */
+		case FIL_PAGE_TYPE_ZBLOB:
+		case FIL_PAGE_TYPE_ZBLOB2:
+		case FIL_PAGE_INDEX:
+			mach_write_ull(page_zip->data
+				       + FIL_PAGE_LSN, newest_lsn);
+			memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+			mach_write_to_4(page_zip->data
+					+ FIL_PAGE_SPACE_OR_CHKSUM,
+					srv_use_checksums
+					? page_zip_calc_checksum(
+						page_zip->data, zip_size)
+					: BUF_NO_CHECKSUM_MAGIC);
+			return;
+		}
+
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: ERROR: The compressed page to be written"
+		      " seems corrupt:", stderr);
+		ut_print_buf(stderr, page, zip_size);
+		fputs("\nInnoDB: Possibly older version of the page:", stderr);
+		ut_print_buf(stderr, page_zip->data, zip_size);
+		putc('\n', stderr);
+		ut_error;
+	}
+
+	/* Write the newest modification lsn to the page header and trailer */
+	mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
+
+	mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+		       newest_lsn);
+
+	/* Store the new formula checksum */
+
+	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+			srv_use_checksums
+			? buf_calc_page_new_checksum(page)
+			: BUF_NO_CHECKSUM_MAGIC);
+
+	/* We overwrite the first 4 bytes of the end lsn field to store
+	the old formula checksum. Since it depends also on the field
+	FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
+	new formula checksum. */
+
+	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+			srv_use_checksums
+			? buf_calc_page_old_checksum(page)
+			: BUF_NO_CHECKSUM_MAGIC);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Does an asynchronous write of a buffer page. NOTE: in simulated aio and
+also when the doublewrite buffer is used, we must call
+buf_flush_buffered_writes after we have posted a batch of writes! */
+static
+void
+buf_flush_write_block_low(
+/*======================*/
+	buf_page_t*	bpage)	/*!< in: buffer block to write */
+{
+	ulint	zip_size	= buf_page_get_zip_size(bpage);
+	page_t*	frame		= NULL;
+#ifdef UNIV_LOG_DEBUG
+	static ibool univ_log_debug_warned;
+#endif /* UNIV_LOG_DEBUG */
+
+	ut_ad(buf_page_in_file(bpage));
+
+	/* We are not holding buf_pool_mutex or block_mutex here.
+	Nevertheless, it is safe to access bpage, because it is
+	io_fixed and oldest_modification != 0.  Thus, it cannot be
+	relocated in the buffer pool or removed from flush_list or
+	LRU_list. */
+	ut_ad(!buf_pool_mutex_own());
+	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+	ut_ad(bpage->oldest_modification != 0);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+#endif
+	ut_ad(bpage->newest_modification != 0);
+
+#ifdef UNIV_LOG_DEBUG
+	if (!univ_log_debug_warned) {
+		univ_log_debug_warned = TRUE;
+		fputs("Warning: cannot force log to disk if"
+		      " UNIV_LOG_DEBUG is defined!\n"
+		      "Crash recovery will not work!\n",
+		      stderr);
+	}
+#else
+	/* Force the log to the disk before writing the modified block */
+	log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
+#endif
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+		break;
+	case BUF_BLOCK_ZIP_DIRTY:
+		frame = bpage->zip.data;
+		if (UNIV_LIKELY(srv_use_checksums)) {
+			ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
+			     == page_zip_calc_checksum(frame, zip_size));
+		}
+		mach_write_ull(frame + FIL_PAGE_LSN,
+			       bpage->newest_modification);
+		memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+		frame = bpage->zip.data;
+		if (!frame) {
+			frame = ((buf_block_t*) bpage)->frame;
+		}
+
+		buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
+					   bpage->zip.data
+					   ? &bpage->zip : NULL,
+					   bpage->newest_modification);
+		break;
+	}
+
+	if (!srv_use_doublewrite_buf || !trx_doublewrite) {
+		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+		       FALSE, buf_page_get_space(bpage), zip_size,
+		       buf_page_get_page_no(bpage), 0,
+		       zip_size ? zip_size : UNIV_PAGE_SIZE,
+		       frame, bpage);
+	} else {
+		buf_flush_post_to_doublewrite_buf(bpage);
+	}
+}
+
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
+held upon entering this function, and they will be released by this
+function. */
+static
+void
+buf_flush_page(
+/*===========*/
+	buf_page_t*	bpage,		/*!< in: buffer control block */
+	enum buf_flush	flush_type)	/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+{
+	mutex_t*	block_mutex;
+	ibool		is_uncompressed;
+
+	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_page_in_file(bpage));
+
+	block_mutex = buf_page_get_mutex(bpage);
+	ut_ad(mutex_own(block_mutex));
+
+	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+
+	buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+
+	buf_page_set_flush_type(bpage, flush_type);
+
+	if (buf_pool->n_flush[flush_type] == 0) {
+
+		os_event_reset(buf_pool->no_flush[flush_type]);
+	}
+
+	buf_pool->n_flush[flush_type]++;
+
+	is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
+
+	switch (flush_type) {
+		ibool	is_s_latched;
+	case BUF_FLUSH_LIST:
+		/* If the simulated aio thread is not running, we must
+		not wait for any latch, as we may end up in a deadlock:
+		if buf_fix_count == 0, then we know we need not wait */
+
+		is_s_latched = (bpage->buf_fix_count == 0);
+		if (is_s_latched && is_uncompressed) {
+			rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
+					   BUF_IO_WRITE);
+		}
+
+		mutex_exit(block_mutex);
+		buf_pool_mutex_exit();
+
+		/* Even though bpage is not protected by any mutex at
+		this point, it is safe to access bpage, because it is
+		io_fixed and oldest_modification != 0.  Thus, it
+		cannot be relocated in the buffer pool or removed from
+		flush_list or LRU_list. */
+
+		if (!is_s_latched) {
+			buf_flush_buffered_writes();
+
+			if (is_uncompressed) {
+				rw_lock_s_lock_gen(&((buf_block_t*) bpage)
+						   ->lock, BUF_IO_WRITE);
+			}
+		}
+
+		break;
+
+	case BUF_FLUSH_LRU:
+		/* VERY IMPORTANT:
+		Because any thread may call the LRU flush, even when owning
+		locks on pages, to avoid deadlocks, we must make sure that the
+		s-lock is acquired on the page without waiting: this is
+		accomplished because buf_flush_ready_for_flush() must hold,
+		and that requires the page not to be bufferfixed. */
+
+		if (is_uncompressed) {
+			rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
+					   BUF_IO_WRITE);
+		}
+
+		/* Note that the s-latch is acquired before releasing the
+		buf_pool mutex: this ensures that the latch is acquired
+		immediately. */
+
+		mutex_exit(block_mutex);
+		buf_pool_mutex_exit();
+		break;
+
+	default:
+		ut_error;
+	}
+
+	/* Even though bpage is not protected by any mutex at this
+	point, it is safe to access bpage, because it is io_fixed and
+	oldest_modification != 0.  Thus, it cannot be relocated in the
+	buffer pool or removed from flush_list or LRU_list. */
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr,
+			"Flushing %u space %u page %u\n",
+			flush_type, bpage->space, bpage->offset);
+	}
+#endif /* UNIV_DEBUG */
+	buf_flush_write_block_low(bpage);
+}
+
+/***********************************************************//**
+Flushes to disk all flushable pages within the flush area.
+@return	number of pages flushed */
+static
+ulint
+buf_flush_try_neighbors(
+/*====================*/
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page offset */
+	enum buf_flush	flush_type)	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST */
+{
+	buf_page_t*	bpage;
+	ulint		low, high;
+	ulint		count		= 0;
+	ulint		i;
+
+	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+		/* If there is little space, it is better not to flush any
+		block except from the end of the LRU list */
+
+		low = offset;
+		high = offset + 1;
+	} else {
+		/* When flushed, dirty blocks are searched in neighborhoods of
+		this size, and flushed along with the original page. */
+
+		ulint	buf_flush_area	= ut_min(BUF_READ_AHEAD_AREA,
+						 buf_pool->curr_size / 16);
+
+		low = (offset / buf_flush_area) * buf_flush_area;
+		high = (offset / buf_flush_area + 1) * buf_flush_area;
+	}
+
+	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
+
+	if (high > fil_space_get_size(space)) {
+		high = fil_space_get_size(space);
+	}
+
+	buf_pool_mutex_enter();
+
+	for (i = low; i < high; i++) {
+
+		bpage = buf_page_hash_get(space, i);
+
+		if (!bpage) {
+
+			continue;
+		}
+
+		ut_a(buf_page_in_file(bpage));
+
+		/* We avoid flushing 'non-old' blocks in an LRU flush,
+		because the flushed blocks are soon freed */
+
+		if (flush_type != BUF_FLUSH_LRU
+		    || i == offset
+		    || buf_page_is_old(bpage)) {
+			mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+			mutex_enter(block_mutex);
+
+			if (buf_flush_ready_for_flush(bpage, flush_type)
+			    && (i == offset || !bpage->buf_fix_count)) {
+				/* We only try to flush those
+				neighbors != offset where the buf fix count is
+				zero, as we then know that we probably can
+				latch the page without a semaphore wait.
+				Semaphore waits are expensive because we must
+				flush the doublewrite buffer before we start
+				waiting. */
+
+				buf_flush_page(bpage, flush_type);
+				ut_ad(!mutex_own(block_mutex));
+				count++;
+
+				buf_pool_mutex_enter();
+			} else {
+				mutex_exit(block_mutex);
+			}
+		}
+	}
+
+	buf_pool_mutex_exit();
+
+	return(count);
+}
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_batch(
+/*============*/
+	enum buf_flush	flush_type,	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+					then the caller must not own any
+					latches on pages */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	ib_uint64_t	lsn_limit)	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+{
+	buf_page_t*	bpage;
+	ulint		page_count	= 0;
+	ulint		old_page_count;
+	ulint		space;
+	ulint		offset;
+
+	ut_ad((flush_type == BUF_FLUSH_LRU)
+	      || (flush_type == BUF_FLUSH_LIST));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((flush_type != BUF_FLUSH_LIST)
+	      || sync_thread_levels_empty_gen(TRUE));
+#endif /* UNIV_SYNC_DEBUG */
+	buf_pool_mutex_enter();
+
+	if ((buf_pool->n_flush[flush_type] > 0)
+	    || (buf_pool->init_flush[flush_type] == TRUE)) {
+
+		/* There is already a flush batch of the same type running */
+
+		buf_pool_mutex_exit();
+
+		return(ULINT_UNDEFINED);
+	}
+
+	buf_pool->init_flush[flush_type] = TRUE;
+
+	for (;;) {
+flush_next:
+		/* If we have flushed enough, leave the loop */
+		if (page_count >= min_n) {
+
+			break;
+		}
+
+		/* Start from the end of the list looking for a suitable
+		block to be flushed. */
+
+		if (flush_type == BUF_FLUSH_LRU) {
+			bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+		} else {
+			ut_ad(flush_type == BUF_FLUSH_LIST);
+
+			bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+			if (!bpage
+			    || bpage->oldest_modification >= lsn_limit) {
+				/* We have flushed enough */
+
+				break;
+			}
+			ut_ad(bpage->in_flush_list);
+		}
+
+		/* Note that after finding a single flushable page, we try to
+		flush also all its neighbors, and after that start from the
+		END of the LRU list or flush list again: the list may change
+		during the flushing and we cannot safely preserve within this
+		function a pointer to a block in the list! */
+
+		do {
+			mutex_t*block_mutex = buf_page_get_mutex(bpage);
+			ibool	ready;
+
+			ut_a(buf_page_in_file(bpage));
+
+			mutex_enter(block_mutex);
+			ready = buf_flush_ready_for_flush(bpage, flush_type);
+			mutex_exit(block_mutex);
+
+			if (ready) {
+				space = buf_page_get_space(bpage);
+				offset = buf_page_get_page_no(bpage);
+
+				buf_pool_mutex_exit();
+
+				old_page_count = page_count;
+
+				/* Try to flush also all the neighbors */
+				page_count += buf_flush_try_neighbors(
+					space, offset, flush_type);
+				/* fprintf(stderr,
+				"Flush type %lu, page no %lu, neighb %lu\n",
+				flush_type, offset,
+				page_count - old_page_count); */
+
+				buf_pool_mutex_enter();
+				goto flush_next;
+
+			} else if (flush_type == BUF_FLUSH_LRU) {
+				bpage = UT_LIST_GET_PREV(LRU, bpage);
+			} else {
+				ut_ad(flush_type == BUF_FLUSH_LIST);
+
+				bpage = UT_LIST_GET_PREV(list, bpage);
+				ut_ad(!bpage || bpage->in_flush_list);
+			}
+		} while (bpage != NULL);
+
+		/* If we could not find anything to flush, leave the loop */
+
+		break;
+	}
+
+	buf_pool->init_flush[flush_type] = FALSE;
+
+	if (buf_pool->n_flush[flush_type] == 0) {
+
+		/* The running flush batch has ended */
+
+		os_event_set(buf_pool->no_flush[flush_type]);
+	}
+
+	buf_pool_mutex_exit();
+
+	buf_flush_buffered_writes();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints && page_count > 0) {
+		ut_a(flush_type == BUF_FLUSH_LRU
+		     || flush_type == BUF_FLUSH_LIST);
+		fprintf(stderr, flush_type == BUF_FLUSH_LRU
+			? "Flushed %lu pages in LRU flush\n"
+			: "Flushed %lu pages in flush list flush\n",
+			(ulong) page_count);
+	}
+#endif /* UNIV_DEBUG */
+
+	srv_buf_pool_flushed += page_count;
+
+	/* We keep track of all flushes happening as part of LRU
+	flush. When estimating the desired rate at which flush_list
+	should be flushed we factor in this value. */
+	if (flush_type == BUF_FLUSH_LRU) {
+		buf_lru_flush_page_count += page_count;
+	}
+
+	return(page_count);
+}
+
+/******************************************************************//**
+Waits until a flush batch of the given type ends */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	enum buf_flush	type)	/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+	ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
+
+	os_event_wait(buf_pool->no_flush[type]);
+}
+
+/******************************************************************//**
+Gives a recommendation of how many blocks should be flushed to establish
+a big enough margin of replaceable blocks near the end of the LRU list
+and in the free list.
+@return number of blocks which should be flushed from the end of the
+LRU list */
+static
+ulint
+buf_flush_LRU_recommendation(void)
+/*==============================*/
+{
+	buf_page_t*	bpage;
+	ulint		n_replaceable;
+	ulint		distance	= 0;
+
+	buf_pool_mutex_enter();
+
+	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while ((bpage != NULL)
+	       && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
+		   + BUF_FLUSH_EXTRA_MARGIN)
+	       && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+
+		mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+		mutex_enter(block_mutex);
+
+		if (buf_flush_ready_for_replace(bpage)) {
+			n_replaceable++;
+		}
+
+		mutex_exit(block_mutex);
+
+		distance++;
+
+		bpage = UT_LIST_GET_PREV(LRU, bpage);
+	}
+
+	buf_pool_mutex_exit();
+
+	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+		return(0);
+	}
+
+	return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
+	       - n_replaceable);
+}
+
+/*********************************************************************//**
+Flushes pages from the end of the LRU list if there is too small a margin
+of replaceable pages there or in the free list. VERY IMPORTANT: this function
+is called also by threads which have locks on pages. To avoid deadlocks, we
+flush only pages such that the s-lock required for flushing can be acquired
+immediately, without waiting. */
+UNIV_INTERN
+void
+buf_flush_free_margin(void)
+/*=======================*/
+{
+	ulint	n_to_flush;
+	ulint	n_flushed;
+
+	n_to_flush = buf_flush_LRU_recommendation();
+
+	if (n_to_flush > 0) {
+		n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
+		if (n_flushed == ULINT_UNDEFINED) {
+			/* There was an LRU type flush batch already running;
+			let us wait for it to end */
+
+			buf_flush_wait_batch_end(BUF_FLUSH_LRU);
+		}
+	}
+}
+
+/*********************************************************************
+Update the historical stats that we are collecting for flush rate
+heuristics at the end of each interval.
+Flush rate heuristic depends on (a) rate of redo log generation and
+(b) the rate at which LRU flush is happening. */
+UNIV_INTERN
+void
+buf_flush_stat_update(void)
+/*=======================*/
+{
+	buf_flush_stat_t*	item;
+	ib_uint64_t		lsn_diff;
+	ib_uint64_t		lsn;
+	ulint			n_flushed;
+
+	lsn = log_get_lsn();
+	if (buf_flush_stat_cur.redo == 0) {
+		/* First time around. Just update the current LSN
+		and return. */
+		buf_flush_stat_cur.redo = lsn;
+		return;
+	}
+
+	item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
+
+	/* values for this interval */
+	lsn_diff = lsn - buf_flush_stat_cur.redo;
+	n_flushed = buf_lru_flush_page_count
+		    - buf_flush_stat_cur.n_flushed;
+
+	/* add the current value and subtract the obsolete entry. */
+	buf_flush_stat_sum.redo += lsn_diff - item->redo;
+	buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
+
+	/* put current entry in the array. */
+	item->redo = lsn_diff;
+	item->n_flushed = n_flushed;
+
+	/* update the index */
+	buf_flush_stat_arr_ind++;
+	buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
+
+	/* reset the current entry. */
+	buf_flush_stat_cur.redo = lsn;
+	buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
+}
+
+/*********************************************************************
+Determines the fraction of dirty pages that need to be flushed based
+on the speed at which we generate redo log. Note that if redo log
+is generated at a significant rate without corresponding increase
+in the number of dirty pages (for example, an in-memory workload)
+it can cause IO bursts of flushing. This function implements heuristics
+to avoid this burstiness.
+@return	number of dirty pages to be flushed / second */
+UNIV_INTERN
+ulint
+buf_flush_get_desired_flush_rate(void)
+/*==================================*/
+{
+	ulint			redo_avg;
+	ulint			lru_flush_avg;
+	ulint			n_dirty;
+	ulint			n_flush_req;
+	lint			rate;
+	ib_uint64_t		lsn = log_get_lsn();
+	ulint			log_capacity = log_get_capacity();
+
+	/* log_capacity should never be zero after the initialization
+	of log subsystem. */
+	ut_ad(log_capacity != 0);
+
+	/* Get total number of dirty pages. It is OK to access
+	flush_list without holding any mtex as we are using this
+	only for heuristics. */
+	n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
+
+	/* An overflow can happen if we generate more than 2^32 bytes
+	of redo in this interval i.e.: 4G of redo in 1 second. We can
+	safely consider this as infinity because if we ever come close
+	to 4G we'll start a synchronous flush of dirty pages. */
+	/* redo_avg below is average at which redo is generated in
+	past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
+	interval. */
+	redo_avg = (ulint) (buf_flush_stat_sum.redo
+			    / BUF_FLUSH_STAT_N_INTERVAL
+			    + (lsn - buf_flush_stat_cur.redo));
+
+	/* An overflow can happen possibly if we flush more than 2^32
+	pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
+	unlikely scenario. Even when this happens it means that our
+	flush rate will be off the mark. It won't affect correctness
+	of any subsystem. */
+	/* lru_flush_avg below is rate at which pages are flushed as
+	part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
+	number of pages flushed in the current interval. */
+	lru_flush_avg = buf_flush_stat_sum.n_flushed
+			/ BUF_FLUSH_STAT_N_INTERVAL
+			+ (buf_lru_flush_page_count
+			   - buf_flush_stat_cur.n_flushed);
+
+	n_flush_req = (n_dirty * redo_avg) / log_capacity;
+
+	/* The number of pages that we want to flush from the flush
+	list is the difference between the required rate and the
+	number of pages that we are historically flushing from the
+	LRU list */
+	rate = n_flush_req - lru_flush_avg;
+	return(rate > 0 ? (ulint) rate : 0);
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return	TRUE if ok */
+static
+ibool
+buf_flush_validate_low(void)
+/*========================*/
+{
+	buf_page_t*	bpage;
+
+	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
+			 ut_ad(ut_list_node_313->in_flush_list));
+
+	bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+	while (bpage != NULL) {
+		const ib_uint64_t om = bpage->oldest_modification;
+		ut_ad(bpage->in_flush_list);
+		ut_a(buf_page_in_file(bpage));
+		ut_a(om > 0);
+
+		bpage = UT_LIST_GET_NEXT(list, bpage);
+
+		ut_a(!bpage || om >= bpage->oldest_modification);
+	}
+
+	return(TRUE);
+}
+
+/******************************************************************//**
+Validates the flush list.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+buf_flush_validate(void)
+/*====================*/
+{
+	ibool	ret;
+
+	buf_pool_mutex_enter();
+
+	ret = buf_flush_validate_low();
+
+	buf_pool_mutex_exit();
+
+	return(ret);
+}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
new file mode 100644
index 00000000000..be53a5f5d9d
--- /dev/null
+++ b/storage/innodb_plugin/buf/buf0lru.c
@@ -0,0 +1,2066 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0lru.c
+The database buffer replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+
+#ifdef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "os0sync.h"
+#include "fil0fil.h"
+#include "btr0btr.h"
+#include "buf0buddy.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "btr0sea.h"
+#include "ibuf0ibuf.h"
+#include "os0file.h"
+#include "page0zip.h"
+#include "log0recv.h"
+#include "srv0srv.h"
+
+/** The number of blocks from the LRU_old pointer onward, including the block
+pointed to, must be 3/8 of the whole LRU list length, except that the
+tolerance defined below is allowed. Note that the tolerance must be small
+enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
+LRU_old pointer is not allowed to point to either end of the LRU list. */
+
+#define BUF_LRU_OLD_TOLERANCE	20
+
+/** The whole LRU list length is divided by this number to determine an
+initial segment in buf_LRU_get_recent_limit */
+
+#define BUF_LRU_INITIAL_RATIO	8
+
+/** When dropping the search hash index entries before deleting an ibd
+file, we build a local array of pages belonging to that tablespace
+in the buffer pool. Following is the size of that array. */
+#define BUF_LRU_DROP_SEARCH_HASH_SIZE	1024
+
+/** If we switch on the InnoDB monitor because there are too few available
+frames in the buffer pool, we set this to TRUE */
+static ibool	buf_lru_switched_on_innodb_mon	= FALSE;
+
+/******************************************************************//**
+These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
+and page_zip_decompress() operations.  Based on the statistics,
+buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
+unzip_LRU or the regular LRU.  From unzip_LRU, we will only evict the
+uncompressed frame (meaning we can evict dirty blocks as well).  From
+the regular LRU, we will evict the entire block (i.e.: both the
+uncompressed and compressed data), which must be clean. */
+
+/* @{ */
+
+/** Number of intervals for which we keep the history of these stats.
+Each interval is 1 second, defined by the rate at which
+srv_error_monitor_thread() calls buf_LRU_stat_update(). */
+#define BUF_LRU_STAT_N_INTERVAL 50
+
+/** Co-efficient with which we multiply I/O operations to equate them
+with page_zip_decompress() operations. */
+#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
+
+/** Sampled values buf_LRU_stat_cur.
+Protected by buf_pool_mutex.  Updated by buf_LRU_stat_update(). */
+static buf_LRU_stat_t		buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
+/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
+static ulint			buf_LRU_stat_arr_ind;
+
+/** Current operation counters.  Not protected by any mutex.  Cleared
+by buf_LRU_stat_update(). */
+UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update().  Protected by buf_pool_mutex. */
+UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_sum;
+
+/* @} */
+
+/******************************************************************//**
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool_zip_mutex will be released.
+
+If a compressed page or a compressed-only block descriptor is freed,
+other compressed pages or compressed-only block descriptors may be
+relocated.
+@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
+was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
+static
+enum buf_page_state
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+	buf_page_t*	bpage,	/*!< in: block, must contain a file page and
+				be in a state where it can be freed; there
+				may or may not be a hash index to the page */
+	ibool		zip);	/*!< in: TRUE if should remove also the
+				compressed page of an uncompressed page */
+/******************************************************************//**
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+	buf_block_t*	block);	/*!< in: block, must contain a file page and
+				be in a state where it can be freed */
+
+/******************************************************************//**
+Determines if the unzip_LRU list should be used for evicting a victim
+instead of the general LRU list.
+@return	TRUE if should use unzip_LRU */
+UNIV_INLINE
+ibool
+buf_LRU_evict_from_unzip_LRU(void)
+/*==============================*/
+{
+	ulint	io_avg;
+	ulint	unzip_avg;
+
+	ut_ad(buf_pool_mutex_own());
+
+	/* If the unzip_LRU list is empty, we can only use the LRU. */
+	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
+		return(FALSE);
+	}
+
+	/* If unzip_LRU is at most 10% of the size of the LRU list,
+	then use the LRU.  This slack allows us to keep hot
+	decompressed pages in the buffer pool. */
+	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+	    <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
+		return(FALSE);
+	}
+
+	/* If eviction hasn't started yet, we assume by default
+	that a workload is disk bound. */
+	if (buf_pool->freed_page_clock == 0) {
+		return(TRUE);
+	}
+
+	/* Calculate the average over past intervals, and add the values
+	of the current interval. */
+	io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+		+ buf_LRU_stat_cur.io;
+	unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+		+ buf_LRU_stat_cur.unzip;
+
+	/* Decide based on our formula.  If the load is I/O bound
+	(unzip_avg is smaller than the weighted io_avg), evict an
+	uncompressed frame from unzip_LRU.  Otherwise we assume that
+	the load is CPU bound and evict from the regular LRU. */
+	return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
+}
+
+/******************************************************************//**
+Attempts to drop page hash index on a batch of pages belonging to a
+particular space id. */
+static
+void
+buf_LRU_drop_page_hash_batch(
+/*=========================*/
+	ulint		space_id,	/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in bytes
+					or 0 for uncompressed pages */
+	const ulint*	arr,		/*!< in: array of page_no */
+	ulint		count)		/*!< in: number of entries in array */
+{
+	ulint	i;
+
+	ut_ad(arr != NULL);
+	ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
+
+	for (i = 0; i < count; ++i) {
+		btr_search_drop_page_hash_when_freed(space_id, zip_size,
+						     arr[i]);
+	}
+}
+
+/******************************************************************//**
+When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
+hash index entries belonging to that table. This function tries to
+do that in batch. Note that this is a 'best effort' attempt and does
+not guarantee that ALL hash entries will be removed. */
+static
+void
+buf_LRU_drop_page_hash_for_tablespace(
+/*==================================*/
+	ulint	id)	/*!< in: space id */
+{
+	buf_page_t*	bpage;
+	ulint*		page_arr;
+	ulint		num_entries;
+	ulint		zip_size;
+
+	zip_size = fil_space_get_zip_size(id);
+
+	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+		/* Somehow, the tablespace does not exist.  Nothing to drop. */
+		ut_ad(0);
+		return;
+	}
+
+	page_arr = ut_malloc(sizeof(ulint)
+			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+	buf_pool_mutex_enter();
+
+scan_again:
+	num_entries = 0;
+	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while (bpage != NULL) {
+		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		buf_page_t*	prev_bpage;
+
+		mutex_enter(block_mutex);
+		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+
+		ut_a(buf_page_in_file(bpage));
+
+		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+		    || bpage->space != id
+		    || bpage->buf_fix_count > 0
+		    || bpage->io_fix != BUF_IO_NONE) {
+			/* We leave the fixed pages as is in this scan.
+			To be dealt with later in the final scan. */
+			mutex_exit(block_mutex);
+			goto next_page;
+		}
+
+		if (((buf_block_t*) bpage)->is_hashed) {
+
+			/* Store the offset(i.e.: page_no) in the array
+			so that we can drop hash index in a batch
+			later. */
+			page_arr[num_entries] = bpage->offset;
+			mutex_exit(block_mutex);
+			ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
+			++num_entries;
+
+			if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
+				goto next_page;
+			}
+			/* Array full. We release the buf_pool_mutex to
+			obey the latching order. */
+			buf_pool_mutex_exit();
+
+			buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
+						     num_entries);
+			num_entries = 0;
+			buf_pool_mutex_enter();
+		} else {
+			mutex_exit(block_mutex);
+		}
+
+next_page:
+		/* Note that we may have released the buf_pool mutex
+		above after reading the prev_bpage during processing
+		of a page_hash_batch (i.e.: when the array was full).
+		This means that prev_bpage can change in LRU list.
+		This is OK because this function is a 'best effort'
+		to drop as many search hash entries as possible and
+		it does not guarantee that ALL such entries will be
+		dropped. */
+		bpage = prev_bpage;
+
+		/* If, however, bpage has been removed from LRU list
+		to the free list then we should restart the scan.
+		bpage->state is protected by buf_pool mutex. */
+		if (bpage && !buf_page_in_file(bpage)) {
+			ut_a(num_entries == 0);
+			goto scan_again;
+		}
+	}
+
+	buf_pool_mutex_exit();
+
+	/* Drop any remaining batch of search hashed pages. */
+	buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+	ut_free(page_arr);
+}
+
+/******************************************************************//**
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. */
+UNIV_INTERN
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+	ulint	id)	/*!< in: space id */
+{
+	buf_page_t*	bpage;
+	ibool		all_freed;
+
+	/* Before we attempt to drop pages one by one we first
+	attempt to drop page hash index entries in batches to make
+	it more efficient. The batching attempt is a best effort
+	attempt and does not guarantee that all pages hash entries
+	will be dropped. We get rid of remaining page hash entries
+	one by one below. */
+	buf_LRU_drop_page_hash_for_tablespace(id);
+
+scan_again:
+	buf_pool_mutex_enter();
+
+	all_freed = TRUE;
+
+	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while (bpage != NULL) {
+		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		buf_page_t*	prev_bpage;
+
+		ut_a(buf_page_in_file(bpage));
+
+		mutex_enter(block_mutex);
+		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+
+		if (buf_page_get_space(bpage) == id) {
+			if (bpage->buf_fix_count > 0
+			    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+
+				/* We cannot remove this page during
+				this scan yet; maybe the system is
+				currently reading it in, or flushing
+				the modifications to the file */
+
+				all_freed = FALSE;
+
+				goto next_page;
+			}
+
+#ifdef UNIV_DEBUG
+			if (buf_debug_prints) {
+				fprintf(stderr,
+					"Dropping space %lu page %lu\n",
+					(ulong) buf_page_get_space(bpage),
+					(ulong) buf_page_get_page_no(bpage));
+			}
+#endif
+			if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
+			    && ((buf_block_t*) bpage)->is_hashed) {
+				ulint	page_no;
+				ulint	zip_size;
+
+				buf_pool_mutex_exit();
+
+				zip_size = buf_page_get_zip_size(bpage);
+				page_no = buf_page_get_page_no(bpage);
+
+				mutex_exit(block_mutex);
+
+				/* Note that the following call will acquire
+				an S-latch on the page */
+
+				btr_search_drop_page_hash_when_freed(
+					id, zip_size, page_no);
+				goto scan_again;
+			}
+
+			if (bpage->oldest_modification != 0) {
+
+				buf_flush_remove(bpage);
+			}
+
+			/* Remove from the LRU list */
+			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+			    != BUF_BLOCK_ZIP_FREE) {
+				buf_LRU_block_free_hashed_page((buf_block_t*)
+							       bpage);
+			} else {
+				/* The block_mutex should have been
+				released by buf_LRU_block_remove_hashed_page()
+				when it returns BUF_BLOCK_ZIP_FREE. */
+				ut_ad(block_mutex == &buf_pool_zip_mutex);
+				ut_ad(!mutex_own(block_mutex));
+
+				/* The compressed block descriptor
+				(bpage) has been deallocated and
+				block_mutex released.  Also,
+				buf_buddy_free() may have relocated
+				prev_bpage.  Rescan the LRU list. */
+
+				bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+				continue;
+			}
+		}
+next_page:
+		mutex_exit(block_mutex);
+		bpage = prev_bpage;
+	}
+
+	buf_pool_mutex_exit();
+
+	if (!all_freed) {
+		os_thread_sleep(20000);
+
+		goto scan_again;
+	}
+}
+
+/******************************************************************//**
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around.
+@return	the limit; zero if could not determine it */
+UNIV_INTERN
+ulint
+buf_LRU_get_recent_limit(void)
+/*==========================*/
+{
+	const buf_page_t*	bpage;
+	ulint			len;
+	ulint			limit;
+
+	buf_pool_mutex_enter();
+
+	len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+	if (len < BUF_LRU_OLD_MIN_LEN) {
+		/* The LRU list is too short to do read-ahead */
+
+		buf_pool_mutex_exit();
+
+		return(0);
+	}
+
+	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	limit = buf_page_get_LRU_position(bpage);
+	len /= BUF_LRU_INITIAL_RATIO;
+
+	buf_pool_mutex_exit();
+
+	return(limit > len ? (limit - len) : 0);
+}
+
+/********************************************************************//**
+Insert a compressed block into buf_pool->zip_clean in the LRU order. */
+UNIV_INTERN
+void
+buf_LRU_insert_zip_clean(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
+{
+	buf_page_t*	b;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+
+	/* Find the first successor of bpage in the LRU list
+	that is in the zip_clean list. */
+	b = bpage;
+	do {
+		b = UT_LIST_GET_NEXT(LRU, b);
+	} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
+
+	/* Insert bpage before b, i.e., after the predecessor of b. */
+	if (b) {
+		b = UT_LIST_GET_PREV(list, b);
+	}
+
+	if (b) {
+		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+	} else {
+		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+	}
+}
+
+/******************************************************************//**
+Try to free an uncompressed page of a compressed block from the unzip
+LRU list.  The compressed page is preserved, and it need not be clean.
+@return	TRUE if freed */
+UNIV_INLINE
+ibool
+buf_LRU_free_from_unzip_LRU_list(
+/*=============================*/
+	ulint	n_iterations)	/*!< in: how many times this has been called
+				repeatedly without result: a high value means
+				that we should search farther; we will search
+				n_iterations / 5 of the unzip_LRU list,
+				or nothing if n_iterations >= 5 */
+{
+	buf_block_t*	block;
+	ulint		distance;
+
+	ut_ad(buf_pool_mutex_own());
+
+	/* Theoratically it should be much easier to find a victim
+	from unzip_LRU as we can choose even a dirty block (as we'll
+	be evicting only the uncompressed frame).  In a very unlikely
+	eventuality that we are unable to find a victim from
+	unzip_LRU, we fall back to the regular LRU list.  We do this
+	if we have done five iterations so far. */
+
+	if (UNIV_UNLIKELY(n_iterations >= 5)
+	    || !buf_LRU_evict_from_unzip_LRU()) {
+
+		return(FALSE);
+	}
+
+	distance = 100 + (n_iterations
+			  * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
+
+	for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+	     UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+	     block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
+
+		enum buf_lru_free_block_status	freed;
+
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(block->in_unzip_LRU_list);
+		ut_ad(block->page.in_LRU_list);
+
+		mutex_enter(&block->mutex);
+		freed = buf_LRU_free_block(&block->page, FALSE, NULL);
+		mutex_exit(&block->mutex);
+
+		switch (freed) {
+		case BUF_LRU_FREED:
+			return(TRUE);
+
+		case BUF_LRU_CANNOT_RELOCATE:
+			/* If we failed to relocate, try
+			regular LRU eviction. */
+			return(FALSE);
+
+		case BUF_LRU_NOT_FREED:
+			/* The block was buffer-fixed or I/O-fixed.
+			Keep looking. */
+			continue;
+		}
+
+		/* inappropriate return value from
+		buf_LRU_free_block() */
+		ut_error;
+	}
+
+	return(FALSE);
+}
+
+/******************************************************************//**
+Try to free a clean page from the common LRU list.
+@return	TRUE if freed */
+UNIV_INLINE
+ibool
+buf_LRU_free_from_common_LRU_list(
+/*==============================*/
+	ulint	n_iterations)	/*!< in: how many times this has been called
+				repeatedly without result: a high value means
+				that we should search farther; if
+				n_iterations < 10, then we search
+				n_iterations / 10 * buf_pool->curr_size
+				pages from the end of the LRU list */
+{
+	buf_page_t*	bpage;
+	ulint		distance;
+
+	ut_ad(buf_pool_mutex_own());
+
+	distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
+
+	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+	     bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
+
+		enum buf_lru_free_block_status	freed;
+		mutex_t*			block_mutex
+			= buf_page_get_mutex(bpage);
+
+		ut_ad(buf_page_in_file(bpage));
+		ut_ad(bpage->in_LRU_list);
+
+		mutex_enter(block_mutex);
+		freed = buf_LRU_free_block(bpage, TRUE, NULL);
+		mutex_exit(block_mutex);
+
+		switch (freed) {
+		case BUF_LRU_FREED:
+			return(TRUE);
+
+		case BUF_LRU_NOT_FREED:
+			/* The block was dirty, buffer-fixed, or I/O-fixed.
+			Keep looking. */
+			continue;
+
+		case BUF_LRU_CANNOT_RELOCATE:
+			/* This should never occur, because we
+			want to discard the compressed page too. */
+			break;
+		}
+
+		/* inappropriate return value from
+		buf_LRU_free_block() */
+		ut_error;
+	}
+
+	return(FALSE);
+}
+
+/******************************************************************//**
+Try to free a replaceable block.
+@return	TRUE if found and freed */
+UNIV_INTERN
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+	ulint	n_iterations)	/*!< in: how many times this has been called
+				repeatedly without result: a high value means
+				that we should search farther; if
+				n_iterations < 10, then we search
+				n_iterations / 10 * buf_pool->curr_size
+				pages from the end of the LRU list; if
+				n_iterations < 5, then we will also search
+				n_iterations / 5 of the unzip_LRU list. */
+{
+	ibool	freed = FALSE;
+
+	buf_pool_mutex_enter();
+
+	freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
+
+	if (!freed) {
+		freed = buf_LRU_free_from_common_LRU_list(n_iterations);
+	}
+
+	if (!freed) {
+		buf_pool->LRU_flush_ended = 0;
+	} else if (buf_pool->LRU_flush_ended > 0) {
+		buf_pool->LRU_flush_ended--;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(freed);
+}
+
+/******************************************************************//**
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+UNIV_INTERN
+void
+buf_LRU_try_free_flushed_blocks(void)
+/*=================================*/
+{
+	buf_pool_mutex_enter();
+
+	while (buf_pool->LRU_flush_ended > 0) {
+
+		buf_pool_mutex_exit();
+
+		buf_LRU_search_and_free_block(1);
+
+		buf_pool_mutex_enter();
+	}
+
+	buf_pool_mutex_exit();
+}
+
+/******************************************************************//**
+Returns TRUE if less than 25 % of the buffer pool is available. This can be
+used in heuristics to prevent huge transactions eating up the whole buffer
+pool for their locks.
+@return	TRUE if less than 25 % of buffer pool left */
+UNIV_INTERN
+ibool
+buf_LRU_buf_pool_running_out(void)
+/*==============================*/
+{
+	ibool	ret	= FALSE;
+
+	buf_pool_mutex_enter();
+
+	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
+
+		ret = TRUE;
+	}
+
+	buf_pool_mutex_exit();
+
+	return(ret);
+}
+
+/******************************************************************//**
+Returns a free block from the buf_pool.  The block is taken off the
+free list.  If it is empty, returns NULL.
+@return	a free control block, or NULL if the buf_block->free list is empty */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_only(void)
+/*=======================*/
+{
+	buf_block_t*	block;
+
+	ut_ad(buf_pool_mutex_own());
+
+	block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+
+	if (block) {
+		ut_ad(block->page.in_free_list);
+		ut_d(block->page.in_free_list = FALSE);
+		ut_ad(!block->page.in_flush_list);
+		ut_ad(!block->page.in_LRU_list);
+		ut_a(!buf_page_in_file(&block->page));
+		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+
+		mutex_enter(&block->mutex);
+
+		buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+
+		mutex_exit(&block->mutex);
+	}
+
+	return(block);
+}
+
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list.
+@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_block(
+/*===================*/
+	ulint	zip_size)	/*!< in: compressed page size in bytes,
+				or 0 if uncompressed tablespace */
+{
+	buf_block_t*	block		= NULL;
+	ibool		freed;
+	ulint		n_iterations	= 1;
+	ibool		mon_value_was	= FALSE;
+	ibool		started_monitor	= FALSE;
+loop:
+	buf_pool_mutex_enter();
+
+	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+			"  InnoDB: ERROR: over 95 percent of the buffer pool"
+			" is occupied by\n"
+			"InnoDB: lock heaps or the adaptive hash index!"
+			" Check that your\n"
+			"InnoDB: transactions do not set too many row locks.\n"
+			"InnoDB: Your buffer pool size is %lu MB."
+			" Maybe you should make\n"
+			"InnoDB: the buffer pool bigger?\n"
+			"InnoDB: We intentionally generate a seg fault"
+			" to print a stack trace\n"
+			"InnoDB: on Linux!\n",
+			(ulong) (buf_pool->curr_size
+				 / (1024 * 1024 / UNIV_PAGE_SIZE)));
+
+		ut_error;
+
+	} else if (!recv_recovery_on
+		   && (UT_LIST_GET_LEN(buf_pool->free)
+		       + UT_LIST_GET_LEN(buf_pool->LRU))
+		   < buf_pool->curr_size / 3) {
+
+		if (!buf_lru_switched_on_innodb_mon) {
+
+			/* Over 67 % of the buffer pool is occupied by lock
+			heaps or the adaptive hash index. This may be a memory
+			leak! */
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				"  InnoDB: WARNING: over 67 percent of"
+				" the buffer pool is occupied by\n"
+				"InnoDB: lock heaps or the adaptive"
+				" hash index! Check that your\n"
+				"InnoDB: transactions do not set too many"
+				" row locks.\n"
+				"InnoDB: Your buffer pool size is %lu MB."
+				" Maybe you should make\n"
+				"InnoDB: the buffer pool bigger?\n"
+				"InnoDB: Starting the InnoDB Monitor to print"
+				" diagnostics, including\n"
+				"InnoDB: lock heap and hash index sizes.\n",
+				(ulong) (buf_pool->curr_size
+					 / (1024 * 1024 / UNIV_PAGE_SIZE)));
+
+			buf_lru_switched_on_innodb_mon = TRUE;
+			srv_print_innodb_monitor = TRUE;
+			os_event_set(srv_lock_timeout_thread_event);
+		}
+	} else if (buf_lru_switched_on_innodb_mon) {
+
+		/* Switch off the InnoDB Monitor; this is a simple way
+		to stop the monitor if the situation becomes less urgent,
+		but may also surprise users if the user also switched on the
+		monitor! */
+
+		buf_lru_switched_on_innodb_mon = FALSE;
+		srv_print_innodb_monitor = FALSE;
+	}
+
+	/* If there is a block in the free list, take it */
+	block = buf_LRU_get_free_only();
+	if (block) {
+
+#ifdef UNIV_DEBUG
+		block->page.zip.m_start =
+#endif /* UNIV_DEBUG */
+			block->page.zip.m_end =
+			block->page.zip.m_nonempty =
+			block->page.zip.n_blobs = 0;
+
+		if (UNIV_UNLIKELY(zip_size)) {
+			ibool	lru;
+			page_zip_set_size(&block->page.zip, zip_size);
+			block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
+			UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+		} else {
+			page_zip_set_size(&block->page.zip, 0);
+			block->page.zip.data = NULL;
+		}
+
+		buf_pool_mutex_exit();
+
+		if (started_monitor) {
+			srv_print_innodb_monitor = mon_value_was;
+		}
+
+		return(block);
+	}
+
+	/* If no block was in the free list, search from the end of the LRU
+	list and try to free a block there */
+
+	buf_pool_mutex_exit();
+
+	freed = buf_LRU_search_and_free_block(n_iterations);
+
+	if (freed > 0) {
+		goto loop;
+	}
+
+	if (n_iterations > 30) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: Warning: difficult to find free blocks in\n"
+			"InnoDB: the buffer pool (%lu search iterations)!"
+			" Consider\n"
+			"InnoDB: increasing the buffer pool size.\n"
+			"InnoDB: It is also possible that"
+			" in your Unix version\n"
+			"InnoDB: fsync is very slow, or"
+			" completely frozen inside\n"
+			"InnoDB: the OS kernel. Then upgrading to"
+			" a newer version\n"
+			"InnoDB: of your operating system may help."
+			" Look at the\n"
+			"InnoDB: number of fsyncs in diagnostic info below.\n"
+			"InnoDB: Pending flushes (fsync) log: %lu;"
+			" buffer pool: %lu\n"
+			"InnoDB: %lu OS file reads, %lu OS file writes,"
+			" %lu OS fsyncs\n"
+			"InnoDB: Starting InnoDB Monitor to print further\n"
+			"InnoDB: diagnostics to the standard output.\n",
+			(ulong) n_iterations,
+			(ulong) fil_n_pending_log_flushes,
+			(ulong) fil_n_pending_tablespace_flushes,
+			(ulong) os_n_file_reads, (ulong) os_n_file_writes,
+			(ulong) os_n_fsyncs);
+
+		mon_value_was = srv_print_innodb_monitor;
+		started_monitor = TRUE;
+		srv_print_innodb_monitor = TRUE;
+		os_event_set(srv_lock_timeout_thread_event);
+	}
+
+	/* No free block was found: try to flush the LRU list */
+
+	buf_flush_free_margin();
+	++srv_buf_pool_wait_free;
+
+	os_aio_simulated_wake_handler_threads();
+
+	buf_pool_mutex_enter();
+
+	if (buf_pool->LRU_flush_ended > 0) {
+		/* We have written pages in an LRU flush. To make the insert
+		buffer more efficient, we try to move these pages to the free
+		list. */
+
+		buf_pool_mutex_exit();
+
+		buf_LRU_try_free_flushed_blocks();
+	} else {
+		buf_pool_mutex_exit();
+	}
+
+	if (n_iterations > 10) {
+
+		os_thread_sleep(500000);
+	}
+
+	n_iterations++;
+
+	goto loop;
+}
+
+/*******************************************************************//**
+Moves the LRU_old pointer so that the length of the old blocks list
+is inside the allowed limits. */
+UNIV_INLINE
+void
+buf_LRU_old_adjust_len(void)
+/*========================*/
+{
+	ulint	old_len;
+	ulint	new_len;
+
+	ut_a(buf_pool->LRU_old);
+	ut_ad(buf_pool_mutex_own());
+#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
+# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
+#endif
+#ifdef UNIV_LRU_DEBUG
+	/* buf_pool->LRU_old must be the first item in the LRU list
+	whose "old" flag is set. */
+	ut_a(buf_pool->LRU_old->old);
+	ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+	     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+	ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+	     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+
+	for (;;) {
+		old_len = buf_pool->LRU_old_len;
+		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+
+		ut_ad(buf_pool->LRU_old->in_LRU_list);
+		ut_a(buf_pool->LRU_old);
+#ifdef UNIV_LRU_DEBUG
+		ut_a(buf_pool->LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+
+		/* Update the LRU_old pointer if necessary */
+
+		if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+
+			buf_pool->LRU_old = UT_LIST_GET_PREV(
+				LRU, buf_pool->LRU_old);
+#ifdef UNIV_LRU_DEBUG
+			ut_a(!buf_pool->LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+			buf_page_set_old(buf_pool->LRU_old, TRUE);
+			buf_pool->LRU_old_len++;
+
+		} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
+
+			buf_page_set_old(buf_pool->LRU_old, FALSE);
+			buf_pool->LRU_old = UT_LIST_GET_NEXT(
+				LRU, buf_pool->LRU_old);
+			buf_pool->LRU_old_len--;
+		} else {
+			return;
+		}
+	}
+}
+
+/*******************************************************************//**
+Initializes the old blocks pointer in the LRU list. This function should be
+called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
+static
+void
+buf_LRU_old_init(void)
+/*==================*/
+{
+	buf_page_t*	bpage;
+
+	ut_ad(buf_pool_mutex_own());
+	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+	/* We first initialize all blocks in the LRU list as old and then use
+	the adjust function to move the LRU_old pointer to the right
+	position */
+
+	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	while (bpage != NULL) {
+		ut_ad(bpage->in_LRU_list);
+		buf_page_set_old(bpage, TRUE);
+		bpage = UT_LIST_GET_NEXT(LRU, bpage);
+	}
+
+	buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
+	buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+	buf_LRU_old_adjust_len();
+}
+
+/******************************************************************//**
+Remove a block from the unzip_LRU list if it belonged to the list. */
+static
+void
+buf_unzip_LRU_remove_block_if_needed(
+/*=================================*/
+	buf_page_t*	bpage)	/*!< in/out: control block */
+{
+	ut_ad(buf_pool);
+	ut_ad(bpage);
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own());
+
+	if (buf_page_belongs_to_unzip_LRU(bpage)) {
+		buf_block_t*	block = (buf_block_t*) bpage;
+
+		ut_ad(block->in_unzip_LRU_list);
+		ut_d(block->in_unzip_LRU_list = FALSE);
+
+		UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+	}
+}
+
+/******************************************************************//**
+Removes a block from the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_remove_block(
+/*=================*/
+	buf_page_t*	bpage)	/*!< in: control block */
+{
+	ut_ad(buf_pool);
+	ut_ad(bpage);
+	ut_ad(buf_pool_mutex_own());
+
+	ut_a(buf_page_in_file(bpage));
+
+	ut_ad(bpage->in_LRU_list);
+
+	/* If the LRU_old pointer is defined and points to just this block,
+	move it backward one step */
+
+	if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
+
+		/* Below: the previous block is guaranteed to exist, because
+		the LRU_old pointer is only allowed to differ by the
+		tolerance value from strict 3/8 of the LRU list length. */
+
+		buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage);
+		ut_a(buf_pool->LRU_old);
+#ifdef UNIV_LRU_DEBUG
+		ut_a(!buf_pool->LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+		buf_page_set_old(buf_pool->LRU_old, TRUE);
+
+		buf_pool->LRU_old_len++;
+	}
+
+	/* Remove the block from the LRU list */
+	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+	ut_d(bpage->in_LRU_list = FALSE);
+
+	buf_unzip_LRU_remove_block_if_needed(bpage);
+
+	/* If the LRU list is so short that LRU_old not defined, return */
+	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+
+		buf_pool->LRU_old = NULL;
+
+		return;
+	}
+
+	ut_ad(buf_pool->LRU_old);
+
+	/* Update the LRU_old_len field if necessary */
+	if (buf_page_is_old(bpage)) {
+
+		buf_pool->LRU_old_len--;
+	}
+
+	/* Adjust the length of the old block list if necessary */
+	buf_LRU_old_adjust_len();
+}
+
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+UNIV_INTERN
+void
+buf_unzip_LRU_add_block(
+/*====================*/
+	buf_block_t*	block,	/*!< in: control block */
+	ibool		old)	/*!< in: TRUE if should be put to the end
+				of the list, else put to the start */
+{
+	ut_ad(buf_pool);
+	ut_ad(block);
+	ut_ad(buf_pool_mutex_own());
+
+	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+
+	ut_ad(!block->in_unzip_LRU_list);
+	ut_d(block->in_unzip_LRU_list = TRUE);
+
+	if (old) {
+		UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+	} else {
+		UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
+	}
+}
+
+/******************************************************************//**
+Adds a block to the LRU list end. */
+UNIV_INLINE
+void
+buf_LRU_add_block_to_end_low(
+/*=========================*/
+	buf_page_t*	bpage)	/*!< in: control block */
+{
+	buf_page_t*	last_bpage;
+
+	ut_ad(buf_pool);
+	ut_ad(bpage);
+	ut_ad(buf_pool_mutex_own());
+
+	ut_a(buf_page_in_file(bpage));
+
+	last_bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	if (last_bpage) {
+		bpage->LRU_position = last_bpage->LRU_position;
+	} else {
+		bpage->LRU_position = buf_pool_clock_tic();
+	}
+
+	ut_ad(!bpage->in_LRU_list);
+	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+	ut_d(bpage->in_LRU_list = TRUE);
+
+	buf_page_set_old(bpage, TRUE);
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+		buf_pool->LRU_old_len++;
+	}
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+		ut_ad(buf_pool->LRU_old);
+
+		/* Adjust the length of the old block list if necessary */
+
+		buf_LRU_old_adjust_len();
+
+	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+		/* The LRU list is now long enough for LRU_old to become
+		defined: init it */
+
+		buf_LRU_old_init();
+	}
+
+	/* If this is a zipped block with decompressed frame as well
+	then put it on the unzip_LRU list */
+	if (buf_page_belongs_to_unzip_LRU(bpage)) {
+		buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
+	}
+}
+
+/******************************************************************//**
+Adds a block to the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_add_block_low(
+/*==================*/
+	buf_page_t*	bpage,	/*!< in: control block */
+	ibool		old)	/*!< in: TRUE if should be put to the old blocks
+				in the LRU list, else put to the start; if the
+				LRU list is very short, the block is added to
+				the start, regardless of this parameter */
+{
+	ut_ad(buf_pool);
+	ut_ad(bpage);
+	ut_ad(buf_pool_mutex_own());
+
+	ut_a(buf_page_in_file(bpage));
+	ut_ad(!bpage->in_LRU_list);
+
+	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
+
+		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
+
+		bpage->LRU_position = buf_pool_clock_tic();
+		bpage->freed_page_clock = buf_pool->freed_page_clock;
+	} else {
+#ifdef UNIV_LRU_DEBUG
+		/* buf_pool->LRU_old must be the first item in the LRU list
+		whose "old" flag is set. */
+		ut_a(buf_pool->LRU_old->old);
+		ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+		     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+		ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+		     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
+				     bpage);
+		buf_pool->LRU_old_len++;
+
+		/* We copy the LRU position field of the previous block
+		to the new block */
+
+		bpage->LRU_position = (buf_pool->LRU_old)->LRU_position;
+	}
+
+	ut_d(bpage->in_LRU_list = TRUE);
+
+	buf_page_set_old(bpage, old);
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+		ut_ad(buf_pool->LRU_old);
+
+		/* Adjust the length of the old block list if necessary */
+
+		buf_LRU_old_adjust_len();
+
+	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+		/* The LRU list is now long enough for LRU_old to become
+		defined: init it */
+
+		buf_LRU_old_init();
+	}
+
+	/* If this is a zipped block with decompressed frame as well
+	then put it on the unzip_LRU list */
+	if (buf_page_belongs_to_unzip_LRU(bpage)) {
+		buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
+	}
+}
+
+/******************************************************************//**
+Adds a block to the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_page_t*	bpage,	/*!< in: control block */
+	ibool		old)	/*!< in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the start;
+				if the LRU list is very short, the block is
+				added to the start, regardless of this
+				parameter */
+{
+	buf_LRU_add_block_low(bpage, old);
+}
+
+/******************************************************************//**
+Moves a block to the start of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: control block */
+{
+	buf_LRU_remove_block(bpage);
+	buf_LRU_add_block_low(bpage, FALSE);
+}
+
+/******************************************************************//**
+Moves a block to the end of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_page_t*	bpage)	/*!< in: control block */
+{
+	buf_LRU_remove_block(bpage);
+	buf_LRU_add_block_to_end_low(bpage);
+}
+
+/******************************************************************//**
+Try to free a block.  If bpage is a descriptor of a compressed-only
+page, the descriptor object will be freed as well.
+
+NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
+release buf_pool_mutex.  Furthermore, the page frame will no longer be
+accessible via bpage.
+
+The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
+release these two mutexes after the call.  No other
+buf_page_get_mutex() may be held when calling this function.
+@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
+BUF_LRU_NOT_FREED otherwise. */
+UNIV_INTERN
+enum buf_lru_free_block_status
+buf_LRU_free_block(
+/*===============*/
+	buf_page_t*	bpage,	/*!< in: block to be freed */
+	ibool		zip,	/*!< in: TRUE if should remove also the
+				compressed page of an uncompressed page */
+	ibool*		buf_pool_mutex_released)
+				/*!< in: pointer to a variable that will
+				be assigned TRUE if buf_pool_mutex
+				was temporarily released, or NULL */
+{
+	buf_page_t*	b = NULL;
+	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(block_mutex));
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(bpage->in_LRU_list);
+	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+
+	if (!buf_page_can_relocate(bpage)) {
+
+		/* Do not free buffer-fixed or I/O-fixed blocks. */
+		return(BUF_LRU_NOT_FREED);
+	}
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+	if (zip || !bpage->zip.data) {
+		/* This would completely free the block. */
+		/* Do not completely free dirty blocks. */
+
+		if (bpage->oldest_modification) {
+			return(BUF_LRU_NOT_FREED);
+		}
+	} else if (bpage->oldest_modification) {
+		/* Do not completely free dirty blocks. */
+
+		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+			ut_ad(buf_page_get_state(bpage)
+			      == BUF_BLOCK_ZIP_DIRTY);
+			return(BUF_LRU_NOT_FREED);
+		}
+
+		goto alloc;
+	} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+		/* Allocate the control block for the compressed page.
+		If it cannot be allocated (without freeing a block
+		from the LRU list), refuse to free bpage. */
+alloc:
+		buf_pool_mutex_exit_forbid();
+		b = buf_buddy_alloc(sizeof *b, NULL);
+		buf_pool_mutex_exit_allow();
+
+		if (UNIV_UNLIKELY(!b)) {
+			return(BUF_LRU_CANNOT_RELOCATE);
+		}
+
+		memcpy(b, bpage, sizeof *b);
+	}
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr, "Putting space %lu page %lu to free list\n",
+			(ulong) buf_page_get_space(bpage),
+			(ulong) buf_page_get_page_no(bpage));
+	}
+#endif /* UNIV_DEBUG */
+
+	if (buf_LRU_block_remove_hashed_page(bpage, zip)
+	    != BUF_BLOCK_ZIP_FREE) {
+		ut_a(bpage->buf_fix_count == 0);
+
+		if (b) {
+			buf_page_t*	prev_b	= UT_LIST_GET_PREV(LRU, b);
+			const ulint	fold	= buf_page_address_fold(
+				bpage->space, bpage->offset);
+
+			ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
+
+			b->state = b->oldest_modification
+				? BUF_BLOCK_ZIP_DIRTY
+				: BUF_BLOCK_ZIP_PAGE;
+			UNIV_MEM_DESC(b->zip.data,
+				      page_zip_get_size(&b->zip), b);
+
+			/* The fields in_page_hash and in_LRU_list of
+			the to-be-freed block descriptor should have
+			been cleared in
+			buf_LRU_block_remove_hashed_page(), which
+			invokes buf_LRU_remove_block(). */
+			ut_ad(!bpage->in_page_hash);
+			ut_ad(!bpage->in_LRU_list);
+			/* bpage->state was BUF_BLOCK_FILE_PAGE because
+			b != NULL. The type cast below is thus valid. */
+			ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
+
+			/* The fields of bpage were copied to b before
+			buf_LRU_block_remove_hashed_page() was invoked. */
+			ut_ad(!b->in_zip_hash);
+			ut_ad(b->in_page_hash);
+			ut_ad(b->in_LRU_list);
+
+			HASH_INSERT(buf_page_t, hash,
+				    buf_pool->page_hash, fold, b);
+
+			/* Insert b where bpage was in the LRU list. */
+			if (UNIV_LIKELY(prev_b != NULL)) {
+				ulint	lru_len;
+
+				ut_ad(prev_b->in_LRU_list);
+				ut_ad(buf_page_in_file(prev_b));
+				UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
+
+				UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
+						     prev_b, b);
+
+				if (buf_page_is_old(b)) {
+					buf_pool->LRU_old_len++;
+					if (UNIV_UNLIKELY
+					    (buf_pool->LRU_old
+					     == UT_LIST_GET_NEXT(LRU, b))) {
+
+						buf_pool->LRU_old = b;
+					}
+#ifdef UNIV_LRU_DEBUG
+					ut_a(prev_b->old
+					     || !UT_LIST_GET_NEXT(LRU, b)
+					     || UT_LIST_GET_NEXT(LRU, b)->old);
+				} else {
+					ut_a(!prev_b->old
+					     || !UT_LIST_GET_NEXT(LRU, b)
+					     || !UT_LIST_GET_NEXT(LRU, b)->old);
+#endif /* UNIV_LRU_DEBUG */
+				}
+
+				lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+				if (lru_len > BUF_LRU_OLD_MIN_LEN) {
+					ut_ad(buf_pool->LRU_old);
+					/* Adjust the length of the
+					old block list if necessary */
+					buf_LRU_old_adjust_len();
+				} else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
+					/* The LRU list is now long
+					enough for LRU_old to become
+					defined: init it */
+					buf_LRU_old_init();
+				}
+			} else {
+				ut_d(b->in_LRU_list = FALSE);
+				buf_LRU_add_block_low(b, buf_page_is_old(b));
+			}
+
+			if (b->state == BUF_BLOCK_ZIP_PAGE) {
+				buf_LRU_insert_zip_clean(b);
+			} else {
+				buf_page_t* prev;
+
+				ut_ad(b->in_flush_list);
+				ut_d(bpage->in_flush_list = FALSE);
+
+				prev = UT_LIST_GET_PREV(list, b);
+				UT_LIST_REMOVE(list, buf_pool->flush_list, b);
+
+				if (prev) {
+					ut_ad(prev->in_flush_list);
+					UT_LIST_INSERT_AFTER(
+						list,
+						buf_pool->flush_list,
+						prev, b);
+				} else {
+					UT_LIST_ADD_FIRST(
+						list,
+						buf_pool->flush_list,
+						b);
+				}
+			}
+
+			bpage->zip.data = NULL;
+			page_zip_set_size(&bpage->zip, 0);
+
+			/* Prevent buf_page_get_gen() from
+			decompressing the block while we release
+			buf_pool_mutex and block_mutex. */
+			b->buf_fix_count++;
+			b->io_fix = BUF_IO_READ;
+		}
+
+		if (buf_pool_mutex_released) {
+			*buf_pool_mutex_released = TRUE;
+		}
+
+		buf_pool_mutex_exit();
+		mutex_exit(block_mutex);
+
+		/* Remove possible adaptive hash index on the page.
+		The page was declared uninitialized by
+		buf_LRU_block_remove_hashed_page().  We need to flag
+		the contents of the page valid (which it still is) in
+		order to avoid bogus Valgrind warnings.*/
+
+		UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
+			       UNIV_PAGE_SIZE);
+		btr_search_drop_page_hash_index((buf_block_t*) bpage);
+		UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+				 UNIV_PAGE_SIZE);
+
+		if (b) {
+			/* Compute and stamp the compressed page
+			checksum while not holding any mutex.  The
+			block is already half-freed
+			(BUF_BLOCK_REMOVE_HASH) and removed from
+			buf_pool->page_hash, thus inaccessible by any
+			other thread. */
+
+			mach_write_to_4(
+				b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
+				UNIV_LIKELY(srv_use_checksums)
+				? page_zip_calc_checksum(
+					b->zip.data,
+					page_zip_get_size(&b->zip))
+				: BUF_NO_CHECKSUM_MAGIC);
+		}
+
+		buf_pool_mutex_enter();
+		mutex_enter(block_mutex);
+
+		if (b) {
+			mutex_enter(&buf_pool_zip_mutex);
+			b->buf_fix_count--;
+			buf_page_set_io_fix(b, BUF_IO_NONE);
+			mutex_exit(&buf_pool_zip_mutex);
+		}
+
+		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+	} else {
+		/* The block_mutex should have been released by
+		buf_LRU_block_remove_hashed_page() when it returns
+		BUF_BLOCK_ZIP_FREE. */
+		ut_ad(block_mutex == &buf_pool_zip_mutex);
+		mutex_enter(block_mutex);
+	}
+
+	return(BUF_LRU_FREED);
+}
+
+/******************************************************************//**
+Puts a block back to the free list. */
+UNIV_INTERN
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block)	/*!< in: block, must not contain a file page */
+{
+	void*	data;
+
+	ut_ad(block);
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&block->mutex));
+
+	switch (buf_block_get_state(block)) {
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_READY_FOR_USE:
+		break;
+	default:
+		ut_error;
+	}
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	ut_a(block->n_pointers == 0);
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	ut_ad(!block->page.in_free_list);
+	ut_ad(!block->page.in_flush_list);
+	ut_ad(!block->page.in_LRU_list);
+
+	buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+
+	UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+#ifdef UNIV_DEBUG
+	/* Wipe contents of page to reveal possible stale pointers to it */
+	memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#else
+	/* Wipe page_no and space_id */
+	memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
+	memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
+#endif
+	data = block->page.zip.data;
+
+	if (data) {
+		block->page.zip.data = NULL;
+		mutex_exit(&block->mutex);
+		buf_pool_mutex_exit_forbid();
+		buf_buddy_free(data, page_zip_get_size(&block->page.zip));
+		buf_pool_mutex_exit_allow();
+		mutex_enter(&block->mutex);
+		page_zip_set_size(&block->page.zip, 0);
+	}
+
+	UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
+	ut_d(block->page.in_free_list = TRUE);
+
+	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+}
+
+/******************************************************************//**
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool_zip_mutex will be released.
+
+If a compressed page or a compressed-only block descriptor is freed,
+other compressed pages or compressed-only block descriptors may be
+relocated.
+@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
+was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
+static
+enum buf_page_state
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+	buf_page_t*	bpage,	/*!< in: block, must contain a file page and
+				be in a state where it can be freed; there
+				may or may not be a hash index to the page */
+	ibool		zip)	/*!< in: TRUE if should remove also the
+				compressed page of an uncompressed page */
+{
+	const buf_page_t*	hashed_bpage;
+	ut_ad(bpage);
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+	ut_a(bpage->buf_fix_count == 0);
+
+	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+
+	buf_LRU_remove_block(bpage);
+
+	buf_pool->freed_page_clock += 1;
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_FILE_PAGE:
+		UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
+		UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
+				  UNIV_PAGE_SIZE);
+		buf_block_modify_clock_inc((buf_block_t*) bpage);
+		if (bpage->zip.data) {
+			const page_t*	page = ((buf_block_t*) bpage)->frame;
+			const ulint	zip_size
+				= page_zip_get_size(&bpage->zip);
+
+			ut_a(!zip || bpage->oldest_modification == 0);
+
+			switch (UNIV_EXPECT(fil_page_get_type(page),
+					    FIL_PAGE_INDEX)) {
+			case FIL_PAGE_TYPE_ALLOCATED:
+			case FIL_PAGE_INODE:
+			case FIL_PAGE_IBUF_BITMAP:
+			case FIL_PAGE_TYPE_FSP_HDR:
+			case FIL_PAGE_TYPE_XDES:
+				/* These are essentially uncompressed pages. */
+				if (!zip) {
+					/* InnoDB writes the data to the
+					uncompressed page frame.  Copy it
+					to the compressed page, which will
+					be preserved. */
+					memcpy(bpage->zip.data, page,
+					       zip_size);
+				}
+				break;
+			case FIL_PAGE_TYPE_ZBLOB:
+			case FIL_PAGE_TYPE_ZBLOB2:
+				break;
+			case FIL_PAGE_INDEX:
+#ifdef UNIV_ZIP_DEBUG
+				ut_a(page_zip_validate(&bpage->zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+				break;
+			default:
+				ut_print_timestamp(stderr);
+				fputs("  InnoDB: ERROR: The compressed page"
+				      " to be evicted seems corrupt:", stderr);
+				ut_print_buf(stderr, page, zip_size);
+				fputs("\nInnoDB: Possibly older version"
+				      " of the page:", stderr);
+				ut_print_buf(stderr, bpage->zip.data,
+					     zip_size);
+				putc('\n', stderr);
+				ut_error;
+			}
+
+			break;
+		}
+		/* fall through */
+	case BUF_BLOCK_ZIP_PAGE:
+		ut_a(bpage->oldest_modification == 0);
+		UNIV_MEM_ASSERT_W(bpage->zip.data,
+				  page_zip_get_size(&bpage->zip));
+		break;
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+		break;
+	}
+
+	hashed_bpage = buf_page_hash_get(bpage->space, bpage->offset);
+
+	if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
+		fprintf(stderr,
+			"InnoDB: Error: page %lu %lu not found"
+			" in the hash table\n",
+			(ulong) bpage->space,
+			(ulong) bpage->offset);
+		if (hashed_bpage) {
+			fprintf(stderr,
+				"InnoDB: In hash table we find block"
+				" %p of %lu %lu which is not %p\n",
+				(const void*) hashed_bpage,
+				(ulong) hashed_bpage->space,
+				(ulong) hashed_bpage->offset,
+				(const void*) bpage);
+		}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		mutex_exit(buf_page_get_mutex(bpage));
+		buf_pool_mutex_exit();
+		buf_print();
+		buf_LRU_print();
+		buf_validate();
+		buf_LRU_validate();
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+		ut_error;
+	}
+
+	ut_ad(!bpage->in_zip_hash);
+	ut_ad(bpage->in_page_hash);
+	ut_d(bpage->in_page_hash = FALSE);
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash,
+		    buf_page_address_fold(bpage->space, bpage->offset),
+		    bpage);
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_PAGE:
+		ut_ad(!bpage->in_free_list);
+		ut_ad(!bpage->in_flush_list);
+		ut_ad(!bpage->in_LRU_list);
+		ut_a(bpage->zip.data);
+		ut_a(buf_page_get_zip_size(bpage));
+
+		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+
+		mutex_exit(&buf_pool_zip_mutex);
+		buf_pool_mutex_exit_forbid();
+		buf_buddy_free(bpage->zip.data,
+			       page_zip_get_size(&bpage->zip));
+		buf_buddy_free(bpage, sizeof(*bpage));
+		buf_pool_mutex_exit_allow();
+		UNIV_MEM_UNDESC(bpage);
+		return(BUF_BLOCK_ZIP_FREE);
+
+	case BUF_BLOCK_FILE_PAGE:
+		memset(((buf_block_t*) bpage)->frame
+		       + FIL_PAGE_OFFSET, 0xff, 4);
+		memset(((buf_block_t*) bpage)->frame
+		       + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+		UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+				 UNIV_PAGE_SIZE);
+		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
+
+		if (zip && bpage->zip.data) {
+			/* Free the compressed page. */
+			void*	data = bpage->zip.data;
+			bpage->zip.data = NULL;
+
+			ut_ad(!bpage->in_free_list);
+			ut_ad(!bpage->in_flush_list);
+			ut_ad(!bpage->in_LRU_list);
+			mutex_exit(&((buf_block_t*) bpage)->mutex);
+			buf_pool_mutex_exit_forbid();
+			buf_buddy_free(data, page_zip_get_size(&bpage->zip));
+			buf_pool_mutex_exit_allow();
+			mutex_enter(&((buf_block_t*) bpage)->mutex);
+			page_zip_set_size(&bpage->zip, 0);
+		}
+
+		return(BUF_BLOCK_REMOVE_HASH);
+
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	}
+
+	ut_error;
+	return(BUF_BLOCK_ZIP_FREE);
+}
+
+/******************************************************************//**
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+	buf_block_t*	block)	/*!< in: block, must contain a file page and
+				be in a state where it can be freed */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&block->mutex));
+
+	buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+	buf_LRU_block_free_non_file_page(block);
+}
+
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+UNIV_INTERN
+void
+buf_LRU_stat_update(void)
+/*=====================*/
+{
+	buf_LRU_stat_t*	item;
+
+	/* If we haven't started eviction yet then don't update stats. */
+	if (buf_pool->freed_page_clock == 0) {
+		goto func_exit;
+	}
+
+	buf_pool_mutex_enter();
+
+	/* Update the index. */
+	item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
+	buf_LRU_stat_arr_ind++;
+	buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
+
+	/* Add the current value and subtract the obsolete entry. */
+	buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io;
+	buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip;
+
+	/* Put current entry in the array. */
+	memcpy(item, &buf_LRU_stat_cur, sizeof *item);
+
+	buf_pool_mutex_exit();
+
+func_exit:
+	/* Clear the current entry. */
+	memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Validates the LRU list.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_LRU_validate(void)
+/*==================*/
+{
+	buf_page_t*	bpage;
+	buf_block_t*	block;
+	ulint		old_len;
+	ulint		new_len;
+	ulint		LRU_pos;
+
+	ut_ad(buf_pool);
+	buf_pool_mutex_enter();
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+		ut_a(buf_pool->LRU_old);
+		old_len = buf_pool->LRU_old_len;
+		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
+		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
+	}
+
+	UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
+			 ut_ad(ut_list_node_313->in_LRU_list));
+
+	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	old_len = 0;
+
+	while (bpage != NULL) {
+
+		switch (buf_page_get_state(bpage)) {
+		case BUF_BLOCK_ZIP_FREE:
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+			ut_error;
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
+			      == buf_page_belongs_to_unzip_LRU(bpage));
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_ZIP_DIRTY:
+			break;
+		}
+
+		if (buf_page_is_old(bpage)) {
+			old_len++;
+		}
+
+		if (buf_pool->LRU_old && (old_len == 1)) {
+			ut_a(buf_pool->LRU_old == bpage);
+		}
+
+		LRU_pos	= buf_page_get_LRU_position(bpage);
+
+		bpage = UT_LIST_GET_NEXT(LRU, bpage);
+
+		if (bpage) {
+			/* If the following assert fails, it may
+			not be an error: just the buf_pool clock
+			has wrapped around */
+			ut_a(LRU_pos >= buf_page_get_LRU_position(bpage));
+		}
+	}
+
+	if (buf_pool->LRU_old) {
+		ut_a(buf_pool->LRU_old_len == old_len);
+	}
+
+	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
+			 ut_ad(ut_list_node_313->in_free_list));
+
+	for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+	     bpage != NULL;
+	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
+
+		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+	}
+
+	UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+			 ut_ad(ut_list_node_313->in_unzip_LRU_list
+			       && ut_list_node_313->page.in_LRU_list));
+
+	for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+	     block;
+	     block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
+
+		ut_ad(block->in_unzip_LRU_list);
+		ut_ad(block->page.in_LRU_list);
+		ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+	}
+
+	buf_pool_mutex_exit();
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Prints the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_print(void)
+/*===============*/
+{
+	const buf_page_t*	bpage;
+
+	ut_ad(buf_pool);
+	buf_pool_mutex_enter();
+
+	fprintf(stderr, "Pool ulint clock %lu\n",
+		(ulong) buf_pool->ulint_clock);
+
+	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	while (bpage != NULL) {
+
+		fprintf(stderr, "BLOCK space %lu page %lu ",
+			(ulong) buf_page_get_space(bpage),
+			(ulong) buf_page_get_page_no(bpage));
+
+		if (buf_page_is_old(bpage)) {
+			fputs("old ", stderr);
+		}
+
+		if (bpage->buf_fix_count) {
+			fprintf(stderr, "buffix count %lu ",
+				(ulong) bpage->buf_fix_count);
+		}
+
+		if (buf_page_get_io_fix(bpage)) {
+			fprintf(stderr, "io_fix %lu ",
+				(ulong) buf_page_get_io_fix(bpage));
+		}
+
+		if (bpage->oldest_modification) {
+			fputs("modif. ", stderr);
+		}
+
+		switch (buf_page_get_state(bpage)) {
+			const byte*	frame;
+		case BUF_BLOCK_FILE_PAGE:
+			frame = buf_block_get_frame((buf_block_t*) bpage);
+			fprintf(stderr, "\nLRU pos %lu type %lu"
+				" index id %lu\n",
+				(ulong) buf_page_get_LRU_position(bpage),
+				(ulong) fil_page_get_type(frame),
+				(ulong) ut_dulint_get_low(
+					btr_page_get_index_id(frame)));
+			break;
+		case BUF_BLOCK_ZIP_PAGE:
+			frame = bpage->zip.data;
+			fprintf(stderr, "\nLRU pos %lu type %lu size %lu"
+				" index id %lu\n",
+				(ulong) buf_page_get_LRU_position(bpage),
+				(ulong) fil_page_get_type(frame),
+				(ulong) buf_page_get_zip_size(bpage),
+				(ulong) ut_dulint_get_low(
+					btr_page_get_index_id(frame)));
+			break;
+
+		default:
+			fprintf(stderr, "\nLRU pos %lu !state %lu!\n",
+				(ulong) buf_page_get_LRU_position(bpage),
+				(ulong) buf_page_get_state(bpage));
+			break;
+		}
+
+		bpage = UT_LIST_GET_NEXT(LRU, bpage);
+	}
+
+	buf_pool_mutex_exit();
+}
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
diff --git a/storage/innobase/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
similarity index 63%
rename from storage/innobase/buf/buf0rea.c
rename to storage/innodb_plugin/buf/buf0rea.c
index fdec0206990..319d6b2a522 100644
--- a/storage/innobase/buf/buf0rea.c
+++ b/storage/innodb_plugin/buf/buf0rea.c
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer read
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0rea.c
+The database buffer read
 
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
@@ -19,60 +36,55 @@ Created 11/5/1995 Heikki Tuuri
 #include "trx0sys.h"
 #include "os0file.h"
 #include "srv0start.h"
+#include "srv0srv.h"
 
-extern ulint srv_read_ahead_rnd;
-extern ulint srv_read_ahead_seq;
-extern ulint srv_buf_pool_reads;
-
-/* The size in blocks of the area where the random read-ahead algorithm counts
+/** The size in blocks of the area where the random read-ahead algorithm counts
 the accessed pages when deciding whether to read-ahead */
 #define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
 
-/* There must be at least this many pages in buf_pool in the area to start
+/** There must be at least this many pages in buf_pool in the area to start
 a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(1 + BUF_READ_AHEAD_RANDOM_AREA / 2)
 
-/* The linear read-ahead area size */
+/** The linear read-ahead area size */
 #define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
 
-/* The linear read-ahead threshold */
-#define BUF_READ_AHEAD_LINEAR_THRESHOLD	(3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
-
-/* If there are buf_pool->curr_size per the number below pending reads, then
+/** If there are buf_pool->curr_size per the number below pending reads, then
 read-ahead is not done: this is to prevent flooding the buffer pool with
 i/o-fixed buffer blocks */
 #define BUF_READ_AHEAD_PEND_LIMIT	2
 
-/************************************************************************
+/********************************************************************//**
 Low-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there, in which case does nothing.
 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread. */
+flag is cleared and the x-lock released by an i/o-handler thread.
+@return 1 if a read request was queued, 0 if the page already resided
+in buf_pool, or if the page is in the doublewrite buffer blocks in
+which case it is never read into the pool, or if the tablespace does
+not exist or is being dropped */
 static
 ulint
 buf_read_page_low(
 /*==============*/
-			/* out: 1 if a read request was queued, 0 if the page
-			already resided in buf_pool, or if the page is in
-			the doublewrite buffer blocks in which case it is never
-			read into the pool, or if the tablespace does not
-			exist or is being dropped */
-	ulint*	err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+	ulint*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
 			trying to read from a non-existent tablespace, or a
 			tablespace which is just now being dropped */
-	ibool	sync,	/* in: TRUE if synchronous aio is desired */
-	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ...,
+	ibool	sync,	/*!< in: TRUE if synchronous aio is desired */
+	ulint	mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
 			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
 			at read-ahead functions) */
-	ulint	space,	/* in: space id */
-	ib_longlong tablespace_version, /* in: if the space memory object has
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	ibool	unzip,	/*!< in: TRUE=request uncompressed page */
+	ib_int64_t tablespace_version, /*!< in: if the space memory object has
 			this timestamp different from what we are giving here,
 			treat the tablespace as dropped; this is a timestamp we
 			use to stop dangling page reads from a tablespace
 			which we have DISCARDed + IMPORTed back */
-	ulint	offset)	/* in: page number */
+	ulint	offset)	/*!< in: page number */
 {
-	buf_block_t*	block;
+	buf_page_t*	bpage;
 	ulint		wake_later;
 
 	*err = DB_SUCCESS;
@@ -96,7 +108,8 @@ buf_read_page_low(
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+	if (ibuf_bitmap_page(zip_size, offset)
+	    || trx_sys_hdr_page(space, offset)) {
 
 		/* Trx sys header is so low in the latching order that we play
 		safe and do not leave the i/o-completion to an asynchronous
@@ -111,9 +124,9 @@ buf_read_page_low(
 	or is being dropped; if we succeed in initing the page in the buffer
 	pool for read, then DISCARD cannot proceed until the read has
 	completed */
-	block = buf_page_init_for_read(err, mode, space, tablespace_version,
-				       offset);
-	if (block == NULL) {
+	bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
+				       tablespace_version, offset);
+	if (bpage == NULL) {
 
 		return(0);
 	}
@@ -127,24 +140,31 @@ buf_read_page_low(
 	}
 #endif
 
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(buf_page_in_file(bpage));
 
-	*err = fil_io(OS_FILE_READ | wake_later,
-		      sync, space,
-		      offset, 0, UNIV_PAGE_SIZE,
-		      (void*)block->frame, (void*)block);
+	if (zip_size) {
+		*err = fil_io(OS_FILE_READ | wake_later,
+			      sync, space, zip_size, offset, 0, zip_size,
+			      bpage->zip.data, bpage);
+	} else {
+		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+
+		*err = fil_io(OS_FILE_READ | wake_later,
+			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
+			      ((buf_block_t*) bpage)->frame, bpage);
+	}
 	ut_a(*err == DB_SUCCESS);
 
 	if (sync) {
 		/* The i/o is already completed when we arrive from
 		fil_read */
-		buf_page_io_complete(block);
+		buf_page_io_complete(bpage);
 	}
 
 	return(1);
 }
 
-/************************************************************************
+/********************************************************************//**
 Applies a random read-ahead in buf_pool if there are at least a threshold
 value of accessed pages from the random read-ahead area. Does not read any
 page, not even the one at the position (space, offset), if the read-ahead
@@ -153,21 +173,20 @@ pages: to avoid deadlocks this function must be written such that it cannot
 end up waiting for these latches! NOTE 2: the calling thread must want
 access to the page given: this rule is set to prevent unintended read-aheads
 performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o. */
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value! */
 static
 ulint
 buf_read_ahead_random(
 /*==================*/
-			/* out: number of page read requests issued; NOTE
-			that if we read ibuf pages, it may happen that
-			the page at the given page number does not get
-			read even if we return a value > 0! */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number of a page which the current thread
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset)	/*!< in: page number of a page which the current thread
 			wants to access */
 {
-	ib_longlong	tablespace_version;
-	buf_block_t*	block;
+	ib_int64_t	tablespace_version;
 	ulint		recent_blocks	= 0;
 	ulint		count;
 	ulint		LRU_recent_limit;
@@ -175,13 +194,18 @@ buf_read_ahead_random(
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
+	ulint		buf_read_ahead_random_area;
+
+	/* We have currently disabled random readahead */
+	return(0);
 
 	if (srv_startup_is_before_trx_rollback_phase) {
 		/* No read-ahead to avoid thread deadlocks */
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+	if (ibuf_bitmap_page(zip_size, offset)
+	    || trx_sys_hdr_page(space, offset)) {
 
 		/* If it is an ibuf bitmap page or trx sys hdr, we do
 		no read-ahead, as that could break the ibuf page access
@@ -196,10 +220,12 @@ buf_read_ahead_random(
 
 	tablespace_version = fil_space_get_version(space);
 
-	low  = (offset / BUF_READ_AHEAD_RANDOM_AREA)
-		* BUF_READ_AHEAD_RANDOM_AREA;
-	high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
-		* BUF_READ_AHEAD_RANDOM_AREA;
+	buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
+
+	low  = (offset / buf_read_ahead_random_area)
+		* buf_read_ahead_random_area;
+	high = (offset / buf_read_ahead_random_area + 1)
+		* buf_read_ahead_random_area;
 	if (high > fil_space_get_size(space)) {
 
 		high = fil_space_get_size(space);
@@ -211,11 +237,11 @@ buf_read_ahead_random(
 
 	LRU_recent_limit = buf_LRU_get_recent_limit();
 
-	mutex_enter(&(buf_pool->mutex));
+	buf_pool_mutex_enter();
 
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		mutex_exit(&(buf_pool->mutex));
+		buf_pool_mutex_exit();
 
 		return(0);
 	}
@@ -224,24 +250,27 @@ buf_read_ahead_random(
 	that is, reside near the start of the LRU list. */
 
 	for (i = low; i < high; i++) {
-		block = buf_page_hash_get(space, i);
+		const buf_page_t*	bpage = buf_page_hash_get(space, i);
 
-		if ((block)
-		    && (block->LRU_position > LRU_recent_limit)
-		    && block->accessed) {
+		if (bpage
+		    && buf_page_is_accessed(bpage)
+		    && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
 
 			recent_blocks++;
+
+			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+
+				buf_pool_mutex_exit();
+				goto read_ahead;
+			}
 		}
 	}
 
-	mutex_exit(&(buf_pool->mutex));
-
-	if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
-		/* Do nothing */
-
-		return(0);
-	}
+	buf_pool_mutex_exit();
+	/* Do nothing */
+	return(0);
 
+read_ahead:
 	/* Read all the suitable blocks within the area */
 
 	if (ibuf_inside()) {
@@ -256,11 +285,12 @@ buf_read_ahead_random(
 		/* It is only sensible to do read-ahead in the non-sync aio
 		mode: hence FALSE as the first parameter */
 
-		if (!ibuf_bitmap_page(i)) {
+		if (!ibuf_bitmap_page(zip_size, i)) {
 			count += buf_read_page_low(
 				&err, FALSE,
 				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
-				space, tablespace_version, i);
+				space, zip_size, FALSE,
+				tablespace_version, i);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
@@ -293,34 +323,36 @@ buf_read_ahead_random(
 	return(count);
 }
 
-/************************************************************************
+/********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
+sensible.
+@return number of page read requests issued: this can be greater than
+1 if read-ahead occurred */
+UNIV_INTERN
 ulint
 buf_read_page(
 /*==========*/
-			/* out: number of page read requests issued: this can
-			be > 1 if read-ahead occurred */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset)	/*!< in: page number */
 {
-	ib_longlong	tablespace_version;
+	ib_int64_t	tablespace_version;
 	ulint		count;
 	ulint		count2;
 	ulint		err;
 
 	tablespace_version = fil_space_get_version(space);
 
-	count = buf_read_ahead_random(space, offset);
+	count = buf_read_ahead_random(space, zip_size, offset);
 
 	/* We do the i/o in the synchronous aio mode to save thread
 	switches: hence TRUE */
 
 	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+				   zip_size, FALSE,
 				   tablespace_version, offset);
 	srv_buf_pool_reads+= count2;
 	if (err == DB_TABLESPACE_DELETED) {
@@ -336,10 +368,13 @@ buf_read_page(
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
 
+	/* Increment number of I/O operations used for LRU policy. */
+	buf_LRU_stat_inc_io();
+
 	return(count + count2);
 }
 
-/************************************************************************
+/********************************************************************//**
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
@@ -361,20 +396,21 @@ function must be written such that it cannot end up waiting for these
 latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
+which could result in a deadlock if the OS does not support asynchronous io.
+@return	number of page read requests issued */
+UNIV_INTERN
 ulint
 buf_read_ahead_linear(
 /*==================*/
-			/* out: number of page read requests issued */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number of a page; NOTE: the current thread
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset)	/*!< in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
 {
-	ib_longlong	tablespace_version;
-	buf_block_t*	block;
+	ib_int64_t	tablespace_version;
+	buf_page_t*	bpage;
 	buf_frame_t*	frame;
-	buf_block_t*	pred_block	= NULL;
+	buf_page_t*	pred_bpage	= NULL;
 	ulint		pred_offset;
 	ulint		succ_offset;
 	ulint		count;
@@ -385,28 +421,32 @@ buf_read_ahead_linear(
 	ulint		low, high;
 	ulint		err;
 	ulint		i;
+	const ulint	buf_read_ahead_linear_area
+		= BUF_READ_AHEAD_LINEAR_AREA;
+	ulint		threshold;
 
-	if (srv_startup_is_before_trx_rollback_phase) {
+	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
 		/* No read-ahead to avoid thread deadlocks */
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+	low  = (offset / buf_read_ahead_linear_area)
+		* buf_read_ahead_linear_area;
+	high = (offset / buf_read_ahead_linear_area + 1)
+		* buf_read_ahead_linear_area;
 
-		/* If it is an ibuf bitmap page or trx sys hdr, we do
-		no read-ahead, as that could break the ibuf page access
-		order */
+	if ((offset != low) && (offset != high - 1)) {
+		/* This is not a border page of the area: return */
 
 		return(0);
 	}
 
-	low  = (offset / BUF_READ_AHEAD_LINEAR_AREA)
-		* BUF_READ_AHEAD_LINEAR_AREA;
-	high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
-		* BUF_READ_AHEAD_LINEAR_AREA;
+	if (ibuf_bitmap_page(zip_size, offset)
+	    || trx_sys_hdr_page(space, offset)) {
 
-	if ((offset != low) && (offset != high - 1)) {
-		/* This is not a border page of the area: return */
+		/* If it is an ibuf bitmap page or trx sys hdr, we do
+		no read-ahead, as that could break the ibuf page access
+		order */
 
 		return(0);
 	}
@@ -417,10 +457,10 @@ buf_read_ahead_linear(
 
 	tablespace_version = fil_space_get_version(space);
 
-	mutex_enter(&(buf_pool->mutex));
+	buf_pool_mutex_enter();
 
 	if (high > fil_space_get_size(space)) {
-		mutex_exit(&(buf_pool->mutex));
+		buf_pool_mutex_exit();
 		/* The area is not whole, return */
 
 		return(0);
@@ -428,7 +468,7 @@ buf_read_ahead_linear(
 
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		mutex_exit(&(buf_pool->mutex));
+		buf_pool_mutex_exit();
 
 		return(0);
 	}
@@ -443,47 +483,63 @@ buf_read_ahead_linear(
 		asc_or_desc = -1;
 	}
 
+	/* How many out of order accessed pages can we ignore
+	when working out the access pattern for linear readahead */
+	threshold = ut_min((64 - srv_read_ahead_threshold),
+			   BUF_READ_AHEAD_AREA);
+
 	fail_count = 0;
 
 	for (i = low; i < high; i++) {
-		block = buf_page_hash_get(space, i);
+		bpage = buf_page_hash_get(space, i);
 
-		if ((block == NULL) || !block->accessed) {
+		if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
 			fail_count++;
 
-		} else if (pred_block
-			   && (ut_ulint_cmp(block->LRU_position,
-					    pred_block->LRU_position)
-			       != asc_or_desc)) {
+		} else if (pred_bpage) {
+			int res = (ut_ulint_cmp(
+				       buf_page_get_LRU_position(bpage),
+				       buf_page_get_LRU_position(pred_bpage)));
 			/* Accesses not in the right order */
-
-			fail_count++;
-			pred_block = block;
+			if (res != 0 && res != asc_or_desc) {
+				fail_count++;
+			}
 		}
-	}
 
-	if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
-	    - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
-		/* Too many failures: return */
+		if (fail_count > threshold) {
+			/* Too many failures: return */
+			buf_pool_mutex_exit();
+			return(0);
+		}
 
-		mutex_exit(&(buf_pool->mutex));
-
-		return(0);
+		if (bpage && buf_page_is_accessed(bpage)) {
+			pred_bpage = bpage;
+		}
 	}
 
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 
-	block = buf_page_hash_get(space, offset);
+	bpage = buf_page_hash_get(space, offset);
 
-	if (block == NULL) {
-		mutex_exit(&(buf_pool->mutex));
+	if (bpage == NULL) {
+		buf_pool_mutex_exit();
 
 		return(0);
 	}
 
-	frame = block->frame;
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_PAGE:
+		frame = bpage->zip.data;
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+		frame = ((buf_block_t*) bpage)->frame;
+		break;
+	default:
+		ut_error;
+		break;
+	}
 
 	/* Read the natural predecessor and successor page addresses from
 	the page; NOTE that because the calling thread may have an x-latch
@@ -494,7 +550,7 @@ buf_read_ahead_linear(
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);
 
-	mutex_exit(&(buf_pool->mutex));
+	buf_pool_mutex_exit();
 
 	if ((offset == low) && (succ_offset == offset + 1)) {
 
@@ -511,10 +567,10 @@ buf_read_ahead_linear(
 		return(0);
 	}
 
-	low  = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
-		* BUF_READ_AHEAD_LINEAR_AREA;
-	high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
-		* BUF_READ_AHEAD_LINEAR_AREA;
+	low  = (new_offset / buf_read_ahead_linear_area)
+		* buf_read_ahead_linear_area;
+	high = (new_offset / buf_read_ahead_linear_area + 1)
+		* buf_read_ahead_linear_area;
 
 	if ((new_offset != low) && (new_offset != high - 1)) {
 		/* This is not a border page of the area: return */
@@ -548,11 +604,11 @@ buf_read_ahead_linear(
 		/* It is only sensible to do read-ahead in the non-sync
 		aio mode: hence FALSE as the first parameter */
 
-		if (!ibuf_bitmap_page(i)) {
+		if (!ibuf_bitmap_page(zip_size, i)) {
 			count += buf_read_page_low(
 				&err, FALSE,
 				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
-				space, tablespace_version, i);
+				space, zip_size, FALSE, tablespace_version, i);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
@@ -583,32 +639,42 @@ buf_read_ahead_linear(
 	}
 #endif /* UNIV_DEBUG */
 
+	/* Read ahead is considered one I/O operation for the purpose of
+	LRU policy decision. */
+	buf_LRU_stat_inc_io();
+
 	++srv_read_ahead_seq;
 	return(count);
 }
 
-/************************************************************************
+/********************************************************************//**
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
-
+UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
-	ibool	sync,		/* in: TRUE if the caller wants this function
-				to wait for the highest address page to get
-				read in, before this function returns */
-	ulint*	space_ids,	/* in: array of space ids */
-	ib_longlong* space_versions,/* in: the spaces must have this version
-				number (timestamp), otherwise we discard the
-				read; we use this to cancel reads if
-				DISCARD + IMPORT may have changed the
-				tablespace size */
-	ulint*	page_nos,	/* in: array of page numbers to read, with the
-				highest page number the last in the array */
-	ulint	n_stored)	/* in: number of page numbers in the array */
+	ibool		sync,		/*!< in: TRUE if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	const ulint*	space_ids,	/*!< in: array of space ids */
+	const ib_int64_t* space_versions,/*!< in: the spaces must have
+					this version number
+					(timestamp), otherwise we
+					discard the read; we use this
+					to cancel reads if DISCARD +
+					IMPORT may have changed the
+					tablespace size */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored)	/*!< in: number of elements
+					in the arrays */
 {
-	ulint	err;
 	ulint	i;
 
 	ut_ad(!ibuf_inside());
@@ -621,18 +687,27 @@ buf_read_ibuf_merge_pages(
 	}
 
 	for (i = 0; i < n_stored; i++) {
-		buf_read_page_low(&err,
-				  (i + 1 == n_stored) && sync,
-				  BUF_READ_ANY_PAGE,
-				  space_ids[i], space_versions[i],
+		ulint	zip_size = fil_space_get_zip_size(space_ids[i]);
+		ulint	err;
+
+		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+			goto tablespace_deleted;
+		}
+
+		buf_read_page_low(&err, sync && (i + 1 == n_stored),
+				  BUF_READ_ANY_PAGE, space_ids[i],
+				  zip_size, TRUE, space_versions[i],
 				  page_nos[i]);
 
-		if (err == DB_TABLESPACE_DELETED) {
+		if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
+tablespace_deleted:
 			/* We have deleted or are deleting the single-table
 			tablespace: remove the entries for that page */
 
 			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
-						      page_nos[i], FALSE);
+						      page_nos[i],
+						      zip_size, FALSE);
 		}
 	}
 
@@ -650,25 +725,41 @@ buf_read_ibuf_merge_pages(
 #endif /* UNIV_DEBUG */
 }
 
-/************************************************************************
+/********************************************************************//**
 Issues read requests for pages which recovery wants to read in. */
-
+UNIV_INTERN
 void
 buf_read_recv_pages(
 /*================*/
-	ibool	sync,		/* in: TRUE if the caller wants this function
-				to wait for the highest address page to get
-				read in, before this function returns */
-	ulint	space,		/* in: space id */
-	ulint*	page_nos,	/* in: array of page numbers to read, with the
-				highest page number the last in the array */
-	ulint	n_stored)	/* in: number of page numbers in the array */
+	ibool		sync,		/*!< in: TRUE if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in
+					bytes, or 0 */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored)	/*!< in: number of page numbers
+					in the array */
 {
-	ib_longlong	tablespace_version;
+	ib_int64_t	tablespace_version;
 	ulint		count;
 	ulint		err;
 	ulint		i;
 
+	zip_size = fil_space_get_zip_size(space);
+
+	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+		/* It is a single table tablespace and the .ibd file is
+		missing: do nothing */
+
+		return;
+	}
+
 	tablespace_version = fil_space_get_version(space);
 
 	for (i = 0; i < n_stored; i++) {
@@ -702,14 +793,14 @@ buf_read_recv_pages(
 		os_aio_print_debug = FALSE;
 
 		if ((i + 1 == n_stored) && sync) {
-			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
-					  space, tablespace_version,
+			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+					  zip_size, TRUE, tablespace_version,
 					  page_nos[i]);
 		} else {
 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
 					  | OS_AIO_SIMULATED_WAKE_LATER,
-					  space, tablespace_version,
-					  page_nos[i]);
+					  space, zip_size, TRUE,
+					  tablespace_version, page_nos[i]);
 		}
 	}
 
diff --git a/storage/innodb_plugin/compile-innodb b/storage/innodb_plugin/compile-innodb
new file mode 100755
index 00000000000..82601f03ae9
--- /dev/null
+++ b/storage/innodb_plugin/compile-innodb
@@ -0,0 +1,24 @@
+#! /bin/sh
+#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+path=`dirname $0`
+. "$path/SETUP.sh"
+
+extra_flags="$pentium_cflags $fast_cflags -g"
+extra_configs="$pentium_configs $static_link --with-plugins=innobase"
+
+. "$path/FINISH.sh"
diff --git a/storage/innodb_plugin/compile-innodb-debug b/storage/innodb_plugin/compile-innodb-debug
new file mode 100755
index 00000000000..efb4abf88d5
--- /dev/null
+++ b/storage/innodb_plugin/compile-innodb-debug
@@ -0,0 +1,24 @@
+#! /bin/sh
+#
+# Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+path=`dirname $0`
+. "$path/SETUP.sh" $@ --with-debug=full
+
+extra_flags="$pentium_cflags $debug_cflags"
+extra_configs="$pentium_configs $debug_configs --with-plugins=innobase"
+
+. "$path/FINISH.sh"
diff --git a/storage/innodb_plugin/data/data0data.c b/storage/innodb_plugin/data/data0data.c
new file mode 100644
index 00000000000..e3c1f1b4f23
--- /dev/null
+++ b/storage/innodb_plugin/data/data0data.c
@@ -0,0 +1,764 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file data/data0data.c
+SQL data field and tuple
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "data0data.h"
+
+#ifdef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#ifndef UNIV_HOTBACKUP
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "dict0dict.h"
+#include "btr0cur.h"
+
+#include <ctype.h>
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/** Dummy variable to catch access to uninitialized fields.  In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
+UNIV_INTERN byte	data_error;
+
+# ifndef UNIV_DEBUG_VALGRIND
+/** this is used to fool the compiler in dtuple_validate */
+UNIV_INTERN ulint	data_dummy;
+# endif /* !UNIV_DEBUG_VALGRIND */
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return	TRUE if equal */
+UNIV_INTERN
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+	const dfield_t*	field,	/*!< in: field */
+	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
+	const byte*	data)	/*!< in: data */
+{
+	if (len != dfield_get_len(field)) {
+
+		return(FALSE);
+	}
+
+	if (len == UNIV_SQL_NULL) {
+
+		return(TRUE);
+	}
+
+	if (0 != memcmp(dfield_get_data(field), data, len)) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/************************************************************//**
+Compare two data tuples, respecting the collation of character fields.
+@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
+than tuple2 */
+UNIV_INTERN
+int
+dtuple_coll_cmp(
+/*============*/
+	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
+	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
+{
+	ulint	n_fields;
+	ulint	i;
+
+	ut_ad(tuple1 && tuple2);
+	ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple1));
+	ut_ad(dtuple_check_typed(tuple2));
+
+	n_fields = dtuple_get_n_fields(tuple1);
+
+	if (n_fields != dtuple_get_n_fields(tuple2)) {
+
+		return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
+	}
+
+	for (i = 0; i < n_fields; i++) {
+		int		cmp;
+		const dfield_t*	field1	= dtuple_get_nth_field(tuple1, i);
+		const dfield_t*	field2	= dtuple_get_nth_field(tuple2, i);
+
+		cmp = cmp_dfield_dfield(field1, field2);
+
+		if (cmp) {
+			return(cmp);
+		}
+	}
+
+	return(0);
+}
+
+/*********************************************************************//**
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */
+UNIV_INTERN
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields)	/*!< in: number of fields */
+{
+	ut_ad(tuple);
+
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+}
+
+/**********************************************************//**
+Checks that a data field is typed.
+@return	TRUE if ok */
+static
+ibool
+dfield_check_typed_no_assert(
+/*=========================*/
+	const dfield_t*	field)	/*!< in: data field */
+{
+	if (dfield_get_type(field)->mtype > DATA_MYSQL
+	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+		fprintf(stderr,
+			"InnoDB: Error: data field type %lu, len %lu\n",
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************//**
+Checks that a data tuple is typed.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	const dfield_t*	field;
+	ulint		i;
+
+	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
+		fprintf(stderr,
+			"InnoDB: Error: index entry has %lu fields\n",
+			(ulong) dtuple_get_n_fields(tuple));
+dump:
+		fputs("InnoDB: Tuple contents: ", stderr);
+		dtuple_print(stderr, tuple);
+		putc('\n', stderr);
+
+		return(FALSE);
+	}
+
+	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+
+		if (!dfield_check_typed_no_assert(field)) {
+			goto dump;
+		}
+	}
+
+	return(TRUE);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Checks that a data field is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dfield_check_typed(
+/*===============*/
+	const dfield_t*	field)	/*!< in: data field */
+{
+	if (dfield_get_type(field)->mtype > DATA_MYSQL
+	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+		fprintf(stderr,
+			"InnoDB: Error: data field type %lu, len %lu\n",
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
+
+		ut_error;
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************//**
+Checks that a data tuple is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed(
+/*===============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	const dfield_t*	field;
+	ulint		i;
+
+	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+
+		ut_a(dfield_check_typed(field));
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************//**
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_validate(
+/*============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	const dfield_t*	field;
+	ulint		n_fields;
+	ulint		len;
+	ulint		i;
+
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	n_fields = dtuple_get_n_fields(tuple);
+
+	/* We dereference all the data of each field to test
+	for memory traps */
+
+	for (i = 0; i < n_fields; i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+		len = dfield_get_len(field);
+
+		if (!dfield_is_null(field)) {
+
+			const byte*	data = dfield_get_data(field);
+#ifndef UNIV_DEBUG_VALGRIND
+			ulint		j;
+
+			for (j = 0; j < len; j++) {
+
+				data_dummy  += *data; /* fool the compiler not
+						      to optimize out this
+						      code */
+				data++;
+			}
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+			UNIV_MEM_ASSERT_RW(data, len);
+		}
+	}
+
+	ut_a(dtuple_check_typed(tuple));
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. */
+UNIV_INTERN
+void
+dfield_print(
+/*=========*/
+	const dfield_t*	dfield)	/*!< in: dfield */
+{
+	const byte*	data;
+	ulint		len;
+	ulint		i;
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (dfield_is_null(dfield)) {
+		fputs("NULL", stderr);
+
+		return;
+	}
+
+	switch (dtype_get_mtype(dfield_get_type(dfield))) {
+	case DATA_CHAR:
+	case DATA_VARCHAR:
+		for (i = 0; i < len; i++) {
+			int	c = *data++;
+			putc(isprint(c) ? c : ' ', stderr);
+		}
+
+		if (dfield_is_ext(dfield)) {
+			fputs("(external)", stderr);
+		}
+		break;
+	case DATA_INT:
+		ut_a(len == 4); /* only works for 32-bit integers */
+		fprintf(stderr, "%d", (int)mach_read_from_4(data));
+		break;
+	default:
+		ut_error;
+	}
+}
+
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */
+UNIV_INTERN
+void
+dfield_print_also_hex(
+/*==================*/
+	const dfield_t*	dfield)	/*!< in: dfield */
+{
+	const byte*	data;
+	ulint		len;
+	ulint		prtype;
+	ulint		i;
+	ibool		print_also_hex;
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (dfield_is_null(dfield)) {
+		fputs("NULL", stderr);
+
+		return;
+	}
+
+	prtype = dtype_get_prtype(dfield_get_type(dfield));
+
+	switch (dtype_get_mtype(dfield_get_type(dfield))) {
+		dulint	id;
+	case DATA_INT:
+		switch (len) {
+			ulint	val;
+		case 1:
+			val = mach_read_from_1(data);
+
+			if (!(prtype & DATA_UNSIGNED)) {
+				val &= ~0x80;
+				fprintf(stderr, "%ld", (long) val);
+			} else {
+				fprintf(stderr, "%lu", (ulong) val);
+			}
+			break;
+
+		case 2:
+			val = mach_read_from_2(data);
+
+			if (!(prtype & DATA_UNSIGNED)) {
+				val &= ~0x8000;
+				fprintf(stderr, "%ld", (long) val);
+			} else {
+				fprintf(stderr, "%lu", (ulong) val);
+			}
+			break;
+
+		case 3:
+			val = mach_read_from_3(data);
+
+			if (!(prtype & DATA_UNSIGNED)) {
+				val &= ~0x800000;
+				fprintf(stderr, "%ld", (long) val);
+			} else {
+				fprintf(stderr, "%lu", (ulong) val);
+			}
+			break;
+
+		case 4:
+			val = mach_read_from_4(data);
+
+			if (!(prtype & DATA_UNSIGNED)) {
+				val &= ~0x80000000;
+				fprintf(stderr, "%ld", (long) val);
+			} else {
+				fprintf(stderr, "%lu", (ulong) val);
+			}
+			break;
+
+		case 6:
+			id = mach_read_from_6(data);
+			fprintf(stderr, "{%lu %lu}",
+				ut_dulint_get_high(id),
+				ut_dulint_get_low(id));
+			break;
+
+		case 7:
+			id = mach_read_from_7(data);
+			fprintf(stderr, "{%lu %lu}",
+				ut_dulint_get_high(id),
+				ut_dulint_get_low(id));
+			break;
+		case 8:
+			id = mach_read_from_8(data);
+			fprintf(stderr, "{%lu %lu}",
+				ut_dulint_get_high(id),
+				ut_dulint_get_low(id));
+			break;
+		default:
+			goto print_hex;
+		}
+		break;
+
+	case DATA_SYS:
+		switch (prtype & DATA_SYS_PRTYPE_MASK) {
+		case DATA_TRX_ID:
+			id = mach_read_from_6(data);
+
+			fprintf(stderr, "trx_id " TRX_ID_FMT,
+				TRX_ID_PREP_PRINTF(id));
+			break;
+
+		case DATA_ROLL_PTR:
+			id = mach_read_from_7(data);
+
+			fprintf(stderr, "roll_ptr {%lu %lu}",
+				ut_dulint_get_high(id), ut_dulint_get_low(id));
+			break;
+
+		case DATA_ROW_ID:
+			id = mach_read_from_6(data);
+
+			fprintf(stderr, "row_id {%lu %lu}",
+				ut_dulint_get_high(id), ut_dulint_get_low(id));
+			break;
+
+		default:
+			id = mach_dulint_read_compressed(data);
+
+			fprintf(stderr, "mix_id {%lu %lu}",
+				ut_dulint_get_high(id), ut_dulint_get_low(id));
+		}
+		break;
+
+	case DATA_CHAR:
+	case DATA_VARCHAR:
+		print_also_hex = FALSE;
+
+		for (i = 0; i < len; i++) {
+			int c = *data++;
+
+			if (!isprint(c)) {
+				print_also_hex = TRUE;
+
+				fprintf(stderr, "\\x%02x", (unsigned char) c);
+			} else {
+				putc(c, stderr);
+			}
+		}
+
+		if (dfield_is_ext(dfield)) {
+			fputs("(external)", stderr);
+		}
+
+		if (!print_also_hex) {
+			break;
+		}
+
+		data = dfield_get_data(dfield);
+		/* fall through */
+
+	case DATA_BINARY:
+	default:
+print_hex:
+		fputs(" Hex: ",stderr);
+
+		for (i = 0; i < len; i++) {
+			fprintf(stderr, "%02lx", (ulint) *data++);
+		}
+
+		if (dfield_is_ext(dfield)) {
+			fputs("(external)", stderr);
+		}
+	}
+}
+
+/*************************************************************//**
+Print a dfield value using ut_print_buf. */
+static
+void
+dfield_print_raw(
+/*=============*/
+	FILE*		f,		/*!< in: output stream */
+	const dfield_t*	dfield)		/*!< in: dfield */
+{
+	ulint	len	= dfield_get_len(dfield);
+	if (!dfield_is_null(dfield)) {
+		ulint	print_len = ut_min(len, 1000);
+		ut_print_buf(f, dfield_get_data(dfield), print_len);
+		if (len != print_len) {
+			fprintf(f, "(total %lu bytes%s)",
+				(ulong) len,
+				dfield_is_ext(dfield) ? ", external" : "");
+		}
+	} else {
+		fputs(" SQL NULL", f);
+	}
+}
+
+/**********************************************************//**
+The following function prints the contents of a tuple. */
+UNIV_INTERN
+void
+dtuple_print(
+/*=========*/
+	FILE*		f,	/*!< in: output stream */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ulint		n_fields;
+	ulint		i;
+
+	n_fields = dtuple_get_n_fields(tuple);
+
+	fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
+
+	for (i = 0; i < n_fields; i++) {
+		fprintf(f, " %lu:", (ulong) i);
+
+		dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
+
+		putc(';', f);
+		putc('\n', f);
+	}
+
+	ut_ad(dtuple_validate(tuple));
+}
+
+/**************************************************************//**
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index.
+@return own: created big record vector, NULL if we are not able to
+shorten the entry enough, i.e., if there are too many fixed-length or
+short fields in entry or the index is clustered */
+UNIV_INTERN
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in/out: index entry */
+	ulint*		n_ext)	/*!< in/out: number of
+				externally stored columns */
+{
+	mem_heap_t*	heap;
+	big_rec_t*	vector;
+	dfield_t*	dfield;
+	dict_field_t*	ifield;
+	ulint		size;
+	ulint		n_fields;
+	ulint		local_len;
+	ulint		local_prefix_len;
+
+	if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
+		return(NULL);
+	}
+
+	if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
+		/* up to MySQL 5.1: store a 768-byte prefix locally */
+		local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
+	} else {
+		/* new-format table: do not store any BLOB prefix locally */
+		local_len = BTR_EXTERN_FIELD_REF_SIZE;
+	}
+
+	ut_a(dtuple_check_typed_no_assert(entry));
+
+	size = rec_get_converted_size(index, entry, *n_ext);
+
+	if (UNIV_UNLIKELY(size > 1000000000)) {
+		fprintf(stderr,
+			"InnoDB: Warning: tuple size very big: %lu\n",
+			(ulong) size);
+		fputs("InnoDB: Tuple contents: ", stderr);
+		dtuple_print(stderr, entry);
+		putc('\n', stderr);
+	}
+
+	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
+			       * sizeof(big_rec_field_t) + 1000);
+
+	vector = mem_heap_alloc(heap, sizeof(big_rec_t));
+
+	vector->heap = heap;
+	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
+					* sizeof(big_rec_field_t));
+
+	/* Decide which fields to shorten: the algorithm is to look for
+	a variable-length field that yields the biggest savings when
+	stored externally */
+
+	n_fields = 0;
+
+	while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
+							     *n_ext),
+				      dict_table_is_comp(index->table),
+				      dict_index_get_n_fields(index),
+				      dict_table_zip_size(index->table))) {
+		ulint			i;
+		ulint			longest		= 0;
+		ulint			longest_i	= ULINT_MAX;
+		byte*			data;
+		big_rec_field_t*	b;
+
+		for (i = dict_index_get_n_unique_in_tree(index);
+		     i < dtuple_get_n_fields(entry); i++) {
+			ulint	savings;
+
+			dfield = dtuple_get_nth_field(entry, i);
+			ifield = dict_index_get_nth_field(index, i);
+
+			/* Skip fixed-length, NULL, externally stored,
+			or short columns */
+
+			if (ifield->fixed_len
+			    || dfield_is_null(dfield)
+			    || dfield_is_ext(dfield)
+			    || dfield_get_len(dfield) <= local_len
+			    || dfield_get_len(dfield)
+			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+				goto skip_field;
+			}
+
+			savings = dfield_get_len(dfield) - local_len;
+
+			/* Check that there would be savings */
+			if (longest >= savings) {
+				goto skip_field;
+			}
+
+			longest_i = i;
+			longest = savings;
+
+skip_field:
+			continue;
+		}
+
+		if (!longest) {
+			/* Cannot shorten more */
+
+			mem_heap_free(heap);
+
+			return(NULL);
+		}
+
+		/* Move data from field longest_i to big rec vector.
+
+		We store the first bytes locally to the record. Then
+		we can calculate all ordering fields in all indexes
+		from locally stored data. */
+
+		dfield = dtuple_get_nth_field(entry, longest_i);
+		ifield = dict_index_get_nth_field(index, longest_i);
+		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
+
+		b = &vector->fields[n_fields];
+		b->field_no = longest_i;
+		b->len = dfield_get_len(dfield) - local_prefix_len;
+		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
+
+		/* Allocate the locally stored part of the column. */
+		data = mem_heap_alloc(heap, local_len);
+
+		/* Copy the local prefix. */
+		memcpy(data, dfield_get_data(dfield), local_prefix_len);
+		/* Clear the extern field reference (BLOB pointer). */
+		memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
+#if 0
+		/* The following would fail the Valgrind checks in
+		page_cur_insert_rec_low() and page_cur_insert_rec_zip().
+		The BLOB pointers in the record will be initialized after
+		the record and the BLOBs have been written. */
+		UNIV_MEM_ALLOC(data + local_prefix_len,
+			       BTR_EXTERN_FIELD_REF_SIZE);
+#endif
+
+		dfield_set_data(dfield, data, local_len);
+		dfield_set_ext(dfield);
+
+		n_fields++;
+		(*n_ext)++;
+		ut_ad(n_fields < dtuple_get_n_fields(entry));
+	}
+
+	vector->n_fields = n_fields;
+	return(vector);
+}
+
+/**************************************************************//**
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+UNIV_INTERN
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+	dict_index_t*	index __attribute__((unused)),	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: entry whose data was put to vector */
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
+				freed in this function */
+{
+	big_rec_field_t*		b	= vector->fields;
+	const big_rec_field_t* const	end	= b + vector->n_fields;
+
+	for (; b < end; b++) {
+		dfield_t*	dfield;
+		ulint		local_len;
+
+		dfield = dtuple_get_nth_field(entry, b->field_no);
+		local_len = dfield_get_len(dfield);
+
+		ut_ad(dfield_is_ext(dfield));
+		ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+		local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+		ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
+
+		dfield_set_data(dfield,
+				(char*) b->data - local_len,
+				b->len + local_len);
+	}
+
+	mem_heap_free(vector->heap);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/data/data0type.c b/storage/innodb_plugin/data/data0type.c
similarity index 65%
rename from storage/innobase/data/data0type.c
rename to storage/innodb_plugin/data/data0type.c
index 305000d7c0a..8429775e7d8 100644
--- a/storage/innobase/data/data0type.c
+++ b/storage/innodb_plugin/data/data0type.c
@@ -1,7 +1,24 @@
-/******************************************************
-Data types
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file data/data0type.c
+Data types
 
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
@@ -12,57 +29,37 @@ Created 1/16/1996 Heikki Tuuri
 #include "data0type.ic"
 #endif
 
-/**********************************************************************
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
-				/* out: number of bytes occupied by the first
-				n characters */
-	ulint charset_id,	/* in: character set id */
-	ulint prefix_len,	/* in: prefix length in bytes of the index
-				(this has to be divided by mbmaxlen to get the
-				number of CHARACTERS n in the prefix) */
-	ulint data_len,		/* in: length of the string in bytes */
-	const char* str);	/* in: character string */
+#ifndef UNIV_HOTBACKUP
+# include "ha_prototypes.h"
 
 /* At the database startup we store the default-charset collation number of
 this MySQL installation to this global variable. If we have < 4.1.2 format
 column definitions, or records in the insert buffer, we use this
 charset-collation code for them. */
 
-ulint	data_mysql_default_charset_coll		= 99999999;
+UNIV_INTERN ulint	data_mysql_default_charset_coll;
 
-/*************************************************************************
+/*********************************************************************//**
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
+the characters in the string occupy.
+@return	length of the prefix, in bytes */
+UNIV_INTERN
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
-					/* out: length of the prefix,
-					in bytes */
-	ulint		prtype,		/* in: precise type */
-	ulint		mbminlen,	/* in: minimum length of a
+	ulint		prtype,		/*!< in: precise type */
+	ulint		mbminlen,	/*!< in: minimum length of a
 					multi-byte character */
-	ulint		mbmaxlen,	/* in: maximum length of a
+	ulint		mbmaxlen,	/*!< in: maximum length of a
 					multi-byte character */
-	ulint		prefix_len,	/* in: length of the requested
+	ulint		prefix_len,	/*!< in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
-	ulint		data_len,	/* in: length of str (in bytes) */
-	const char*	str)		/* in: the string whose prefix
+	ulint		data_len,	/*!< in: length of str (in bytes) */
+	const char*	str)		/*!< in: the string whose prefix
 					length is being determined */
 {
-#ifndef UNIV_HOTBACKUP
 	ut_a(data_len != UNIV_SQL_NULL);
 	ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
 
@@ -80,23 +77,18 @@ dtype_get_at_most_n_mbchars(
 	}
 
 	return(data_len);
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-#endif /* UNIV_HOTBACKUP */
 }
+#endif /* UNIV_HOTBACKUP */
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
+string type.
+@return	TRUE if string type */
+UNIV_INTERN
 ibool
 dtype_is_string_type(
 /*=================*/
-			/* out: TRUE if string type */
-	ulint	mtype)	/* in: InnoDB main data type code: DATA_CHAR, ... */
+	ulint	mtype)	/*!< in: InnoDB main data type code: DATA_CHAR, ... */
 {
 	if (mtype <= DATA_BLOB
 	    || mtype == DATA_MYSQL
@@ -108,17 +100,17 @@ dtype_is_string_type(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
+those DATA_BLOB columns this function currently returns FALSE.
+@return	TRUE if binary string type */
+UNIV_INTERN
 ibool
 dtype_is_binary_string_type(
 /*========================*/
-			/* out: TRUE if binary string type */
-	ulint	mtype,	/* in: main data type */
-	ulint	prtype)	/* in: precise type */
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype)	/*!< in: precise type */
 {
 	if ((mtype == DATA_FIXBINARY)
 	    || (mtype == DATA_BINARY)
@@ -130,18 +122,18 @@ dtype_is_binary_string_type(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
+For those DATA_BLOB columns this function currently returns TRUE.
+@return	TRUE if non-binary string type */
+UNIV_INTERN
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
-			/* out: TRUE if non-binary string type */
-	ulint	mtype,	/* in: main data type */
-	ulint	prtype)	/* in: precise type */
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype)	/*!< in: precise type */
 {
 	if (dtype_is_string_type(mtype) == TRUE
 	    && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
@@ -152,27 +144,17 @@ dtype_is_non_binary_string_type(
 	return(FALSE);
 }
 
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
-	ulint	prtype)	/* in: precise data type */
-{
-	return(dtype_get_charset_coll(prtype));
-}
-
-/*************************************************************************
+/*********************************************************************//**
 Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
+charset-collation code.
+@return precise type, including the charset-collation code */
+UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
-	ulint	old_prtype,	/* in: the MySQL type code and the flags
+	ulint	old_prtype,	/*!< in: the MySQL type code and the flags
 				DATA_BINARY_TYPE etc. */
-	ulint	charset_coll)	/* in: MySQL charset-collation code */
+	ulint	charset_coll)	/*!< in: MySQL charset-collation code */
 {
 	ut_a(old_prtype < 256 * 256);
 	ut_a(charset_coll < 256);
@@ -180,14 +162,14 @@ dtype_form_prtype(
 	return(old_prtype + (charset_coll << 16));
 }
 
-/*************************************************************************
-Validates a data type structure. */
-
+/*********************************************************************//**
+Validates a data type structure.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 dtype_validate(
 /*===========*/
-				/* out: TRUE if ok */
-	dtype_t*	type)	/* in: type struct to validate */
+	const dtype_t*	type)	/*!< in: type struct to validate */
 {
 	ut_a(type);
 	ut_a(type->mtype >= DATA_VARCHAR);
@@ -197,18 +179,21 @@ dtype_validate(
 		ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
 	}
 
+#ifndef UNIV_HOTBACKUP
 	ut_a(type->mbminlen <= type->mbmaxlen);
+#endif /* !UNIV_HOTBACKUP */
 
 	return(TRUE);
 }
 
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
 Prints a data type structure. */
-
+UNIV_INTERN
 void
 dtype_print(
 /*========*/
-	dtype_t*	type)	/* in: type */
+	const dtype_t*	type)	/*!< in: type */
 {
 	ulint	mtype;
 	ulint	prtype;
@@ -293,3 +278,4 @@ dtype_print(
 
 	fprintf(stderr, " len %lu", (ulong) len);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0boot.c b/storage/innodb_plugin/dict/dict0boot.c
similarity index 75%
rename from storage/innobase/dict/dict0boot.c
rename to storage/innodb_plugin/dict/dict0boot.c
index 5f9aaf71e18..e55de30481b 100644
--- a/storage/innobase/dict/dict0boot.c
+++ b/storage/innodb_plugin/dict/dict0boot.c
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0boot.c
+Data dictionary creation and booting
 
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
@@ -23,36 +40,35 @@ Created 4/18/1996 Heikki Tuuri
 #include "log0recv.h"
 #include "os0file.h"
 
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
+/**********************************************************************//**
+Gets a pointer to the dictionary header and x-latches its page.
+@return	pointer to the dictionary header, page x-latched */
+UNIV_INTERN
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
-			/* out: pointer to the dictionary header,
-			page x-latched */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
+	buf_block_t*	block;
 	dict_hdr_t*	header;
 
-	ut_ad(mtr);
+	block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
+			     RW_X_LATCH, mtr);
+	header = DICT_HDR + buf_block_get_frame(block);
+
+	buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
 
-	header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
-					 RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
 	return(header);
 }
 
-/**************************************************************************
-Returns a new table, index, or tree id. */
-
+/**********************************************************************//**
+Returns a new table, index, or tree id.
+@return	the new id */
+UNIV_INTERN
 dulint
 dict_hdr_get_new_id(
 /*================*/
-			/* out: the new id */
-	ulint	type)	/* in: DICT_HDR_ROW_ID, ... */
+	ulint	type)	/*!< in: DICT_HDR_ROW_ID, ... */
 {
 	dict_hdr_t*	dict_hdr;
 	dulint		id;
@@ -74,10 +90,10 @@ dict_hdr_get_new_id(
 	return(id);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the current value of the row id counter to the dictionary header file
 page. */
-
+UNIV_INTERN
 void
 dict_hdr_flush_row_id(void)
 /*=======================*/
@@ -99,31 +115,28 @@ dict_hdr_flush_row_id(void)
 	mtr_commit(&mtr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates the file page for the dictionary header. This function is
-called only at the database creation. */
+called only at the database creation.
+@return	TRUE if succeed */
 static
 ibool
 dict_hdr_create(
 /*============*/
-			/* out: TRUE if succeed */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
+	buf_block_t*	block;
 	dict_hdr_t*	dict_header;
-	ulint		hdr_page_no;
 	ulint		root_page_no;
-	page_t*		page;
 
 	ut_ad(mtr);
 
 	/* Create the dictionary header file block in a new, allocated file
 	segment in the system tablespace */
-	page = fseg_create(DICT_HDR_SPACE, 0,
-			   DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
+	block = fseg_create(DICT_HDR_SPACE, 0,
+			    DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
 
-	hdr_page_no = buf_frame_get_page_no(page);
-
-	ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
+	ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
 
 	dict_header = dict_hdr_get(mtr);
 
@@ -147,7 +160,8 @@ dict_hdr_create(
 
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
+				  DICT_HDR_SPACE, 0, DICT_TABLES_ID,
+				  dict_ind_redundant, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -156,8 +170,9 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
-				  DICT_TABLE_IDS_ID, FALSE, mtr);
+	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
+				  DICT_TABLE_IDS_ID,
+				  dict_ind_redundant, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -167,7 +182,8 @@ dict_hdr_create(
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
+				  DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
+				  dict_ind_redundant, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -177,7 +193,8 @@ dict_hdr_create(
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
+				  DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
+				  dict_ind_redundant, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -187,7 +204,8 @@ dict_hdr_create(
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
 	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
+				  DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
+				  dict_ind_redundant, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -200,10 +218,10 @@ dict_hdr_create(
 	return(TRUE);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created. */
-
+UNIV_INTERN
 void
 dict_boot(void)
 /*===========*/
@@ -213,6 +231,7 @@ dict_boot(void)
 	dict_hdr_t*	dict_hdr;
 	mem_heap_t*	heap;
 	mtr_t		mtr;
+	ulint		error;
 
 	mtr_start(&mtr);
 
@@ -249,7 +268,10 @@ dict_boot(void)
 
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
 	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
+	/* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
+	and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
@@ -270,9 +292,12 @@ dict_boot(void)
 
 	index->id = DICT_TABLES_ID;
 
-	dict_index_add_to_cache(table, index,
-				mtr_read_ulint(dict_hdr + DICT_HDR_TABLES,
-					       MLOG_4BYTES, &mtr));
+	error = dict_index_add_to_cache(table, index,
+					mtr_read_ulint(dict_hdr
+						       + DICT_HDR_TABLES,
+						       MLOG_4BYTES, &mtr),
+					FALSE);
+	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
 	index = dict_mem_index_create("SYS_TABLES", "ID_IND",
@@ -280,9 +305,12 @@ dict_boot(void)
 	dict_mem_index_add_field(index, "ID", 0);
 
 	index->id = DICT_TABLE_IDS_ID;
-	dict_index_add_to_cache(table, index,
-				mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS,
-					       MLOG_4BYTES, &mtr));
+	error = dict_index_add_to_cache(table, index,
+					mtr_read_ulint(dict_hdr
+						       + DICT_HDR_TABLE_IDS,
+						       MLOG_4BYTES, &mtr),
+					FALSE);
+	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
@@ -309,9 +337,12 @@ dict_boot(void)
 	dict_mem_index_add_field(index, "POS", 0);
 
 	index->id = DICT_COLUMNS_ID;
-	dict_index_add_to_cache(table, index,
-				mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS,
-					       MLOG_4BYTES, &mtr));
+	error = dict_index_add_to_cache(table, index,
+					mtr_read_ulint(dict_hdr
+						       + DICT_HDR_COLUMNS,
+						       MLOG_4BYTES, &mtr),
+					FALSE);
+	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
@@ -348,9 +379,12 @@ dict_boot(void)
 	dict_mem_index_add_field(index, "ID", 0);
 
 	index->id = DICT_INDEXES_ID;
-	dict_index_add_to_cache(table, index,
-				mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES,
-					       MLOG_4BYTES, &mtr));
+	error = dict_index_add_to_cache(table, index,
+					mtr_read_ulint(dict_hdr
+						       + DICT_HDR_INDEXES,
+						       MLOG_4BYTES, &mtr),
+					FALSE);
+	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
@@ -372,9 +406,12 @@ dict_boot(void)
 	dict_mem_index_add_field(index, "POS", 0);
 
 	index->id = DICT_FIELDS_ID;
-	dict_index_add_to_cache(table, index,
-				mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS,
-					       MLOG_4BYTES, &mtr));
+	error = dict_index_add_to_cache(table, index,
+					mtr_read_ulint(dict_hdr
+						       + DICT_HDR_FIELDS,
+						       MLOG_4BYTES, &mtr),
+					FALSE);
+	ut_a(error == DB_SUCCESS);
 
 	mtr_commit(&mtr);
 	/*-------------------------*/
@@ -393,7 +430,7 @@ dict_boot(void)
 	mutex_exit(&(dict_sys->mutex));
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Inserts the basic system table data into themselves in the database
 creation. */
 static
@@ -404,9 +441,9 @@ dict_insert_initial_data(void)
 	/* Does nothing yet */
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the data dictionary at the database creation. */
-
+UNIV_INTERN
 void
 dict_create(void)
 /*=============*/
diff --git a/storage/innobase/dict/dict0crea.c b/storage/innodb_plugin/dict/dict0crea.c
similarity index 79%
rename from storage/innobase/dict/dict0crea.c
rename to storage/innodb_plugin/dict/dict0crea.c
index 4116230347d..7bad4d2057e 100644
--- a/storage/innobase/dict/dict0crea.c
+++ b/storage/innodb_plugin/dict/dict0crea.c
@@ -1,7 +1,24 @@
-/******************************************************
-Database object creation
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0crea.c
+Database object creation
 
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
@@ -26,16 +43,16 @@ Created 1/8/1996 Heikki Tuuri
 #include "usr0sess.h"
 #include "ut0vec.h"
 
-/*********************************************************************
+/*****************************************************************//**
 Based on a table object, this function builds the entry to be inserted
-in the SYS_TABLES system table. */
+in the SYS_TABLES system table.
+@return	the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_tables_tuple(
 /*=========================*/
-				/* out: the tuple which should be inserted */
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
 				the built tuple is allocated */
 {
 	dict_table_t*	sys_tables;
@@ -49,6 +66,8 @@ dict_create_sys_tables_tuple(
 
 	entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
 
+	dict_table_copy_types(entry, sys_tables);
+
 	/* 0: NAME -----------------------------*/
 	dfield = dtuple_get_nth_field(entry, 0);
 
@@ -75,27 +94,34 @@ dict_create_sys_tables_tuple(
 	dfield = dtuple_get_nth_field(entry, 3);
 
 	ptr = mem_heap_alloc(heap, 4);
-	mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
+	if (table->flags & ~DICT_TF_COMPACT) {
+		ut_a(table->flags & DICT_TF_COMPACT);
+		ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
+		ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
+		     <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
+		ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
+		mach_write_to_4(ptr, table->flags);
+	} else {
+		mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
+	}
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 6: MIX_ID (obsolete) ---------------------------*/
 	dfield = dtuple_get_nth_field(entry, 4);
 
-	ptr = mem_heap_alloc(heap, 8);
-	memset(ptr, 0, 8);
+	ptr = mem_heap_zalloc(heap, 8);
 
 	dfield_set_data(dfield, ptr, 8);
 	/* 7: MIX_LEN (obsolete) --------------------------*/
 
 	dfield = dtuple_get_nth_field(entry, 5);
 
-	ptr = mem_heap_alloc(heap, 4);
-	memset(ptr, 0, 4);
+	ptr = mem_heap_zalloc(heap, 4);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 8: CLUSTER_NAME ---------------------*/
 	dfield = dtuple_get_nth_field(entry, 6);
-	dfield_set_data(dfield, NULL, UNIV_SQL_NULL); /* not supported */
+	dfield_set_null(dfield); /* not supported */
 
 	/* 9: SPACE ----------------------------*/
 	dfield = dtuple_get_nth_field(entry, 7);
@@ -106,22 +132,20 @@ dict_create_sys_tables_tuple(
 	dfield_set_data(dfield, ptr, 4);
 	/*----------------------------------*/
 
-	dict_table_copy_types(entry, sys_tables);
-
 	return(entry);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Based on a table object, this function builds the entry to be inserted
-in the SYS_COLUMNS system table. */
+in the SYS_COLUMNS system table.
+@return	the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_columns_tuple(
 /*==========================*/
-				/* out: the tuple which should be inserted */
-	dict_table_t*	table,	/* in: table */
-	ulint		i,	/* in: column number */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
+	dict_table_t*	table,	/*!< in: table */
+	ulint		i,	/*!< in: column number */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
 				the built tuple is allocated */
 {
 	dict_table_t*		sys_columns;
@@ -139,6 +163,8 @@ dict_create_sys_columns_tuple(
 
 	entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
 
+	dict_table_copy_types(entry, sys_columns);
+
 	/* 0: TABLE_ID -----------------------*/
 	dfield = dtuple_get_nth_field(entry, 0);
 
@@ -188,20 +214,18 @@ dict_create_sys_columns_tuple(
 	dfield_set_data(dfield, ptr, 4);
 	/*---------------------------------*/
 
-	dict_table_copy_types(entry, sys_columns);
-
 	return(entry);
 }
 
-/*******************************************************************
-Builds a table definition to insert. */
+/***************************************************************//**
+Builds a table definition to insert.
+@return	DB_SUCCESS or error code */
 static
 ulint
 dict_build_table_def_step(
 /*======================*/
-				/* out: DB_SUCCESS or error code */
-	que_thr_t*	thr,	/* in: query thread */
-	tab_node_t*	node)	/* in: table create node */
+	que_thr_t*	thr,	/*!< in: query thread */
+	tab_node_t*	node)	/*!< in: table create node */
 {
 	dict_table_t*	table;
 	dtuple_t*	row;
@@ -209,8 +233,6 @@ dict_build_table_def_step(
 	const char*	path_or_name;
 	ibool		is_path;
 	mtr_t		mtr;
-	ulint		i;
-	ulint		row_len;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -220,14 +242,6 @@ dict_build_table_def_step(
 
 	thr_get_trx(thr)->table_id = table->id;
 
-	row_len = 0;
-	for (i = 0; i < table->n_def; i++) {
-		row_len += dict_col_get_min_size(&table->cols[i]);
-	}
-	if (row_len > BTR_PAGE_MAX_REC_SIZE) {
-		return(DB_TOO_BIG_RECORD);
-	}
-
 	if (srv_file_per_table) {
 		/* We create a new single-table tablespace for the table.
 		We initially let it be 4 pages:
@@ -250,8 +264,13 @@ dict_build_table_def_step(
 			is_path = FALSE;
 		}
 
+		ut_ad(dict_table_get_format(table) <= DICT_TF_FORMAT_MAX);
+		ut_ad(!dict_table_zip_size(table)
+		      || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
+
 		error = fil_create_new_single_table_tablespace(
 			&space, path_or_name, is_path,
+			table->flags == DICT_TF_COMPACT ? 0 : table->flags,
 			FIL_IBD_FILE_INITIAL_SIZE);
 		table->space = (unsigned int) space;
 
@@ -265,6 +284,9 @@ dict_build_table_def_step(
 		fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
 
 		mtr_commit(&mtr);
+	} else {
+		/* Create in the system tablespace: disallow new features */
+		table->flags &= DICT_TF_COMPACT;
 	}
 
 	row = dict_create_sys_tables_tuple(table, node->heap);
@@ -274,14 +296,14 @@ dict_build_table_def_step(
 	return(DB_SUCCESS);
 }
 
-/*******************************************************************
-Builds a column definition to insert. */
+/***************************************************************//**
+Builds a column definition to insert.
+@return	DB_SUCCESS */
 static
 ulint
 dict_build_col_def_step(
 /*====================*/
-				/* out: DB_SUCCESS */
-	tab_node_t*	node)	/* in: table create node */
+	tab_node_t*	node)	/*!< in: table create node */
 {
 	dtuple_t*	row;
 
@@ -292,16 +314,16 @@ dict_build_col_def_step(
 	return(DB_SUCCESS);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Based on an index object, this function builds the entry to be inserted
-in the SYS_INDEXES system table. */
+in the SYS_INDEXES system table.
+@return	the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_indexes_tuple(
 /*==========================*/
-				/* out: the tuple which should be inserted */
-	dict_index_t*	index,	/* in: index */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
+	dict_index_t*	index,	/*!< in: index */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
 				the built tuple is allocated */
 {
 	dict_table_t*	sys_indexes;
@@ -319,6 +341,8 @@ dict_create_sys_indexes_tuple(
 
 	entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
 
+	dict_table_copy_types(entry, sys_indexes);
+
 	/* 0: TABLE_ID -----------------------*/
 	dfield = dtuple_get_nth_field(entry, 0);
 
@@ -377,22 +401,20 @@ dict_create_sys_indexes_tuple(
 	dfield_set_data(dfield, ptr, 4);
 	/*--------------------------------*/
 
-	dict_table_copy_types(entry, sys_indexes);
-
 	return(entry);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Based on an index object, this function builds the entry to be inserted
-in the SYS_FIELDS system table. */
+in the SYS_FIELDS system table.
+@return	the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_fields_tuple(
 /*=========================*/
-				/* out: the tuple which should be inserted */
-	dict_index_t*	index,	/* in: index */
-	ulint		i,	/* in: field number */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
+	dict_index_t*	index,	/*!< in: index */
+	ulint		i,	/*!< in: field number */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
 				the built tuple is allocated */
 {
 	dict_table_t*	sys_fields;
@@ -408,6 +430,7 @@ dict_create_sys_fields_tuple(
 	for (j = 0; j < index->n_fields; j++) {
 		if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
 			index_contains_column_prefix_field = TRUE;
+			break;
 		}
 	}
 
@@ -417,6 +440,8 @@ dict_create_sys_fields_tuple(
 
 	entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
 
+	dict_table_copy_types(entry, sys_fields);
+
 	/* 0: INDEX_ID -----------------------*/
 	dfield = dtuple_get_nth_field(entry, 0);
 
@@ -452,26 +477,24 @@ dict_create_sys_fields_tuple(
 			ut_strlen(field->name));
 	/*---------------------------------*/
 
-	dict_table_copy_types(entry, sys_fields);
-
 	return(entry);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates the tuple with which the index entry is searched for writing the index
-tree root page number, if such a tree is created. */
+tree root page number, if such a tree is created.
+@return	the tuple for search */
 static
 dtuple_t*
 dict_create_search_tuple(
 /*=====================*/
-				/* out: the tuple for search */
-	dtuple_t*	tuple,	/* in: the tuple inserted in the SYS_INDEXES
+	const dtuple_t*	tuple,	/*!< in: the tuple inserted in the SYS_INDEXES
 				table */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory for
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
 				the built tuple is allocated */
 {
 	dtuple_t*	search_tuple;
-	dfield_t*	field1;
+	const dfield_t*	field1;
 	dfield_t*	field2;
 
 	ut_ad(tuple && heap);
@@ -493,15 +516,15 @@ dict_create_search_tuple(
 	return(search_tuple);
 }
 
-/*******************************************************************
-Builds an index definition row to insert. */
+/***************************************************************//**
+Builds an index definition row to insert.
+@return	DB_SUCCESS or error code */
 static
 ulint
 dict_build_index_def_step(
 /*======================*/
-				/* out: DB_SUCCESS or error code */
-	que_thr_t*	thr,	/* in: query thread */
-	ind_node_t*	node)	/* in: index create node */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ind_node_t*	node)	/*!< in: index create node */
 {
 	dict_table_t*	table;
 	dict_index_t*	index;
@@ -525,7 +548,7 @@ dict_build_index_def_step(
 	node->table = table;
 
 	ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
-	      || (index->type & DICT_CLUSTERED));
+	      || dict_index_is_clust(index));
 
 	index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
 
@@ -539,17 +562,20 @@ dict_build_index_def_step(
 
 	ins_node_set_new_row(node->ind_def, row);
 
+	/* Note that the index was created by this transaction. */
+	index->trx_id = (ib_uint64_t) ut_conv_dulint_to_longlong(trx->id);
+
 	return(DB_SUCCESS);
 }
 
-/*******************************************************************
-Builds a field definition row to insert. */
+/***************************************************************//**
+Builds a field definition row to insert.
+@return	DB_SUCCESS */
 static
 ulint
 dict_build_field_def_step(
 /*======================*/
-				/* out: DB_SUCCESS */
-	ind_node_t*	node)	/* in: index create node */
+	ind_node_t*	node)	/*!< in: index create node */
 {
 	dict_index_t*	index;
 	dtuple_t*	row;
@@ -563,14 +589,14 @@ dict_build_field_def_step(
 	return(DB_SUCCESS);
 }
 
-/*******************************************************************
-Creates an index tree for the index if it is not a member of a cluster. */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 dict_create_index_tree_step(
 /*========================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	ind_node_t*	node)	/* in: index create node */
+	ind_node_t*	node)	/*!< in: index create node */
 {
 	dict_index_t*	index;
 	dict_table_t*	sys_indexes;
@@ -600,8 +626,9 @@ dict_create_index_tree_step(
 
 	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 
-	node->page_no = btr_create(index->type, index->space, index->id,
-				   dict_table_is_comp(table), &mtr);
+	node->page_no = btr_create(index->type, index->space,
+				   dict_table_zip_size(index->table),
+				   index->id, index, &mtr);
 	/* printf("Created a new index tree in space %lu root page %lu\n",
 	index->space, index->page_no); */
 
@@ -619,20 +646,21 @@ dict_create_index_tree_step(
 	return(DB_SUCCESS);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Drops the index tree associated with a row in SYS_INDEXES table. */
-
+UNIV_INTERN
 void
 dict_drop_index_tree(
 /*=================*/
-	rec_t*	rec,	/* in: record in the clustered index of SYS_INDEXES
-			table */
-	mtr_t*	mtr)	/* in: mtr having the latch on the record page */
+	rec_t*	rec,	/*!< in/out: record in the clustered index
+			of SYS_INDEXES table */
+	mtr_t*	mtr)	/*!< in: mtr having the latch on the record page */
 {
-	ulint	root_page_no;
-	ulint	space;
-	byte*	ptr;
-	ulint	len;
+	ulint		root_page_no;
+	ulint		space;
+	ulint		zip_size;
+	const byte*	ptr;
+	ulint		len;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
@@ -654,8 +682,9 @@ dict_drop_index_tree(
 	ut_ad(len == 4);
 
 	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+	zip_size = fil_space_get_zip_size(space);
 
-	if (!fil_tablespace_exists_in_mem(space)) {
+	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
 		/* It is a single table tablespace and the .ibd file is
 		missing: do nothing */
 
@@ -665,7 +694,7 @@ dict_drop_index_tree(
 	/* We free all the pages but the root page first; this operation
 	may span several mini-transactions */
 
-	btr_free_but_not_root(space, root_page_no);
+	btr_free_but_not_root(space, zip_size, root_page_no);
 
 	/* Then we free the root page in the same mini-transaction where
 	we write FIL_NULL to the appropriate field in the SYS_INDEXES
@@ -673,38 +702,40 @@ dict_drop_index_tree(
 
 	/* printf("Dropping index tree in space %lu root page %lu\n", space,
 	root_page_no); */
-	btr_free_root(space, root_page_no, mtr);
+	btr_free_root(space, zip_size, root_page_no, mtr);
 
 	page_rec_write_index_page_no(rec,
 				     DICT_SYS_INDEXES_PAGE_NO_FIELD,
 				     FIL_NULL, mtr);
 }
 
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
+/*******************************************************************//**
+Truncates the index tree associated with a row in SYS_INDEXES table.
+@return	new root page number, or FIL_NULL on failure */
+UNIV_INTERN
 ulint
 dict_truncate_index_tree(
 /*=====================*/
-				/* out: new root page number, or
-				FIL_NULL on failure */
-	dict_table_t*	table,	/* in: the table the index belongs to */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
+	dict_table_t*	table,	/*!< in: the table the index belongs to */
+	ulint		space,	/*!< in: 0=truncate,
+				nonzero=create the index tree in the
+				given tablespace */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
 				record in the clustered index of
 				SYS_INDEXES table. The cursor may be
 				repositioned in this call. */
-	mtr_t*		mtr)	/* in: mtr having the latch
+	mtr_t*		mtr)	/*!< in: mtr having the latch
 				on the record page. The mtr may be
 				committed and restarted in this call. */
 {
 	ulint		root_page_no;
-	ulint		space;
+	ibool		drop = !space;
+	ulint		zip_size;
 	ulint		type;
 	dulint		index_id;
 	rec_t*		rec;
-	byte*		ptr;
+	const byte*	ptr;
 	ulint		len;
-	ulint		comp;
 	dict_index_t*	index;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -716,13 +747,13 @@ dict_truncate_index_tree(
 
 	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
 
-	if (root_page_no == FIL_NULL) {
+	if (drop && root_page_no == FIL_NULL) {
 		/* The tree has been freed. */
 
 		ut_print_timestamp(stderr);
 		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
 			" a missing index of table %s!\n", table->name);
-		return(FIL_NULL);
+		drop = FALSE;
 	}
 
 	ptr = rec_get_nth_field_old(rec,
@@ -730,9 +761,13 @@ dict_truncate_index_tree(
 
 	ut_ad(len == 4);
 
-	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+	if (drop) {
+		space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+	}
 
-	if (!fil_tablespace_exists_in_mem(space)) {
+	zip_size = fil_space_get_zip_size(space);
+
+	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
 		/* It is a single table tablespace and the .ibd file is
 		missing: do nothing */
 
@@ -751,20 +786,25 @@ dict_truncate_index_tree(
 	ut_ad(len == 8);
 	index_id = mach_read_from_8(ptr);
 
+	if (!drop) {
+
+		goto create;
+	}
+
 	/* We free all the pages but the root page first; this operation
 	may span several mini-transactions */
 
-	btr_free_but_not_root(space, root_page_no);
+	btr_free_but_not_root(space, zip_size, root_page_no);
 
 	/* Then we free the root page in the same mini-transaction where
 	we create the b-tree and write its new root page number to the
 	appropriate field in the SYS_INDEXES record: this mini-transaction
 	marks the B-tree totally truncated */
 
-	comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
-					 mtr));
+	btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
 
-	btr_free_root(space, root_page_no, mtr);
+	btr_free_root(space, zip_size, root_page_no, mtr);
+create:
 	/* We will temporarily write FIL_NULL to the PAGE_NO field
 	in SYS_INDEXES, so that the database will not get into an
 	inconsistent state in case it crashes between the mtr_commit()
@@ -786,36 +826,34 @@ dict_truncate_index_tree(
 	     index;
 	     index = UT_LIST_GET_NEXT(indexes, index)) {
 		if (!ut_dulint_cmp(index->id, index_id)) {
-			break;
+			root_page_no = btr_create(type, space, zip_size,
+						  index_id, index, mtr);
+			index->page = (unsigned int) root_page_no;
+			return(root_page_no);
 		}
 	}
 
-	root_page_no = btr_create(type, space, index_id, comp, mtr);
-	if (index) {
-		index->page = (unsigned int) root_page_no;
-	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Index %lu %lu of table %s is missing\n"
-			"InnoDB: from the data dictionary during TRUNCATE!\n",
-			ut_dulint_get_high(index_id),
-			ut_dulint_get_low(index_id),
-			table->name);
-	}
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: Index %lu %lu of table %s is missing\n"
+		"InnoDB: from the data dictionary during TRUNCATE!\n",
+		ut_dulint_get_high(index_id),
+		ut_dulint_get_low(index_id),
+		table->name);
 
-	return(root_page_no);
+	return(FIL_NULL);
 }
 
-/*************************************************************************
-Creates a table create graph. */
-
+/*********************************************************************//**
+Creates a table create graph.
+@return	own: table create node */
+UNIV_INTERN
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
-				/* out, own: table create node */
-	dict_table_t*	table,	/* in: table to create, built as a memory data
+	dict_table_t*	table,	/*!< in: table to create, built as a memory data
 				structure */
-	mem_heap_t*	heap)	/* in: heap where created */
+	mem_heap_t*	heap)	/*!< in: heap where created */
 {
 	tab_node_t*	node;
 
@@ -842,16 +880,16 @@ tab_create_graph_create(
 	return(node);
 }
 
-/*************************************************************************
-Creates an index create graph. */
-
+/*********************************************************************//**
+Creates an index create graph.
+@return	own: index create node */
+UNIV_INTERN
 ind_node_t*
 ind_create_graph_create(
 /*====================*/
-				/* out, own: index create node */
-	dict_index_t*	index,	/* in: index to create, built as a memory data
+	dict_index_t*	index,	/*!< in: index to create, built as a memory data
 				structure */
-	mem_heap_t*	heap)	/* in: heap where created */
+	mem_heap_t*	heap)	/*!< in: heap where created */
 {
 	ind_node_t*	node;
 
@@ -879,14 +917,14 @@ ind_create_graph_create(
 	return(node);
 }
 
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 dict_create_table_step(
 /*===================*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	tab_node_t*	node;
 	ulint		err	= DB_ERROR;
@@ -985,15 +1023,15 @@ function_exit:
 	return(thr);
 }
 
-/***************************************************************
+/***********************************************************//**
 Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 dict_create_index_step(
 /*===================*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ind_node_t*	node;
 	ulint		err	= DB_ERROR;
@@ -1046,19 +1084,40 @@ dict_create_index_step(
 
 			return(thr);
 		} else {
-			node->state = INDEX_CREATE_INDEX_TREE;
+			node->state = INDEX_ADD_TO_CACHE;
 		}
 	}
 
+	if (node->state == INDEX_ADD_TO_CACHE) {
+
+		dulint	index_id = node->index->id;
+
+		err = dict_index_add_to_cache(node->table, node->index,
+					      FIL_NULL, TRUE);
+
+		node->index = dict_index_get_if_in_cache_low(index_id);
+		ut_a(!node->index == (err != DB_SUCCESS));
+
+		if (err != DB_SUCCESS) {
+
+			goto function_exit;
+		}
+
+		node->state = INDEX_CREATE_INDEX_TREE;
+	}
+
 	if (node->state == INDEX_CREATE_INDEX_TREE) {
 
 		err = dict_create_index_tree_step(node);
 
 		if (err != DB_SUCCESS) {
+			dict_index_remove_from_cache(node->table, node->index);
+			node->index = NULL;
 
 			goto function_exit;
 		}
 
+		node->index->page = node->page_no;
 		node->state = INDEX_COMMIT_WORK;
 	}
 
@@ -1068,21 +1127,13 @@ dict_create_index_step(
 		(CREATE INDEX does NOT currently do an implicit commit of
 		the current transaction) */
 
-		node->state = INDEX_ADD_TO_CACHE;
+		node->state = INDEX_CREATE_INDEX_TREE;
 
 		/* thr->run_node = node->commit_node;
 
 		return(thr); */
 	}
 
-	if (node->state == INDEX_ADD_TO_CACHE) {
-
-		dict_index_add_to_cache(node->table, node->index,
-					node->page_no);
-
-		err = DB_SUCCESS;
-	}
-
 function_exit:
 	trx->error_state = err;
 
@@ -1103,15 +1154,15 @@ function_exit:
 	return(thr);
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at database creation or database start if they are not found or are
-not of the right form. */
-
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 dict_create_or_check_foreign_constraint_tables(void)
 /*================================================*/
-				/* out: DB_SUCCESS or error code */
 {
 	dict_table_t*	table1;
 	dict_table_t*	table2;
@@ -1187,7 +1238,6 @@ dict_create_or_check_foreign_constraint_tables(void)
 			     " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
 			     "CREATE UNIQUE CLUSTERED INDEX ID_IND"
 			     " ON SYS_FOREIGN_COLS (ID, POS);\n"
-			     "COMMIT WORK;\n"
 			     "END;\n"
 			     , FALSE, trx);
 
@@ -1210,7 +1260,7 @@ dict_create_or_check_foreign_constraint_tables(void)
 		error = DB_MUST_GET_MORE_FILE_SPACE;
 	}
 
-	trx->op_info = "";
+	trx_commit_for_mysql(trx);
 
 	row_mysql_unlock_data_dictionary(trx);
 
@@ -1225,18 +1275,18 @@ dict_create_or_check_foreign_constraint_tables(void)
 	return(error);
 }
 
-/********************************************************************
-Evaluate the given foreign key SQL statement. */
-
+/****************************************************************//**
+Evaluate the given foreign key SQL statement.
+@return	error code or DB_SUCCESS */
+static
 ulint
 dict_foreign_eval_sql(
 /*==================*/
-				/* out: error code or DB_SUCCESS */
-	pars_info_t*	info,	/* in: info struct, or NULL */
-	const char*	sql,	/* in: SQL string to evaluate */
-	dict_table_t*	table,	/* in: table */
-	dict_foreign_t*	foreign,/* in: foreign */
-	trx_t*		trx)	/* in: transaction */
+	pars_info_t*	info,	/*!< in: info struct, or NULL */
+	const char*	sql,	/*!< in: SQL string to evaluate */
+	dict_table_t*	table,	/*!< in: table */
+	dict_foreign_t*	foreign,/*!< in: foreign */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	ulint		error;
 	FILE*		ef	= dict_foreign_err_file;
@@ -1251,12 +1301,11 @@ dict_foreign_eval_sql(
 		      ef);
 		ut_print_name(ef, trx, TRUE, table->name);
 		fputs(".\nA foreign key constraint of name ", ef);
-		ut_print_name(ef, trx, FALSE, foreign->id);
+		ut_print_name(ef, trx, TRUE, foreign->id);
 		fputs("\nalready exists."
-		      " (Note that internally InnoDB adds 'databasename/'\n"
-		      "in front of the user-defined constraint name).\n",
-		      ef);
-		fputs("Note that InnoDB's FOREIGN KEY system tables store\n"
+		      " (Note that internally InnoDB adds 'databasename'\n"
+		      "in front of the user-defined constraint name.)\n"
+		      "Note that InnoDB's FOREIGN KEY system tables store\n"
 		      "constraint names as case-insensitive, with the\n"
 		      "MySQL standard latin1_swedish_ci collation. If you\n"
 		      "create tables or databases whose names differ only in\n"
@@ -1291,18 +1340,18 @@ dict_foreign_eval_sql(
 	return(DB_SUCCESS);
 }
 
-/************************************************************************
+/********************************************************************//**
 Add a single foreign key field definition to the data dictionary tables in
-the database.  */
+the database.
+@return	error code or DB_SUCCESS */
 static
 ulint
 dict_create_add_foreign_field_to_dictionary(
 /*========================================*/
-					/* out: error code or DB_SUCCESS */
-	ulint		field_nr,	/* in: foreign field number */
-	dict_table_t*	table,		/* in: table */
-	dict_foreign_t*	foreign,	/* in: foreign */
-	trx_t*		trx)		/* in: transaction */
+	ulint		field_nr,	/*!< in: foreign field number */
+	dict_table_t*	table,		/*!< in: table */
+	dict_foreign_t*	foreign,	/*!< in: foreign */
+	trx_t*		trx)		/*!< in: transaction */
 {
 	pars_info_t*	info = pars_info_create();
 
@@ -1326,23 +1375,23 @@ dict_create_add_foreign_field_to_dictionary(
 		       table, foreign, trx));
 }
 
-/************************************************************************
+/********************************************************************//**
 Add a single foreign key definition to the data dictionary tables in the
 database. We also generate names to constraints that were not named by the
 user. A generated constraint has a name of the format
 databasename/tablename_ibfk_<number>, where the numbers start from 1, and
 are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be. */
+the old format constraints < 4.0.18 it used to be.
+@return	error code or DB_SUCCESS */
 static
 ulint
 dict_create_add_foreign_to_dictionary(
 /*==================================*/
-				/* out: error code or DB_SUCCESS */
-	ulint*		id_nr,	/* in/out: number to use in id generation;
+	ulint*		id_nr,	/*!< in/out: number to use in id generation;
 				incremented if used */
-	dict_table_t*	table,	/* in: table */
-	dict_foreign_t*	foreign,/* in: foreign */
-	trx_t*		trx)	/* in: transaction */
+	dict_table_t*	table,	/*!< in: table */
+	dict_foreign_t*	foreign,/*!< in: foreign */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	ulint		error;
 	ulint		i;
@@ -1401,14 +1450,14 @@ dict_create_add_foreign_to_dictionary(
 	return(error);
 }
 
-/************************************************************************
-Adds foreign key definitions to data dictionary tables in the database. */
-
+/********************************************************************//**
+Adds foreign key definitions to data dictionary tables in the database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
-				/* out: error code or DB_SUCCESS */
-	ulint		start_id,/* in: if we are actually doing ALTER TABLE
+	ulint		start_id,/*!< in: if we are actually doing ALTER TABLE
 				ADD CONSTRAINT, we want to generate constraint
 				numbers which are bigger than in the table so
 				far; we number the constraints from
@@ -1416,8 +1465,8 @@ dict_create_add_foreigns_to_dictionary(
 				we are creating a new table, or if the table
 				so far has no constraints for which the name
 				was generated here */
-	dict_table_t*	table,	/* in: table */
-	trx_t*		trx)	/* in: transaction */
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	dict_foreign_t*	foreign;
 	ulint		number	= start_id + 1;
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c
similarity index 67%
rename from storage/innobase/dict/dict0dict.c
rename to storage/innodb_plugin/dict/dict0dict.c
index c7a57d6a2b8..d1f0e0ffc19 100644
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innodb_plugin/dict/dict0dict.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-Data dictionary system
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0dict.c
+Data dictionary system
 
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
@@ -12,6 +29,12 @@ Created 1/8/1996 Heikki Tuuri
 #include "dict0dict.ic"
 #endif
 
+/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
+UNIV_INTERN dict_index_t*	dict_ind_redundant;
+/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
+UNIV_INTERN dict_index_t*	dict_ind_compact;
+
+#ifndef UNIV_HOTBACKUP
 #include "buf0buf.h"
 #include "data0type.h"
 #include "mach0data.h"
@@ -22,221 +45,136 @@ Created 1/8/1996 Heikki Tuuri
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "btr0sea.h"
+#include "page0zip.h"
+#include "page0page.h"
 #include "pars0pars.h"
 #include "pars0sym.h"
 #include "que0que.h"
 #include "rem0cmp.h"
-#ifndef UNIV_HOTBACKUP
-# include "m_ctype.h" /* my_isspace() */
-#endif /* !UNIV_HOTBACKUP */
+#include "row0merge.h"
+#include "m_ctype.h" /* my_isspace() */
+#include "ha_prototypes.h" /* innobase_strcasecmp() */
 
 #include <ctype.h>
 
-dict_sys_t*	dict_sys	= NULL;	/* the dictionary system */
+/** the dictionary system */
+UNIV_INTERN dict_sys_t*	dict_sys	= NULL;
 
-rw_lock_t	dict_operation_lock;	/* table create, drop, etc. reserve
-					this in X-mode; implicit or backround
-					operations purge, rollback, foreign
-					key checks reserve this in S-mode; we
-					cannot trust that MySQL protects
-					implicit or background operations
-					a table drop since MySQL does not
-					know of them; therefore we need this;
-					NOTE: a transaction which reserves
-					this must keep book on the mode in
-					trx->dict_operation_lock_mode */
+/** @brief the data dictionary rw-latch protecting dict_sys
 
-#define	DICT_HEAP_SIZE		100	/* initial memory heap size when
+table create, drop, etc. reserve this in X-mode; implicit or
+backround operations purge, rollback, foreign key checks reserve this
+in S-mode; we cannot trust that MySQL protects implicit or background
+operations a table drop since MySQL does not know of them; therefore
+we need this; NOTE: a transaction which reserves this must keep book
+on the mode in trx_struct::dict_operation_lock_mode */
+UNIV_INTERN rw_lock_t	dict_operation_lock;
+
+#define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
 					creating a table or index object */
-#define DICT_POOL_PER_TABLE_HASH 512	/* buffer pool max size per table
+#define DICT_POOL_PER_TABLE_HASH 512	/*!< buffer pool max size per table
 					hash table fixed size in bytes */
-#define DICT_POOL_PER_VARYING	4	/* buffer pool max size per data
+#define DICT_POOL_PER_VARYING	4	/*!< buffer pool max size per data
 					dictionary varying size in bytes */
 
-/* Identifies generated InnoDB foreign key names */
+/** Identifies generated InnoDB foreign key names */
 static char	dict_ibfk[] = "_ibfk_";
 
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_table_id(
-/*===========================*/
-	char*		to,	/* out: converted identifier */
-	const char*	from,	/* in: identifier to convert */
-	ulint		len);	/* in: length of 'to', in bytes;
-				should be at least 5 * strlen(to) + 1 */
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_id(
-/*=====================*/
-	char*		to,	/* out: converted identifier */
-	const char*	from,	/* in: identifier to convert */
-	ulint		len);	/* in: length of 'to', in bytes;
-				should be at least 3 * strlen(to) + 1 */
-/**********************************************************************
-Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-int
-innobase_strcasecmp(
-/*================*/
-				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
-	const char*	a,	/* in: first string to compare */
-	const char*	b);	/* in: second string to compare */
-
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_casedn_str(
-/*================*/
-	char*	a);	/* in/out: string to put in lower case */
-
-/**************************************************************************
-Determines the connection character set.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
-				/* out: connection character set */
-	void*	mysql_thd);	/* in: MySQL thread handle */
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************************
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index);	/* in, own: index */
-/***********************************************************************
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
-	dict_index_t*	index1,	/* in: index to copy to */
-	dict_index_t*	index2,	/* in: index to copy from */
-	dict_table_t*	table,	/* in: table */
-	ulint		start,	/* in: first position to copy */
-	ulint		end);	/* in: last position to copy */
-/***********************************************************************
+/*******************************************************************//**
 Tries to find column names for the index and sets the col field of the
 index. */
 static
 void
 dict_index_find_cols(
 /*=================*/
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index);	/* in: index */
-/***********************************************************************
+	dict_table_t*	table,	/*!< in: table */
+	dict_index_t*	index);	/*!< in: index */
+/*******************************************************************//**
 Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return	own: the internal representation of the clustered index */
 static
 dict_index_t*
 dict_index_build_internal_clust(
 /*============================*/
-				/* out, own: the internal representation
-				of the clustered index */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index);	/* in: user representation of a clustered
-				index */
-/***********************************************************************
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index);	/*!< in: user representation of
+					a clustered index */
+/*******************************************************************//**
 Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return	own: the internal representation of the non-clustered index */
 static
 dict_index_t*
 dict_index_build_internal_non_clust(
 /*================================*/
-				/* out, own: the internal representation
-				of the non-clustered index */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index);	/* in: user representation of a non-clustered
-				index */
-/**************************************************************************
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index);	/*!< in: user representation of
+					a non-clustered index */
+/**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
 static
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
-	dict_foreign_t*	foreign);	/* in, own: foreign constraint */
-/**************************************************************************
+	dict_foreign_t*	foreign);	/*!< in, own: foreign constraint */
+/**********************************************************************//**
 Prints a column data. */
 static
 void
 dict_col_print_low(
 /*===============*/
-	const dict_table_t*	table,	/* in: table */
-	const dict_col_t*	col);	/* in: column */
-/**************************************************************************
+	const dict_table_t*	table,	/*!< in: table */
+	const dict_col_t*	col);	/*!< in: column */
+/**********************************************************************//**
 Prints an index data. */
 static
 void
 dict_index_print_low(
 /*=================*/
-	dict_index_t*	index);	/* in: index */
-/**************************************************************************
+	dict_index_t*	index);	/*!< in: index */
+/**********************************************************************//**
 Prints a field data. */
 static
 void
 dict_field_print_low(
 /*=================*/
-	dict_field_t*	field);	/* in: field */
-/*************************************************************************
+	dict_field_t*	field);	/*!< in: field */
+/*********************************************************************//**
 Frees a foreign key struct. */
 static
 void
 dict_foreign_free(
 /*==============*/
-	dict_foreign_t*	foreign);	/* in, own: foreign key struct */
+	dict_foreign_t*	foreign);	/*!< in, own: foreign key struct */
 
 /* Stream for storing detailed information about the latest foreign key
 and unique key errors */
-FILE*	dict_foreign_err_file		= NULL;
-mutex_t	dict_foreign_err_mutex;		/* mutex protecting the foreign
-					and unique error buffers */
+UNIV_INTERN FILE*	dict_foreign_err_file		= NULL;
+/* mutex protecting the foreign and unique error buffers */
+UNIV_INTERN mutex_t	dict_foreign_err_mutex;
 
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
+/******************************************************************//**
 Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
+UNIV_INTERN
 void
 dict_casedn_str(
 /*============*/
-	char*	a)	/* in/out: string to put in lower case */
+	char*	a)	/*!< in/out: string to put in lower case */
 {
 	innobase_casedn_str(a);
 }
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************************
-Checks if the database name in two table names is the same. */
 
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return	TRUE if same db name */
+UNIV_INTERN
 ibool
 dict_tables_have_same_db(
 /*=====================*/
-				/* out: TRUE if same db name */
-	const char*	name1,	/* in: table name in the form
+	const char*	name1,	/*!< in: table name in the form
 				dbname '/' tablename */
-	const char*	name2)	/* in: table name in the form
+	const char*	name2)	/*!< in: table name in the form
 				dbname '/' tablename */
 {
 	for (; *name1 == *name2; name1++, name2++) {
@@ -248,14 +186,14 @@ dict_tables_have_same_db(
 	return(FALSE);
 }
 
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
-
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return	table name */
+UNIV_INTERN
 const char*
 dict_remove_db_name(
 /*================*/
-				/* out: table name */
-	const char*	name)	/* in: table name in the form
+	const char*	name)	/*!< in: table name in the form
 				dbname '/' tablename */
 {
 	const char*	s = strchr(name, '/');
@@ -264,14 +202,14 @@ dict_remove_db_name(
 	return(s + 1);
 }
 
-/************************************************************************
-Get the database name length in a table name. */
-
+/********************************************************************//**
+Get the database name length in a table name.
+@return	database name length */
+UNIV_INTERN
 ulint
 dict_get_db_name_len(
 /*=================*/
-				/* out: database name length */
-	const char*	name)	/* in: table name in the form
+	const char*	name)	/*!< in: table name in the form
 				dbname '/' tablename */
 {
 	const char*	s;
@@ -280,9 +218,9 @@ dict_get_db_name_len(
 	return(s - name);
 }
 
-/************************************************************************
+/********************************************************************//**
 Reserves the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
 void
 dict_mutex_enter_for_mysql(void)
 /*============================*/
@@ -290,9 +228,9 @@ dict_mutex_enter_for_mysql(void)
 	mutex_enter(&(dict_sys->mutex));
 }
 
-/************************************************************************
+/********************************************************************//**
 Releases the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
 void
 dict_mutex_exit_for_mysql(void)
 /*===========================*/
@@ -300,97 +238,40 @@ dict_mutex_exit_for_mysql(void)
 	mutex_exit(&(dict_sys->mutex));
 }
 
-/************************************************************************
+/********************************************************************//**
 Decrements the count of open MySQL handles to a table. */
-
+UNIV_INTERN
 void
 dict_table_decrement_handle_count(
 /*==============================*/
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table,		/*!< in/out: table */
+	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
 {
-	mutex_enter(&(dict_sys->mutex));
+	if (!dict_locked) {
+		mutex_enter(&dict_sys->mutex);
+	}
 
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(table->n_mysql_handles_opened > 0);
 
 	table->n_mysql_handles_opened--;
 
-	mutex_exit(&(dict_sys->mutex));
+	if (!dict_locked) {
+		mutex_exit(&dict_sys->mutex);
+	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
-	const dict_col_t*	col,	/* in: column */
-	dtype_t*		type)	/* out: data type */
-{
-	dict_col_copy_type(col, type);
-}
-
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			pos)	/* in: position of column */
-{
-	return(dict_table_get_nth_col(table, pos));
-}
-
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
-				/* out: index, NULL if none exists */
-	dict_table_t*	table)	/* in: table */
-{
-	return(dict_table_get_first_index(table));
-}
-
-/************************************************************************
-Gets the next index on the table. */
-
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
-				/* out: index, NULL if none left */
-	dict_index_t*	index)	/* in: index */
-{
-	return(dict_table_get_next_index(index));
-}
-
-/**************************************************************************
-Returns an index object. */
-
-dict_index_t*
-dict_table_get_index_noninline(
-/*===========================*/
-				/* out: index, NULL if does not exist */
-	dict_table_t*	table,	/* in: table */
-	const char*	name)	/* in: index name */
-{
-	return(dict_table_get_index(table, name));
-}
-
-/**************************************************************************
-Returns a column's name. */
-
+/**********************************************************************//**
+Returns a column's name.
+@return column name. NOTE: not guaranteed to stay valid if table is
+modified in any way (columns added, etc.). */
+UNIV_INTERN
 const char*
 dict_table_get_col_name(
 /*====================*/
-					/* out: column name. NOTE: not
-					guaranteed to stay valid if table is
-					modified in any way (columns added,
-					etc.). */
-	const dict_table_t*	table,	/* in: table */
-	ulint			col_nr)	/* in: column number */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			col_nr)	/*!< in: column number */
 {
 	ulint		i;
 	const char*	s;
@@ -409,57 +290,57 @@ dict_table_get_col_name(
 	return(s);
 }
 
-
-/************************************************************************
-Acquire the autoinc lock.*/
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Acquire the autoinc lock. */
+UNIV_INTERN
 void
 dict_table_autoinc_lock(
 /*====================*/
-	dict_table_t*	table)
+	dict_table_t*	table)	/*!< in/out: table */
 {
 	mutex_enter(&table->autoinc_mutex);
 }
 
-/************************************************************************
+/********************************************************************//**
 Unconditionally set the autoinc counter. */
-
+UNIV_INTERN
 void
 dict_table_autoinc_initialize(
 /*==========================*/
-	dict_table_t*	table,	/* in: table */
-	ib_ulonglong	value)	/* in: next value to assign to a row */
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value)	/*!< in: next value to assign to a row */
 {
 	ut_ad(mutex_own(&table->autoinc_mutex));
 
 	table->autoinc = value;
 }
 
-/************************************************************************
+/********************************************************************//**
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
+initialized.
+@return	value for a new row, or 0 */
+UNIV_INTERN
+ib_uint64_t
 dict_table_autoinc_read(
 /*====================*/
-				/* out: value for a new row, or 0 */
-	dict_table_t*	table)	/* in: table */
+	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(mutex_own(&table->autoinc_mutex));
 
 	return(table->autoinc);
 }
 
-/************************************************************************
+/********************************************************************//**
 Updates the autoinc counter if the value supplied is greater than the
 current value. */
-
+UNIV_INTERN
 void
 dict_table_autoinc_update_if_greater(
 /*=================================*/
 
-	dict_table_t*	table,	/* in: table */
-	ib_ulonglong	value)	/* in: value which was assigned to a row */
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value)	/*!< in: value which was assigned to a row */
 {
 	ut_ad(mutex_own(&table->autoinc_mutex));
 
@@ -469,28 +350,56 @@ dict_table_autoinc_update_if_greater(
 	}
 }
 
-/************************************************************************
-Release the autoinc lock.*/
-
+/********************************************************************//**
+Release the autoinc lock. */
+UNIV_INTERN
 void
 dict_table_autoinc_unlock(
 /*======================*/
-	dict_table_t*	table)	/* in: release autoinc lock for this table */
+	dict_table_t*	table)	/*!< in/out: table */
 {
 	mutex_exit(&table->autoinc_mutex);
 }
 
-/************************************************************************
-Looks for column n in an index. */
+/**********************************************************************//**
+Looks for an index with the given table and index id.
+NOTE that we do not reserve the dictionary mutex.
+@return	index or NULL if not found from cache */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_on_id_low(
+/*=====================*/
+	dict_table_t*	table,	/*!< in: table */
+	dulint		id)	/*!< in: index id */
+{
+	dict_index_t*	index;
 
+	index = dict_table_get_first_index(table);
+
+	while (index) {
+		if (0 == ut_dulint_cmp(id, index->id)) {
+			/* Found */
+
+			return(index);
+		}
+
+		index = dict_table_get_next_index(index);
+	}
+
+	return(NULL);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
 ulint
 dict_index_get_nth_col_pos(
 /*=======================*/
-				/* out: position in internal representation
-				of the index; if not contained, returns
-				ULINT_UNDEFINED */
-	dict_index_t*	index,	/* in: index */
-	ulint		n)	/* in: column number */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
 {
 	const dict_field_t*	field;
 	const dict_col_t*	col;
@@ -502,7 +411,7 @@ dict_index_get_nth_col_pos(
 
 	col = dict_table_get_nth_col(index->table, n);
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 
 		return(dict_col_get_clust_pos(col, index));
 	}
@@ -521,16 +430,16 @@ dict_index_get_nth_col_pos(
 	return(ULINT_UNDEFINED);
 }
 
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns TRUE if the index contains a column or a prefix of that column.
+@return	TRUE if contains the column or its prefix */
+UNIV_INTERN
 ibool
 dict_index_contains_col_or_prefix(
 /*==============================*/
-				/* out: TRUE if contains the column or its
-				prefix */
-	dict_index_t*	index,	/* in: index */
-	ulint		n)	/* in: column number */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
 {
 	const dict_field_t*	field;
 	const dict_col_t*	col;
@@ -540,7 +449,7 @@ dict_index_contains_col_or_prefix(
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 
 		return(TRUE);
 	}
@@ -561,26 +470,25 @@ dict_index_contains_col_or_prefix(
 	return(FALSE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Looks for a matching field in an index. The column has to be the same. The
 column in index must be complete, or must contain a prefix longer than the
 column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
 ulint
 dict_index_get_nth_field_pos(
 /*=========================*/
-				/* out: position in internal representation
-				of the index; if not contained, returns
-				ULINT_UNDEFINED */
-	dict_index_t*	index,	/* in: index from which to search */
-	dict_index_t*	index2,	/* in: index */
-	ulint		n)	/* in: field number in index2 */
+	const dict_index_t*	index,	/*!< in: index from which to search */
+	const dict_index_t*	index2,	/*!< in: index */
+	ulint			n)	/*!< in: field number in index2 */
 {
-	dict_field_t*	field;
-	dict_field_t*	field2;
-	ulint		n_fields;
-	ulint		pos;
+	const dict_field_t*	field;
+	const dict_field_t*	field2;
+	ulint			n_fields;
+	ulint			pos;
 
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -604,15 +512,15 @@ dict_index_get_nth_field_pos(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Returns a table object based on table id. */
-
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INTERN
 dict_table_t*
 dict_table_get_on_id(
 /*=================*/
-				/* out: table, NULL if does not exist */
-	dulint	table_id,	/* in: table id */
-	trx_t*	trx)		/* in: transaction handle */
+	dulint	table_id,	/*!< in: table id */
+	trx_t*	trx)		/*!< in: transaction handle */
 {
 	dict_table_t*	table;
 
@@ -638,47 +546,32 @@ dict_table_get_on_id(
 	return(table);
 }
 
-/************************************************************************
-Looks for column n position in the clustered index. */
-
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return	position in internal representation of the clustered index */
+UNIV_INTERN
 ulint
 dict_table_get_nth_col_pos(
 /*=======================*/
-				/* out: position in internal representation
-				of the clustered index */
-	dict_table_t*	table,	/* in: table */
-	ulint		n)	/* in: column number */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n)	/*!< in: column number */
 {
 	return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
 					  n));
 }
 
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
-/*=========================*/
-					/* out: TRUE if table uses the
-					compact page format */
-	const dict_table_t*	table)	/* in: table */
-{
-	return(dict_table_is_comp(table));
-}
-
-/************************************************************************
+/********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
+table. Column prefixes are treated like whole columns.
+@return	TRUE if the column, or its prefix, is in the clustered key */
+UNIV_INTERN
 ibool
 dict_table_col_in_clustered_key(
 /*============================*/
-				/* out: TRUE if the column, or its prefix, is
-				in the clustered key */
-	dict_table_t*	table,	/* in: table */
-	ulint		n)	/* in: column number */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n)	/*!< in: column number */
 {
-	dict_index_t*		index;
+	const dict_index_t*	index;
 	const dict_field_t*	field;
 	const dict_col_t*	col;
 	ulint			pos;
@@ -704,9 +597,9 @@ dict_table_col_in_clustered_key(
 	return(FALSE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Inits the data dictionary module. */
-
+UNIV_INTERN
 void
 dict_init(void)
 /*===========*/
@@ -715,10 +608,10 @@ dict_init(void)
 
 	mutex_create(&dict_sys->mutex, SYNC_DICT);
 
-	dict_sys->table_hash = hash_create(buf_pool_get_max_size()
+	dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
 					   / (DICT_POOL_PER_TABLE_HASH
 					      * UNIV_WORD_SIZE));
-	dict_sys->table_id_hash = hash_create(buf_pool_get_max_size()
+	dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
 					      / (DICT_POOL_PER_TABLE_HASH
 						 * UNIV_WORD_SIZE));
 	dict_sys->size = 0;
@@ -733,20 +626,18 @@ dict_init(void)
 	mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Returns a table object and optionally increment its MySQL open handle count.
 NOTE! This is a high-level function to be used mainly from outside the
 'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
+appropriate function.
+@return	table, NULL if does not exist */
+UNIV_INTERN
 dict_table_t*
 dict_table_get(
 /*===========*/
-					/* out: table, NULL if
-					does not exist */
-	const char*	table_name,	/* in: table name */
-	ibool		inc_mysql_count)
-     					/* in: whether to increment the open
+	const char*	table_name,	/*!< in: table name */
+	ibool		inc_mysql_count)/*!< in: whether to increment the open
 					handle count on the table */
 {
 	dict_table_t*	table;
@@ -772,15 +663,16 @@ dict_table_get(
 
 	return(table);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**************************************************************************
+/**********************************************************************//**
 Adds system columns to a table object. */
-
+UNIV_INTERN
 void
 dict_table_add_system_columns(
 /*==========================*/
-	dict_table_t*	table,	/* in/out: table */
-	mem_heap_t*	heap)	/* in: temporary heap */
+	dict_table_t*	table,	/*!< in/out: table */
+	mem_heap_t*	heap)	/*!< in: temporary heap */
 {
 	ut_ad(table);
 	ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
@@ -819,14 +711,15 @@ dict_table_add_system_columns(
 #endif
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Adds a table object to the dictionary cache. */
-
+UNIV_INTERN
 void
 dict_table_add_to_cache(
 /*====================*/
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap)	/* in: temporary heap */
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap)	/*!< in: temporary heap */
 {
 	ulint	fold;
 	ulint	id_fold;
@@ -866,17 +759,35 @@ dict_table_add_to_cache(
 	/* Look for a table with the same name: error if such exists */
 	{
 		dict_table_t*	table2;
-		HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
-			    (ut_strcmp(table2->name, table->name) == 0));
+		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+			    dict_table_t*, table2, ut_ad(table2->cached),
+			    ut_strcmp(table2->name, table->name) == 0);
 		ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+		/* Look for the same table pointer with a different name */
+		HASH_SEARCH_ALL(name_hash, dict_sys->table_hash,
+				dict_table_t*, table2, ut_ad(table2->cached),
+				table2 == table);
+		ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
 	}
 
 	/* Look for a table with the same id: error if such exists */
 	{
 		dict_table_t*	table2;
-		HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, table2,
-			    (ut_dulint_cmp(table2->id, table->id) == 0));
+		HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold,
+			    dict_table_t*, table2, ut_ad(table2->cached),
+			    ut_dulint_cmp(table2->id, table->id) == 0);
 		ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+		/* Look for the same table pointer with a different id */
+		HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash,
+				dict_table_t*, table2, ut_ad(table2->cached),
+				table2 == table);
+		ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
 	}
 
 	/* Add table to hash table of tables */
@@ -892,16 +803,16 @@ dict_table_add_to_cache(
 	dict_sys->size += mem_heap_get_size(table->heap);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Looks for an index with the given id. NOTE that we do not reserve
 the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
+printing info of a corrupt database page!
+@return	index or NULL if not found from cache */
+UNIV_INTERN
 dict_index_t*
 dict_index_find_on_id_low(
 /*======================*/
-			/* out: index or NULL if not found from cache */
-	dulint	id)	/* in: index id */
+	dulint	id)	/*!< in: index id */
 {
 	dict_table_t*	table;
 	dict_index_t*	index;
@@ -927,16 +838,16 @@ dict_index_find_on_id_low(
 	return(NULL);
 }
 
-/**************************************************************************
-Renames a table object. */
-
+/**********************************************************************//**
+Renames a table object.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 dict_table_rename_in_cache(
 /*=======================*/
-					/* out: TRUE if success */
-	dict_table_t*	table,		/* in: table */
-	const char*	new_name,	/* in: new name */
-	ibool		rename_also_foreigns)/* in: in ALTER TABLE we want
+	dict_table_t*	table,		/*!< in/out: table */
+	const char*	new_name,	/*!< in: new name */
+	ibool		rename_also_foreigns)/*!< in: in ALTER TABLE we want
 					to preserve the original table name
 					in constraints which reference it */
 {
@@ -944,26 +855,31 @@ dict_table_rename_in_cache(
 	dict_index_t*	index;
 	ulint		fold;
 	ulint		old_size;
-	char*		old_name;
-	ibool		success;
+	const char*	old_name;
 
 	ut_ad(table);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	old_size = mem_heap_get_size(table->heap);
+	old_name = table->name;
 
 	fold = ut_fold_string(new_name);
 
 	/* Look for a table with the same name: error if such exists */
 	{
 		dict_table_t*	table2;
-		HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
+		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+			    dict_table_t*, table2, ut_ad(table2->cached),
 			    (ut_strcmp(table2->name, new_name) == 0));
-		if (table2) {
-			fprintf(stderr,
-				"InnoDB: Error: dictionary cache"
-				" already contains a table of name %s\n",
-				new_name);
+		if (UNIV_LIKELY_NULL(table2)) {
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Error: dictionary cache"
+			      " already contains a table ", stderr);
+			ut_print_name(stderr, NULL, TRUE, new_name);
+			fputs("\n"
+			      "InnoDB: cannot rename table ", stderr);
+			ut_print_name(stderr, NULL, TRUE, old_name);
+			putc('\n', stderr);
 			return(FALSE);
 		}
 	}
@@ -973,27 +889,24 @@ dict_table_rename_in_cache(
 
 	if (table->space != 0) {
 		if (table->dir_path_of_temp_table != NULL) {
-			fprintf(stderr,
-				"InnoDB: Error: trying to rename a table"
-				" %s (%s) created with CREATE\n"
-				"InnoDB: TEMPORARY TABLE\n",
-				table->name, table->dir_path_of_temp_table);
-			success = FALSE;
-		} else {
-			success = fil_rename_tablespace(
-				table->name, table->space, new_name);
-		}
-
-		if (!success) {
-
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Error: trying to rename a"
+			      " TEMPORARY TABLE ", stderr);
+			ut_print_name(stderr, NULL, TRUE, old_name);
+			fputs(" (", stderr);
+			ut_print_filename(stderr,
+					  table->dir_path_of_temp_table);
+			fputs(" )\n", stderr);
+			return(FALSE);
+		} else if (!fil_rename_tablespace(old_name, table->space,
+						  new_name)) {
 			return(FALSE);
 		}
 	}
 
 	/* Remove table from the hash tables of tables */
 	HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
-		    ut_fold_string(table->name), table);
-	old_name = mem_heap_strdup(table->heap, table->name);
+		    ut_fold_string(old_name), table);
 	table->name = mem_heap_strdup(table->heap, new_name);
 
 	/* Add table to hash table of tables */
@@ -1141,15 +1054,15 @@ dict_table_rename_in_cache(
 	return(TRUE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Change the id of a table object in the dictionary cache. This is used in
 DISCARD TABLESPACE. */
-
+UNIV_INTERN
 void
 dict_table_change_id_in_cache(
 /*==========================*/
-	dict_table_t*	table,	/* in: table object already in cache */
-	dulint		new_id)	/* in: new id to set */
+	dict_table_t*	table,	/*!< in/out: table object already in cache */
+	dulint		new_id)	/*!< in: new id to set */
 {
 	ut_ad(table);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -1166,13 +1079,13 @@ dict_table_change_id_in_cache(
 		    ut_fold_dulint(table->id), table);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Removes a table object from the dictionary cache. */
-
+UNIV_INTERN
 void
 dict_table_remove_from_cache(
 /*=========================*/
-	dict_table_t*	table)	/* in, own: table */
+	dict_table_t*	table)	/*!< in, own: table */
 {
 	dict_foreign_t*	foreign;
 	dict_index_t*	index;
@@ -1233,27 +1146,15 @@ dict_table_remove_from_cache(
 	dict_mem_table_free(table);
 }
 
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
-	const dict_col_t*	col,		/* in: table column */
-	const dict_index_t*	clust_index)	/* in: clustered index */
-{
-	return(dict_col_get_clust_pos(col, clust_index));
-}
-
-/********************************************************************
+/****************************************************************//**
 If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
+TRUE.
+@return	TRUE if name is reserved */
+UNIV_INTERN
 ibool
 dict_col_name_is_reserved(
 /*======================*/
-				/* out: TRUE if name is reserved */
-	const char*	name)	/* in: column name */
+	const char*	name)	/*!< in: column name */
 {
 	/* This check reminds that if a new system column is added to
 	the program, it should be dealt with here. */
@@ -1277,16 +1178,271 @@ dict_col_name_is_reserved(
 	return(FALSE);
 }
 
-/**************************************************************************
-Adds an index to the dictionary cache. */
+/****************************************************************//**
+If an undo log record for this table might not fit on a single page,
+return TRUE.
+@return	TRUE if the undo log record could become too big */
+static
+ibool
+dict_index_too_big_for_undo(
+/*========================*/
+	const dict_table_t*	table,		/*!< in: table */
+	const dict_index_t*	new_index)	/*!< in: index */
+{
+	/* Make sure that all column prefixes will fit in the undo log record
+	in trx_undo_page_report_modify() right after trx_undo_page_init(). */
 
-void
+	ulint			i;
+	const dict_index_t*	clust_index
+		= dict_table_get_first_index(table);
+	ulint			undo_page_len
+		= TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
+		+ 2 /* next record pointer */
+		+ 1 /* type_cmpl */
+		+ 11 /* trx->undo_no */ - 11 /* table->id */
+		+ 1 /* rec_get_info_bits() */
+		+ 11 /* DB_TRX_ID */
+		+ 11 /* DB_ROLL_PTR */
+		+ 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
+		+ 2/* pointer to previous undo log record */;
+
+	if (UNIV_UNLIKELY(!clust_index)) {
+		ut_a(dict_index_is_clust(new_index));
+		clust_index = new_index;
+	}
+
+	/* Add the size of the ordering columns in the
+	clustered index. */
+	for (i = 0; i < clust_index->n_uniq; i++) {
+		const dict_col_t*	col
+			= dict_index_get_nth_col(clust_index, i);
+
+		/* Use the maximum output size of
+		mach_write_compressed(), although the encoded
+		length should always fit in 2 bytes. */
+		undo_page_len += 5 + dict_col_get_max_size(col);
+	}
+
+	/* Add the old values of the columns to be updated.
+	First, the amount and the numbers of the columns.
+	These are written by mach_write_compressed() whose
+	maximum output length is 5 bytes.  However, given that
+	the quantities are below REC_MAX_N_FIELDS (10 bits),
+	the maximum length is 2 bytes per item. */
+	undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
+
+	for (i = 0; i < clust_index->n_def; i++) {
+		const dict_col_t*	col
+			= dict_index_get_nth_col(clust_index, i);
+		ulint			max_size
+			= dict_col_get_max_size(col);
+		ulint			fixed_size
+			= dict_col_get_fixed_size(col,
+						  dict_table_is_comp(table));
+
+		if (fixed_size) {
+			/* Fixed-size columns are stored locally. */
+			max_size = fixed_size;
+		} else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+			/* Short columns are stored locally. */
+		} else if (!col->ord_part) {
+			/* See if col->ord_part would be set
+			because of new_index. */
+			ulint	j;
+
+			for (j = 0; j < new_index->n_uniq; j++) {
+				if (dict_index_get_nth_col(
+					    new_index, j) == col) {
+
+					goto is_ord_part;
+				}
+			}
+
+			/* This is not an ordering column in any index.
+			Thus, it can be stored completely externally. */
+			max_size = BTR_EXTERN_FIELD_REF_SIZE;
+		} else {
+is_ord_part:
+			/* This is an ordering column in some index.
+			A long enough prefix must be written to the
+			undo log.  See trx_undo_page_fetch_ext(). */
+
+			if (max_size > REC_MAX_INDEX_COL_LEN) {
+				max_size = REC_MAX_INDEX_COL_LEN;
+			}
+
+			max_size += BTR_EXTERN_FIELD_REF_SIZE;
+		}
+
+		undo_page_len += 5 + max_size;
+	}
+
+	return(undo_page_len >= UNIV_PAGE_SIZE);
+}
+
+/****************************************************************//**
+If a record of this index might not fit on a single B-tree page,
+return TRUE.
+@return	TRUE if the index record could become too big */
+static
+ibool
+dict_index_too_big_for_tree(
+/*========================*/
+	const dict_table_t*	table,		/*!< in: table */
+	const dict_index_t*	new_index)	/*!< in: index */
+{
+	ulint	zip_size;
+	ulint	comp;
+	ulint	i;
+	/* maximum possible storage size of a record */
+	ulint	rec_max_size;
+	/* maximum allowed size of a record on a leaf page */
+	ulint	page_rec_max;
+	/* maximum allowed size of a node pointer record */
+	ulint	page_ptr_max;
+
+	comp = dict_table_is_comp(table);
+	zip_size = dict_table_zip_size(table);
+
+	if (zip_size && zip_size < UNIV_PAGE_SIZE) {
+		/* On a compressed page, two records must fit in the
+		uncompressed page modification log.  On compressed
+		pages with zip_size == UNIV_PAGE_SIZE, this limit will
+		never be reached. */
+		ut_ad(comp);
+		/* The maximum allowed record size is the size of
+		an empty page, minus a byte for recoding the heap
+		number in the page modification log.  The maximum
+		allowed node pointer size is half that. */
+		page_rec_max = page_zip_empty_size(new_index->n_fields,
+						   zip_size) - 1;
+		page_ptr_max = page_rec_max / 2;
+		/* On a compressed page, there is a two-byte entry in
+		the dense page directory for every record.  But there
+		is no record header. */
+		rec_max_size = 2;
+	} else {
+		/* The maximum allowed record size is half a B-tree
+		page.  No additional sparse page directory entry will
+		be generated for the first few user records. */
+		page_rec_max = page_get_free_space_of_empty(comp) / 2;
+		page_ptr_max = page_rec_max;
+		/* Each record has a header. */
+		rec_max_size = comp
+			? REC_N_NEW_EXTRA_BYTES
+			: REC_N_OLD_EXTRA_BYTES;
+	}
+
+	if (comp) {
+		/* Include the "null" flags in the
+		maximum possible record size. */
+		rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
+	} else {
+		/* For each column, include a 2-byte offset and a
+		"null" flag.  The 1-byte format is only used in short
+		records that do not contain externally stored columns.
+		Such records could never exceed the page limit, even
+		when using the 2-byte format. */
+		rec_max_size += 2 * new_index->n_fields;
+	}
+
+	/* Compute the maximum possible record size. */
+	for (i = 0; i < new_index->n_fields; i++) {
+		const dict_field_t*	field
+			= dict_index_get_nth_field(new_index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+		ulint			field_max_size;
+		ulint			field_ext_max_size;
+
+		/* In dtuple_convert_big_rec(), variable-length columns
+		that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
+		may be chosen for external storage.
+
+		Fixed-length columns, and all columns of secondary
+		index records are always stored inline. */
+
+		/* Determine the maximum length of the index field.
+		The field_ext_max_size should be computed as the worst
+		case in rec_get_converted_size_comp() for
+		REC_STATUS_ORDINARY records. */
+
+		field_max_size = dict_col_get_fixed_size(col, comp);
+		if (field_max_size) {
+			/* dict_index_add_col() should guarantee this */
+			ut_ad(!field->prefix_len
+			      || field->fixed_len == field->prefix_len);
+			/* Fixed lengths are not encoded
+			in ROW_FORMAT=COMPACT. */
+			field_ext_max_size = 0;
+			goto add_field_size;
+		}
+
+		field_max_size = dict_col_get_max_size(col);
+		field_ext_max_size = field_max_size < 256 ? 1 : 2;
+
+		if (field->prefix_len) {
+			if (field->prefix_len < field_max_size) {
+				field_max_size = field->prefix_len;
+			}
+		} else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
+			   && dict_index_is_clust(new_index)) {
+
+			/* In the worst case, we have a locally stored
+			column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
+			The length can be stored in one byte.  If the
+			column were stored externally, the lengths in
+			the clustered index page would be
+			BTR_EXTERN_FIELD_REF_SIZE and 2. */
+			field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
+			field_ext_max_size = 1;
+		}
+
+		if (comp) {
+			/* Add the extra size for ROW_FORMAT=COMPACT.
+			For ROW_FORMAT=REDUNDANT, these bytes were
+			added to rec_max_size before this loop. */
+			rec_max_size += field_ext_max_size;
+		}
+add_field_size:
+		rec_max_size += field_max_size;
+
+		/* Check the size limit on leaf pages. */
+		if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
+
+			return(TRUE);
+		}
+
+		/* Check the size limit on non-leaf pages.  Records
+		stored in non-leaf B-tree pages consist of the unique
+		columns of the record (the key columns of the B-tree)
+		and a node pointer field.  When we have processed the
+		unique columns, rec_max_size equals the size of the
+		node pointer record minus the node pointer column. */
+		if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
+		    && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************************//**
+Adds an index to the dictionary cache.
+@return	DB_SUCCESS or DB_TOO_BIG_RECORD */
+UNIV_INTERN
+ulint
 dict_index_add_to_cache(
 /*====================*/
-	dict_table_t*	table,	/* in: table on which the index is */
-	dict_index_t*	index,	/* in, own: index; NOTE! The index memory
+	dict_table_t*	table,	/*!< in: table on which the index is */
+	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
 				object is freed in this function! */
-	ulint		page_no)/* in: root page number of the index */
+	ulint		page_no,/*!< in: root page number of the index */
+	ibool		strict)	/*!< in: TRUE=refuse to create the index
+				if records could be too big to fit in
+				an B-tree page */
 {
 	dict_index_t*	new_index;
 	ulint		n_ord;
@@ -1298,21 +1454,7 @@ dict_index_add_to_cache(
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
 	ut_ad(mem_heap_validate(index->heap));
-
-#ifdef UNIV_DEBUG
-	{
-		dict_index_t*	index2;
-		index2 = UT_LIST_GET_FIRST(table->indexes);
-
-		while (index2 != NULL) {
-			ut_ad(ut_strcmp(index->name, index2->name) != 0);
-
-			index2 = UT_LIST_GET_NEXT(indexes, index2);
-		}
-	}
-#endif /* UNIV_DEBUG */
-
-	ut_a(!(index->type & DICT_CLUSTERED)
+	ut_a(!dict_index_is_clust(index)
 	     || UT_LIST_GET_LEN(table->indexes) == 0);
 
 	dict_index_find_cols(table, index);
@@ -1320,39 +1462,106 @@ dict_index_add_to_cache(
 	/* Build the cache internal representation of the index,
 	containing also the added system fields */
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		new_index = dict_index_build_internal_clust(table, index);
 	} else {
 		new_index = dict_index_build_internal_non_clust(table, index);
 	}
 
-	new_index->search_info = btr_search_info_create(new_index->heap);
-
 	/* Set the n_fields value in new_index to the actual defined
 	number of fields in the cache internal representation */
 
 	new_index->n_fields = new_index->n_def;
 
+	if (strict && dict_index_too_big_for_tree(table, new_index)) {
+too_big:
+		dict_mem_index_free(new_index);
+		dict_mem_index_free(index);
+		return(DB_TOO_BIG_RECORD);
+	}
+
+	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
+		n_ord = new_index->n_fields;
+	} else {
+		n_ord = new_index->n_uniq;
+	}
+
+	switch (dict_table_get_format(table)) {
+	case DICT_TF_FORMAT_51:
+		/* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
+		prefixes of externally stored columns locally within
+		the record.  There are no special considerations for
+		the undo log record size. */
+		goto undo_size_ok;
+
+	case DICT_TF_FORMAT_ZIP:
+		/* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
+		column prefix indexes require that prefixes of
+		externally stored columns are written to the undo log.
+		This may make the undo log record bigger than the
+		record on the B-tree page.  The maximum size of an
+		undo log record is the page size.  That must be
+		checked for below. */
+		break;
+
+#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
+# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
+#endif
+	}
+
+	for (i = 0; i < n_ord; i++) {
+		const dict_field_t*	field
+			= dict_index_get_nth_field(new_index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+
+		/* In dtuple_convert_big_rec(), variable-length columns
+		that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
+		may be chosen for external storage.  If the column appears
+		in an ordering column of an index, a longer prefix of
+		REC_MAX_INDEX_COL_LEN will be copied to the undo log
+		by trx_undo_page_report_modify() and
+		trx_undo_page_fetch_ext().  It suffices to check the
+		capacity of the undo log whenever new_index includes
+		a column prefix on a column that may be stored externally. */
+
+		if (field->prefix_len /* prefix index */
+		    && !col->ord_part /* not yet ordering column */
+		    && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
+		    && dict_col_get_max_size(col)
+		    > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
+
+			if (dict_index_too_big_for_undo(table, new_index)) {
+				/* An undo log record might not fit in
+				a single page.  Refuse to create this index. */
+
+				goto too_big;
+			}
+
+			break;
+		}
+	}
+
+undo_size_ok:
+	/* Flag the ordering columns */
+
+	for (i = 0; i < n_ord; i++) {
+
+		dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
+	}
+
 	/* Add the new index as the last index for the table */
 
 	UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
 	new_index->table = table;
 	new_index->table_name = table->name;
 
-	/* Increment the ord_part counts in columns which are ordering */
+	new_index->search_info = btr_search_info_create(new_index->heap);
 
-	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
-		n_ord = new_index->n_fields;
-	} else {
-		n_ord = dict_index_get_n_unique(new_index);
-	}
+	new_index->stat_index_size = 1;
+	new_index->stat_n_leaf_pages = 1;
 
-	for (i = 0; i < n_ord; i++) {
-
-		dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
-	}
-
-	new_index->page = (unsigned int) page_no;
+	new_index->page = page_no;
 	rw_lock_create(&new_index->lock, SYNC_INDEX_TREE);
 
 	if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) {
@@ -1360,7 +1569,7 @@ dict_index_add_to_cache(
 		new_index->stat_n_diff_key_vals = mem_heap_alloc(
 			new_index->heap,
 			(1 + dict_index_get_n_unique(new_index))
-			* sizeof(ib_longlong));
+			* sizeof(ib_int64_t));
 		/* Give some sensible values to stat_n_... in case we do
 		not calculate statistics quickly enough */
 
@@ -1373,16 +1582,18 @@ dict_index_add_to_cache(
 	dict_sys->size += mem_heap_get_size(new_index->heap);
 
 	dict_mem_index_free(index);
+
+	return(DB_SUCCESS);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Removes an index from the dictionary cache. */
-static
+UNIV_INTERN
 void
 dict_index_remove_from_cache(
 /*=========================*/
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index)	/* in, own: index */
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index)	/*!< in, own: index */
 {
 	ulint		size;
 	ulint		retries = 0;
@@ -1452,15 +1663,15 @@ dict_index_remove_from_cache(
 	dict_mem_index_free(index);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Tries to find column names for the index and sets the col field of the
 index. */
 static
 void
 dict_index_find_cols(
 /*=================*/
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index)	/* in: index */
+	dict_table_t*	table,	/*!< in: table */
+	dict_index_t*	index)	/*!< in: index */
 {
 	ulint		i;
 
@@ -1475,31 +1686,36 @@ dict_index_find_cols(
 		for (j = 0; j < table->n_cols; j++) {
 			if (!strcmp(dict_table_get_col_name(table, j),
 				    field->name)) {
-				field->col = (dict_col_t*)
-					dict_table_get_nth_col(table, j);
+				field->col = dict_table_get_nth_col(table, j);
 
 				goto found;
 			}
 		}
 
 		/* It is an error not to find a matching column. */
+		fputs("InnoDB: Error: no matching column for ", stderr);
+		ut_print_name(stderr, NULL, FALSE, field->name);
+		fputs(" in ", stderr);
+		dict_index_name_print(stderr, NULL, index);
+		fputs("!\n", stderr);
 		ut_error;
 
-	found:
+found:
 		;
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
+/*******************************************************************//**
 Adds a column to index. */
-
+UNIV_INTERN
 void
 dict_index_add_col(
 /*===============*/
-	dict_index_t*	index,		/* in: index */
-	dict_table_t*	table,		/* in: table */
-	dict_col_t*	col,		/* in: column */
-	ulint		prefix_len)	/* in: column prefix length */
+	dict_index_t*		index,		/*!< in/out: index */
+	const dict_table_t*	table,		/*!< in: table */
+	dict_col_t*		col,		/*!< in: column */
+	ulint			prefix_len)	/*!< in: column prefix length */
 {
 	dict_field_t*	field;
 	const char*	col_name;
@@ -1511,7 +1727,8 @@ dict_index_add_col(
 	field = dict_index_get_nth_field(index, index->n_def - 1);
 
 	field->col = col;
-	field->fixed_len = (unsigned int) dict_col_get_fixed_size(col);
+	field->fixed_len = (unsigned int) dict_col_get_fixed_size(
+		col, dict_table_is_comp(table));
 
 	if (prefix_len && field->fixed_len > prefix_len) {
 		field->fixed_len = (unsigned int) prefix_len;
@@ -1536,17 +1753,18 @@ dict_index_add_col(
 	}
 }
 
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Copies fields contained in index2 to index1. */
 static
 void
 dict_index_copy(
 /*============*/
-	dict_index_t*	index1,	/* in: index to copy to */
-	dict_index_t*	index2,	/* in: index to copy from */
-	dict_table_t*	table,	/* in: table */
-	ulint		start,	/* in: first position to copy */
-	ulint		end)	/* in: last position to copy */
+	dict_index_t*		index1,	/*!< in: index to copy to */
+	dict_index_t*		index2,	/*!< in: index to copy from */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			start,	/*!< in: first position to copy */
+	ulint			end)	/*!< in: last position to copy */
 {
 	dict_field_t*	field;
 	ulint		i;
@@ -1561,15 +1779,16 @@ dict_index_copy(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Copies types of fields contained in index to tuple. */
-
+UNIV_INTERN
 void
 dict_index_copy_types(
 /*==================*/
-	dtuple_t*	tuple,		/* in: data tuple */
-	dict_index_t*	index,		/* in: index */
-	ulint		n_fields)	/* in: number of field types to copy */
+	dtuple_t*		tuple,		/*!< in/out: data tuple */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint			n_fields)	/*!< in: number of
+						field types to copy */
 {
 	ulint		i;
 
@@ -1580,8 +1799,8 @@ dict_index_copy_types(
 	}
 
 	for (i = 0; i < n_fields; i++) {
-		dict_field_t*	ifield;
-		dtype_t*	dfield_type;
+		const dict_field_t*	ifield;
+		dtype_t*		dfield_type;
 
 		ifield = dict_index_get_nth_field(index, i);
 		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
@@ -1589,38 +1808,40 @@ dict_index_copy_types(
 	}
 }
 
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create(). */
+UNIV_INTERN
 void
 dict_table_copy_types(
 /*==================*/
-	dtuple_t*	tuple,	/* in: data tuple */
-	dict_table_t*	table)	/* in: index */
+	dtuple_t*		tuple,	/*!< in/out: data tuple */
+	const dict_table_t*	table)	/*!< in: table */
 {
-	dtype_t*	dfield_type;
 	ulint		i;
 
 	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
 
-		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
-		dict_col_copy_type(dict_table_get_nth_col(table, i),
-				   dfield_type);
+		dfield_t*	dfield	= dtuple_get_nth_field(tuple, i);
+		dtype_t*	dtype	= dfield_get_type(dfield);
+
+		dfield_set_null(dfield);
+		dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return	own: the internal representation of the clustered index */
 static
 dict_index_t*
 dict_index_build_internal_clust(
 /*============================*/
-				/* out, own: the internal representation
-				of the clustered index */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index)	/* in: user representation of a clustered
-				index */
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index)	/*!< in: user representation of
+					a clustered index */
 {
 	dict_index_t*	new_index;
 	dict_field_t*	field;
@@ -1630,7 +1851,7 @@ dict_index_build_internal_clust(
 	ibool*		indexed;
 
 	ut_ad(table && index);
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
@@ -1655,7 +1876,7 @@ dict_index_build_internal_clust(
 
 		new_index->n_uniq = REC_MAX_N_FIELDS;
 
-	} else if (index->type & DICT_UNIQUE) {
+	} else if (dict_index_is_unique(index)) {
 		/* Only the fields defined so far are needed to identify
 		the index entry uniquely */
 
@@ -1667,7 +1888,7 @@ dict_index_build_internal_clust(
 
 	new_index->trx_id_offset = 0;
 
-	if (!(index->type & DICT_IBUF)) {
+	if (!dict_index_is_ibuf(index)) {
 		/* Add system columns, trx id first */
 
 		trx_id_pos = new_index->n_def;
@@ -1682,19 +1903,19 @@ dict_index_build_internal_clust(
 # error "DATA_ROLL_PTR != 2"
 #endif
 
-		if (!(index->type & DICT_UNIQUE)) {
-			dict_index_add_col(new_index, table, (dict_col_t*)
+		if (!dict_index_is_unique(index)) {
+			dict_index_add_col(new_index, table,
 					   dict_table_get_sys_col(
 						   table, DATA_ROW_ID),
 					   0);
 			trx_id_pos++;
 		}
 
-		dict_index_add_col(new_index, table, (dict_col_t*)
+		dict_index_add_col(new_index, table,
 				   dict_table_get_sys_col(table, DATA_TRX_ID),
 				   0);
 
-		dict_index_add_col(new_index, table, (dict_col_t*)
+		dict_index_add_col(new_index, table,
 				   dict_table_get_sys_col(table,
 							  DATA_ROLL_PTR),
 				   0);
@@ -1702,7 +1923,8 @@ dict_index_build_internal_clust(
 		for (i = 0; i < trx_id_pos; i++) {
 
 			fixed_size = dict_col_get_fixed_size(
-				dict_index_get_nth_col(new_index, i));
+				dict_index_get_nth_col(new_index, i),
+				dict_table_is_comp(table));
 
 			if (fixed_size == 0) {
 				new_index->trx_id_offset = 0;
@@ -1723,10 +1945,9 @@ dict_index_build_internal_clust(
 	}
 
 	/* Remember the table columns already contained in new_index */
-	indexed = mem_alloc(table->n_cols * sizeof *indexed);
-	memset(indexed, 0, table->n_cols * sizeof *indexed);
+	indexed = mem_zalloc(table->n_cols * sizeof *indexed);
 
-	/* Mark with 0 the table columns already contained in new_index */
+	/* Mark the table columns already contained in new_index */
 	for (i = 0; i < new_index->n_def; i++) {
 
 		field = dict_index_get_nth_field(new_index, i);
@@ -1744,8 +1965,7 @@ dict_index_build_internal_clust(
 	there */
 	for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
 
-		dict_col_t*	col = (dict_col_t*)
-			dict_table_get_nth_col(table, i);
+		dict_col_t*	col = dict_table_get_nth_col(table, i);
 		ut_ad(col->mtype != DATA_SYS);
 
 		if (!indexed[col->ind]) {
@@ -1755,7 +1975,7 @@ dict_index_build_internal_clust(
 
 	mem_free(indexed);
 
-	ut_ad((index->type & DICT_IBUF)
+	ut_ad(dict_index_is_ibuf(index)
 	      || (UT_LIST_GET_LEN(table->indexes) == 0));
 
 	new_index->cached = TRUE;
@@ -1763,18 +1983,17 @@ dict_index_build_internal_clust(
 	return(new_index);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return	own: the internal representation of the non-clustered index */
 static
 dict_index_t*
 dict_index_build_internal_non_clust(
 /*================================*/
-				/* out, own: the internal representation
-				of the non-clustered index */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index)	/* in: user representation of a non-clustered
-				index */
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index)	/*!< in: user representation of
+					a non-clustered index */
 {
 	dict_field_t*	field;
 	dict_index_t*	new_index;
@@ -1783,7 +2002,7 @@ dict_index_build_internal_non_clust(
 	ibool*		indexed;
 
 	ut_ad(table && index);
-	ut_ad(0 == (index->type & DICT_CLUSTERED));
+	ut_ad(!dict_index_is_clust(index));
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
@@ -1791,7 +2010,7 @@ dict_index_build_internal_non_clust(
 	clust_index = UT_LIST_GET_FIRST(table->indexes);
 
 	ut_ad(clust_index);
-	ut_ad(clust_index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(clust_index));
 	ut_ad(!(clust_index->type & DICT_UNIVERSAL));
 
 	/* Create a new index */
@@ -1810,10 +2029,9 @@ dict_index_build_internal_non_clust(
 	dict_index_copy(new_index, index, table, 0, index->n_fields);
 
 	/* Remember the table columns already contained in new_index */
-	indexed = mem_alloc(table->n_cols * sizeof *indexed);
-	memset(indexed, 0, table->n_cols * sizeof *indexed);
+	indexed = mem_zalloc(table->n_cols * sizeof *indexed);
 
-	/* Mark with 0 table columns already contained in new_index */
+	/* Mark the table columns already contained in new_index */
 	for (i = 0; i < new_index->n_def; i++) {
 
 		field = dict_index_get_nth_field(new_index, i);
@@ -1842,7 +2060,7 @@ dict_index_build_internal_non_clust(
 
 	mem_free(indexed);
 
-	if ((index->type) & DICT_UNIQUE) {
+	if (dict_index_is_unique(index)) {
 		new_index->n_uniq = index->n_fields;
 	} else {
 		new_index->n_uniq = new_index->n_def;
@@ -1860,42 +2078,98 @@ dict_index_build_internal_non_clust(
 
 /*====================== FOREIGN KEY PROCESSING ========================*/
 
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return	TRUE if table is referenced by a foreign key */
+UNIV_INTERN
 ibool
-dict_table_referenced_by_foreign_key(
-/*=================================*/
-				/* out: TRUE if table is referenced by a
-				foreign key */
-	dict_table_t*	table)	/* in: InnoDB table */
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+	const dict_table_t*	table)	/*!< in: InnoDB table */
 {
-	if (UT_LIST_GET_LEN(table->referenced_list) > 0) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return(UT_LIST_GET_LEN(table->referenced_list) > 0);
 }
 
-/*************************************************************************
+/*********************************************************************//**
+Check if the index is referenced by a foreign key, if TRUE return foreign
+else return NULL
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_referenced_constraint(
+/*=================================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+{
+	dict_foreign_t*	foreign;
+
+	ut_ad(index != NULL);
+	ut_ad(table != NULL);
+
+	for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	     foreign;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+
+		if (foreign->referenced_index == index) {
+
+			return(foreign);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if a index is defined for a foreign key constraint. Index is a part
+of a foreign key constraint if the index is referenced by foreign key
+or index is a foreign key index.
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_foreign_constraint(
+/*==============================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+{
+	dict_foreign_t*	foreign;
+
+	ut_ad(index != NULL);
+	ut_ad(table != NULL);
+
+	for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
+	     foreign;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+
+		if (foreign->foreign_index == index
+		    || foreign->referenced_index == index) {
+
+			return(foreign);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
 Frees a foreign key struct. */
 static
 void
 dict_foreign_free(
 /*==============*/
-	dict_foreign_t*	foreign)	/* in, own: foreign key struct */
+	dict_foreign_t*	foreign)	/*!< in, own: foreign key struct */
 {
 	mem_heap_free(foreign->heap);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
 static
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
-	dict_foreign_t*	foreign)	/* in, own: foreign constraint */
+	dict_foreign_t*	foreign)	/*!< in, own: foreign constraint */
 {
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_a(foreign);
@@ -1915,16 +2189,16 @@ dict_foreign_remove_from_cache(
 	dict_foreign_free(foreign);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Looks for the foreign constraint from the foreign and referenced lists
-of a table. */
+of a table.
+@return	foreign constraint */
 static
 dict_foreign_t*
 dict_foreign_find(
 /*==============*/
-				/* out: foreign constraint */
-	dict_table_t*	table,	/* in: table object */
-	const char*	id)	/* in: foreign constraint id */
+	dict_table_t*	table,	/*!< in: table object */
+	const char*	id)	/*!< in: foreign constraint id */
 {
 	dict_foreign_t*	foreign;
 
@@ -1955,38 +2229,45 @@ dict_foreign_find(
 	return(NULL);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
-in the same order. */
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return	matching index, NULL if not found */
 static
 dict_index_t*
 dict_foreign_find_index(
 /*====================*/
-				/* out: matching index, NULL if not found */
-	dict_table_t*	table,	/* in: table */
-	const char**	columns,/* in: array of column names */
-	ulint		n_cols,	/* in: number of columns */
-	dict_index_t*	types_idx, /* in: NULL or an index to whose types the
+	dict_table_t*	table,	/*!< in: table */
+	const char**	columns,/*!< in: array of column names */
+	ulint		n_cols,	/*!< in: number of columns */
+	dict_index_t*	types_idx, /*!< in: NULL or an index to whose types the
 				   column types must match */
 	ibool		check_charsets,
-				/* in: whether to check charsets.
+				/*!< in: whether to check charsets.
 				only has an effect if types_idx != NULL */
 	ulint		check_null)
-				/* in: nonzero if none of the columns must
+				/*!< in: nonzero if none of the columns must
 				be declared NOT NULL */
 {
 	dict_index_t*	index;
-	dict_field_t*	field;
-	const char*	col_name;
-	ulint		i;
 
 	index = dict_table_get_first_index(table);
 
 	while (index != NULL) {
-		if (dict_index_get_n_fields(index) >= n_cols) {
+		/* Ignore matches that refer to the same instance
+		or the index is to be dropped */
+		if (index->to_be_dropped || types_idx == index) {
+
+			goto next_rec;
+
+		} else if (dict_index_get_n_fields(index) >= n_cols) {
+			ulint		i;
 
 			for (i = 0; i < n_cols; i++) {
+				dict_field_t*	field;
+				const char*	col_name;
+
 				field = dict_index_get_nth_field(index, i);
 
 				col_name = dict_table_get_col_name(
@@ -2027,20 +2308,104 @@ dict_foreign_find_index(
 			}
 		}
 
+next_rec:
 		index = dict_table_get_next_index(index);
 	}
 
 	return(NULL);
 }
 
-/**************************************************************************
+/**********************************************************************//**
+Find an index that is equivalent to the one passed in and is not marked
+for deletion.
+@return	index equivalent to foreign->foreign_index, or NULL */
+UNIV_INTERN
+dict_index_t*
+dict_foreign_find_equiv_index(
+/*==========================*/
+	dict_foreign_t*	foreign)/*!< in: foreign key */
+{
+	ut_a(foreign != NULL);
+
+	/* Try to find an index which contains the columns as the
+	first fields and in the right order, and the types are the
+	same as in foreign->foreign_index */
+
+	return(dict_foreign_find_index(
+		       foreign->foreign_table,
+		       foreign->foreign_col_names, foreign->n_fields,
+		       foreign->foreign_index, TRUE, /* check types */
+		       FALSE/* allow columns to be NULL */));
+}
+
+/**********************************************************************//**
+Returns an index object by matching on the name and column names and
+if more than one index matches return the index with the max id
+@return	matching index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_by_max_id(
+/*===========================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name,	/*!< in: the index name to find */
+	const char**	columns,/*!< in: array of column names */
+	ulint		n_cols)	/*!< in: number of columns */
+{
+	dict_index_t*	index;
+	dict_index_t*	found;
+
+	found = NULL;
+	index = dict_table_get_first_index(table);
+
+	while (index != NULL) {
+		if (ut_strcmp(index->name, name) == 0
+		    && dict_index_get_n_ordering_defined_by_user(index)
+		    == n_cols) {
+
+			ulint		i;
+
+			for (i = 0; i < n_cols; i++) {
+				dict_field_t*	field;
+				const char*	col_name;
+
+				field = dict_index_get_nth_field(index, i);
+
+				col_name = dict_table_get_col_name(
+					table, dict_col_get_no(field->col));
+
+				if (0 != innobase_strcasecmp(
+					    columns[i], col_name)) {
+
+					break;
+				}
+			}
+
+			if (i == n_cols) {
+				/* We found a matching index, select
+				the index with the higher id*/
+
+				if (!found
+				    || ut_dulint_cmp(index->id, found->id) > 0) {
+
+					found = index;
+				}
+			}
+		}
+
+		index = dict_table_get_next_index(index);
+	}
+
+	return(found);
+}
+
+/**********************************************************************//**
 Report an error in a foreign key definition. */
 static
 void
 dict_foreign_error_report_low(
 /*==========================*/
-	FILE*		file,	/* in: output stream */
-	const char*	name)	/* in: table name */
+	FILE*		file,	/*!< in: output stream */
+	const char*	name)	/*!< in: table name */
 {
 	rewind(file);
 	ut_print_timestamp(file);
@@ -2048,15 +2413,15 @@ dict_foreign_error_report_low(
 		name);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Report an error in a foreign key definition. */
 static
 void
 dict_foreign_error_report(
 /*======================*/
-	FILE*		file,	/* in: output stream */
-	dict_foreign_t*	fk,	/* in: foreign key constraint */
-	const char*	msg)	/* in: the error message */
+	FILE*		file,	/*!< in: output stream */
+	dict_foreign_t*	fk,	/*!< in: foreign key constraint */
+	const char*	msg)	/*!< in: the error message */
 {
 	mutex_enter(&dict_foreign_err_mutex);
 	dict_foreign_error_report_low(file, fk->foreign_table_name);
@@ -2068,26 +2433,25 @@ dict_foreign_error_report(
 		fputs("The index in the foreign key in table is ", file);
 		ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
 		fputs("\n"
-		      "See http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-foreign-key-constraints.html\n"
+		      "See " REFMAN "innodb-foreign-key-constraints.html\n"
 		      "for correct foreign key definition.\n",
 		      file);
 	}
 	mutex_exit(&dict_foreign_err_mutex);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
 At least one of the foreign table and the referenced table must already
-be in the dictionary cache! */
-
+be in the dictionary cache!
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 dict_foreign_add_to_cache(
 /*======================*/
-					/* out: DB_SUCCESS or error code */
-	dict_foreign_t*	foreign,	/* in, own: foreign key constraint */
-	ibool		check_charsets)	/* in: TRUE=check charset
+	dict_foreign_t*	foreign,	/*!< in, own: foreign key constraint */
+	ibool		check_charsets)	/*!< in: TRUE=check charset
 					compatibility */
 {
 	dict_table_t*	for_table;
@@ -2124,7 +2488,7 @@ dict_foreign_add_to_cache(
 	if (for_in_cache->referenced_table == NULL && ref_table) {
 		index = dict_foreign_find_index(
 			ref_table,
-			(const char**) for_in_cache->referenced_col_names,
+			for_in_cache->referenced_col_names,
 			for_in_cache->n_fields, for_in_cache->foreign_index,
 			check_charsets, FALSE);
 
@@ -2156,7 +2520,7 @@ dict_foreign_add_to_cache(
 	if (for_in_cache->foreign_table == NULL && for_table) {
 		index = dict_foreign_find_index(
 			for_table,
-			(const char**) for_in_cache->foreign_col_names,
+			for_in_cache->foreign_col_names,
 			for_in_cache->n_fields,
 			for_in_cache->referenced_index, check_charsets,
 			for_in_cache->type
@@ -2199,17 +2563,17 @@ dict_foreign_add_to_cache(
 	return(DB_SUCCESS);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Scans from pointer onwards. Stops if is at the start of a copy of
 'string' where characters are compared without case sensitivity, and
-only outside `` or "" quotes. Stops also at '\0'. */
-
+only outside `` or "" quotes. Stops also at NUL.
+@return	scanned up to this */
+static
 const char*
 dict_scan_to(
 /*=========*/
-				/* out: scanned up to this */
-	const char*	ptr,	/* in: scan from */
-	const char*	string)	/* in: look for this */
+	const char*	ptr,	/*!< in: scan from */
+	const char*	string)	/*!< in: look for this */
 {
 	char	quote	= '\0';
 
@@ -2242,19 +2606,19 @@ nomatch:
 	return(ptr);
 }
 
-/*************************************************************************
-Accepts a specified string. Comparisons are case-insensitive. */
+/*********************************************************************//**
+Accepts a specified string. Comparisons are case-insensitive.
+@return if string was accepted, the pointer is moved after that, else
+ptr is returned */
 static
 const char*
 dict_accept(
 /*========*/
-				/* out: if string was accepted, the pointer
-				is moved after that, else ptr is returned */
-	struct charset_info_st*	cs,/* in: the character set of ptr */
-	const char*	ptr,	/* in: scan from this */
-	const char*	string,	/* in: accept only this string as the next
+	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	const char*	ptr,	/*!< in: scan from this */
+	const char*	string,	/*!< in: accept only this string as the next
 				non-whitespace string */
-	ibool*		success)/* out: TRUE if accepted */
+	ibool*		success)/*!< out: TRUE if accepted */
 {
 	const char*	old_ptr = ptr;
 	const char*	old_ptr2;
@@ -2278,25 +2642,25 @@ dict_accept(
 	return(ptr + ut_strlen(string));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Scans an id. For the lexical definition of an 'id', see the code below.
-Strips backquotes or double quotes from around the id. */
+Strips backquotes or double quotes from around the id.
+@return	scanned to */
 static
 const char*
 dict_scan_id(
 /*=========*/
-				/* out: scanned to */
-	struct charset_info_st*	cs,/* in: the character set of ptr */
-	const char*	ptr,	/* in: scanned to */
-	mem_heap_t*	heap,	/* in: heap where to allocate the id
+	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	const char*	ptr,	/*!< in: scanned to */
+	mem_heap_t*	heap,	/*!< in: heap where to allocate the id
 				(NULL=id will not be allocated, but it
 				will point to string near ptr) */
-	const char**	id,	/* out,own: the id; NULL if no id was
+	const char**	id,	/*!< out,own: the id; NULL if no id was
 				scannable */
-	ibool		table_id,/* in: TRUE=convert the allocated id
+	ibool		table_id,/*!< in: TRUE=convert the allocated id
 				as a table name; FALSE=convert to UTF-8 */
 	ibool		accept_also_dot)
-				/* in: TRUE if also a dot can appear in a
+				/*!< in: TRUE if also a dot can appear in a
 				non-quoted id; in a quoted id it can appear
 				always */
 {
@@ -2378,7 +2742,7 @@ convert_id:
 		len = 3 * len + 1;
 		*id = dst = mem_heap_alloc(heap, len);
 
-		innobase_convert_from_id(dst, str, len);
+		innobase_convert_from_id(cs, dst, str, len);
 	} else if (!strncmp(str, srv_mysql50_table_name_prefix,
 			    sizeof srv_mysql50_table_name_prefix)) {
 		/* This is a pre-5.1 table name
@@ -2392,26 +2756,26 @@ convert_id:
 		len = 5 * len + 1;
 		*id = dst = mem_heap_alloc(heap, len);
 
-		innobase_convert_from_table_id(dst, str, len);
+		innobase_convert_from_table_id(cs, dst, str, len);
 	}
 
 	return(ptr);
 }
 
-/*************************************************************************
-Tries to scan a column name. */
+/*********************************************************************//**
+Tries to scan a column name.
+@return	scanned to */
 static
 const char*
 dict_scan_col(
 /*==========*/
-					/* out: scanned to */
-	struct charset_info_st*	cs,	/* in: the character set of ptr */
-	const char*		ptr,	/* in: scanned to */
-	ibool*			success,/* out: TRUE if success */
-	dict_table_t*		table,	/* in: table in which the column is */
-	const dict_col_t**	column,	/* out: pointer to column if success */
-	mem_heap_t*		heap,	/* in: heap where to allocate */
-	const char**		name)	/* out,own: the column name;
+	struct charset_info_st*	cs,	/*!< in: the character set of ptr */
+	const char*		ptr,	/*!< in: scanned to */
+	ibool*			success,/*!< out: TRUE if success */
+	dict_table_t*		table,	/*!< in: table in which the column is */
+	const dict_col_t**	column,	/*!< out: pointer to column if success */
+	mem_heap_t*		heap,	/*!< in: heap where to allocate */
+	const char**		name)	/*!< out,own: the column name;
 					NULL if no name was scannable */
 {
 	ulint		i;
@@ -2449,20 +2813,20 @@ dict_scan_col(
 	return(ptr);
 }
 
-/*************************************************************************
-Scans a table name from an SQL string. */
+/*********************************************************************//**
+Scans a table name from an SQL string.
+@return	scanned to */
 static
 const char*
 dict_scan_table_name(
 /*=================*/
-				/* out: scanned to */
-	struct charset_info_st*	cs,/* in: the character set of ptr */
-	const char*	ptr,	/* in: scanned to */
-	dict_table_t**	table,	/* out: table object or NULL */
-	const char*	name,	/* in: foreign key table name */
-	ibool*		success,/* out: TRUE if ok name found */
-	mem_heap_t*	heap,	/* in: heap where to allocate the id */
-	const char**	ref_name)/* out,own: the table name;
+	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	const char*	ptr,	/*!< in: scanned to */
+	dict_table_t**	table,	/*!< out: table object or NULL */
+	const char*	name,	/*!< in: foreign key table name */
+	ibool*		success,/*!< out: TRUE if ok name found */
+	mem_heap_t*	heap,	/*!< in: heap where to allocate the id */
+	const char**	ref_name)/*!< out,own: the table name;
 				NULL if no name was scannable */
 {
 	const char*	database_name	= NULL;
@@ -2548,16 +2912,16 @@ dict_scan_table_name(
 	return(ptr);
 }
 
-/*************************************************************************
-Skips one id. The id is allowed to contain also '.'. */
+/*********************************************************************//**
+Skips one id. The id is allowed to contain also '.'.
+@return	scanned to */
 static
 const char*
 dict_skip_word(
 /*===========*/
-				/* out: scanned to */
-	struct charset_info_st*	cs,/* in: the character set of ptr */
-	const char*	ptr,	/* in: scanned to */
-	ibool*		success)/* out: TRUE if success, FALSE if just spaces
+	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	const char*	ptr,	/*!< in: scanned to */
+	ibool*		success)/*!< out: TRUE if success, FALSE if just spaces
 				left in string or a syntax error */
 {
 	const char*	start;
@@ -2573,20 +2937,19 @@ dict_skip_word(
 	return(ptr);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Removes MySQL comments from an SQL string. A comment is either
 (a) '#' to the end of the line,
-(b) '--<space>' to the end of the line, or
-(c) '<slash><asterisk>' till the next '<asterisk><slash>' (like the familiar
-C comment syntax). */
+(b) '--[space]' to the end of the line, or
+(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
+C comment syntax).
+@return own: SQL string stripped from comments; the caller must free
+this with mem_free()! */
 static
 char*
 dict_strip_comments(
 /*================*/
-					/* out, own: SQL string stripped from
-					comments; the caller must free this
-					with mem_free()! */
-	const char*	sql_string)	/* in: SQL string */
+	const char*	sql_string)	/*!< in: SQL string */
 {
 	char*		str;
 	const char*	sptr;
@@ -2616,7 +2979,7 @@ scan_more:
 		} else if (quote) {
 			/* Within quotes: do not look for
 			starting quotes or comments. */
-		} else if (*sptr == '"' || *sptr == '`') {
+		} else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
 			/* Starting quote: remember the quote character. */
 			quote = *sptr;
 		} else if (*sptr == '#'
@@ -2660,17 +3023,16 @@ scan_more:
 	}
 }
 
-/*************************************************************************
-Finds the highest <number> for foreign key constraints of the table. Looks
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
 only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_<number>. */
+databasename/tablename_ibfk_[number].
+@return	highest number, 0 if table has no new format foreign key constraints */
 static
 ulint
 dict_table_get_highest_foreign_id(
 /*==============================*/
-				/* out: highest number, 0 if table has no new
-				format foreign key constraints */
-	dict_table_t*	table)	/* in: table in the dictionary memory cache */
+	dict_table_t*	table)	/*!< in: table in the dictionary memory cache */
 {
 	dict_foreign_t*	foreign;
 	char*		endp;
@@ -2709,19 +3071,19 @@ dict_table_get_highest_foreign_id(
 	return(biggest_id);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reports a simple foreign key create clause syntax error. */
 static
 void
 dict_foreign_report_syntax_err(
 /*===========================*/
-	const char*	name,		/* in: table name */
+	const char*	name,		/*!< in: table name */
 	const char*	start_of_latest_foreign,
-					/* in: start of the foreign key clause
+					/*!< in: start of the foreign key clause
 					in the SQL string */
-	const char*	ptr)		/* in: place of the syntax error */
+	const char*	ptr)		/*!< in: place of the syntax error */
 {
-	FILE*	 ef = dict_foreign_err_file;
+	FILE*	ef = dict_foreign_err_file;
 
 	mutex_enter(&dict_foreign_err_mutex);
 	dict_foreign_error_report_low(ef, name);
@@ -2730,31 +3092,31 @@ dict_foreign_report_syntax_err(
 	mutex_exit(&dict_foreign_err_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary the foreign
 key constraints declared in the string. This function should be called after
 the indexes for a table have been created. Each foreign key constraint must
 be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
+allowed to contain more fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
 static
 ulint
 dict_create_foreign_constraints_low(
 /*================================*/
-				/* out: error code or DB_SUCCESS */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap,	/* in: memory heap */
-	struct charset_info_st*	cs,/* in: the character set of sql_string */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	struct charset_info_st*	cs,/*!< in: the character set of sql_string */
 	const char*	sql_string,
-				/* in: CREATE TABLE or ALTER TABLE statement
+				/*!< in: CREATE TABLE or ALTER TABLE statement
 				where foreign keys are declared like:
 				FOREIGN KEY (a, b) REFERENCES table2(c, d),
 				table2 can be written also with the database
 				name before it: test.table2; the default
 				database is the database of parameter name */
-	const char*	name,	/* in: table full name in the normalized form
+	const char*	name,	/*!< in: table full name in the normalized form
 				database_name/table_name */
 	ibool		reject_fks)
-				/* in: if TRUE, fail with error code
+				/*!< in: if TRUE, fail with error code
 				DB_CANNOT_ADD_CONSTRAINT if any foreign
 				keys are found. */
 {
@@ -2828,8 +3190,8 @@ dict_create_foreign_constraints_low(
 	}
 
 	/* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the
-	format databasename/tablename_ibfk_<number>, where <number> is local
-	to the table; look for the highest <number> for table_to_alter, so
+	format databasename/tablename_ibfk_[number], where [number] is local
+	to the table; look for the highest [number] for table_to_alter, so
 	that we can assign to new constraints higher numbers. */
 
 	/* If we are altering a temporary table, the table name after ALTER
@@ -2989,8 +3351,7 @@ col_loop1:
 		ut_print_name(ef, NULL, TRUE, name);
 		fprintf(ef, " where the columns appear\n"
 			"as the first columns. Constraint:\n%s\n"
-			"See http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-foreign-key-constraints.html\n"
+			"See " REFMAN "innodb-foreign-key-constraints.html\n"
 			"for correct foreign key definition.\n",
 			start_of_latest_foreign);
 		mutex_exit(&dict_foreign_err_mutex);
@@ -3270,7 +3631,7 @@ try_find_index:
 				" and such columns in old tables\n"
 				"cannot be referenced by such columns"
 				" in new tables.\n"
-				"See http://dev.mysql.com/doc/refman/5.1/en/"
+				"See " REFMAN
 				"innodb-foreign-key-constraints.html\n"
 				"for correct foreign key definition.\n",
 				start_of_latest_foreign);
@@ -3309,38 +3670,19 @@ try_find_index:
 	goto loop;
 }
 
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
-					/* out: TRUE if str starts
-					with keyword */
-	void*		mysql_thd,	/* in: MySQL thread handle */
-	const char*	str,		/* in: string to scan for keyword */
-	const char*	keyword)	/* in: keyword to look for */
-{
-	struct charset_info_st*	cs	= innobase_get_charset(mysql_thd);
-	ibool			success;
-
-	dict_accept(cs, str, keyword, &success);
-	return(success);
-}
-
-/*************************************************************************
+/*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary the foreign
 key constraints declared in the string. This function should be called after
 the indexes for a table have been created. Each foreign key constraint must
 be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
-
+allowed to contain more fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 dict_create_foreign_constraints(
 /*============================*/
-					/* out: error code or DB_SUCCESS */
-	trx_t*		trx,		/* in: transaction */
-	const char*	sql_string,	/* in: table create statement where
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
 					foreign keys are declared like:
 					FOREIGN KEY (a, b) REFERENCES
 					table2(c, d), table2 can be written
@@ -3348,10 +3690,10 @@ dict_create_foreign_constraints(
 					name before it: test.table2; the
 					default database id the database of
 					parameter name */
-	const char*	name,		/* in: table full name in the
+	const char*	name,		/*!< in: table full name in the
 					normalized form
 					database_name/table_name */
-	ibool		reject_fks)	/* in: if TRUE, fail with error
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
 {
@@ -3375,23 +3717,21 @@ dict_create_foreign_constraints(
 	return(err);
 }
 
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+UNIV_INTERN
 ulint
 dict_foreign_parse_drop_constraints(
 /*================================*/
-						/* out: DB_SUCCESS or
-						DB_CANNOT_DROP_CONSTRAINT if
-						syntax error or the constraint
-						id does not match */
-	mem_heap_t*	heap,			/* in: heap from which we can
+	mem_heap_t*	heap,			/*!< in: heap from which we can
 						allocate memory */
-	trx_t*		trx,			/* in: transaction */
-	dict_table_t*	table,			/* in: table */
-	ulint*		n,			/* out: number of constraints
+	trx_t*		trx,			/*!< in: transaction */
+	dict_table_t*	table,			/*!< in: table */
+	ulint*		n,			/*!< out: number of constraints
 						to drop */
-	const char***	constraints_to_drop)	/* out: id's of the
+	const char***	constraints_to_drop)	/*!< out: id's of the
 						constraints to drop */
 {
 	dict_foreign_t*		foreign;
@@ -3509,19 +3849,33 @@ syntax_error:
 
 	return(DB_CANNOT_DROP_CONSTRAINT);
 }
-#endif /* UNIV_HOTBACKUP */
 
 /*==================== END OF FOREIGN KEY PROCESSING ====================*/
 
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys->mutex is already being held.
+@return	index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+	dulint	index_id)	/*!< in: index id */
+{
+	ut_ad(mutex_own(&(dict_sys->mutex)));
 
+	return(dict_index_find_on_id_low(index_id));
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return	index, NULL if not found */
+UNIV_INTERN
 dict_index_t*
 dict_index_get_if_in_cache(
 /*=======================*/
-				/* out: index, NULL if not found */
-	dulint	index_id)	/* in: index id */
+	dulint	index_id)	/*!< in: index id */
 {
 	dict_index_t*	index;
 
@@ -3531,25 +3885,25 @@ dict_index_get_if_in_cache(
 
 	mutex_enter(&(dict_sys->mutex));
 
-	index = dict_index_find_on_id_low(index_id);
+	index = dict_index_get_if_in_cache_low(index_id);
 
 	mutex_exit(&(dict_sys->mutex));
 
 	return(index);
 }
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 #ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
 Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
+no comparison can occur with the page number field in a node pointer.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 dict_index_check_search_tuple(
 /*==========================*/
-				/* out: TRUE if ok */
-	dict_index_t*	index,	/* in: index tree */
-	dtuple_t*	tuple)	/* in: tuple used in a search */
+	const dict_index_t*	index,	/*!< in: index tree */
+	const dtuple_t*		tuple)	/*!< in: tuple used in a search */
 {
 	ut_a(index);
 	ut_a(dtuple_get_n_fields_cmp(tuple)
@@ -3558,20 +3912,22 @@ dict_index_check_search_tuple(
 }
 #endif /* UNIV_DEBUG */
 
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
-
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return	own: node pointer */
+UNIV_INTERN
 dtuple_t*
 dict_index_build_node_ptr(
 /*======================*/
-				/* out, own: node pointer */
-	dict_index_t*	index,	/* in: index tree */
-	rec_t*		rec,	/* in: record for which to build node
-				pointer */
-	ulint		page_no,/* in: page number to put in node pointer */
-	mem_heap_t*	heap,	/* in: memory heap where pointer created */
-	ulint		level)	/* in: level of rec in tree: 0 means leaf
-				level */
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level)	/*!< in: level of rec in tree:
+					0 means leaf level */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
@@ -3625,20 +3981,21 @@ dict_index_build_node_ptr(
 	return(tuple);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
+index entry uniquely.
+@return	pointer to the prefix record */
+UNIV_INTERN
 rec_t*
 dict_index_copy_rec_order_prefix(
 /*=============================*/
-				/* out: pointer to the prefix record */
-	dict_index_t*	index,	/* in: index tree */
-	rec_t*		rec,	/* in: record for which to copy prefix */
-	ulint*		n_fields,/* out: number of fields copied */
-	byte**		buf,	/* in/out: memory buffer for the copied prefix,
-				or NULL */
-	ulint*		buf_size)/* in/out: buffer size */
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to
+					copy prefix */
+	ulint*			n_fields,/*!< out: number of fields copied */
+	byte**			buf,	/*!< in/out: memory buffer for the
+					copied prefix, or NULL */
+	ulint*			buf_size)/*!< in/out: buffer size */
 {
 	ulint		n;
 
@@ -3655,17 +4012,17 @@ dict_index_copy_rec_order_prefix(
 	return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
 }
 
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
+/**********************************************************************//**
+Builds a typed data tuple out of a physical record.
+@return	own: data tuple */
+UNIV_INTERN
 dtuple_t*
 dict_index_build_data_tuple(
 /*========================*/
-				/* out, own: data tuple */
-	dict_index_t*	index,	/* in: index tree */
-	rec_t*		rec,	/* in: record for which to build data tuple */
-	ulint		n_fields,/* in: number of data fields */
-	mem_heap_t*	heap)	/* in: memory heap where tuple created */
+	dict_index_t*	index,	/*!< in: index tree */
+	rec_t*		rec,	/*!< in: record for which to build data tuple */
+	ulint		n_fields,/*!< in: number of data fields */
+	mem_heap_t*	heap)	/*!< in: memory heap where tuple created */
 {
 	dtuple_t*	tuple;
 
@@ -3683,24 +4040,25 @@ dict_index_build_data_tuple(
 	return(tuple);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the minimum record length in an index. */
-
+UNIV_INTERN
 ulint
 dict_index_calc_min_rec_len(
 /*========================*/
-	dict_index_t*	index)	/* in: index */
+	const dict_index_t*	index)	/*!< in: index */
 {
 	ulint	sum	= 0;
 	ulint	i;
+	ulint	comp	= dict_table_is_comp(index->table);
 
-	if (dict_table_is_comp(index->table)) {
+	if (comp) {
 		ulint nullable = 0;
 		sum = REC_N_NEW_EXTRA_BYTES;
 		for (i = 0; i < dict_index_get_n_fields(index); i++) {
 			const dict_col_t*	col
 				= dict_index_get_nth_col(index, i);
-			ulint	size = dict_col_get_fixed_size(col);
+			ulint	size = dict_col_get_fixed_size(col, comp);
 			sum += size;
 			if (!size) {
 				size = col->len;
@@ -3719,7 +4077,7 @@ dict_index_calc_min_rec_len(
 
 	for (i = 0; i < dict_index_get_n_fields(index); i++) {
 		sum += dict_col_get_fixed_size(
-			dict_index_get_nth_col(index, i));
+			dict_index_get_nth_col(index, i), comp);
 	}
 
 	if (sum > 127) {
@@ -3733,16 +4091,16 @@ dict_index_calc_min_rec_len(
 	return(sum);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization. */
-
+UNIV_INTERN
 void
 dict_update_statistics_low(
 /*=======================*/
-	dict_table_t*	table,		/* in: table */
+	dict_table_t*	table,		/*!< in/out: table */
 	ibool		has_dict_mutex __attribute__((unused)))
-					/* in: TRUE if the caller has the
+					/*!< in: TRUE if the caller has the
 					dictionary mutex */
 {
 	dict_index_t*	index;
@@ -3755,8 +4113,7 @@ dict_update_statistics_low(
 			"  InnoDB: cannot calculate statistics for table %s\n"
 			"InnoDB: because the .ibd file is missing.  For help,"
 			" please refer to\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n",
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n",
 			table->name);
 
 		return;
@@ -3817,37 +4174,25 @@ dict_update_statistics_low(
 	table->stat_modified_counter = 0;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization. */
-
+UNIV_INTERN
 void
 dict_update_statistics(
 /*===================*/
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in/out: table */
 {
 	dict_update_statistics_low(table, FALSE);
 }
 
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
-					/* out: table, NULL if not found */
-	const char*	table_name)	/* in: table name */
-{
-	return(dict_table_get_low(table_name));
-}
-
-/**************************************************************************
+/**********************************************************************//**
 Prints info of a foreign key constraint. */
 static
 void
 dict_foreign_print_low(
 /*===================*/
-	dict_foreign_t*	foreign)	/* in: foreign key constraint */
+	dict_foreign_t*	foreign)	/*!< in: foreign key constraint */
 {
 	ulint	i;
 
@@ -3871,26 +4216,26 @@ dict_foreign_print_low(
 	fputs(" )\n", stderr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints a table data. */
-
+UNIV_INTERN
 void
 dict_table_print(
 /*=============*/
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in: table */
 {
 	mutex_enter(&(dict_sys->mutex));
 	dict_table_print_low(table);
 	mutex_exit(&(dict_sys->mutex));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints a table data when we know the table name. */
-
+UNIV_INTERN
 void
 dict_table_print_by_name(
 /*=====================*/
-	const char*	name)
+	const char*	name)	/*!< in: table name */
 {
 	dict_table_t*	table;
 
@@ -3904,13 +4249,13 @@ dict_table_print_by_name(
 	mutex_exit(&(dict_sys->mutex));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints a table data. */
-
+UNIV_INTERN
 void
 dict_table_print_low(
 /*=================*/
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in: table */
 {
 	dict_index_t*	index;
 	dict_foreign_t*	foreign;
@@ -3922,17 +4267,18 @@ dict_table_print_low(
 
 	fprintf(stderr,
 		"--------------------------------------\n"
-		"TABLE: name %s, id %lu %lu, columns %lu, indexes %lu,"
-		" appr.rows %lu\n"
+		"TABLE: name %s, id %lu %lu, flags %lx, columns %lu,"
+		" indexes %lu, appr.rows %lu\n"
 		"  COLUMNS: ",
 		table->name,
 		(ulong) ut_dulint_get_high(table->id),
 		(ulong) ut_dulint_get_low(table->id),
+		(ulong) table->flags,
 		(ulong) table->n_cols,
 		(ulong) UT_LIST_GET_LEN(table->indexes),
 		(ulong) table->stat_n_rows);
 
-	for (i = 0; i + 1 < (ulint) table->n_cols; i++) {
+	for (i = 0; i < (ulint) table->n_cols; i++) {
 		dict_col_print_low(table, dict_table_get_nth_col(table, i));
 		fputs("; ", stderr);
 	}
@@ -3961,14 +4307,14 @@ dict_table_print_low(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints a column data. */
 static
 void
 dict_col_print_low(
 /*===============*/
-	const dict_table_t*	table,	/* in: table */
-	const dict_col_t*	col)	/* in: column */
+	const dict_table_t*	table,	/*!< in: table */
+	const dict_col_t*	col)	/*!< in: column */
 {
 	dtype_t	type;
 
@@ -3981,16 +4327,17 @@ dict_col_print_low(
 	dtype_print(&type);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints an index data. */
 static
 void
 dict_index_print_low(
 /*=================*/
-	dict_index_t*	index)	/* in: index */
+	dict_index_t*	index)	/*!< in: index */
 {
-	ib_longlong	n_vals;
+	ib_int64_t	n_vals;
 	ulint		i;
+	const char*	type_string;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -4001,6 +4348,14 @@ dict_index_print_low(
 		n_vals = index->stat_n_diff_key_vals[1];
 	}
 
+	if (dict_index_is_clust(index)) {
+		type_string = "clustered index";
+	} else if (dict_index_is_unique(index)) {
+		type_string = "unique index";
+	} else {
+		type_string = "secondary index";
+	}
+
 	fprintf(stderr,
 		"  INDEX: name %s, id %lu %lu, fields %lu/%lu,"
 		" uniq %lu, type %lu\n"
@@ -4032,13 +4387,13 @@ dict_index_print_low(
 #endif /* UNIV_BTR_PRINT */
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints a field data. */
 static
 void
 dict_field_print_low(
 /*=================*/
-	dict_field_t*	field)	/* in: field */
+	dict_field_t*	field)	/*!< in: field */
 {
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -4049,17 +4404,17 @@ dict_field_print_low(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
-
+UNIV_INTERN
 void
 dict_print_info_on_foreign_key_in_create_format(
 /*============================================*/
-	FILE*		file,		/* in: file where to print */
-	trx_t*		trx,		/* in: transaction */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint */
-	ibool		add_newline)	/* in: whether to add a newline */
+	FILE*		file,		/*!< in: file where to print */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
+	ibool		add_newline)	/*!< in: whether to add a newline */
 {
 	const char*	stripped_id;
 	ulint	i;
@@ -4147,19 +4502,19 @@ dict_print_info_on_foreign_key_in_create_format(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Outputs info on foreign keys of a table. */
-
+UNIV_INTERN
 void
 dict_print_info_on_foreign_keys(
 /*============================*/
-	ibool		create_table_format, /* in: if TRUE then print in
+	ibool		create_table_format, /*!< in: if TRUE then print in
 				a format suitable to be inserted into
 				a CREATE TABLE, otherwise in the format
 				of SHOW TABLE STATUS */
-	FILE*		file,	/* in: file where to print */
-	trx_t*		trx,	/* in: transaction */
-	dict_table_t*	table)	/* in: table */
+	FILE*		file,	/*!< in: file where to print */
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	table)	/*!< in: table */
 {
 	dict_foreign_t*	foreign;
 
@@ -4237,17 +4592,180 @@ dict_print_info_on_foreign_keys(
 	mutex_exit(&(dict_sys->mutex));
 }
 
-/************************************************************************
+/********************************************************************//**
 Displays the names of the index and the table. */
+UNIV_INTERN
 void
 dict_index_name_print(
 /*==================*/
-	FILE*			file,	/* in: output stream */
-	trx_t*			trx,	/* in: transaction */
-	const dict_index_t*	index)	/* in: index to print */
+	FILE*			file,	/*!< in: output stream */
+	trx_t*			trx,	/*!< in: transaction */
+	const dict_index_t*	index)	/*!< in: index to print */
 {
 	fputs("index ", file);
 	ut_print_name(file, trx, FALSE, index->name);
 	fputs(" of table ", file);
 	ut_print_name(file, trx, TRUE, index->table_name);
 }
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Inits dict_ind_redundant and dict_ind_compact. */
+UNIV_INTERN
+void
+dict_ind_init(void)
+/*===============*/
+{
+	dict_table_t*		table;
+
+	/* create dummy table and index for REDUNDANT infimum and supremum */
+	table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0);
+	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
+			       DATA_ENGLISH | DATA_NOT_NULL, 8);
+
+	dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1",
+						   DICT_HDR_SPACE, 0, 1);
+	dict_index_add_col(dict_ind_redundant, table,
+			   dict_table_get_nth_col(table, 0), 0);
+	dict_ind_redundant->table = table;
+	/* create dummy table and index for COMPACT infimum and supremum */
+	table = dict_mem_table_create("SYS_DUMMY2",
+				      DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
+	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
+			       DATA_ENGLISH | DATA_NOT_NULL, 8);
+	dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
+						 DICT_HDR_SPACE, 0, 1);
+	dict_index_add_col(dict_ind_compact, table,
+			   dict_table_get_nth_col(table, 0), 0);
+	dict_ind_compact->table = table;
+
+	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+	dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Get index by name
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name(
+/*=========================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name)	/*!< in: name of the index to find */
+{
+	dict_index_t*	index;
+
+	index = dict_table_get_first_index(table);
+
+	while (index != NULL) {
+		if (ut_strcmp(index->name, name) == 0) {
+
+			return(index);
+		}
+
+		index = dict_table_get_next_index(index);
+	}
+
+	return(NULL);
+
+}
+
+/**********************************************************************//**
+Replace the index passed in with another equivalent index in the tables
+foreign key list. */
+UNIV_INTERN
+void
+dict_table_replace_index_in_foreign_list(
+/*=====================================*/
+	dict_table_t*	table,  /*!< in/out: table */
+	dict_index_t*	index)	/*!< in: index to be replaced */
+{
+	dict_foreign_t*	foreign;
+
+	for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
+	     foreign;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+
+		if (foreign->foreign_index == index) {
+			dict_index_t*	new_index
+				= dict_foreign_find_equiv_index(foreign);
+			ut_a(new_index);
+
+			foreign->foreign_index = new_index;
+		}
+	}
+}
+
+/**********************************************************************//**
+In case there is more than one index with the same name return the index
+with the min(id).
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name_and_min_id(
+/*=====================================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name)	/*!< in: name of the index to find */
+{
+	dict_index_t*	index;
+	dict_index_t*	min_index; /* Index with matching name and min(id) */
+
+	min_index = NULL;
+	index = dict_table_get_first_index(table);
+
+	while (index != NULL) {
+		if (ut_strcmp(index->name, name) == 0) {
+			if (!min_index
+			    || ut_dulint_cmp(index->id, min_index->id) < 0) {
+
+				min_index = index;
+			}
+		}
+
+		index = dict_table_get_next_index(index);
+	}
+
+	return(min_index);
+
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+UNIV_INTERN
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+	const dict_table_t*	table)	/*!< in: Check for dup indexes
+					in this table */
+{
+	/* Check for duplicates, ignoring indexes that are marked
+	as to be dropped */
+
+	const dict_index_t*	index1;
+	const dict_index_t*	index2;
+
+	/* The primary index _must_ exist */
+	ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
+
+	index1 = UT_LIST_GET_FIRST(table->indexes);
+	index2 = UT_LIST_GET_NEXT(indexes, index1);
+
+	while (index1 && index2) {
+
+		while (index2) {
+
+			if (!index2->to_be_dropped) {
+				ut_ad(ut_strcmp(index1->name, index2->name));
+			}
+
+			index2 = UT_LIST_GET_NEXT(indexes, index2);
+		}
+
+		index1 = UT_LIST_GET_NEXT(indexes, index1);
+		index2 = UT_LIST_GET_NEXT(indexes, index1);
+	}
+}
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c
similarity index 79%
rename from storage/innobase/dict/dict0load.c
rename to storage/innodb_plugin/dict/dict0load.c
index 65f1c9536bd..842a129c1a6 100644
--- a/storage/innobase/dict/dict0load.c
+++ b/storage/innodb_plugin/dict/dict0load.c
@@ -1,16 +1,31 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0load.c
 Loads to the memory cache database object definitions
 from dictionary tables
 
-(c) 1996 Innobase Oy
-
 Created 4/24/1996 Heikki Tuuri
 *******************************************************/
 
 #include "dict0load.h"
-#ifndef UNIV_HOTBACKUP
 #include "mysql_version.h"
-#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_NONINL
 #include "dict0load.ic"
@@ -26,17 +41,17 @@ Created 4/24/1996 Heikki Tuuri
 #include "srv0start.h"
 #include "srv0srv.h"
 
-/********************************************************************
-Returns TRUE if index's i'th column's name is 'name' .*/
+/****************************************************************//**
+Compare the name of an index column.
+@return	TRUE if the i'th column of index is 'name'. */
 static
 ibool
 name_of_col_is(
 /*===========*/
-				/* out: */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index,	/* in: index */
-	ulint		i,	/* in:  */
-	const char*	name)	/* in: name to compare to */
+	const dict_table_t*	table,	/*!< in: table */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			i,	/*!< in: index field offset */
+	const char*		name)	/*!< in: name to compare to */
 {
 	ulint	tmp = dict_col_get_no(dict_field_get_col(
 					      dict_index_get_nth_field(
@@ -45,16 +60,15 @@ name_of_col_is(
 	return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
 }
 
-/************************************************************************
-Finds the first table name in the given database. */
-
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
-				/* out, own: table name, NULL if
-				does not exist; the caller must
-				free the memory in the string! */
-	const char*	name)	/* in: database name which ends in '/' */
+	const char*	name)	/*!< in: database name which ends in '/' */
 {
 	dict_table_t*	sys_tables;
 	btr_pcur_t	pcur;
@@ -62,8 +76,8 @@ dict_get_first_table_name_in_db(
 	dtuple_t*	tuple;
 	mem_heap_t*	heap;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	mtr_t		mtr;
 
@@ -88,7 +102,7 @@ dict_get_first_table_name_in_db(
 loop:
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		/* Not found */
 
 		btr_pcur_close(&pcur);
@@ -129,10 +143,10 @@ loop:
 	goto loop;
 }
 
-/************************************************************************
+/********************************************************************//**
 Prints to the standard output information on all tables found in the data
 dictionary system table. */
-
+UNIV_INTERN
 void
 dict_print(void)
 /*============*/
@@ -141,8 +155,8 @@ dict_print(void)
 	dict_index_t*	sys_index;
 	dict_table_t*	table;
 	btr_pcur_t	pcur;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	mtr_t		mtr;
 
@@ -167,7 +181,7 @@ loop:
 
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		/* end of index */
 
 		btr_pcur_close(&pcur);
@@ -222,7 +236,69 @@ loop:
 	goto loop;
 }
 
-/************************************************************************
+/********************************************************************//**
+Determine the flags of a table described in SYS_TABLES.
+@return compressed page size in kilobytes; or 0 if the tablespace is
+uncompressed, ULINT_UNDEFINED on error */
+static
+ulint
+dict_sys_tables_get_flags(
+/*======================*/
+	const rec_t*	rec)	/*!< in: a record of SYS_TABLES */
+{
+	const byte*	field;
+	ulint		len;
+	ulint		n_cols;
+	ulint		flags;
+
+	field = rec_get_nth_field_old(rec, 5, &len);
+	ut_a(len == 4);
+
+	flags = mach_read_from_4(field);
+
+	if (UNIV_LIKELY(flags == DICT_TABLE_ORDINARY)) {
+		return(0);
+	}
+
+	field = rec_get_nth_field_old(rec, 4, &len);
+	n_cols = mach_read_from_4(field);
+
+	if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
+		/* New file formats require ROW_FORMAT=COMPACT. */
+		return(ULINT_UNDEFINED);
+	}
+
+	switch (flags & (DICT_TF_FORMAT_MASK | DICT_TF_COMPACT)) {
+	default:
+	case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
+	case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
+		/* flags should be DICT_TABLE_ORDINARY,
+		or DICT_TF_FORMAT_MASK should be nonzero. */
+		return(ULINT_UNDEFINED);
+
+	case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
+#if DICT_TF_FORMAT_MAX > DICT_TF_FORMAT_ZIP
+# error "missing case labels for DICT_TF_FORMAT_ZIP .. DICT_TF_FORMAT_MAX"
+#endif
+		/* We support this format. */
+		break;
+	}
+
+	if (UNIV_UNLIKELY((flags & DICT_TF_ZSSIZE_MASK)
+			  > (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT))) {
+		/* Unsupported compressed page size. */
+		return(ULINT_UNDEFINED);
+	}
+
+	if (UNIV_UNLIKELY(flags & (~0 << DICT_TF_BITS))) {
+		/* Some unused bits are set. */
+		return(ULINT_UNDEFINED);
+	}
+
+	return(flags);
+}
+
+/********************************************************************//**
 In a crash recovery we already have all the tablespace objects created.
 This function compares the space id information in the InnoDB data dictionary
 to what we already read with fil_load_single_table_tablespaces().
@@ -230,19 +306,16 @@ to what we already read with fil_load_single_table_tablespaces().
 In a normal startup, we create the tablespace objects for every table in
 InnoDB's data dictionary, if the corresponding .ibd file exists.
 We also scan the biggest space id, and store it to fil_system. */
-
+UNIV_INTERN
 void
 dict_check_tablespaces_and_store_max_id(
 /*====================================*/
-	ibool	in_crash_recovery)	/* in: are we doing a crash recovery */
+	ibool	in_crash_recovery)	/*!< in: are we doing a crash recovery */
 {
 	dict_table_t*	sys_tables;
 	dict_index_t*	sys_index;
 	btr_pcur_t	pcur;
-	rec_t*		rec;
-	byte*		field;
-	ulint		len;
-	ulint		space_id;
+	const rec_t*	rec;
 	ulint		max_space_id	= 0;
 	mtr_t		mtr;
 
@@ -261,7 +334,7 @@ loop:
 
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		/* end of index */
 
 		btr_pcur_close(&pcur);
@@ -279,13 +352,34 @@ loop:
 		return;
 	}
 
-	field = rec_get_nth_field_old(rec, 0, &len);
-
 	if (!rec_get_deleted_flag(rec, 0)) {
 
 		/* We found one */
+		const byte*	field;
+		ulint		len;
+		ulint		space_id;
+		ulint		flags;
+		char*		name;
 
-		char*	name = mem_strdupl((char*) field, len);
+		field = rec_get_nth_field_old(rec, 0, &len);
+		name = mem_strdupl((char*) field, len);
+
+		flags = dict_sys_tables_get_flags(rec);
+		if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+
+			field = rec_get_nth_field_old(rec, 5, &len);
+			flags = mach_read_from_4(field);
+
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Error: table ", stderr);
+			ut_print_filename(stderr, name);
+			fprintf(stderr, "\n"
+				"InnoDB: in InnoDB data dictionary"
+				" has unknown type %lx.\n",
+				(ulong) flags);
+
+			goto loop;
+		}
 
 		field = rec_get_nth_field_old(rec, 9, &len);
 		ut_a(len == 4);
@@ -309,7 +403,7 @@ loop:
 			object and check that the .ibd file exists. */
 
 			fil_open_single_table_tablespace(FALSE, space_id,
-							 name);
+							 flags, name);
 		}
 
 		mem_free(name);
@@ -326,22 +420,22 @@ loop:
 	goto loop;
 }
 
-/************************************************************************
+/********************************************************************//**
 Loads definitions for table columns. */
 static
 void
 dict_load_columns(
 /*==============*/
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap)	/*!< in: memory heap for temporary storage */
 {
 	dict_table_t*	sys_columns;
 	dict_index_t*	sys_index;
 	btr_pcur_t	pcur;
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	byte*		buf;
 	char*		name;
@@ -374,7 +468,7 @@ dict_load_columns(
 
 		rec = btr_pcur_get_rec(&pcur);
 
-		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+		ut_a(btr_pcur_is_on_user_rec(&pcur));
 
 		ut_a(!rec_get_deleted_flag(rec, 0));
 
@@ -432,34 +526,14 @@ dict_load_columns(
 	mtr_commit(&mtr);
 }
 
-/************************************************************************
-Report that an index field or index for a table has been delete marked. */
-static
-void
-dict_load_report_deleted_index(
-/*===========================*/
-	const char*	name,	/* in: table name */
-	ulint		field)	/* in: index field, or ULINT_UNDEFINED */
-{
-	fprintf(stderr, "InnoDB: Error: data dictionary entry"
-		" for table %s is corrupt!\n", name);
-	if (field != ULINT_UNDEFINED) {
-		fprintf(stderr,
-			"InnoDB: Index field %lu is delete marked.\n", field);
-	} else {
-		fputs("InnoDB: An index is delete marked.\n", stderr);
-	}
-}
-
-/************************************************************************
+/********************************************************************//**
 Loads definitions for index fields. */
 static
 void
 dict_load_fields(
 /*=============*/
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index,	/* in: index whose fields to load */
-	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
+	dict_index_t*	index,	/*!< in: index whose fields to load */
+	mem_heap_t*	heap)	/*!< in: memory heap for temporary storage */
 {
 	dict_table_t*	sys_fields;
 	dict_index_t*	sys_index;
@@ -468,8 +542,8 @@ dict_load_fields(
 	dfield_t*	dfield;
 	ulint		pos_and_prefix_len;
 	ulint		prefix_len;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	byte*		buf;
 	ulint		i;
@@ -498,14 +572,19 @@ dict_load_fields(
 
 		rec = btr_pcur_get_rec(&pcur);
 
-		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+		ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+		/* There could be delete marked records in SYS_FIELDS
+		because SYS_FIELDS.INDEX_ID can be updated
+		by ALTER TABLE ADD INDEX. */
+
 		if (rec_get_deleted_flag(rec, 0)) {
-			dict_load_report_deleted_index(table->name, i);
+
+			goto next_rec;
 		}
 
 		field = rec_get_nth_field_old(rec, 0, &len);
 		ut_ad(len == 8);
-		ut_a(ut_memcmp(buf, field, len) == 0);
 
 		field = rec_get_nth_field_old(rec, 1, &len);
 		ut_a(len == 4);
@@ -540,6 +619,7 @@ dict_load_fields(
 							  (char*) field, len),
 					 prefix_len);
 
+next_rec:
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 
@@ -547,19 +627,17 @@ dict_load_fields(
 	mtr_commit(&mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Loads definitions for table indexes. Adds them to the data dictionary
-cache. */
+cache.
+@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
+table or DB_UNSUPPORTED if table has unknown index type */
 static
 ulint
 dict_load_indexes(
 /*==============*/
-				/* out: DB_SUCCESS if ok, DB_CORRUPTION
-				if corruption of dictionary table or
-				DB_UNSUPPORTED if table has unknown index
-				type */
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap)	/*!< in: memory heap for temporary storage */
 {
 	dict_table_t*	sys_indexes;
 	dict_index_t*	sys_index;
@@ -567,8 +645,8 @@ dict_load_indexes(
 	btr_pcur_t	pcur;
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	ulint		name_len;
 	char*		name_buf;
@@ -609,7 +687,7 @@ dict_load_indexes(
 	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
 	for (;;) {
-		if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+		if (!btr_pcur_is_on_user_rec(&pcur)) {
 
 			break;
 		}
@@ -621,14 +699,9 @@ dict_load_indexes(
 
 		if (ut_memcmp(buf, field, len) != 0) {
 			break;
-		}
-
-		if (rec_get_deleted_flag(rec, 0)) {
-			dict_load_report_deleted_index(table->name,
-						       ULINT_UNDEFINED);
-
-			error = DB_CORRUPTION;
-			goto func_exit;
+		} else if (rec_get_deleted_flag(rec, 0)) {
+			/* Skip delete marked records */
+			goto next_rec;
 		}
 
 		field = rec_get_nth_field_old(rec, 1, &len);
@@ -678,12 +751,13 @@ dict_load_indexes(
 		} else if ((type & DICT_CLUSTERED) == 0
 			    && NULL == dict_table_get_first_index(table)) {
 
-			fprintf(stderr,
-				"InnoDB: Error: trying to load index %s"
-				" for table %s\n"
-				"InnoDB: but the first index"
-				" is not clustered!\n",
-				name_buf, table->name);
+			fputs("InnoDB: Error: trying to load index ",
+			      stderr);
+			ut_print_name(stderr, NULL, FALSE, name_buf);
+			fputs(" for table ", stderr);
+			ut_print_name(stderr, NULL, TRUE, table->name);
+			fputs("\nInnoDB: but the first index"
+			      " is not clustered!\n", stderr);
 
 			error = DB_CORRUPTION;
 			goto func_exit;
@@ -701,10 +775,21 @@ dict_load_indexes(
 						      space, type, n_fields);
 			index->id = id;
 
-			dict_load_fields(table, index, heap);
-			dict_index_add_to_cache(table, index, page_no);
+			dict_load_fields(index, heap);
+			error = dict_index_add_to_cache(table, index, page_no,
+							FALSE);
+			/* The data dictionary tables should never contain
+			invalid index definitions.  If we ignored this error
+			and simply did not load this index definition, the
+			.frm file would disagree with the index definitions
+			inside InnoDB. */
+			if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+
+				goto func_exit;
+			}
 		}
 
+next_rec:
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 
@@ -715,22 +800,20 @@ func_exit:
 	return(error);
 }
 
-/************************************************************************
+/********************************************************************//**
 Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
 a foreign key references columns in this table. Adds all these to the data
-dictionary cache. */
-
+dictionary cache.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+UNIV_INTERN
 dict_table_t*
 dict_load_table(
 /*============*/
-				/* out: table, NULL if does not exist;
-				if the table is stored in an .ibd file,
-				but the file does not exist,
-				then we set the ibd_file_missing flag TRUE
-				in the table object we return */
-	const char*	name)	/* in: table name in the
+	const char*	name)	/*!< in: table name in the
 				databasename/tablename format */
 {
 	ibool		ibd_file_missing	= FALSE;
@@ -741,8 +824,8 @@ dict_load_table(
 	dtuple_t*	tuple;
 	mem_heap_t*	heap;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	ulint		space;
 	ulint		n_cols;
@@ -770,7 +853,7 @@ dict_load_table(
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+	if (!btr_pcur_is_on_user_rec(&pcur)
 	    || rec_get_deleted_flag(rec, 0)) {
 		/* Not found */
 err_exit:
@@ -796,6 +879,22 @@ err_exit:
 
 	/* Check if the tablespace exists and has the right name */
 	if (space != 0) {
+		flags = dict_sys_tables_get_flags(rec);
+
+		if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+			field = rec_get_nth_field_old(rec, 5, &len);
+			flags = mach_read_from_4(field);
+
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Error: table ", stderr);
+			ut_print_filename(stderr, name);
+			fprintf(stderr, "\n"
+				"InnoDB: in InnoDB data dictionary"
+				" has unknown type %lx.\n",
+				(ulong) flags);
+			goto err_exit;
+		}
+
 		if (fil_space_for_table_exists_in_mem(space, name, FALSE,
 						      FALSE, FALSE)) {
 			/* Ok; (if we did a crash recovery then the tablespace
@@ -812,14 +911,16 @@ err_exit:
 				" Retrying an open.\n",
 				name, (ulong)space);
 			/* Try to open the tablespace */
-			if (!fil_open_single_table_tablespace(TRUE,
-							      space, name)) {
+			if (!fil_open_single_table_tablespace(
+				    TRUE, space, flags, name)) {
 				/* We failed to find a sensible tablespace
 				file */
 
 				ibd_file_missing = TRUE;
 			}
 		}
+	} else {
+		flags = 0;
 	}
 
 	ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
@@ -827,8 +928,6 @@ err_exit:
 	field = rec_get_nth_field_old(rec, 4, &len);
 	n_cols = mach_read_from_4(field);
 
-	flags = 0;
-
 	/* The high-order bit of N_COLS is the "compact format" flag. */
 	if (n_cols & 0x80000000UL) {
 		flags |= DICT_TF_COMPACT;
@@ -844,15 +943,6 @@ err_exit:
 	field = rec_get_nth_field_old(rec, 3, &len);
 	table->id = mach_read_from_8(field);
 
-	field = rec_get_nth_field_old(rec, 5, &len);
-	if (UNIV_UNLIKELY(mach_read_from_4(field) != DICT_TABLE_ORDINARY)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: table %s: unknown table type %lu\n",
-			name, (ulong) mach_read_from_4(field));
-		goto err_exit;
-	}
-
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
@@ -903,14 +993,14 @@ err_exit:
 	return(table);
 }
 
-/***************************************************************************
-Loads a table object based on the table id. */
-
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return	table; NULL if table does not exist */
+UNIV_INTERN
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
-				/* out: table; NULL if table does not exist */
-	dulint	table_id)	/* in: table id */
+	dulint	table_id)	/*!< in: table id */
 {
 	byte		id_buf[8];
 	btr_pcur_t	pcur;
@@ -919,8 +1009,8 @@ dict_load_table_on_id(
 	dfield_t*	dfield;
 	dict_index_t*	sys_table_ids;
 	dict_table_t*	sys_tables;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	dict_table_t*	table;
 	mtr_t		mtr;
@@ -953,7 +1043,7 @@ dict_load_table_on_id(
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+	if (!btr_pcur_is_on_user_rec(&pcur)
 	    || rec_get_deleted_flag(rec, 0)) {
 		/* Not found */
 
@@ -994,15 +1084,15 @@ dict_load_table_on_id(
 	return(table);
 }
 
-/************************************************************************
+/********************************************************************//**
 This function is called when the database is booted. Loads system table
 index definitions except for the clustered index which is added to the
 dictionary cache at booting before calling this function. */
-
+UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
-	dict_table_t*	table)	/* in: system table */
+	dict_table_t*	table)	/*!< in: system table */
 {
 	mem_heap_t*	heap;
 
@@ -1015,23 +1105,23 @@ dict_load_sys_table(
 	mem_heap_free(heap);
 }
 
-/************************************************************************
+/********************************************************************//**
 Loads foreign key constraint col names (also for the referenced table). */
 static
 void
 dict_load_foreign_cols(
 /*===================*/
-	const char*	id,	/* in: foreign constraint id as a
+	const char*	id,	/*!< in: foreign constraint id as a
 				null-terminated string */
-	dict_foreign_t*	foreign)/* in: foreign constraint object */
+	dict_foreign_t*	foreign)/*!< in: foreign constraint object */
 {
 	dict_table_t*	sys_foreign_cols;
 	dict_index_t*	sys_index;
 	btr_pcur_t	pcur;
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	ulint		i;
 	mtr_t		mtr;
@@ -1061,7 +1151,7 @@ dict_load_foreign_cols(
 
 		rec = btr_pcur_get_rec(&pcur);
 
-		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+		ut_a(btr_pcur_is_on_user_rec(&pcur));
 		ut_a(!rec_get_deleted_flag(rec, 0));
 
 		field = rec_get_nth_field_old(rec, 0, &len);
@@ -1087,17 +1177,17 @@ dict_load_foreign_cols(
 	mtr_commit(&mtr);
 }
 
-/***************************************************************************
-Loads a foreign key constraint to the dictionary cache. */
+/***********************************************************************//**
+Loads a foreign key constraint to the dictionary cache.
+@return	DB_SUCCESS or error code */
 static
 ulint
 dict_load_foreign(
 /*==============*/
-				/* out: DB_SUCCESS or error code */
-	const char*	id,	/* in: foreign constraint id as a
+	const char*	id,	/*!< in: foreign constraint id as a
 				null-terminated string */
 	ibool		check_charsets)
-				/* in: TRUE=check charset compatibility */
+				/*!< in: TRUE=check charset compatibility */
 {
 	dict_foreign_t*	foreign;
 	dict_table_t*	sys_foreign;
@@ -1106,8 +1196,8 @@ dict_load_foreign(
 	dtuple_t*	tuple;
 	mem_heap_t*	heap2;
 	dfield_t*	dfield;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	ulint		n_fields_and_type;
 	mtr_t		mtr;
@@ -1132,7 +1222,7 @@ dict_load_foreign(
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+	if (!btr_pcur_is_on_user_rec(&pcur)
 	    || rec_get_deleted_flag(rec, 0)) {
 		/* Not found */
 
@@ -1212,19 +1302,19 @@ dict_load_foreign(
 	return(dict_foreign_add_to_cache(foreign, check_charsets));
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
 constraints to the data dictionary. Note that we know that the dictionary
 cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
+already in the dictionary cache.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 dict_load_foreigns(
 /*===============*/
-					/* out: DB_SUCCESS or error code */
-	const char*	table_name,	/* in: table name */
-	ibool		check_charsets)	/* in: TRUE=check charset
+	const char*	table_name,	/*!< in: table name */
+	ibool		check_charsets)	/*!< in: TRUE=check charset
 					compatibility */
 {
 	btr_pcur_t	pcur;
@@ -1233,8 +1323,8 @@ dict_load_foreigns(
 	dfield_t*	dfield;
 	dict_index_t*	sec_index;
 	dict_table_t*	sys_foreign;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
 	char*		id ;
 	ulint		err;
@@ -1276,7 +1366,7 @@ start_load:
 loop:
 	rec = btr_pcur_get_rec(&pcur);
 
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		/* End of index */
 
 		goto load_next_index;
diff --git a/storage/innobase/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c
similarity index 60%
rename from storage/innobase/dict/dict0mem.c
rename to storage/innodb_plugin/dict/dict0mem.c
index 168771ca307..6458cbab92d 100644
--- a/storage/innobase/dict/dict0mem.c
+++ b/storage/innodb_plugin/dict/dict0mem.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-Data dictionary memory object creation
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0mem.c
+Data dictionary memory object creation
 
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
@@ -16,76 +33,50 @@ Created 1/8/1996 Heikki Tuuri
 #include "data0type.h"
 #include "mach0data.h"
 #include "dict0dict.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
+#ifndef UNIV_HOTBACKUP
+# include "lock0lock.h"
+#endif /* !UNIV_HOTBACKUP */
 
-#define	DICT_HEAP_SIZE		100	/* initial memory heap size when
+#define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
 					creating a table or index object */
 
-/**************************************************************************
-Creates a table memory object. */
-
+/**********************************************************************//**
+Creates a table memory object.
+@return	own: table object */
+UNIV_INTERN
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
-				/* out, own: table object */
-	const char*	name,	/* in: table name */
-	ulint		space,	/* in: space where the clustered index of
+	const char*	name,	/*!< in: table name */
+	ulint		space,	/*!< in: space where the clustered index of
 				the table is placed; this parameter is
 				ignored if the table is made a member of
 				a cluster */
-	ulint		n_cols,	/* in: number of columns */
-	ulint		flags)	/* in: table flags */
+	ulint		n_cols,	/*!< in: number of columns */
+	ulint		flags)	/*!< in: table flags */
 {
 	dict_table_t*	table;
 	mem_heap_t*	heap;
 
 	ut_ad(name);
-	ut_ad(!(flags & ~DICT_TF_COMPACT));
+	ut_a(!(flags & (~0 << DICT_TF_BITS)));
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 
-	table = mem_heap_alloc(heap, sizeof(dict_table_t));
+	table = mem_heap_zalloc(heap, sizeof(dict_table_t));
 
 	table->heap = heap;
 
 	table->flags = (unsigned int) flags;
 	table->name = mem_heap_strdup(heap, name);
-	table->dir_path_of_temp_table = NULL;
 	table->space = (unsigned int) space;
-	table->ibd_file_missing = FALSE;
-	table->tablespace_discarded = FALSE;
-	table->n_def = 0;
 	table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
 
-	table->n_mysql_handles_opened = 0;
-	table->n_foreign_key_checks_running = 0;
-
-	table->cached = FALSE;
-
 	table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
 				     * sizeof(dict_col_t));
-	table->col_names = NULL;
-	UT_LIST_INIT(table->indexes);
 
-	table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
-
-	table->query_cache_inv_trx_id = ut_dulint_zero;
-
-	UT_LIST_INIT(table->locks);
-	UT_LIST_INIT(table->foreign_list);
-	UT_LIST_INIT(table->referenced_list);
-
-#ifdef UNIV_DEBUG
-	table->does_not_fit_in_memory = FALSE;
-#endif /* UNIV_DEBUG */
-
-	table->stat_initialized = FALSE;
-
-	table->stat_modified_counter = 0;
-
-	table->big_rows = 0;
+#ifndef UNIV_HOTBACKUP
+	table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
 
 	mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
 
@@ -94,40 +85,42 @@ dict_mem_table_create(
 	/* The number of transactions that are either waiting on the
 	AUTOINC lock or have been granted the lock. */
 	table->n_waiting_or_granted_auto_inc_locks = 0;
+#endif /* !UNIV_HOTBACKUP */
 
-#ifdef UNIV_DEBUG
-	table->magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
 	return(table);
 }
 
-/********************************************************************
+/****************************************************************//**
 Free a table memory object. */
-
+UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
-	dict_table_t*	table)		/* in: table */
+	dict_table_t*	table)		/*!< in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_d(table->cached = FALSE);
 
+#ifndef UNIV_HOTBACKUP
 	mutex_free(&(table->autoinc_mutex));
+#endif /* UNIV_HOTBACKUP */
 	mem_heap_free(table->heap);
 }
 
-/********************************************************************
-Append 'name' to 'col_names' (@see dict_table_t::col_names). */
+/****************************************************************//**
+Append 'name' to 'col_names'.  @see dict_table_t::col_names
+@return	new column names array */
 static
 const char*
 dict_add_col_name(
 /*==============*/
-					/* out: new column names array */
-	const char*	col_names,	/* in: existing column names, or
+	const char*	col_names,	/*!< in: existing column names, or
 					NULL */
-	ulint		cols,		/* in: number of existing columns */
-	const char*	name,		/* in: new column name */
-	mem_heap_t*	heap)		/* in: heap */
+	ulint		cols,		/*!< in: number of existing columns */
+	const char*	name,		/*!< in: new column name */
+	mem_heap_t*	heap)		/*!< in: heap */
 {
 	ulint	old_len;
 	ulint	new_len;
@@ -164,22 +157,24 @@ dict_add_col_name(
 	return(res);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Adds a column definition to a table. */
-
+UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
-	dict_table_t*	table,	/* in: table */
-	mem_heap_t*	heap,	/* in: temporary memory heap, or NULL */
-	const char*	name,	/* in: column name, or NULL */
-	ulint		mtype,	/* in: main datatype */
-	ulint		prtype,	/* in: precise type */
-	ulint		len)	/* in: precision */
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap,	/*!< in: temporary memory heap, or NULL */
+	const char*	name,	/*!< in: column name, or NULL */
+	ulint		mtype,	/*!< in: main datatype */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len)	/*!< in: precision */
 {
 	dict_col_t*	col;
+#ifndef UNIV_HOTBACKUP
 	ulint		mbminlen;
 	ulint		mbmaxlen;
+#endif /* !UNIV_HOTBACKUP */
 	ulint		i;
 
 	ut_ad(table);
@@ -194,8 +189,7 @@ dict_mem_table_add_col(
 		}
 		if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
 			/* All preceding column names are empty. */
-			char* s = mem_heap_alloc(heap, table->n_def);
-			memset(s, 0, table->n_def);
+			char* s = mem_heap_zalloc(heap, table->n_def);
 			table->col_names = s;
 		}
 
@@ -203,7 +197,7 @@ dict_mem_table_add_col(
 						     i, name, heap);
 	}
 
-	col = (dict_col_t*) dict_table_get_nth_col(table, i);
+	col = dict_table_get_nth_col(table, i);
 
 	col->ind = (unsigned int) i;
 	col->ord_part = 0;
@@ -212,27 +206,29 @@ dict_mem_table_add_col(
 	col->prtype = (unsigned int) prtype;
 	col->len = (unsigned int) len;
 
+#ifndef UNIV_HOTBACKUP
 	dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
 
 	col->mbminlen = (unsigned int) mbminlen;
 	col->mbmaxlen = (unsigned int) mbmaxlen;
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/**************************************************************************
-Creates an index memory object. */
-
+/**********************************************************************//**
+Creates an index memory object.
+@return	own: index object */
+UNIV_INTERN
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
-					/* out, own: index object */
-	const char*	table_name,	/* in: table name */
-	const char*	index_name,	/* in: index name */
-	ulint		space,		/* in: space where the index tree is
+	const char*	table_name,	/*!< in: table name */
+	const char*	index_name,	/*!< in: index name */
+	ulint		space,		/*!< in: space where the index tree is
 					placed, ignored if the index is of
 					the clustered type */
-	ulint		type,		/* in: DICT_UNIQUE,
+	ulint		type,		/*!< in: DICT_UNIQUE,
 					DICT_CLUSTERED, ... ORed */
-	ulint		n_fields)	/* in: number of fields */
+	ulint		n_fields)	/*!< in: number of fields */
 {
 	dict_index_t*	index;
 	mem_heap_t*	heap;
@@ -240,79 +236,58 @@ dict_mem_index_create(
 	ut_ad(table_name && index_name);
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
-	index = mem_heap_alloc(heap, sizeof(dict_index_t));
+	index = mem_heap_zalloc(heap, sizeof(dict_index_t));
 
 	index->heap = heap;
 
 	index->type = type;
+#ifndef UNIV_HOTBACKUP
 	index->space = (unsigned int) space;
-	index->page = 0;
+#endif /* !UNIV_HOTBACKUP */
 	index->name = mem_heap_strdup(heap, index_name);
 	index->table_name = table_name;
-	index->table = NULL;
-	index->n_def = index->n_nullable = 0;
 	index->n_fields = (unsigned int) n_fields;
 	index->fields = mem_heap_alloc(heap, 1 + n_fields
 				       * sizeof(dict_field_t));
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
-	index->stat_n_diff_key_vals = NULL;
-
-	index->cached = FALSE;
-	memset(&index->lock, 0, sizeof index->lock);
 #ifdef UNIV_DEBUG
 	index->magic_n = DICT_INDEX_MAGIC_N;
 #endif /* UNIV_DEBUG */
 	return(index);
 }
 
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return	own: foreign constraint struct */
+UNIV_INTERN
 dict_foreign_t*
 dict_mem_foreign_create(void)
 /*=========================*/
-				/* out, own: foreign constraint struct */
 {
 	dict_foreign_t*	foreign;
 	mem_heap_t*	heap;
 
 	heap = mem_heap_create(100);
 
-	foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
+	foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
 
 	foreign->heap = heap;
 
-	foreign->id = NULL;
-
-	foreign->type = 0;
-	foreign->foreign_table_name = NULL;
-	foreign->foreign_table = NULL;
-	foreign->foreign_col_names = NULL;
-
-	foreign->referenced_table_name = NULL;
-	foreign->referenced_table = NULL;
-	foreign->referenced_col_names = NULL;
-
-	foreign->n_fields = 0;
-
-	foreign->foreign_index = NULL;
-	foreign->referenced_index = NULL;
-
 	return(foreign);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
-
+UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
-	dict_index_t*	index,		/* in: index */
-	const char*	name,		/* in: column name */
-	ulint		prefix_len)	/* in: 0 or the column prefix length
+	dict_index_t*	index,		/*!< in: index */
+	const char*	name,		/*!< in: column name */
+	ulint		prefix_len)	/*!< in: 0 or the column prefix length
 					in a MySQL index like
 					INDEX (textcol(25)) */
 {
@@ -329,13 +304,13 @@ dict_mem_index_add_field(
 	field->prefix_len = (unsigned int) prefix_len;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees an index memory object. */
-
+UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
-	dict_index_t*	index)	/* in: index */
+	dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
diff --git a/storage/innodb_plugin/dyn/dyn0dyn.c b/storage/innodb_plugin/dyn/dyn0dyn.c
new file mode 100644
index 00000000000..e1275f040f3
--- /dev/null
+++ b/storage/innodb_plugin/dyn/dyn0dyn.c
@@ -0,0 +1,65 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dyn/dyn0dyn.c
+The dynamically allocated array
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dyn0dyn.h"
+#ifdef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+/************************************************************//**
+Adds a new block to a dyn array.
+@return	created block */
+UNIV_INTERN
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+	dyn_array_t*	arr)	/*!< in: dyn array */
+{
+	mem_heap_t*	heap;
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	if (arr->heap == NULL) {
+		UT_LIST_INIT(arr->base);
+		UT_LIST_ADD_FIRST(list, arr->base, arr);
+
+		arr->heap = mem_heap_create(sizeof(dyn_block_t));
+	}
+
+	block = dyn_array_get_last_block(arr);
+	block->used = block->used | DYN_BLOCK_FULL_FLAG;
+
+	heap = arr->heap;
+
+	block = mem_heap_alloc(heap, sizeof(dyn_block_t));
+
+	block->used = 0;
+
+	UT_LIST_ADD_LAST(list, arr->base, block);
+
+	return(block);
+}
diff --git a/storage/innobase/eval/eval0eval.c b/storage/innodb_plugin/eval/eval0eval.c
similarity index 86%
rename from storage/innobase/eval/eval0eval.c
rename to storage/innodb_plugin/eval/eval0eval.c
index cbc47ec508f..589b0fa1576 100644
--- a/storage/innobase/eval/eval0eval.c
+++ b/storage/innodb_plugin/eval/eval0eval.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file eval/eval0eval.c
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 
-(c) 1997 Innobase Oy
-
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 
@@ -16,29 +33,29 @@ Created 12/29/1997 Heikki Tuuri
 #include "data0data.h"
 #include "row0sel.h"
 
-/* The RND function seed */
-ulint	eval_rnd	= 128367121;
+/** The RND function seed */
+static ulint	eval_rnd	= 128367121;
 
-/* Dummy adress used when we should allocate a buffer of size 0 in
-the function below */
+/** Dummy adress used when we should allocate a buffer of size 0 in
+eval_node_alloc_val_buf */
 
-byte	eval_dummy;
+static byte	eval_dummy;
 
-/*********************************************************************
+/*****************************************************************//**
 Allocate a buffer from global dynamic memory for a value of a que_node.
 NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has an allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
+allocated for a query node val field.
+@return	pointer to allocated buffer */
+UNIV_INTERN
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
-				/* out: pointer to allocated buffer */
-	que_node_t*	node,	/* in: query graph node; sets the val field
+	que_node_t*	node,	/*!< in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
-	ulint		size)	/* in: buffer size */
+	ulint		size)	/*!< in: buffer size */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -67,15 +84,15 @@ eval_node_alloc_val_buf(
 	return(data);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
-
+UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
-	que_node_t*	node)	/* in: query graph node */
+	que_node_t*	node)	/*!< in: query graph node */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -94,14 +111,14 @@ eval_node_free_val_buf(
 	}
 }
 
-/*********************************************************************
-Evaluates a comparison node. */
-
+/*****************************************************************//**
+Evaluates a comparison node.
+@return	the result of the comparison */
+UNIV_INTERN
 ibool
 eval_cmp(
 /*=====*/
-					/* out: the result of the comparison */
-	func_node_t*	cmp_node)	/* in: comparison node */
+	func_node_t*	cmp_node)	/*!< in: comparison node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -153,13 +170,13 @@ eval_cmp(
 	return(val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a logical operation node. */
 UNIV_INLINE
 void
 eval_logical(
 /*=========*/
-	func_node_t*	logical_node)	/* in: logical operation node */
+	func_node_t*	logical_node)	/*!< in: logical operation node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -194,13 +211,13 @@ eval_logical(
 	eval_node_set_ibool_val(logical_node, val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates an arithmetic operation node. */
 UNIV_INLINE
 void
 eval_arith(
 /*=======*/
-	func_node_t*	arith_node)	/* in: arithmetic operation node */
+	func_node_t*	arith_node)	/*!< in: arithmetic operation node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -238,13 +255,13 @@ eval_arith(
 	eval_node_set_int_val(arith_node, val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates an aggregate operation node. */
 UNIV_INLINE
 void
 eval_aggregate(
 /*===========*/
-	func_node_t*	node)	/* in: aggregate operation node */
+	func_node_t*	node)	/*!< in: aggregate operation node */
 {
 	que_node_t*	arg;
 	lint		val;
@@ -272,14 +289,14 @@ eval_aggregate(
 	eval_node_set_int_val(node, val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a predefined function node where the function is not relevant
 in benchmarks. */
 static
 void
 eval_predefined_2(
 /*==============*/
-	func_node_t*	func_node)	/* in: predefined function node */
+	func_node_t*	func_node)	/*!< in: predefined function node */
 {
 	que_node_t*	arg;
 	que_node_t*	arg1;
@@ -359,13 +376,13 @@ eval_predefined_2(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a notfound-function node. */
 UNIV_INLINE
 void
 eval_notfound(
 /*==========*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -401,13 +418,13 @@ eval_notfound(
 	eval_node_set_ibool_val(func_node, ibool_val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a substr-function node. */
 UNIV_INLINE
 void
 eval_substr(
 /*========*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -434,13 +451,13 @@ eval_substr(
 	dfield_set_data(dfield, str1 + len1, len2);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a replstr-procedure node. */
 static
 void
 eval_replstr(
 /*=========*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -474,13 +491,13 @@ eval_replstr(
 	ut_memcpy(str1 + len1, str2, len2);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates an instr-function node. */
 static
 void
 eval_instr(
 /*=======*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -546,13 +563,13 @@ match_found:
 	eval_node_set_int_val(func_node, int_val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a predefined function node. */
 UNIV_INLINE
 void
 eval_binary_to_number(
 /*==================*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	dfield_t*	dfield;
@@ -584,13 +601,13 @@ eval_binary_to_number(
 	eval_node_copy_and_alloc_val(func_node, str2, 4);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a predefined function node. */
 static
 void
 eval_concat(
 /*========*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg;
 	dfield_t*	dfield;
@@ -626,7 +643,7 @@ eval_concat(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a predefined function node. If the first argument is an integer,
 this function looks at the second argument which is the integer length in
 bytes, and converts the integer to a VARCHAR.
@@ -636,7 +653,7 @@ UNIV_INLINE
 void
 eval_to_binary(
 /*===========*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -674,13 +691,13 @@ eval_to_binary(
 	dfield_set_data(dfield, str1 + (4 - len1), len1);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a predefined function node. */
 UNIV_INLINE
 void
 eval_predefined(
 /*============*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg1;
 	lint		int_val;
@@ -746,8 +763,7 @@ eval_predefined(
 			}
 		}
 
-		dfield_set_len((dfield_t*) que_node_get_val(func_node),
-			       int_len);
+		dfield_set_len(que_node_get_val(func_node), int_len);
 
 		return;
 
@@ -767,13 +783,13 @@ eval_predefined(
 	eval_node_set_int_val(func_node, int_val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a function node. */
-
+UNIV_INTERN
 void
 eval_func(
 /*======*/
-	func_node_t*	func_node)	/* in: function node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg;
 	ulint		class;
@@ -793,7 +809,7 @@ eval_func(
 		/* The functions are not defined for SQL null argument
 		values, except for eval_cmp and notfound */
 
-		if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
+		if (dfield_is_null(que_node_get_val(arg))
 		    && (class != PARS_FUNC_CMP)
 		    && (func != PARS_NOTFOUND_TOKEN)
 		    && (func != PARS_PRINTF_TOKEN)) {
diff --git a/storage/innobase/eval/eval0proc.c b/storage/innodb_plugin/eval/eval0proc.c
similarity index 71%
rename from storage/innobase/eval/eval0proc.c
rename to storage/innodb_plugin/eval/eval0proc.c
index a513e8e4024..3a4218d92bf 100644
--- a/storage/innobase/eval/eval0proc.c
+++ b/storage/innodb_plugin/eval/eval0proc.c
@@ -1,7 +1,24 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
+/*****************************************************************************
 
-(c) 1998 Innobase Oy
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file eval/eval0proc.c
+Executes SQL stored procedures and their control structures
 
 Created 1/20/1998 Heikki Tuuri
 *******************************************************/
@@ -12,14 +29,14 @@ Created 1/20/1998 Heikki Tuuri
 #include "eval0proc.ic"
 #endif
 
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an if-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 if_step(
 /*====*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	if_node_t*	node;
 	elsif_node_t*	elsif_node;
@@ -88,14 +105,14 @@ if_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of a while-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 while_step(
 /*=======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	while_node_t*	node;
 
@@ -124,14 +141,14 @@ while_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an assignment statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 assign_step(
 /*========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	assign_node_t*	node;
 
@@ -151,14 +168,14 @@ assign_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
+/**********************************************************************//**
+Performs an execution step of a for-loop node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 for_step(
 /*=====*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	for_node_t*	node;
 	que_node_t*	parent;
@@ -213,14 +230,14 @@ for_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an exit statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 exit_step(
 /*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	exit_node_t*	node;
 	que_node_t*	loop_node;
@@ -245,14 +262,14 @@ exit_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of a return-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 return_step(
 /*========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	return_node_t*	node;
 	que_node_t*	parent;
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c
similarity index 69%
rename from storage/innobase/fil/fil0fil.c
rename to storage/innodb_plugin/fil/fil0fil.c
index c63d67cae60..96e60b0128f 100644
--- a/storage/innobase/fil/fil0fil.c
+++ b/storage/innodb_plugin/fil/fil0fil.c
@@ -1,7 +1,24 @@
-/******************************************************
-The tablespace memory cache
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fil/fil0fil.c
+The tablespace memory cache
 
 Created 10/25/1995 Heikki Tuuri
 *******************************************************/
@@ -9,16 +26,11 @@ Created 10/25/1995 Heikki Tuuri
 #include "fil0fil.h"
 
 #include "mem0mem.h"
-#include "sync0sync.h"
 #include "hash0hash.h"
 #include "os0file.h"
-#include "os0sync.h"
 #include "mach0data.h"
-#include "ibuf0ibuf.h"
 #include "buf0buf.h"
 #include "buf0flu.h"
-#include "buf0lru.h"
-#include "log0log.h"
 #include "log0recv.h"
 #include "fsp0fsp.h"
 #include "srv0srv.h"
@@ -26,7 +38,15 @@ Created 10/25/1995 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "dict0dict.h"
-
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
 
 /*
 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -84,131 +104,144 @@ out of the LRU-list and keep a count of pending operations. When an operation
 completes, we decrement the count and return the file node to the LRU-list if
 the count drops to zero. */
 
-/* When mysqld is run, the default directory "." is the mysqld datadir,
+/** When mysqld is run, the default directory "." is the mysqld datadir,
 but in the MySQL Embedded Server Library and ibbackup it is not the default
 directory, and we must set the base file path explicitly */
-const char*	fil_path_to_mysql_datadir	= ".";
+UNIV_INTERN const char*	fil_path_to_mysql_datadir	= ".";
 
-/* The number of fsyncs done to the log */
-ulint	fil_n_log_flushes			= 0;
+/** The number of fsyncs done to the log */
+UNIV_INTERN ulint	fil_n_log_flushes			= 0;
 
-ulint	fil_n_pending_log_flushes		= 0;
-ulint	fil_n_pending_tablespace_flushes	= 0;
+/** Number of pending redo log flushes */
+UNIV_INTERN ulint	fil_n_pending_log_flushes		= 0;
+/** Number of pending tablespace flushes */
+UNIV_INTERN ulint	fil_n_pending_tablespace_flushes	= 0;
 
-/* Null file address */
-fil_addr_t	fil_addr_null = {FIL_NULL, 0};
+/** The null file address */
+UNIV_INTERN fil_addr_t	fil_addr_null = {FIL_NULL, 0};
 
-/* File node of a tablespace or the log data space */
+/** File node of a tablespace or the log data space */
 struct fil_node_struct {
-	fil_space_t*	space;	/* backpointer to the space where this node
+	fil_space_t*	space;	/*!< backpointer to the space where this node
 				belongs */
-	char*		name;	/* path to the file */
-	ibool		open;	/* TRUE if file open */
-	os_file_t	handle;	/* OS handle to the file, if file open */
-	ibool		is_raw_disk;/* TRUE if the 'file' is actually a raw
+	char*		name;	/*!< path to the file */
+	ibool		open;	/*!< TRUE if file open */
+	os_file_t	handle;	/*!< OS handle to the file, if file open */
+	ibool		is_raw_disk;/*!< TRUE if the 'file' is actually a raw
 				device or a raw disk partition */
-	ulint		size;	/* size of the file in database pages, 0 if
+	ulint		size;	/*!< size of the file in database pages, 0 if
 				not known yet; the possible last incomplete
 				megabyte may be ignored if space == 0 */
 	ulint		n_pending;
-				/* count of pending i/o's on this file;
+				/*!< count of pending i/o's on this file;
 				closing of the file is not allowed if
 				this is > 0 */
 	ulint		n_pending_flushes;
-				/* count of pending flushes on this file;
+				/*!< count of pending flushes on this file;
 				closing of the file is not allowed if
 				this is > 0 */
-	ib_longlong	modification_counter;/* when we write to the file we
+	ib_int64_t	modification_counter;/*!< when we write to the file we
 				increment this by one */
-	ib_longlong	flush_counter;/* up to what modification_counter value
-				we have flushed the modifications to disk */
+	ib_int64_t	flush_counter;/*!< up to what
+				modification_counter value we have
+				flushed the modifications to disk */
 	UT_LIST_NODE_T(fil_node_t) chain;
-				/* link field for the file chain */
+				/*!< link field for the file chain */
 	UT_LIST_NODE_T(fil_node_t) LRU;
-				/* link field for the LRU list */
-	ulint		magic_n;
+				/*!< link field for the LRU list */
+	ulint		magic_n;/*!< FIL_NODE_MAGIC_N */
 };
 
+/** Value of fil_node_struct::magic_n */
 #define	FIL_NODE_MAGIC_N	89389
 
-/* Tablespace or log data space: let us call them by a common name space */
+/** Tablespace or log data space: let us call them by a common name space */
 struct fil_space_struct {
-	char*		name;	/* space name = the path to the first file in
+	char*		name;	/*!< space name = the path to the first file in
 				it */
-	ulint		id;	/* space id */
-	ib_longlong	tablespace_version;
-				/* in DISCARD/IMPORT this timestamp is used to
-				check if we should ignore an insert buffer
-				merge request for a page because it actually
-				was for the previous incarnation of the
-				space */
-	ibool		mark;	/* this is set to TRUE at database startup if
+	ulint		id;	/*!< space id */
+	ib_int64_t	tablespace_version;
+				/*!< in DISCARD/IMPORT this timestamp
+				is used to check if we should ignore
+				an insert buffer merge request for a
+				page because it actually was for the
+				previous incarnation of the space */
+	ibool		mark;	/*!< this is set to TRUE at database startup if
 				the space corresponds to a table in the InnoDB
 				data dictionary; so we can print a warning of
 				orphaned tablespaces */
-	ibool		stop_ios;/* TRUE if we want to rename the .ibd file of
-				tablespace and want to stop temporarily
-				posting of new i/o requests on the file */
+	ibool		stop_ios;/*!< TRUE if we want to rename the
+				.ibd file of tablespace and want to
+				stop temporarily posting of new i/o
+				requests on the file */
 	ibool		stop_ibuf_merges;
-				/* we set this TRUE when we start deleting a
-				single-table tablespace */
+				/*!< we set this TRUE when we start
+				deleting a single-table tablespace */
 	ibool		is_being_deleted;
-				/* this is set to TRUE when we start
+				/*!< this is set to TRUE when we start
 				deleting a single-table tablespace and its
 				file; when this flag is set no further i/o
 				or flush requests can be placed on this space,
 				though there may be such requests still being
 				processed on this space */
-	ulint		purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
+	ulint		purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
+				FIL_ARCH_LOG */
 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
-				/* base node for the file chain */
-	ulint		size;	/* space size in pages; 0 if a single-table
+				/*!< base node for the file chain */
+	ulint		size;	/*!< space size in pages; 0 if a single-table
 				tablespace whose size we do not know yet;
 				last incomplete megabytes in data files may be
 				ignored if space == 0 */
+	ulint		flags;	/*!< compressed page size and file format, or 0 */
 	ulint		n_reserved_extents;
-				/* number of reserved free extents for
+				/*!< number of reserved free extents for
 				ongoing operations like B-tree page split */
-	ulint		n_pending_flushes; /* this is > 0 when flushing
+	ulint		n_pending_flushes; /*!< this is positive when flushing
 				the tablespace to disk; dropping of the
-				tablespace is forbidden if this is > 0 */
-	ulint		n_pending_ibuf_merges;/* this is > 0 when merging
-				insert buffer entries to a page so that we
-				may need to access the ibuf bitmap page in the
-				tablespade: dropping of the tablespace is
-				forbidden if this is > 0 */
-	hash_node_t	hash;	/* hash chain node */
-	hash_node_t	name_hash;/* hash chain the name_hash table */
-	rw_lock_t	latch;	/* latch protecting the file space storage
+				tablespace is forbidden if this is positive */
+	ulint		n_pending_ibuf_merges;/*!< this is positive
+				when merging insert buffer entries to
+				a page so that we may need to access
+				the ibuf bitmap page in the
+				tablespade: dropping of the tablespace
+				is forbidden if this is positive */
+	hash_node_t	hash;	/*!< hash chain node */
+	hash_node_t	name_hash;/*!< hash chain the name_hash table */
+#ifndef UNIV_HOTBACKUP
+	rw_lock_t	latch;	/*!< latch protecting the file space storage
 				allocation */
+#endif /* !UNIV_HOTBACKUP */
 	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
-				/* list of spaces with at least one unflushed
+				/*!< list of spaces with at least one unflushed
 				file we have written to */
-	ibool		is_in_unflushed_spaces; /* TRUE if this space is
-				currently in the list above */
+	ibool		is_in_unflushed_spaces; /*!< TRUE if this space is
+				currently in unflushed_spaces */
 	UT_LIST_NODE_T(fil_space_t) space_list;
-				/* list of all spaces */
-	ibuf_data_t*	ibuf_data;
-				/* insert buffer data */
-	ulint		magic_n;
+				/*!< list of all spaces */
+	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
 };
 
+/** Value of fil_space_struct::magic_n */
 #define	FIL_SPACE_MAGIC_N	89472
 
-/* The tablespace memory cache; also the totality of logs = the log data space,
-is stored here; below we talk about tablespaces, but also the ib_logfiles
-form a 'space' and it is handled here */
-
+/** The tablespace memory cache */
 typedef	struct fil_system_struct	fil_system_t;
+
+/** The tablespace memory cache; also the totality of logs (the log
+data space) is stored here; below we talk about tablespaces, but also
+the ib_logfiles form a 'space' and it is handled here */
+
 struct fil_system_struct {
-	mutex_t		mutex;		/* The mutex protecting the cache */
-	hash_table_t*	spaces;		/* The hash table of spaces in the
+#ifndef UNIV_HOTBACKUP
+	mutex_t		mutex;		/*!< The mutex protecting the cache */
+#endif /* !UNIV_HOTBACKUP */
+	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
 					id */
-	hash_table_t*	name_hash;	/* hash table based on the space
+	hash_table_t*	name_hash;	/*!< hash table based on the space
 					name */
 	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
-					/* base node for the LRU list of the
+					/*!< base node for the LRU list of the
 					most recently used open files with no
 					pending i/o's; if we start an i/o on
 					the file, we first remove it from this
@@ -219,24 +252,24 @@ struct fil_system_struct {
 					after the startup, and kept open until
 					shutdown */
 	UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
-					/* base node for the list of those
+					/*!< base node for the list of those
 					tablespaces whose files contain
 					unflushed writes; those spaces have
 					at least one file node where
 					modification_counter > flush_counter */
-	ulint		n_open;		/* number of files currently open */
-	ulint		max_n_open;	/* n_open is not allowed to exceed
+	ulint		n_open;		/*!< number of files currently open */
+	ulint		max_n_open;	/*!< n_open is not allowed to exceed
 					this */
-	ib_longlong	modification_counter;/* when we write to a file we
+	ib_int64_t	modification_counter;/*!< when we write to a file we
 					increment this by one */
-	ulint		max_assigned_id;/* maximum space id in the existing
+	ulint		max_assigned_id;/*!< maximum space id in the existing
 					tables, or assigned during the time
 					mysqld has been up; at an InnoDB
 					startup we scan the data dictionary
 					and set here the maximum of the
 					space id's of the tables there */
-	ib_longlong	tablespace_version;
-					/* a counter which is incremented for
+	ib_int64_t	tablespace_version;
+					/*!< a counter which is incremented for
 					every space object memory creation;
 					every space mem object gets a
 					'timestamp' from this; in DISCARD/
@@ -244,15 +277,15 @@ struct fil_system_struct {
 					should ignore an insert buffer merge
 					request */
 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
-					/* list of all file spaces */
+					/*!< list of all file spaces */
 };
 
-/* The tablespace memory cache. This variable is NULL before the module is
+/** The tablespace memory cache. This variable is NULL before the module is
 initialized. */
-fil_system_t*	fil_system	= NULL;
+static fil_system_t*	fil_system	= NULL;
 
 
-/************************************************************************
+/********************************************************************//**
 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
 
 Prepares a file node for i/o. Opens the file if it is closed. Updates the
@@ -263,153 +296,228 @@ static
 void
 fil_node_prepare_for_io(
 /*====================*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	fil_space_t*	space);	/* in: space */
-/************************************************************************
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	fil_space_t*	space);	/*!< in: space */
+/********************************************************************//**
 Updates the data structures when an i/o operation finishes. Updates the
 pending i/o's field in the node appropriately. */
 static
 void
 fil_node_complete_io(
 /*=================*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	ulint		type);	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	ulint		type);	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
 				the node as modified if
 				type == OS_FILE_WRITE */
-/***********************************************************************
+/*******************************************************************//**
 Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
+tablespace memory cache.
+@return	space id, ULINT_UNDEFINED if not found */
 static
 ulint
 fil_get_space_id_for_table(
 /*=======================*/
-				/* out: space id, ULINT_UNDEFINED if not
-				found */
-	const char*	name);	/* in: table name in the standard
+	const char*	name);	/*!< in: table name in the standard
 				'databasename/tablename' format */
+/********************************************************************//**
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space.
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INLINE
+ulint
+fil_read(
+/*=====*/
+	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	block_offset,	/*!< in: offset in number of blocks */
+	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/*!< in: how many bytes to read; this must not
+				cross a file boundary; in aio this must be a
+				block size multiple */
+	void*	buf,		/*!< in/out: buffer where to store data read;
+				in aio this must be appropriately aligned */
+	void*	message)	/*!< in: message for aio handler if non-sync
+				aio used, else ignored */
+{
+	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
+					  byte_offset, len, buf, message));
+}
 
+/********************************************************************//**
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space.
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INLINE
+ulint
+fil_write(
+/*======*/
+	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	block_offset,	/*!< in: offset in number of blocks */
+	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/*!< in: how many bytes to write; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple */
+	void*	buf,		/*!< in: buffer from which to write; in aio
+				this must be appropriately aligned */
+	void*	message)	/*!< in: message for aio handler if non-sync
+				aio used, else ignored */
+{
+	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
+					   byte_offset, len, buf, message));
+}
 
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+	ulint	id)	/*!< in: space id */
+{
+	fil_space_t*	space;
 
-ib_longlong
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	HASH_SEARCH(hash, fil_system->spaces, id,
+		    fil_space_t*, space,
+		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
+		    space->id == id);
+
+	return(space);
+}
+
+/*******************************************************************//**
+Returns the table space by a given name, NULL if not found. */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_by_name(
+/*==================*/
+	const char*	name)	/*!< in: space name */
+{
+	fil_space_t*	space;
+	ulint		fold;
+
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	fold = ut_fold_string(name);
+
+	HASH_SEARCH(name_hash, fil_system->name_hash, fold,
+		    fil_space_t*, space,
+		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
+		    !strcmp(name, space->name));
+
+	return(space);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns the version number of a tablespace, -1 if not found.
+@return version number, -1 if the tablespace does not exist in the
+memory cache */
+UNIV_INTERN
+ib_int64_t
 fil_space_get_version(
 /*==================*/
-			/* out: version number, -1 if the tablespace does not
-			exist in the memory cache */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
-	ib_longlong	version		= -1;
+	ib_int64_t	version		= -1;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space) {
 		version = space->tablespace_version;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(version);
 }
 
-/***********************************************************************
-Returns the latch of a file space. */
-
+/*******************************************************************//**
+Returns the latch of a file space.
+@return	latch protecting storage allocation */
+UNIV_INTERN
 rw_lock_t*
 fil_space_get_latch(
 /*================*/
-			/* out: latch protecting storage allocation */
-	ulint	id)	/* in: space id */
+	ulint	id,	/*!< in: space id */
+	ulint*	flags)	/*!< out: tablespace flags */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 
-	mutex_exit(&(system->mutex));
+	if (flags) {
+		*flags = space->flags;
+	}
+
+	mutex_exit(&fil_system->mutex);
 
 	return(&(space->latch));
 }
 
-/***********************************************************************
-Returns the type of a file space. */
-
+/*******************************************************************//**
+Returns the type of a file space.
+@return	FIL_TABLESPACE or FIL_LOG */
+UNIV_INTERN
 ulint
 fil_space_get_type(
 /*===============*/
-			/* out: FIL_TABLESPACE or FIL_LOG */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(space->purpose);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
-			/* out: ibuf data for this space */
-	ulint	id)	/* in: space id */
-{
-	fil_system_t*	system		= fil_system;
-	fil_space_t*	space;
-
-	ut_ad(system);
-
-	ut_a(id == 0);
-
-	mutex_enter(&(system->mutex));
-
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
-	mutex_exit(&(system->mutex));
-
-	ut_a(space);
-
-	return(space->ibuf_data);
-}
-
-/**************************************************************************
+/**********************************************************************//**
 Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex. */
+the fil_system mutex.
+@return	TRUE if all are flushed */
 static
 ibool
 fil_space_is_flushed(
 /*=================*/
-				/* out: TRUE if all are flushed */
-	fil_space_t*	space)	/* in: space */
+	fil_space_t*	space)	/*!< in: space */
 {
 	fil_node_t*	node;
 
-	ut_ad(mutex_own(&(fil_system->mutex)));
+	ut_ad(mutex_own(&fil_system->mutex));
 
 	node = UT_LIST_GET_FIRST(space->chain);
 
@@ -425,27 +533,26 @@ fil_space_is_flushed(
 	return(TRUE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Appends a new file to the chain of files of a space. File must be closed. */
-
+UNIV_INTERN
 void
 fil_node_create(
 /*============*/
-	const char*	name,	/* in: file name (file must be closed) */
-	ulint		size,	/* in: file size in database blocks, rounded
+	const char*	name,	/*!< in: file name (file must be closed) */
+	ulint		size,	/*!< in: file size in database blocks, rounded
 				downwards to an integer */
-	ulint		id,	/* in: space id where to append */
-	ibool		is_raw)	/* in: TRUE if a raw device or
+	ulint		id,	/*!< in: space id where to append */
+	ibool		is_raw)	/*!< in: TRUE if a raw device or
 				a raw disk partition */
 {
-	fil_system_t*	system	= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
 
-	ut_a(system);
+	ut_a(fil_system);
 	ut_a(name);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	node = mem_alloc(sizeof(fil_node_t));
 
@@ -463,7 +570,7 @@ fil_node_create(
 	node->modification_counter = 0;
 	node->flush_counter = 0;
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (!space) {
 		ut_print_timestamp(stderr);
@@ -476,7 +583,7 @@ fil_node_create(
 
 		mem_free(node);
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return;
 	}
@@ -487,21 +594,21 @@ fil_node_create(
 
 	UT_LIST_ADD_LAST(chain, space->chain, node);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/************************************************************************
+/********************************************************************//**
 Opens a the file of a node of a tablespace. The caller must own the fil_system
 mutex. */
 static
 void
 fil_node_open_file(
 /*===============*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	fil_space_t*	space)	/* in: space */
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	fil_space_t*	space)	/*!< in: space */
 {
-	ib_longlong	size_bytes;
+	ib_int64_t	size_bytes;
 	ulint		size_low;
 	ulint		size_high;
 	ibool		ret;
@@ -510,6 +617,7 @@ fil_node_open_file(
 	byte*		buf2;
 	byte*		page;
 	ulint		space_id;
+	ulint		flags;
 #endif /* !UNIV_HOTBACKUP */
 
 	ut_ad(mutex_own(&(system->mutex)));
@@ -543,11 +651,11 @@ fil_node_open_file(
 
 		os_file_get_size(node->handle, &size_low, &size_high);
 
-		size_bytes = (((ib_longlong)size_high) << 32)
-			+ (ib_longlong)size_low;
+		size_bytes = (((ib_int64_t)size_high) << 32)
+			+ (ib_int64_t)size_low;
 #ifdef UNIV_HOTBACKUP
 		node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
-
+		/* TODO: adjust to zip_size, like below? */
 #else
 		ut_a(space->purpose != FIL_LOG);
 		ut_a(space->id != 0);
@@ -577,6 +685,7 @@ fil_node_open_file(
 		success = os_file_read(node->handle, page, 0, 0,
 				       UNIV_PAGE_SIZE);
 		space_id = fsp_header_get_space_id(page);
+		flags = fsp_header_get_flags(page);
 
 		ut_free(buf2);
 
@@ -584,31 +693,47 @@ fil_node_open_file(
 
 		os_file_close(node->handle);
 
-		if (space_id == ULINT_UNDEFINED || space_id == 0) {
-			fprintf(stderr,
-				"InnoDB: Error: tablespace id %lu"
-				" in file %s is not sensible\n",
-				(ulong) space_id, node->name);
-
-			ut_a(0);
-		}
-
-		if (space_id != space->id) {
+		if (UNIV_UNLIKELY(space_id != space->id)) {
 			fprintf(stderr,
 				"InnoDB: Error: tablespace id is %lu"
 				" in the data dictionary\n"
 				"InnoDB: but in file %s it is %lu!\n",
 				space->id, node->name, space_id);
 
-			ut_a(0);
+			ut_error;
 		}
 
-		if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
-			node->size = (ulint)
-				((size_bytes / (1024 * 1024))
-				 * ((1024 * 1024) / UNIV_PAGE_SIZE));
-		} else {
+		if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
+				  || space_id == 0)) {
+			fprintf(stderr,
+				"InnoDB: Error: tablespace id %lu"
+				" in file %s is not sensible\n",
+				(ulong) space_id, node->name);
+
+			ut_error;
+		}
+
+		if (UNIV_UNLIKELY(space->flags != flags)) {
+			fprintf(stderr,
+				"InnoDB: Error: table flags are %lx"
+				" in the data dictionary\n"
+				"InnoDB: but the flags in file %s are %lx!\n",
+				space->flags, node->name, flags);
+
+			ut_error;
+		}
+
+		if (size_bytes >= 1024 * 1024) {
+			/* Truncate the size to whole megabytes. */
+			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
+		}
+
+		if (!(flags & DICT_TF_ZSSIZE_MASK)) {
 			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+		} else {
+			node->size = (ulint)
+				(size_bytes
+				 / dict_table_flags_to_zip_size(flags));
 		}
 #endif
 		space->size += node->size;
@@ -644,14 +769,14 @@ fil_node_open_file(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Closes a file. */
 static
 void
 fil_node_close_file(
 /*================*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system)	/* in: tablespace memory cache */
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system)	/*!< in: tablespace memory cache */
 {
 	ibool	ret;
 
@@ -679,41 +804,38 @@ fil_node_close_file(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex. */
+mutex.
+@return TRUE if success, FALSE if should retry later; since i/o's
+generally complete in < 100 ms, and as InnoDB writes at most 128 pages
+from the buffer pool in a batch, and then immediately flushes the
+files, there is a good chance that the next time we find a suitable
+node from the LRU list */
 static
 ibool
 fil_try_to_close_file_in_LRU(
 /*=========================*/
-				/* out: TRUE if success, FALSE if should retry
-				later; since i/o's generally complete in <
-				100 ms, and as InnoDB writes at most 128 pages
-				from the buffer pool in a batch, and then
-				immediately flushes the files, there is a good
-				chance that the next time we find a suitable
-				node from the LRU list */
-	ibool	print_info)	/* in: if TRUE, prints information why it
+	ibool	print_info)	/*!< in: if TRUE, prints information why it
 				cannot close a file */
 {
-	fil_system_t*	system		= fil_system;
 	fil_node_t*	node;
 
-	ut_ad(mutex_own(&(system->mutex)));
+	ut_ad(mutex_own(&fil_system->mutex));
 
-	node = UT_LIST_GET_LAST(system->LRU);
+	node = UT_LIST_GET_LAST(fil_system->LRU);
 
 	if (print_info) {
 		fprintf(stderr,
 			"InnoDB: fil_sys open file LRU len %lu\n",
-			(ulong) UT_LIST_GET_LEN(system->LRU));
+			(ulong) UT_LIST_GET_LEN(fil_system->LRU));
 	}
 
 	while (node != NULL) {
 		if (node->modification_counter == node->flush_counter
 		    && node->n_pending_flushes == 0) {
 
-			fil_node_close_file(node, system);
+			fil_node_close_file(node, fil_system);
 
 			return(TRUE);
 		}
@@ -741,7 +863,7 @@ fil_try_to_close_file_in_LRU(
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Reserves the fil_system mutex and tries to make sure we can open at least one
 file while holding it. This should be called before calling
 fil_node_prepare_for_io(), because that function may need to open a file. */
@@ -749,18 +871,16 @@ static
 void
 fil_mutex_enter_and_prepare_for_io(
 /*===============================*/
-	ulint	space_id)	/* in: space id */
+	ulint	space_id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	ibool		success;
 	ibool		print_info	= FALSE;
 	ulint		count		= 0;
 	ulint		count2		= 0;
 
-	ut_ad(!mutex_own(&(system->mutex)));
 retry:
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
 		/* We keep log files and system tablespace files always open;
@@ -772,13 +892,13 @@ retry:
 		return;
 	}
 
-	if (system->n_open < system->max_n_open) {
+	if (fil_system->n_open < fil_system->max_n_open) {
 
 		return;
 	}
 
-	HASH_SEARCH(hash, system->spaces, space_id, space,
-		    space->id == space_id);
+	space = fil_space_get_by_id(space_id);
+
 	if (space != NULL && space->stop_ios) {
 		/* We are going to do a rename file and want to stop new i/o's
 		for a while */
@@ -791,7 +911,7 @@ retry:
 				(ulong) count2);
 		}
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		os_thread_sleep(20000);
 
@@ -817,12 +937,12 @@ retry:
 close_more:
 	success = fil_try_to_close_file_in_LRU(print_info);
 
-	if (success && system->n_open >= system->max_n_open) {
+	if (success && fil_system->n_open >= fil_system->max_n_open) {
 
 		goto close_more;
 	}
 
-	if (system->n_open < system->max_n_open) {
+	if (fil_system->n_open < fil_system->max_n_open) {
 		/* Ok */
 
 		return;
@@ -837,12 +957,13 @@ close_more:
 			"InnoDB: You may need to raise the value of"
 			" innodb_max_files_open in\n"
 			"InnoDB: my.cnf.\n",
-			(ulong) system->n_open, (ulong) system->max_n_open);
+			(ulong) fil_system->n_open,
+			(ulong) fil_system->max_n_open);
 
 		return;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 #ifndef UNIV_HOTBACKUP
 	/* Wake the i/o-handler threads to make sure pending i/o's are
@@ -861,15 +982,15 @@ close_more:
 	goto retry;
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees a file node object from a tablespace memory cache. */
 static
 void
 fil_node_free(
 /*==========*/
-	fil_node_t*	node,	/* in, own: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	fil_space_t*	space)	/* in: space where the file node is chained */
+	fil_node_t*	node,	/*!< in, own: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	fil_space_t*	space)	/*!< in: space where the file node is chained */
 {
 	ut_ad(node && system && space);
 	ut_ad(mutex_own(&(system->mutex)));
@@ -903,69 +1024,79 @@ fil_node_free(
 	mem_free(node);
 }
 
-/********************************************************************
+#ifdef UNIV_LOG_ARCHIVE
+/****************************************************************//**
 Drops files from the start of a file space, so that its size is cut by
 the amount given. */
-
+UNIV_INTERN
 void
 fil_space_truncate_start(
 /*=====================*/
-	ulint	id,		/* in: space id */
-	ulint	trunc_len)	/* in: truncate by this much; it is an error
+	ulint	id,		/*!< in: space id */
+	ulint	trunc_len)	/*!< in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
 {
-	fil_system_t*	system		= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 
 	while (trunc_len > 0) {
 		node = UT_LIST_GET_FIRST(space->chain);
 
-		ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
+		ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
 
 		trunc_len -= node->size * UNIV_PAGE_SIZE;
 
-		fil_node_free(node, system, space);
+		fil_node_free(node, fil_system, space);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
+#endif /* UNIV_LOG_ARCHIVE */
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a space memory object and puts it to the tablespace memory cache. If
-there is an error, prints an error message to the .err log. */
-
+there is an error, prints an error message to the .err log.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_space_create(
 /*=============*/
-				/* out: TRUE if success */
-	const char*	name,	/* in: space name */
-	ulint		id,	/* in: space id */
-	ulint		purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+	const char*	name,	/*!< in: space name */
+	ulint		id,	/*!< in: space id */
+	ulint		flags,	/*!< in: compressed page size
+				and file format, or 0 */
+	ulint		purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
-	ulint		namesake_id;
+
+	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
+	format, the tablespace flags should equal table->flags. */
+	ut_a(flags != DICT_TF_COMPACT);
+
 try_again:
 	/*printf(
 	"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
 	purpose);*/
 
-	ut_a(system);
+	ut_a(fil_system);
 	ut_a(name);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_name(name);
+
+	if (UNIV_LIKELY_NULL(space)) {
+		ulint	namesake_id;
 
-	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
-		    0 == strcmp(name, space->name));
-	if (space != NULL) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Warning: trying to init to the"
@@ -980,7 +1111,7 @@ try_again:
 
 		if (id == 0 || purpose != FIL_TABLESPACE) {
 
-			mutex_exit(&(system->mutex));
+			mutex_exit(&fil_system->mutex);
 
 			return(FALSE);
 		}
@@ -1002,16 +1133,16 @@ try_again:
 
 		namesake_id = space->id;
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		fil_space_free(namesake_id);
 
 		goto try_again;
 	}
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
-	if (space != NULL) {
+	if (UNIV_LIKELY_NULL(space)) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to add tablespace %lu"
 			" of name ", (ulong) id);
@@ -1024,7 +1155,7 @@ try_again:
 		fputs(" already exists in the tablespace\n"
 		      "InnoDB: memory cache!\n", stderr);
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
@@ -1034,12 +1165,12 @@ try_again:
 	space->name = mem_strdup(name);
 	space->id = id;
 
-	system->tablespace_version++;
-	space->tablespace_version = system->tablespace_version;
+	fil_system->tablespace_version++;
+	space->tablespace_version = fil_system->tablespace_version;
 	space->mark = FALSE;
 
-	if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
-		system->max_assigned_id = id;
+	if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) {
+		fil_system->max_assigned_id = id;
 	}
 
 	space->stop_ios = FALSE;
@@ -1047,6 +1178,7 @@ try_again:
 	space->is_being_deleted = FALSE;
 	space->purpose = purpose;
 	space->size = 0;
+	space->flags = flags;
 
 	space->n_reserved_extents = 0;
 
@@ -1056,42 +1188,38 @@ try_again:
 	UT_LIST_INIT(space->chain);
 	space->magic_n = FIL_SPACE_MAGIC_N;
 
-	space->ibuf_data = NULL;
-
 	rw_lock_create(&space->latch, SYNC_FSP);
 
-	HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+	HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
 
-	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(name), space);
 	space->is_in_unflushed_spaces = FALSE;
 
-	UT_LIST_ADD_LAST(space_list, system->space_list, space);
+	UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(TRUE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Assigns a new space id for a new single-table tablespace. This works simply by
 incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's. */
+to recycle id's.
+@return	new tablespace id; ULINT_UNDEFINED if could not assign an id */
 static
 ulint
 fil_assign_new_space_id(void)
 /*=========================*/
-			/* out: new tablespace id; ULINT_UNDEFINED if could
-			not assign an id */
 {
-	fil_system_t*	system = fil_system;
 	ulint		id;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	system->max_assigned_id++;
+	fil_system->max_assigned_id++;
 
-	id = system->max_assigned_id;
+	id = fil_system->max_assigned_id;
 
 	if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
 		ut_print_timestamp(stderr);
@@ -1117,35 +1245,34 @@ fil_assign_new_space_id(void)
 			" have to dump all your tables and\n"
 			"InnoDB: recreate the whole InnoDB installation.\n",
 			(ulong) id);
-		system->max_assigned_id--;
+		fil_system->max_assigned_id--;
 
 		id = ULINT_UNDEFINED;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(id);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees a space object from the tablespace memory cache. Closes the files in
 the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files. */
-
+flushes on the files.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_space_free(
 /*===========*/
-			/* out: TRUE if success */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system = fil_system;
 	fil_space_t*	space;
 	fil_space_t*	namespace;
 	fil_node_t*	fil_node;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (!space) {
 		ut_print_timestamp(stderr);
@@ -1154,29 +1281,28 @@ fil_space_free(
 			" from the cache but\n"
 			"InnoDB: it is not there.\n", (ulong) id);
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
 
-	HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+	HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
 
-	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
-		    namespace, 0 == strcmp(space->name, namespace->name));
+	namespace = fil_space_get_by_name(space->name);
 	ut_a(namespace);
 	ut_a(space == namespace);
 
-	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(space->name), space);
 
 	if (space->is_in_unflushed_spaces) {
 		space->is_in_unflushed_spaces = FALSE;
 
-		UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
+		UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
 			       space);
 	}
 
-	UT_LIST_REMOVE(space_list, system->space_list, space);
+	UT_LIST_REMOVE(space_list, fil_system->space_list, space);
 
 	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
 	ut_a(0 == space->n_pending_flushes);
@@ -1184,14 +1310,14 @@ fil_space_free(
 	fil_node = UT_LIST_GET_FIRST(space->chain);
 
 	while (fil_node != NULL) {
-		fil_node_free(fil_node, system, space);
+		fil_node_free(fil_node, fil_system, space);
 
 		fil_node = UT_LIST_GET_FIRST(space->chain);
 	}
 
 	ut_a(0 == UT_LIST_GET_LEN(space->chain));
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	rw_lock_free(&(space->latch));
 
@@ -1201,52 +1327,28 @@ fil_space_free(
 	return(TRUE);
 }
 
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Returns the tablespace object for a given id, or NULL if not found from the
-tablespace memory cache. */
-static
-fil_space_t*
-fil_get_space_for_id_low(
-/*=====================*/
-			/* out: tablespace object or NULL; NOTE that you must
-			own &(fil_system->mutex) to call this function! */
-	ulint	id)	/* in: space id */
-{
-	fil_system_t*	system		= fil_system;
-	fil_space_t*	space;
-
-	ut_ad(system);
-
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
-	return(space);
-}
-#endif
-
-/***********************************************************************
+/*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return	space size, 0 if space not found */
+UNIV_INTERN
 ulint
 fil_space_get_size(
 /*===============*/
-			/* out: space size, 0 if space not found */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
 	ulint		size;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
 	fil_mutex_enter_and_prepare_for_io(id);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(0);
 	}
@@ -1262,27 +1364,101 @@ fil_space_get_size(
 		the file yet; the following calls will open it and update the
 		size fields */
 
-		fil_node_prepare_for_io(node, system, space);
-		fil_node_complete_io(node, system, OS_FILE_READ);
+		fil_node_prepare_for_io(node, fil_system, space);
+		fil_node_complete_io(node, fil_system, OS_FILE_READ);
 	}
 
 	size = space->size;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(size);
 }
 
-/***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
+/*******************************************************************//**
+Returns the flags of the space. The tablespace must be cached
+in the memory cache.
+@return	flags, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_flags(
+/*================*/
+	ulint	id)	/*!< in: space id */
+{
+	fil_node_t*	node;
+	fil_space_t*	space;
+	ulint		flags;
 
+	ut_ad(fil_system);
+
+	if (UNIV_UNLIKELY(!id)) {
+		return(0);
+	}
+
+	fil_mutex_enter_and_prepare_for_io(id);
+
+	space = fil_space_get_by_id(id);
+
+	if (space == NULL) {
+		mutex_exit(&fil_system->mutex);
+
+		return(ULINT_UNDEFINED);
+	}
+
+	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
+		ut_a(id != 0);
+
+		ut_a(1 == UT_LIST_GET_LEN(space->chain));
+
+		node = UT_LIST_GET_FIRST(space->chain);
+
+		/* It must be a single-table tablespace and we have not opened
+		the file yet; the following calls will open it and update the
+		size fields */
+
+		fil_node_prepare_for_io(node, fil_system, space);
+		fil_node_complete_io(node, fil_system, OS_FILE_READ);
+	}
+
+	flags = space->flags;
+
+	mutex_exit(&fil_system->mutex);
+
+	return(flags);
+}
+
+/*******************************************************************//**
+Returns the compressed page size of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	compressed page size, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_zip_size(
+/*===================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(dict_table_flags_to_zip_size(flags));
+	}
+
+	return(flags);
+}
+
+/*******************************************************************//**
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache.
+@return	TRUE if the address is meaningful */
+UNIV_INTERN
 ibool
 fil_check_adress_in_tablespace(
 /*===========================*/
-			/* out: TRUE if the address is meaningful */
-	ulint	id,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	id,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
 {
 	if (fil_space_get_size(id) > page_no) {
 
@@ -1292,84 +1468,58 @@ fil_check_adress_in_tablespace(
 	return(FALSE);
 }
 
-/********************************************************************
-Creates a the tablespace memory cache. */
-static
-fil_system_t*
-fil_system_create(
-/*==============*/
-				/* out, own: tablespace memory cache */
-	ulint	hash_size,	/* in: hash table size */
-	ulint	max_n_open)	/* in: maximum number of open files; must be
-				> 10 */
+/****************************************************************//**
+Initializes the tablespace memory cache. */
+UNIV_INTERN
+void
+fil_init(
+/*=====*/
+	ulint	hash_size,	/*!< in: hash table size */
+	ulint	max_n_open)	/*!< in: max number of open files */
 {
-	fil_system_t*	system;
+	ut_a(fil_system == NULL);
 
 	ut_a(hash_size > 0);
 	ut_a(max_n_open > 0);
 
-	system = mem_alloc(sizeof(fil_system_t));
+	fil_system = mem_alloc(sizeof(fil_system_t));
 
-	mutex_create(&system->mutex, SYNC_ANY_LATCH);
+	mutex_create(&fil_system->mutex, SYNC_ANY_LATCH);
 
-	system->spaces = hash_create(hash_size);
-	system->name_hash = hash_create(hash_size);
+	fil_system->spaces = hash_create(hash_size);
+	fil_system->name_hash = hash_create(hash_size);
 
-	UT_LIST_INIT(system->LRU);
+	UT_LIST_INIT(fil_system->LRU);
 
-	system->n_open = 0;
-	system->max_n_open = max_n_open;
+	fil_system->n_open = 0;
+	fil_system->max_n_open = max_n_open;
 
-	system->modification_counter = 0;
-	system->max_assigned_id = 0;
+	fil_system->modification_counter = 0;
+	fil_system->max_assigned_id = 0;
 
-	system->tablespace_version = 0;
+	fil_system->tablespace_version = 0;
 
-	UT_LIST_INIT(system->unflushed_spaces);
-	UT_LIST_INIT(system->space_list);
-
-	return(system);
+	UT_LIST_INIT(fil_system->unflushed_spaces);
+	UT_LIST_INIT(fil_system->space_list);
 }
 
-/********************************************************************
-Initializes the tablespace memory cache. */
-
-void
-fil_init(
-/*=====*/
-	ulint	max_n_open)	/* in: max number of open files */
-{
-	ulint	hash_size;
-
-	ut_a(fil_system == NULL);
-
-	if (srv_file_per_table) {
-		hash_size = 50000;
-	} else {
-		hash_size = 5000;
-	}
-
-	fil_system = fil_system_create(hash_size, max_n_open);
-}
-
-/***********************************************************************
+/*******************************************************************//**
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
-
+UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void)
 /*==========================================*/
 {
-	fil_system_t*	system = fil_system;
 	fil_space_t*	space;
 	fil_node_t*	node;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	space = UT_LIST_GET_FIRST(system->space_list);
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
 
 	while (space != NULL) {
 		if (space->purpose != FIL_TABLESPACE || space->id == 0) {
@@ -1377,10 +1527,11 @@ fil_open_log_and_system_tablespace_files(void)
 
 			while (node != NULL) {
 				if (!node->open) {
-					fil_node_open_file(node, system,
+					fil_node_open_file(node, fil_system,
 							   space);
 				}
-				if (system->max_n_open < 10 + system->n_open) {
+				if (fil_system->max_n_open
+				    < 10 + fil_system->n_open) {
 					fprintf(stderr,
 						"InnoDB: Warning: you must"
 						" raise the value of"
@@ -1398,8 +1549,8 @@ fil_open_log_and_system_tablespace_files(void)
 						" Current open files %lu,"
 						" max allowed"
 						" open files %lu.\n",
-						(ulong) system->n_open,
-						(ulong) system->max_n_open);
+						(ulong) fil_system->n_open,
+						(ulong) fil_system->max_n_open);
 				}
 				node = UT_LIST_GET_NEXT(chain, node);
 			}
@@ -1407,100 +1558,78 @@ fil_open_log_and_system_tablespace_files(void)
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
-
+UNIV_INTERN
 void
 fil_close_all_files(void)
 /*=====================*/
 {
-	fil_system_t*	system = fil_system;
 	fil_space_t*	space;
 	fil_node_t*	node;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	space = UT_LIST_GET_FIRST(system->space_list);
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
 
 	while (space != NULL) {
 		node = UT_LIST_GET_FIRST(space->chain);
 
 		while (node != NULL) {
 			if (node->open) {
-				fil_node_close_file(node, system);
+				fil_node_close_file(node, fil_system);
 			}
 			node = UT_LIST_GET_NEXT(chain, node);
 		}
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
-
+UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
-	ulint	max_id)	/* in: maximum known id */
+	ulint	max_id)	/*!< in: maximum known id */
 {
-	fil_system_t*	system = fil_system;
-
 	if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
 		fprintf(stderr,
 			"InnoDB: Fatal error: max tablespace id"
 			" is too high, %lu\n", (ulong) max_id);
-		ut_a(0);
+		ut_error;
 	}
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	if (system->max_assigned_id < max_id) {
+	if (fil_system->max_assigned_id < max_id) {
 
-		system->max_assigned_id = max_id;
+		fil_system->max_assigned_id = max_id;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void)
-/*===========================*/
-{
-	fil_space_t*	space;
-
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
-
-	ut_a(space);
-	ut_a(space->purpose == FIL_TABLESPACE);
-
-	space->ibuf_data = ibuf_data_init_for_space(space->id);
-}
-
-/********************************************************************
+/****************************************************************//**
 Writes the flushed lsn and the latest archived log number to the page header
-of the first page of a data file. */
+of the first page of a data file of the system tablespace (space 0),
+which is uncompressed. */
 static
 ulint
 fil_write_lsn_and_arch_no_to_file(
 /*==============================*/
-	ulint	space_id,	/* in: space number */
-	ulint	sum_of_sizes,	/* in: combined size of previous files in
-				space, in database pages */
-	dulint	lsn,		/* in: lsn to write */
-	ulint	arch_log_no	/* in: archived log number to write */
-	__attribute__((unused)))
+	ulint		sum_of_sizes,	/*!< in: combined size of previous files
+					in space, in database pages */
+	ib_uint64_t	lsn,		/*!< in: lsn to write */
+	ulint		arch_log_no __attribute__((unused)))
+					/*!< in: archived log number to write */
 {
 	byte*	buf1;
 	byte*	buf;
@@ -1508,32 +1637,35 @@ fil_write_lsn_and_arch_no_to_file(
 	buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
 	buf = ut_align(buf1, UNIV_PAGE_SIZE);
 
-	fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+	fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
 
-	mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+	mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
 
-	fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+	fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+	mem_free(buf1);
 
 	return(DB_SUCCESS);
 }
 
-/********************************************************************
+/****************************************************************//**
 Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
+header of the first page of each data file in the system tablespace.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 fil_write_flushed_lsn_to_data_files(
 /*================================*/
-				/* out: DB_SUCCESS or error number */
-	dulint	lsn,		/* in: lsn to write */
-	ulint	arch_log_no)	/* in: latest archived log file number */
+	ib_uint64_t	lsn,		/*!< in: lsn to write */
+	ulint		arch_log_no)	/*!< in: latest archived log
+					file number */
 {
 	fil_space_t*	space;
 	fil_node_t*	node;
 	ulint		sum_of_sizes;
 	ulint		err;
 
-	mutex_enter(&(fil_system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	space = UT_LIST_GET_FIRST(fil_system->space_list);
 
@@ -1550,17 +1682,16 @@ fil_write_flushed_lsn_to_data_files(
 
 			node = UT_LIST_GET_FIRST(space->chain);
 			while (node) {
-				mutex_exit(&(fil_system->mutex));
+				mutex_exit(&fil_system->mutex);
 
 				err = fil_write_lsn_and_arch_no_to_file(
-					space->id, sum_of_sizes, lsn,
-					arch_log_no);
+					sum_of_sizes, lsn, arch_log_no);
 				if (err != DB_SUCCESS) {
 
 					return(err);
 				}
 
-				mutex_enter(&(fil_system->mutex));
+				mutex_enter(&fil_system->mutex);
 
 				sum_of_sizes += node->size;
 				node = UT_LIST_GET_NEXT(chain, node);
@@ -1569,31 +1700,32 @@ fil_write_flushed_lsn_to_data_files(
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
-	mutex_exit(&(fil_system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(DB_SUCCESS);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Reads the flushed lsn and arch no fields from a data file at database
 startup. */
-
+UNIV_INTERN
 void
 fil_read_flushed_lsn_and_arch_log_no(
 /*=================================*/
-	os_file_t data_file,		/* in: open data file */
-	ibool	one_read_already,	/* in: TRUE if min and max parameters
-					below already contain sensible data */
+	os_file_t	data_file,		/*!< in: open data file */
+	ibool		one_read_already,	/*!< in: TRUE if min and max
+						parameters below already
+						contain sensible data */
 #ifdef UNIV_LOG_ARCHIVE
-	ulint*	min_arch_log_no,	/* in/out: */
-	ulint*	max_arch_log_no,	/* in/out: */
+	ulint*		min_arch_log_no,	/*!< in/out: */
+	ulint*		max_arch_log_no,	/*!< in/out: */
 #endif /* UNIV_LOG_ARCHIVE */
-	dulint*	min_flushed_lsn,	/* in/out: */
-	dulint*	max_flushed_lsn)	/* in/out: */
+	ib_uint64_t*	min_flushed_lsn,	/*!< in/out: */
+	ib_uint64_t*	max_flushed_lsn)	/*!< in/out: */
 {
-	byte*	buf;
-	byte*	buf2;
-	dulint	flushed_lsn;
+	byte*		buf;
+	byte*		buf2;
+	ib_uint64_t	flushed_lsn;
 
 	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
 	/* Align the memory for a possible read from a raw device */
@@ -1601,7 +1733,7 @@ fil_read_flushed_lsn_and_arch_log_no(
 
 	os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
 
-	flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
+	flushed_lsn = mach_read_ull(buf + FIL_PAGE_FILE_FLUSH_LSN);
 
 	ut_free(buf2);
 
@@ -1615,10 +1747,10 @@ fil_read_flushed_lsn_and_arch_log_no(
 		return;
 	}
 
-	if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
+	if (*min_flushed_lsn > flushed_lsn) {
 		*min_flushed_lsn = flushed_lsn;
 	}
-	if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
+	if (*max_flushed_lsn < flushed_lsn) {
 		*max_flushed_lsn = flushed_lsn;
 	}
 #ifdef UNIV_LOG_ARCHIVE
@@ -1633,23 +1765,22 @@ fil_read_flushed_lsn_and_arch_log_no(
 
 /*================ SINGLE-TABLE TABLESPACES ==========================*/
 
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
+being deleted.
+@return	TRUE if being deleted, and ibuf merges should be skipped */
+UNIV_INTERN
 ibool
 fil_inc_pending_ibuf_merges(
 /*========================*/
-			/* out: TRUE if being deleted, and ibuf merges should
-			be skipped */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL) {
 		fprintf(stderr,
@@ -1659,32 +1790,31 @@ fil_inc_pending_ibuf_merges(
 	}
 
 	if (space == NULL || space->stop_ibuf_merges) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
 	}
 
 	space->n_pending_ibuf_merges++;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Decrements the count of pending insert buffer page merges. */
-
+UNIV_INTERN
 void
 fil_decr_pending_ibuf_merges(
 /*=========================*/
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL) {
 		fprintf(stderr,
@@ -1697,16 +1827,17 @@ fil_decr_pending_ibuf_merges(
 		space->n_pending_ibuf_merges--;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
+/********************************************************//**
 Creates the database directory for a table if it does not exist yet. */
 static
 void
 fil_create_directory_for_tablename(
 /*===============================*/
-	const char*	name)	/* in: name in the standard
+	const char*	name)	/*!< in: name in the standard
 				'databasename/tablename' format */
 {
 	const char*	namend;
@@ -1730,29 +1861,35 @@ fil_create_directory_for_tablename(
 }
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************
+/********************************************************//**
 Writes a log record about an .ibd file create/rename/delete. */
 static
 void
 fil_op_write_log(
 /*=============*/
-	ulint		type,		/* in: MLOG_FILE_CREATE,
+	ulint		type,		/*!< in: MLOG_FILE_CREATE,
+					MLOG_FILE_CREATE2,
 					MLOG_FILE_DELETE, or
 					MLOG_FILE_RENAME */
-	ulint		space_id,	/* in: space id */
-	const char*	name,		/* in: table name in the familiar
+	ulint		space_id,	/*!< in: space id */
+	ulint		log_flags,	/*!< in: redo log flags (stored
+					in the page number field) */
+	ulint		flags,		/*!< in: compressed page size
+					and file format
+					if type==MLOG_FILE_CREATE2, or 0 */
+	const char*	name,		/*!< in: table name in the familiar
 					'databasename/tablename' format, or
 					the file path in the case of
 					MLOG_FILE_DELETE */
-	const char*	new_name,	/* in: if type is MLOG_FILE_RENAME,
+	const char*	new_name,	/*!< in: if type is MLOG_FILE_RENAME,
 					the new table name in the
 					'databasename/tablename' format */
-	mtr_t*		mtr)		/* in: mini-transaction handle */
+	mtr_t*		mtr)		/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
 	ulint	len;
 
-	log_ptr = mlog_open(mtr, 11 + 2);
+	log_ptr = mlog_open(mtr, 11 + 2 + 1);
 
 	if (!log_ptr) {
 		/* Logging in mtr is switched off during crash recovery:
@@ -1760,8 +1897,12 @@ fil_op_write_log(
 		return;
 	}
 
-	log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
-							    log_ptr, mtr);
+	log_ptr = mlog_write_initial_log_record_for_file_op(
+		type, space_id, log_flags, log_ptr, mtr);
+	if (type == MLOG_FILE_CREATE2) {
+		mach_write_to_4(log_ptr, flags);
+		log_ptr += 4;
+	}
 	/* Let us store the strings as null-terminated for easier readability
 	and handling */
 
@@ -1774,7 +1915,7 @@ fil_op_write_log(
 	mlog_catenate_string(mtr, (byte*) name, len);
 
 	if (type == MLOG_FILE_RENAME) {
-		ulint	len = strlen(new_name) + 1;
+		len = strlen(new_name) + 1;
 		log_ptr = mlog_open(mtr, 2 + len);
 		ut_a(log_ptr);
 		mach_write_to_2(log_ptr, len);
@@ -1786,7 +1927,7 @@ fil_op_write_log(
 }
 #endif
 
-/***********************************************************************
+/*******************************************************************//**
 Parses the body of a log record written about an .ibd file operation. That is,
 the log record part after the standard (type, space id, page no) header of the
 log record.
@@ -1797,29 +1938,39 @@ at that path does not exist yet. If the database directory for the file to be
 created does not exist, then we create the directory, too.
 
 Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
+datadir that we should use in replaying the file operations.
+@return end of log record, or NULL if the record was not completely
+contained between ptr and end_ptr */
+UNIV_INTERN
 byte*
 fil_op_log_parse_or_replay(
 /*=======================*/
-				/* out: end of log record, or NULL if the
-				record was not completely contained between
-				ptr and end_ptr */
-	byte*	ptr,		/* in: buffer containing the log record body,
+	byte*	ptr,		/*!< in: buffer containing the log record body,
 				or an initial segment of it, if the record does
 				not fir completely between ptr and end_ptr */
-	byte*	end_ptr,	/* in: buffer end */
-	ulint	type,		/* in: the type of this log record */
-	ibool	do_replay,	/* in: TRUE if we want to replay the
-				operation, and not just parse the log record */
-	ulint	space_id)	/* in: if do_replay is TRUE, the space id of
-				the tablespace in question; otherwise
-				ignored */
+	byte*	end_ptr,	/*!< in: buffer end */
+	ulint	type,		/*!< in: the type of this log record */
+	ulint	space_id,	/*!< in: the space id of the tablespace in
+				question, or 0 if the log record should
+				only be parsed but not replayed */
+	ulint	log_flags)	/*!< in: redo log flags
+				(stored in the page number parameter) */
 {
 	ulint		name_len;
 	ulint		new_name_len;
 	const char*	name;
 	const char*	new_name	= NULL;
+	ulint		flags		= 0;
+
+	if (type == MLOG_FILE_CREATE2) {
+		if (end_ptr < ptr + 4) {
+
+			return(NULL);
+		}
+
+		flags = mach_read_from_4(ptr);
+		ptr += 4;
+	}
 
 	if (end_ptr < ptr + 2) {
 
@@ -1868,7 +2019,7 @@ fil_op_log_parse_or_replay(
 	printf("new name %s\n", new_name);
 	}
 	*/
-	if (do_replay == FALSE) {
+	if (!space_id) {
 
 		return(ptr);
 	}
@@ -1881,11 +2032,15 @@ fil_op_log_parse_or_replay(
 	were renames of tables during the backup. See ibbackup code for more
 	on the problem. */
 
-	if (type == MLOG_FILE_DELETE) {
+	switch (type) {
+	case MLOG_FILE_DELETE:
 		if (fil_tablespace_exists_in_mem(space_id)) {
 			ut_a(fil_delete_tablespace(space_id));
 		}
-	} else if (type == MLOG_FILE_RENAME) {
+
+		break;
+
+	case MLOG_FILE_RENAME:
 		/* We do the rename based on space id, not old file name;
 		this should guarantee that after the log replay each .ibd file
 		has the correct name for the latest log sequence number; the
@@ -1909,43 +2064,49 @@ fil_op_log_parse_or_replay(
 				}
 			}
 		}
-	} else {
-		ut_a(type == MLOG_FILE_CREATE);
 
+		break;
+
+	case MLOG_FILE_CREATE:
+	case MLOG_FILE_CREATE2:
 		if (fil_tablespace_exists_in_mem(space_id)) {
 			/* Do nothing */
 		} else if (fil_get_space_id_for_table(name)
 			   != ULINT_UNDEFINED) {
 			/* Do nothing */
+		} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
+			/* Temporary table, do nothing */
 		} else {
 			/* Create the database directory for name, if it does
 			not exist yet */
 			fil_create_directory_for_tablename(name);
 
-			ut_a(space_id != 0);
-
 			if (fil_create_new_single_table_tablespace(
-				    &space_id, name, FALSE,
+				    &space_id, name, FALSE, flags,
 				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
 				ut_error;
 			}
 		}
+
+		break;
+
+	default:
+		ut_error;
 	}
 
 	return(ptr);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_delete_tablespace(
 /*==================*/
-			/* out: TRUE if success */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	ibool		success;
 	fil_space_t*	space;
 	fil_node_t*	node;
@@ -1954,15 +2115,15 @@ fil_delete_tablespace(
 
 	ut_a(id != 0);
 stop_ibuf_merges:
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space != NULL) {
 		space->stop_ibuf_merges = TRUE;
 
 		if (space->n_pending_ibuf_merges == 0) {
-			mutex_exit(&(system->mutex));
+			mutex_exit(&fil_system->mutex);
 
 			count = 0;
 
@@ -1981,7 +2142,7 @@ stop_ibuf_merges:
 					(ulong) count);
 			}
 
-			mutex_exit(&(system->mutex));
+			mutex_exit(&fil_system->mutex);
 
 			os_thread_sleep(20000);
 			count++;
@@ -1990,13 +2151,13 @@ stop_ibuf_merges:
 		}
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 	count = 0;
 
 try_again:
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL) {
 		ut_print_timestamp(stderr);
@@ -2006,7 +2167,7 @@ try_again:
 			" tablespace memory cache.\n",
 			(ulong) id);
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
@@ -2033,7 +2194,7 @@ try_again:
 				(ulong) node->n_pending,
 				(ulong) count);
 		}
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 		os_thread_sleep(20000);
 
 		count++;
@@ -2043,7 +2204,7 @@ try_again:
 
 	path = mem_strdup(space->name);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 #ifndef UNIV_HOTBACKUP
 	/* Invalidate in the buffer pool all pages belonging to the
 	tablespace. Since we have set space->is_being_deleted = TRUE, readahead
@@ -2078,7 +2239,7 @@ try_again:
 		to write any log record */
 		mtr_start(&mtr);
 
-		fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr);
+		fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
 		mtr_commit(&mtr);
 #endif
 		mem_free(path);
@@ -2091,20 +2252,21 @@ try_again:
 	return(FALSE);
 }
 
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Discards a single-table tablespace. The tablespace must be cached in the
 memory cache. Discarding is like deleting a tablespace, but
 1) we do not drop the table from the data dictionary;
 2) we remove all insert buffer entries for the tablespace immediately; in DROP
 TABLE they are only removed gradually in the background;
 3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
+as it originally had.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_discard_tablespace(
 /*===================*/
-			/* out: TRUE if success */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
 	ibool	success;
 
@@ -2123,26 +2285,27 @@ fil_discard_tablespace(
 
 	ibuf_delete_for_discarded_space(id);
 
-	return(TRUE);
+	return(success);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
-Renames the memory cache structures of a single-table tablespace. */
+/*******************************************************************//**
+Renames the memory cache structures of a single-table tablespace.
+@return	TRUE if success */
 static
 ibool
 fil_rename_tablespace_in_mem(
 /*=========================*/
-				/* out: TRUE if success */
-	fil_space_t*	space,	/* in: tablespace memory object */
-	fil_node_t*	node,	/* in: file node of that tablespace */
-	const char*	path)	/* in: new name */
+	fil_space_t*	space,	/*!< in: tablespace memory object */
+	fil_node_t*	node,	/*!< in: file node of that tablespace */
+	const char*	path)	/*!< in: new name */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space2;
 	const char*	old_name	= space->name;
 
-	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
-		    space2, 0 == strcmp(old_name, space2->name));
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	space2 = fil_space_get_by_name(old_name);
 	if (space != space2) {
 		fputs("InnoDB: Error: cannot find ", stderr);
 		ut_print_filename(stderr, old_name);
@@ -2151,8 +2314,7 @@ fil_rename_tablespace_in_mem(
 		return(FALSE);
 	}
 
-	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
-		    space2, 0 == strcmp(path, space2->name));
+	space2 = fil_space_get_by_name(path);
 	if (space2 != NULL) {
 		fputs("InnoDB: Error: ", stderr);
 		ut_print_filename(stderr, path);
@@ -2161,7 +2323,7 @@ fil_rename_tablespace_in_mem(
 		return(FALSE);
 	}
 
-	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(space->name), space);
 	mem_free(space->name);
 	mem_free(node->name);
@@ -2169,22 +2331,22 @@ fil_rename_tablespace_in_mem(
 	space->name = mem_strdup(path);
 	node->name = mem_strdup(path);
 
-	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(path), space);
 	return(TRUE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free(). */
+by caller with mem_free().
+@return	own: file name */
 static
 char*
 fil_make_ibd_name(
 /*==============*/
-					/* out, own: file name */
-	const char*	name,		/* in: table name or a dir path of a
+	const char*	name,		/*!< in: table name or a dir path of a
 					TEMPORARY table */
-	ibool		is_temp)	/* in: TRUE if it is a dir path */
+	ibool		is_temp)	/*!< in: TRUE if it is a dir path */
 {
 	ulint	namelen		= strlen(name);
 	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
@@ -2206,24 +2368,23 @@ fil_make_ibd_name(
 	return(filename);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
+tablespace memory cache.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_rename_tablespace(
 /*==================*/
-					/* out: TRUE if success */
-	const char*	old_name,	/* in: old table name in the standard
+	const char*	old_name,	/*!< in: old table name in the standard
 					databasename/tablename format of
 					InnoDB, or NULL if we do the rename
 					based on the space id only */
-	ulint		id,		/* in: space id */
-	const char*	new_name)	/* in: new table name in the standard
+	ulint		id,		/*!< in: space id */
+	const char*	new_name)	/*!< in: new table name in the standard
 					databasename/tablename format
 					of InnoDB */
 {
-	fil_system_t*	system		= fil_system;
 	ibool		success;
 	fil_space_t*	space;
 	fil_node_t*	node;
@@ -2250,9 +2411,9 @@ retry:
 		fprintf(stderr, ", %lu iterations\n", (ulong) count);
 	}
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL) {
 		fprintf(stderr,
@@ -2261,14 +2422,14 @@ retry:
 			"InnoDB: though the table ", (ulong) id);
 		ut_print_filename(stderr, old_name);
 		fputs(" in a rename operation should have that id\n", stderr);
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
 
 	if (count > 25000) {
 		space->stop_ios = FALSE;
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
@@ -2286,7 +2447,7 @@ retry:
 		/* There are pending i/o's or flushes, sleep for a while and
 		retry */
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		os_thread_sleep(20000);
 
@@ -2295,7 +2456,7 @@ retry:
 	} else if (node->modification_counter > node->flush_counter) {
 		/* Flush the space */
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		os_thread_sleep(20000);
 
@@ -2306,7 +2467,7 @@ retry:
 	} else if (node->open) {
 		/* Close the file */
 
-		fil_node_close_file(node, system);
+		fil_node_close_file(node, fil_system);
 	}
 
 	/* Check that the old name in the space is right */
@@ -2341,7 +2502,7 @@ retry:
 
 	space->stop_ios = FALSE;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 #ifndef UNIV_HOTBACKUP
 	if (success) {
@@ -2349,7 +2510,7 @@ retry:
 
 		mtr_start(&mtr);
 
-		fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name,
+		fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
 				 &mtr);
 		mtr_commit(&mtr);
 	}
@@ -2357,27 +2518,28 @@ retry:
 	return(success);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a new single-table tablespace to a database directory of MySQL.
 Database directories are under the 'datadir' of MySQL. The datadir is the
 directory of a running mysqld program. We can refer to it by simply the
 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
+dir of the mysqld server.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 fil_create_new_single_table_tablespace(
 /*===================================*/
-					/* out: DB_SUCCESS or error code */
-	ulint*		space_id,	/* in/out: space id; if this is != 0,
+	ulint*		space_id,	/*!< in/out: space id; if this is != 0,
 					then this is an input parameter,
 					otherwise output */
-	const char*	tablename,	/* in: the table name in the usual
+	const char*	tablename,	/*!< in: the table name in the usual
 					databasename/tablename format
 					of InnoDB, or a dir path to a temp
 					table */
-	ibool		is_temp,	/* in: TRUE if a table created with
+	ibool		is_temp,	/*!< in: TRUE if a table created with
 					CREATE TEMPORARY TABLE */
-	ulint		size)		/* in: the initial size of the
+	ulint		flags,		/*!< in: tablespace flags */
+	ulint		size)		/*!< in: the initial size of the
 					tablespace file in pages,
 					must be >= FIL_IBD_FILE_INITIAL_SIZE */
 {
@@ -2390,6 +2552,11 @@ fil_create_new_single_table_tablespace(
 	char*		path;
 
 	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
+	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
+	format, the tablespace flags should equal table->flags. */
+	ut_a(flags != DICT_TF_COMPACT);
 
 	path = fil_make_ibd_name(tablename, is_temp);
 
@@ -2437,7 +2604,7 @@ fil_create_new_single_table_tablespace(
 		return(DB_ERROR);
 	}
 
-	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
+	buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
 	/* Align the memory for file i/o if we might have O_DIRECT set */
 	page = ut_align(buf2, UNIV_PAGE_SIZE);
 
@@ -2480,11 +2647,30 @@ error_exit2:
 
 	memset(page, '\0', UNIV_PAGE_SIZE);
 
-	fsp_header_write_space_id(page, *space_id);
+	fsp_header_init_fields(page, *space_id, flags);
+	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id);
 
-	buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
+	if (!(flags & DICT_TF_ZSSIZE_MASK)) {
+		buf_flush_init_for_writing(page, NULL, 0);
+		ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+	} else {
+		page_zip_des_t	page_zip;
+		ulint		zip_size;
 
-	ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+		zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
+			    << ((flags & DICT_TF_ZSSIZE_MASK)
+				>> DICT_TF_ZSSIZE_SHIFT));
+
+		page_zip_set_size(&page_zip, zip_size);
+		page_zip.data = page + UNIV_PAGE_SIZE;
+#ifdef UNIV_DEBUG
+		page_zip.m_start =
+#endif /* UNIV_DEBUG */
+			page_zip.m_end = page_zip.m_nonempty =
+			page_zip.n_blobs = 0;
+		buf_flush_init_for_writing(page, &page_zip, 0);
+		ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
+	}
 
 	ut_free(buf2);
 
@@ -2511,7 +2697,7 @@ error_exit2:
 		goto error_exit2;
 	}
 
-	success = fil_space_create(path, *space_id, FIL_TABLESPACE);
+	success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE);
 
 	if (!success) {
 		goto error_exit2;
@@ -2525,8 +2711,13 @@ error_exit2:
 
 		mtr_start(&mtr);
 
-		fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename,
-				 NULL, &mtr);
+		fil_op_write_log(flags
+				 ? MLOG_FILE_CREATE2
+				 : MLOG_FILE_CREATE,
+				 *space_id,
+				 is_temp ? MLOG_FILE_FLAG_TEMP : 0,
+				 flags,
+				 tablename, NULL, &mtr);
 
 		mtr_commit(&mtr);
 	}
@@ -2535,7 +2726,8 @@ error_exit2:
 	return(DB_SUCCESS);
 }
 
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
 It is possible, though very improbable, that the lsn's in the tablespace to be
 imported have risen above the current system lsn, if a lengthy purge, ibuf
 merge, or rollback was performed on a backup taken with ibbackup. If that is
@@ -2543,15 +2735,15 @@ the case, reset page lsn's in the file. We assume that mysqld was shut down
 after it performed these cleanup operations on the .ibd file, so that it at
 the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
 first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
+lsn's just by looking at that flush lsn.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_reset_too_high_lsns(
 /*====================*/
-					/* out: TRUE if success */
-	const char*	name,		/* in: table name in the
+	const char*	name,		/*!< in: table name in the
 					databasename/tablename format */
-	dulint		current_lsn)	/* in: reset lsn's if the lsn stamped
+	ib_uint64_t	current_lsn)	/*!< in: reset lsn's if the lsn stamped
 					to FIL_PAGE_FILE_FLUSH_LSN in the
 					first page is too high */
 {
@@ -2559,11 +2751,11 @@ fil_reset_too_high_lsns(
 	char*		filepath;
 	byte*		page;
 	byte*		buf2;
-	dulint		flush_lsn;
+	ib_uint64_t	flush_lsn;
 	ulint		space_id;
-	ib_longlong	file_size;
-	ib_longlong	offset;
-	ulint		page_no;
+	ib_int64_t	file_size;
+	ib_int64_t	offset;
+	ulint		zip_size;
 	ibool		success;
 
 	filepath = fil_make_ibd_name(name, FALSE);
@@ -2588,7 +2780,7 @@ fil_reset_too_high_lsns(
 
 	/* Read the first page of the tablespace */
 
-	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
+	buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
 	/* Align the memory for file i/o if we might have O_DIRECT set */
 	page = ut_align(buf2, UNIV_PAGE_SIZE);
 
@@ -2600,9 +2792,9 @@ fil_reset_too_high_lsns(
 
 	/* We have to read the file flush lsn from the header of the file */
 
-	flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+	flush_lsn = mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN);
 
-	if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
+	if (current_lsn >= flush_lsn) {
 		/* Ok */
 		success = TRUE;
 
@@ -2610,48 +2802,56 @@ fil_reset_too_high_lsns(
 	}
 
 	space_id = fsp_header_get_space_id(page);
+	zip_size = fsp_header_get_zip_size(page);
 
 	ut_print_timestamp(stderr);
 	fprintf(stderr,
 		"  InnoDB: Flush lsn in the tablespace file %lu"
 		" to be imported\n"
-		"InnoDB: is %lu %lu, which exceeds current"
-		" system lsn %lu %lu.\n"
+		"InnoDB: is %llu, which exceeds current"
+		" system lsn %llu.\n"
 		"InnoDB: We reset the lsn's in the file ",
 		(ulong) space_id,
-		(ulong) ut_dulint_get_high(flush_lsn),
-		(ulong) ut_dulint_get_low(flush_lsn),
-		(ulong) ut_dulint_get_high(current_lsn),
-		(ulong) ut_dulint_get_low(current_lsn));
+		flush_lsn, current_lsn);
 	ut_print_filename(stderr, filepath);
 	fputs(".\n", stderr);
 
+	ut_a(ut_is_2pow(zip_size));
+	ut_a(zip_size <= UNIV_PAGE_SIZE);
+
 	/* Loop through all the pages in the tablespace and reset the lsn and
 	the page checksum if necessary */
 
 	file_size = os_file_get_size_as_iblonglong(file);
 
-	for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
+	for (offset = 0; offset < file_size;
+	     offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
 		success = os_file_read(file, page,
 				       (ulint)(offset & 0xFFFFFFFFUL),
-				       (ulint)(offset >> 32), UNIV_PAGE_SIZE);
+				       (ulint)(offset >> 32),
+				       zip_size ? zip_size : UNIV_PAGE_SIZE);
 		if (!success) {
 
 			goto func_exit;
 		}
-		if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
-				  current_lsn) > 0) {
+		if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
 			/* We have to reset the lsn */
-			space_id = mach_read_from_4(
-				page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-			page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
-			buf_flush_init_for_writing(page, current_lsn, space_id,
-						   page_no);
+			if (zip_size) {
+				memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
+				buf_flush_init_for_writing(
+					page, page + UNIV_PAGE_SIZE,
+					current_lsn);
+			} else {
+				buf_flush_init_for_writing(
+					page, NULL, current_lsn);
+			}
 			success = os_file_write(filepath, file, page,
 						(ulint)(offset & 0xFFFFFFFFUL),
 						(ulint)(offset >> 32),
-						UNIV_PAGE_SIZE);
+						zip_size
+						? zip_size
+						: UNIV_PAGE_SIZE);
 			if (!success) {
 
 				goto func_exit;
@@ -2666,15 +2866,17 @@ fil_reset_too_high_lsns(
 	}
 
 	/* We now update the flush_lsn stamp at the start of the file */
-	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+	success = os_file_read(file, page, 0, 0,
+			       zip_size ? zip_size : UNIV_PAGE_SIZE);
 	if (!success) {
 
 		goto func_exit;
 	}
 
-	mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+	mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
 
-	success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
+	success = os_file_write(filepath, file, page, 0, 0,
+				zip_size ? zip_size : UNIV_PAGE_SIZE);
 	if (!success) {
 
 		goto func_exit;
@@ -2688,7 +2890,7 @@ func_exit:
 	return(success);
 }
 
-/************************************************************************
+/********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
 function is used to open a tablespace when we start up mysqld, and also in
@@ -2696,21 +2898,22 @@ IMPORT TABLESPACE.
 NOTE that we assume this operation is used either at the database startup
 or under the protection of the dictionary mutex, so that two users cannot
 race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
+tablespace open, but closes it after we have looked at the space id in it.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_open_single_table_tablespace(
 /*=============================*/
-					/* out: TRUE if success */
-	ibool		check_space_id,	/* in: should we check that the space
+	ibool		check_space_id,	/*!< in: should we check that the space
 					id in the file is right; we assume
 					that this function runs much faster
 					if no check is made, since accessing
 					the file inode probably is much
 					faster (the OS caches them) than
 					accessing the first page of the file */
-	ulint		id,		/* in: space id */
-	const char*	name)		/* in: table name in the
+	ulint		id,		/*!< in: space id */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	name)		/*!< in: table name in the
 					databasename/tablename format */
 {
 	os_file_t	file;
@@ -2719,10 +2922,17 @@ fil_open_single_table_tablespace(
 	byte*		buf2;
 	byte*		page;
 	ulint		space_id;
+	ulint		space_flags;
 	ibool		ret		= TRUE;
 
 	filepath = fil_make_ibd_name(name, FALSE);
 
+	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
+	format, the tablespace flags should equal table->flags. */
+	ut_a(flags != DICT_TF_COMPACT);
+
 	file = os_file_create_simple_no_error_handling(
 		filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
 	if (!success) {
@@ -2744,8 +2954,7 @@ fil_open_single_table_tablespace(
 		      " a temporary table #sql...,\n"
 		      "InnoDB: and MySQL removed the .ibd file for this.\n"
 		      "InnoDB: Please refer to\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-troubleshooting.html\n"
+		      "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
 		      "InnoDB: for how to resolve the issue.\n", stderr);
 
 		mem_free(filepath);
@@ -2767,28 +2976,30 @@ fil_open_single_table_tablespace(
 
 	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
 
-	/* We have to read the tablespace id from the file */
+	/* We have to read the tablespace id and flags from the file. */
 
 	space_id = fsp_header_get_space_id(page);
+	space_flags = fsp_header_get_flags(page);
 
 	ut_free(buf2);
 
-	if (space_id != id) {
+	if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
 		ut_print_timestamp(stderr);
 
-		fputs("  InnoDB: Error: tablespace id in file ", stderr);
+		fputs("  InnoDB: Error: tablespace id and flags in file ",
+		      stderr);
 		ut_print_filename(stderr, filepath);
-		fprintf(stderr, " is %lu, but in the InnoDB\n"
-			"InnoDB: data dictionary it is %lu.\n"
+		fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
+			"InnoDB: data dictionary they are %lu and %lu.\n"
 			"InnoDB: Have you moved InnoDB .ibd files"
 			" around without using the\n"
 			"InnoDB: commands DISCARD TABLESPACE and"
 			" IMPORT TABLESPACE?\n"
 			"InnoDB: Please refer to\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
 			"InnoDB: for how to resolve the issue.\n",
-			(ulong) space_id, (ulong) id);
+			(ulong) space_id, (ulong) space_flags,
+			(ulong) id, (ulong) flags);
 
 		ret = FALSE;
 
@@ -2796,7 +3007,7 @@ fil_open_single_table_tablespace(
 	}
 
 skip_check:
-	success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+	success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
 
 	if (!success) {
 		goto func_exit;
@@ -2812,17 +3023,18 @@ func_exit:
 
 	return(ret);
 }
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_HOTBACKUP
-/***********************************************************************
+/*******************************************************************//**
 Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()! */
+The string must be freed by caller with mem_free()!
+@return	own: file name */
 static
 char*
 fil_make_ibbackup_old_name(
 /*=======================*/
-					/* out, own: file name */
-	const char*	name)		/* in: original file name */
+	const char*	name)		/*!< in: original file name */
 {
 	static const char suffix[] = "_ibbackup_old_vers_";
 	ulint	len	= strlen(name);
@@ -2835,15 +3047,15 @@ fil_make_ibbackup_old_name(
 }
 #endif /* UNIV_HOTBACKUP */
 
-/************************************************************************
+/********************************************************************//**
 Opens an .ibd file and adds the associated single-table tablespace to the
 InnoDB fil0fil.c data structures. */
 static
 void
 fil_load_single_table_tablespace(
 /*=============================*/
-	const char*	dbname,		/* in: database name */
-	const char*	filename)	/* in: file name (not a path),
+	const char*	dbname,		/*!< in: database name */
+	const char*	filename)	/*!< in: file name (not a path),
 					including the .ibd extension */
 {
 	os_file_t	file;
@@ -2852,9 +3064,10 @@ fil_load_single_table_tablespace(
 	byte*		buf2;
 	byte*		page;
 	ulint		space_id;
+	ulint		flags;
 	ulint		size_low;
 	ulint		size_high;
-	ib_longlong	size;
+	ib_int64_t	size;
 #ifdef UNIV_HOTBACKUP
 	fil_space_t*	space;
 #endif
@@ -2974,7 +3187,7 @@ fil_load_single_table_tablespace(
 	/* Every .ibd file is created >= 4 pages in size. Smaller files
 	cannot be ok. */
 
-	size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
+	size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
 #ifndef UNIV_HOTBACKUP
 	if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
 		fprintf(stderr,
@@ -3002,8 +3215,10 @@ fil_load_single_table_tablespace(
 		/* We have to read the tablespace id from the file */
 
 		space_id = fsp_header_get_space_id(page);
+		flags = fsp_header_get_flags(page);
 	} else {
 		space_id = ULINT_UNDEFINED;
+		flags = 0;
 	}
 
 #ifndef UNIV_HOTBACKUP
@@ -3048,9 +3263,9 @@ fil_load_single_table_tablespace(
 	file than delete it, because if there is a bug, we do not want to
 	destroy valuable data. */
 
-	mutex_enter(&(fil_system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	space = fil_get_space_for_id_low(space_id);
+	space = fil_space_get_by_id(space_id);
 
 	if (space) {
 		char*	new_path;
@@ -3068,7 +3283,7 @@ fil_load_single_table_tablespace(
 
 		new_path = fil_make_ibbackup_old_name(filepath);
 
-		mutex_exit(&(fil_system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		ut_a(os_file_rename(filepath, new_path));
 
@@ -3078,9 +3293,9 @@ fil_load_single_table_tablespace(
 
 		return;
 	}
-	mutex_exit(&(fil_system->mutex));
+	mutex_exit(&fil_system->mutex);
 #endif
-	success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+	success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
 
 	if (!success) {
 
@@ -3098,21 +3313,21 @@ func_exit:
 	mem_free(filepath);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 A fault-tolerant function that tries to read the next file name in the
 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data. */
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
 static
 int
 fil_file_readdir_next_file(
 /*=======================*/
-				/* out: 0 if ok, -1 if error even after the
-				retries, 1 if at the end of the directory */
-	ulint*		err,	/* out: this is set to DB_ERROR if an error
+	ulint*		err,	/*!< out: this is set to DB_ERROR if an error
 				was encountered, otherwise not changed */
-	const char*	dirname,/* in: directory name or path */
-	os_file_dir_t	dir,	/* in: directory stream */
-	os_file_stat_t*	info)	/* in/out: buffer where the info is returned */
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info)	/*!< in/out: buffer where the info is returned */
 {
 	ulint	i;
 	int	ret;
@@ -3138,18 +3353,18 @@ fil_file_readdir_next_file(
 	return(-1);
 }
 
-/************************************************************************
+/********************************************************************//**
 At the server startup, if we need crash recovery, scans the database
 directories under the MySQL datadir, looking for .ibd files. Those files are
 single-table tablespaces. We need to know the space id in each of them so that
 we know into which file we should look to check the contents of a page stored
 in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
+space id is != 0.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 fil_load_single_table_tablespaces(void)
 /*===================================*/
-			/* out: DB_SUCCESS or error number */
 {
 	int		ret;
 	char*		dbpath		= NULL;
@@ -3267,23 +3482,22 @@ next_datadir_item:
 	return(err);
 }
 
-/************************************************************************
+/********************************************************************//**
 If we need crash recovery, and we have called
 fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
 we can call this function to print an error message of orphaned .ibd files
 for which there is not a data dictionary entry with a matching table name
 and space id. */
-
+UNIV_INTERN
 void
 fil_print_orphaned_tablespaces(void)
 /*================================*/
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	space = UT_LIST_GET_FIRST(system->space_list);
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
 
 	while (space) {
 		if (space->purpose == FIL_TABLESPACE && space->id != 0
@@ -3298,128 +3512,115 @@ fil_print_orphaned_tablespaces(void)
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
+or is being deleted there.
+@return	TRUE if does not exist or is being\ deleted */
+UNIV_INTERN
 ibool
 fil_tablespace_deleted_or_being_deleted_in_mem(
 /*===========================================*/
-				/* out: TRUE if does not exist or is being\
-				deleted */
-	ulint		id,	/* in: space id */
-	ib_longlong	version)/* in: tablespace_version should be this; if
+	ulint		id,	/*!< in: space id */
+	ib_int64_t	version)/*!< in: tablespace_version should be this; if
 				you pass -1 as the value of this, then this
 				parameter is ignored */
 {
-	fil_system_t*	system	= fil_system;
 	fil_space_t*	space;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	if (space == NULL || space->is_being_deleted) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
 	}
 
-	if (version != ((ib_longlong)-1)
+	if (version != ((ib_int64_t)-1)
 	    && space->tablespace_version != version) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(FALSE);
 }
 
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace exists in the memory cache.
+@return	TRUE if exists */
+UNIV_INTERN
 ibool
 fil_tablespace_exists_in_mem(
 /*=========================*/
-			/* out: TRUE if exists */
-	ulint	id)	/* in: space id */
+	ulint	id)	/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
-	if (space == NULL) {
-		mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
-		return(FALSE);
-	}
-
-	mutex_exit(&(system->mutex));
-
-	return(TRUE);
+	return(space != NULL);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
+there may be many tablespaces which are not yet in the memory cache.
+@return	TRUE if a matching tablespace exists in the memory cache */
+UNIV_INTERN
 ibool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
-					/* out: TRUE if a matching tablespace
-					exists in the memory cache */
-	ulint		id,		/* in: space id */
-	const char*	name,		/* in: table name in the standard
+	ulint		id,		/*!< in: space id */
+	const char*	name,		/*!< in: table name in the standard
 					'databasename/tablename' format or
 					the dir path to a temp table */
-	ibool		is_temp,	/* in: TRUE if created with CREATE
+	ibool		is_temp,	/*!< in: TRUE if created with CREATE
 					TEMPORARY TABLE */
-	ibool		mark_space,	/* in: in crash recovery, at database
+	ibool		mark_space,	/*!< in: in crash recovery, at database
 					startup we mark all spaces which have
 					an associated table in the InnoDB
 					data dictionary, so that
 					we can print a warning about orphaned
 					tablespaces */
 	ibool		print_error_if_does_not_exist)
-					/* in: print detailed error
+					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
 					memory */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	namespace;
 	fil_space_t*	space;
 	char*		path;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	path = fil_make_ibd_name(name, is_temp);
 
 	/* Look if there is a space with the same id */
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	/* Look if there is a space with the same name; the name is the
 	directory path from the datadir to the file */
 
-	HASH_SEARCH(name_hash, system->name_hash,
-		    ut_fold_string(path), namespace,
-		    0 == strcmp(namespace->name, path));
+	namespace = fil_space_get_by_name(path);
 	if (space && space == namespace) {
 		/* Found */
 
@@ -3428,7 +3629,7 @@ fil_space_for_table_exists_in_mem(
 		}
 
 		mem_free(path);
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
 	}
@@ -3436,7 +3637,7 @@ fil_space_for_table_exists_in_mem(
 	if (!print_error_if_does_not_exist) {
 
 		mem_free(path);
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
@@ -3476,12 +3677,11 @@ fil_space_for_table_exists_in_mem(
 		}
 error_exit:
 		fputs("InnoDB: Please refer to\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-troubleshooting.html\n"
+		      "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
 		      "InnoDB: for how to resolve the issue.\n", stderr);
 
 		mem_free(path);
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
 	}
@@ -3511,69 +3711,65 @@ error_exit:
 	}
 
 	mem_free(path);
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
+tablespace memory cache.
+@return	space id, ULINT_UNDEFINED if not found */
 static
 ulint
 fil_get_space_id_for_table(
 /*=======================*/
-				/* out: space id, ULINT_UNDEFINED if not
-				found */
-	const char*	name)	/* in: table name in the standard
+	const char*	name)	/*!< in: table name in the standard
 				'databasename/tablename' format */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	namespace;
 	ulint		id		= ULINT_UNDEFINED;
 	char*		path;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	path = fil_make_ibd_name(name, FALSE);
 
 	/* Look if there is a space with the same name; the name is the
 	directory path to the file */
 
-	HASH_SEARCH(name_hash, system->name_hash,
-		    ut_fold_string(path), namespace,
-		    0 == strcmp(namespace->name, path));
+	namespace = fil_space_get_by_name(path);
+
 	if (namespace) {
 		id = namespace->id;
 	}
 
 	mem_free(path);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(id);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Tries to extend a data file so that it would accommodate the number of pages
 given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
+enough already, does nothing.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_extend_space_to_desired_size(
 /*=============================*/
-				/* out: TRUE if success */
-	ulint*	actual_size,	/* out: size of the space after extension;
+	ulint*	actual_size,	/*!< out: size of the space after extension;
 				if we ran out of disk space this may be lower
 				than the desired size */
-	ulint	space_id,	/* in: space id */
-	ulint	size_after_extend)/* in: desired size in pages after the
+	ulint	space_id,	/*!< in: space id */
+	ulint	size_after_extend)/*!< in: desired size in pages after the
 				extension; if the current space size is bigger
 				than this already, the function does nothing */
 {
-	fil_system_t*	system		= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
 	byte*		buf2;
@@ -3583,12 +3779,12 @@ fil_extend_space_to_desired_size(
 	ulint		file_start_page_no;
 	ulint		offset_high;
 	ulint		offset_low;
+	ulint		page_size;
 	ibool		success		= TRUE;
 
 	fil_mutex_enter_and_prepare_for_io(space_id);
 
-	HASH_SEARCH(hash, system->spaces, space_id, space,
-		    space->id == space_id);
+	space = fil_space_get_by_id(space_id);
 	ut_a(space);
 
 	if (space->size >= size_after_extend) {
@@ -3596,44 +3792,48 @@ fil_extend_space_to_desired_size(
 
 		*actual_size = space->size;
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return(TRUE);
 	}
 
+	page_size = dict_table_flags_to_zip_size(space->flags);
+	if (!page_size) {
+		page_size = UNIV_PAGE_SIZE;
+	}
+
 	node = UT_LIST_GET_LAST(space->chain);
 
-	fil_node_prepare_for_io(node, system, space);
+	fil_node_prepare_for_io(node, fil_system, space);
 
 	start_page_no = space->size;
 	file_start_page_no = space->size - node->size;
 
 	/* Extend at most 64 pages at a time */
-	buf_size = ut_min(64, size_after_extend - start_page_no)
-		* UNIV_PAGE_SIZE;
-	buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
-	buf = ut_align(buf2, UNIV_PAGE_SIZE);
+	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
+	buf2 = mem_alloc(buf_size + page_size);
+	buf = ut_align(buf2, page_size);
 
 	memset(buf, 0, buf_size);
 
 	while (start_page_no < size_after_extend) {
-		ulint	n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
+		ulint	n_pages = ut_min(buf_size / page_size,
 					 size_after_extend - start_page_no);
 
 		offset_high = (start_page_no - file_start_page_no)
-			/ (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
+			/ (4096 * ((1024 * 1024) / page_size));
 		offset_low  = ((start_page_no - file_start_page_no)
-			       % (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
-			* UNIV_PAGE_SIZE;
+			       % (4096 * ((1024 * 1024) / page_size)))
+			* page_size;
 #ifdef UNIV_HOTBACKUP
 		success = os_file_write(node->name, node->handle, buf,
 					offset_low, offset_high,
-					UNIV_PAGE_SIZE * n_pages);
+					page_size * n_pages);
 #else
 		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
 				 node->name, node->handle, buf,
 				 offset_low, offset_high,
-				 UNIV_PAGE_SIZE * n_pages,
+				 page_size * n_pages,
 				 NULL, NULL);
 #endif
 		if (success) {
@@ -3646,9 +3846,9 @@ fil_extend_space_to_desired_size(
 			how much we were able to extend it */
 
 			n_pages = ((ulint)
-				   (os_file_get_size_as_iblonglong
-				    (node->handle)
-				    / UNIV_PAGE_SIZE)) - node->size;
+				   (os_file_get_size_as_iblonglong(
+					   node->handle)
+				    / page_size)) - node->size;
 
 			node->size += n_pages;
 			space->size += n_pages;
@@ -3661,13 +3861,13 @@ fil_extend_space_to_desired_size(
 
 	mem_free(buf2);
 
-	fil_node_complete_io(node, system, OS_FILE_WRITE);
+	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
 
 	*actual_size = space->size;
 
 #ifndef UNIV_HOTBACKUP
 	if (space_id == 0) {
-		ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE;
+		ulint pages_per_mb = (1024 * 1024) / page_size;
 
 		/* Keep the last data file size info up to date, rounded to
 		full megabytes */
@@ -3680,7 +3880,7 @@ fil_extend_space_to_desired_size(
 	/*
 	printf("Extended %s to %lu, actual size %lu pages\n", space->name,
 	size_after_extend, *actual_size); */
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	fil_flush(space_id);
 
@@ -3688,17 +3888,16 @@ fil_extend_space_to_desired_size(
 }
 
 #ifdef UNIV_HOTBACKUP
-/************************************************************************
+/********************************************************************//**
 Extends all tablespaces to the size stored in the space header. During the
 ibbackup --apply-log phase we extended the spaces on-demand so that log records
 could be applied, but that may have left spaces still too small compared to
 the size stored in the space header. */
-
+UNIV_INTERN
 void
 fil_extend_tablespaces_to_stored_len(void)
 /*======================================*/
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	byte*		buf;
 	ulint		actual_size;
@@ -3708,18 +3907,19 @@ fil_extend_tablespaces_to_stored_len(void)
 
 	buf = mem_alloc(UNIV_PAGE_SIZE);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	space = UT_LIST_GET_FIRST(system->space_list);
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
 
 	while (space) {
 		ut_a(space->purpose == FIL_TABLESPACE);
 
-		mutex_exit(&(system->mutex)); /* no need to protect with a
+		mutex_exit(&fil_system->mutex); /* no need to protect with a
 					      mutex, because this is a
 					      single-threaded operation */
-		error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
-				 NULL);
+		error = fil_read(TRUE, space->id,
+				 dict_table_flags_to_zip_size(space->flags),
+				 0, 0, UNIV_PAGE_SIZE, buf, NULL);
 		ut_a(error == DB_SUCCESS);
 
 		size_in_header = fsp_get_size_low(buf);
@@ -3739,12 +3939,12 @@ fil_extend_tablespaces_to_stored_len(void)
 			exit(1);
 		}
 
-		mutex_enter(&(system->mutex));
+		mutex_enter(&fil_system->mutex);
 
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	mem_free(buf);
 }
@@ -3752,26 +3952,25 @@ fil_extend_tablespaces_to_stored_len(void)
 
 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
 
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return	TRUE if succeed */
+UNIV_INTERN
 ibool
 fil_space_reserve_free_extents(
 /*===========================*/
-				/* out: TRUE if succeed */
-	ulint	id,		/* in: space id */
-	ulint	n_free_now,	/* in: number of free extents now */
-	ulint	n_to_reserve)	/* in: how many one wants to reserve */
+	ulint	id,		/*!< in: space id */
+	ulint	n_free_now,	/*!< in: number of free extents now */
+	ulint	n_to_reserve)	/*!< in: how many one wants to reserve */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	ibool		success;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 
@@ -3782,68 +3981,66 @@ fil_space_reserve_free_extents(
 		success = TRUE;
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(success);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Releases free extents in a file space. */
-
+UNIV_INTERN
 void
 fil_space_release_free_extents(
 /*===========================*/
-	ulint	id,		/* in: space id */
-	ulint	n_reserved)	/* in: how many one reserved */
+	ulint	id,		/*!< in: space id */
+	ulint	n_reserved)	/*!< in: how many one reserved */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 	ut_a(space->n_reserved_extents >= n_reserved);
 
 	space->n_reserved_extents -= n_reserved;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the number of reserved extents. If the database is silent, this number
 should be zero. */
-
+UNIV_INTERN
 ulint
 fil_space_get_n_reserved_extents(
 /*=============================*/
-	ulint	id)		/* in: space id */
+	ulint	id)		/*!< in: space id */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	ulint		n;
 
-	ut_ad(system);
+	ut_ad(fil_system);
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+	space = fil_space_get_by_id(id);
 
 	ut_a(space);
 
 	n = space->n_reserved_extents;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(n);
 }
 
 /*============================ FILE I/O ================================*/
 
-/************************************************************************
+/********************************************************************//**
 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
 
 Prepares a file node for i/o. Opens the file if it is closed. Updates the
@@ -3854,9 +4051,9 @@ static
 void
 fil_node_prepare_for_io(
 /*====================*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	fil_space_t*	space)	/* in: space */
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	fil_space_t*	space)	/*!< in: space */
 {
 	ut_ad(node && system && space);
 	ut_ad(mutex_own(&(system->mutex)));
@@ -3889,16 +4086,16 @@ fil_node_prepare_for_io(
 	node->n_pending++;
 }
 
-/************************************************************************
+/********************************************************************//**
 Updates the data structures when an i/o operation finishes. Updates the
 pending i/o's field in the node appropriately. */
 static
 void
 fil_node_complete_io(
 /*=================*/
-	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: tablespace memory cache */
-	ulint		type)	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	ulint		type)	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
 				the node as modified if
 				type == OS_FILE_WRITE */
 {
@@ -3930,18 +4127,18 @@ fil_node_complete_io(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Report information about an invalid page access. */
 static
 void
 fil_report_invalid_page_access(
 /*===========================*/
-	ulint		block_offset,	/* in: block offset */
-	ulint		space_id,	/* in: space id */
-	const char*	space_name,	/* in: space name */
-	ulint		byte_offset,	/* in: byte offset */
-	ulint		len,		/* in: I/O length */
-	ulint		type)		/* in: I/O type */
+	ulint		block_offset,	/*!< in: block offset */
+	ulint		space_id,	/*!< in: space id */
+	const char*	space_name,	/*!< in: space name */
+	ulint		byte_offset,	/*!< in: byte offset */
+	ulint		len,		/*!< in: I/O length */
+	ulint		type)		/*!< in: I/O type */
 {
 	fprintf(stderr,
 		"InnoDB: Error: trying to access page number %lu"
@@ -3958,16 +4155,15 @@ fil_report_invalid_page_access(
 		(ulong) byte_offset, (ulong) len, (ulong) type);
 }
 
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
+/********************************************************************//**
+Reads or writes data. This operation is asynchronous (aio).
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INTERN
 ulint
 fil_io(
 /*===*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
+	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
 				ORed to OS_FILE_LOG, if a log i/o
 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
 				if simulated aio and we want to post a
@@ -3976,22 +4172,23 @@ fil_io(
 				because i/os are not actually handled until
 				all have been posted: use with great
 				caution! */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in
+	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	block_offset,	/*!< in: offset in number of blocks */
+	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
-	ulint	len,		/* in: how many bytes to read or write; this
+	ulint	len,		/*!< in: how many bytes to read or write; this
 				must not cross a file boundary; in aio this
 				must be a block size multiple */
-	void*	buf,		/* in/out: buffer where to store read data
+	void*	buf,		/*!< in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message)	/* in: message for aio handler if non-sync
+	void*	message)	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
 {
-	fil_system_t*	system		= fil_system;
 	ulint		mode;
 	fil_space_t*	space;
 	fil_node_t*	node;
@@ -4008,29 +4205,38 @@ fil_io(
 	type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
 
 	ut_ad(byte_offset < UNIV_PAGE_SIZE);
+	ut_ad(!zip_size || !byte_offset);
+	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(buf);
 	ut_ad(len > 0);
-	ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
+#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
+# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
+#endif
 	ut_ad(fil_validate());
-#ifndef UNIV_LOG_DEBUG
+#ifndef UNIV_HOTBACKUP
+# ifndef UNIV_LOG_DEBUG
 	/* ibuf bitmap pages must be read in the sync aio mode: */
 	ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
-	      || !ibuf_bitmap_page(block_offset) || sync || is_log);
-#ifdef UNIV_SYNC_DEBUG
+	      || !ibuf_bitmap_page(zip_size, block_offset)
+	      || sync || is_log);
 	ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
-	      || ibuf_page(space_id, block_offset));
-#endif
-#endif
+	      || ibuf_page(space_id, zip_size, block_offset, NULL));
+# endif /* UNIV_LOG_DEBUG */
 	if (sync) {
 		mode = OS_AIO_SYNC;
-	} else if (type == OS_FILE_READ && !is_log
-		   && ibuf_page(space_id, block_offset)) {
-		mode = OS_AIO_IBUF;
 	} else if (is_log) {
 		mode = OS_AIO_LOG;
+	} else if (type == OS_FILE_READ
+		   && !recv_no_ibuf_operations
+		   && ibuf_page(space_id, zip_size, block_offset, NULL)) {
+		mode = OS_AIO_IBUF;
 	} else {
 		mode = OS_AIO_NORMAL;
 	}
+#else /* !UNIV_HOTBACKUP */
+	ut_a(sync);
+	mode = OS_AIO_SYNC;
+#endif /* !UNIV_HOTBACKUP */
 
 	if (type == OS_FILE_READ) {
 		srv_data_read+= len;
@@ -4043,10 +4249,10 @@ fil_io(
 
 	fil_mutex_enter_and_prepare_for_io(space_id);
 
-	HASH_SEARCH(hash, system->spaces, space_id, space,
-		    space->id == space_id);
+	space = fil_space_get_by_id(space_id);
+
 	if (!space) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
@@ -4065,7 +4271,7 @@ fil_io(
 	node = UT_LIST_GET_FIRST(space->chain);
 
 	for (;;) {
-		if (node == NULL) {
+		if (UNIV_UNLIKELY(node == NULL)) {
 			fil_report_invalid_page_access(
 				block_offset, space_id, space->name,
 				byte_offset, len, type);
@@ -4090,12 +4296,12 @@ fil_io(
 	}
 
 	/* Open file if closed */
-	fil_node_prepare_for_io(node, system, space);
+	fil_node_prepare_for_io(node, fil_system, space);
 
 	/* Check that at least the start offset is within the bounds of a
 	single-table tablespace */
-	if (space->purpose == FIL_TABLESPACE && space->id != 0
-	    && node->size <= block_offset) {
+	if (UNIV_UNLIKELY(node->size <= block_offset)
+	    && space->id != 0 && space->purpose == FIL_TABLESPACE) {
 
 		fil_report_invalid_page_access(
 			block_offset, space_id, space->name, byte_offset,
@@ -4104,17 +4310,35 @@ fil_io(
 		ut_error;
 	}
 
-	/* Now we have made the changes in the data structures of system */
-	mutex_exit(&(system->mutex));
+	/* Now we have made the changes in the data structures of fil_system */
+	mutex_exit(&fil_system->mutex);
 
 	/* Calculate the low 32 bits and the high 32 bits of the file offset */
 
-	offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
-	offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
-		+ byte_offset;
+	if (!zip_size) {
+		offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
+		offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
+			       & 0xFFFFFFFFUL) + byte_offset;
 
-	ut_a(node->size - block_offset
-	     >= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
+		ut_a(node->size - block_offset
+		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
+			 / UNIV_PAGE_SIZE));
+	} else {
+		ulint	zip_size_shift;
+		switch (zip_size) {
+		case 1024: zip_size_shift = 10; break;
+		case 2048: zip_size_shift = 11; break;
+		case 4096: zip_size_shift = 12; break;
+		case 8192: zip_size_shift = 13; break;
+		case 16384: zip_size_shift = 14; break;
+		default: ut_error;
+		}
+		offset_high = block_offset >> (32 - zip_size_shift);
+		offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
+			+ byte_offset;
+		ut_a(node->size - block_offset
+		     >= (len + (zip_size - 1)) / zip_size);
+	}
 
 	/* Do aio */
 
@@ -4141,11 +4365,11 @@ fil_io(
 		/* The i/o operation is already completed when we return from
 		os_aio: */
 
-		mutex_enter(&(system->mutex));
+		mutex_enter(&fil_system->mutex);
 
-		fil_node_complete_io(node, system, type);
+		fil_node_complete_io(node, fil_system, type);
 
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		ut_ad(fil_validate());
 	}
@@ -4153,75 +4377,19 @@ fil_io(
 	return(DB_SUCCESS);
 }
 
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/* in: how many bytes to read; this must not
-				cross a file boundary; in aio this must be a
-				block size multiple */
-	void*	buf,		/* in/out: buffer where to store data read;
-				in aio this must be appropriately aligned */
-	void*	message)	/* in: message for aio handler if non-sync
-				aio used, else ignored */
-{
-	return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
-		      byte_offset, len, buf, message));
-}
-
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/* in: how many bytes to write; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
-	void*	buf,		/* in: buffer from which to write; in aio
-				this must be appropriately aligned */
-	void*	message)	/* in: message for aio handler if non-sync
-				aio used, else ignored */
-{
-	return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
-		      byte_offset, len, buf, message));
-}
-
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
 into segments (see os0file.c for more info). The thread specifies which
 segment it wants to wait for. */
-
+UNIV_INTERN
 void
 fil_aio_wait(
 /*=========*/
-	ulint	segment)	/* in: the number of the segment in the aio
+	ulint	segment)	/*!< in: the number of the segment in the aio
 				array to wait for */
 {
-	fil_system_t*	system		= fil_system;
 	ibool		ret;
 	fil_node_t*	fil_node;
 	void*		message;
@@ -4234,8 +4402,6 @@ fil_aio_wait(
 #ifdef WIN_ASYNC_IO
 		ret = os_aio_windows_handle(segment, 0, &fil_node,
 					    &message, &type);
-#elif defined(POSIX_ASYNC_IO)
-		ret = os_aio_posix_handle(segment, &fil_node, &message);
 #else
 		ret = 0; /* Eliminate compiler warning */
 		ut_error;
@@ -4251,11 +4417,11 @@ fil_aio_wait(
 
 	srv_set_io_thread_op_info(segment, "complete io for fil node");
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	fil_node_complete_io(fil_node, fil_system, type);
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	ut_ad(fil_validate());
 
@@ -4265,7 +4431,7 @@ fil_aio_wait(
 	deadlocks in the i/o system. We keep tablespace 0 data files always
 	open, and use a special i/o thread to serve insert buffer requests. */
 
-	if (buf_pool_is_block(message)) {
+	if (fil_node->space->purpose == FIL_TABLESPACE) {
 		srv_set_io_thread_op_info(segment, "complete io for buf page");
 		buf_page_io_complete(message);
 	} else {
@@ -4273,34 +4439,34 @@ fil_aio_wait(
 		log_io_complete(message);
 	}
 }
+#endif /* UNIV_HOTBACKUP */
 
-/**************************************************************************
+/**********************************************************************//**
 Flushes to disk possible writes cached by the OS. If the space does not exist
 or is being dropped, does not do anything. */
-
+UNIV_INTERN
 void
 fil_flush(
 /*======*/
-	ulint	space_id)	/* in: file space id (this can be a group of
+	ulint	space_id)	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
 {
-	fil_system_t*	system	= fil_system;
 	fil_space_t*	space;
 	fil_node_t*	node;
 	os_file_t	file;
-	ib_longlong	old_mod_counter;
+	ib_int64_t	old_mod_counter;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(space_id);
 
-	HASH_SEARCH(hash, system->spaces, space_id, space,
-		    space->id == space_id);
 	if (!space || space->is_being_deleted) {
-		mutex_exit(&(system->mutex));
+		mutex_exit(&fil_system->mutex);
 
 		return;
 	}
 
-	space->n_pending_flushes++;	/* prevent dropping of the space while
+	space->n_pending_flushes++;	/*!< prevent dropping of the space while
 					we are flushing */
 	node = UT_LIST_GET_FIRST(space->chain);
 
@@ -4331,11 +4497,11 @@ retry:
 				not know what bugs OS's may contain in file
 				i/o; sleep for a while */
 
-				mutex_exit(&(system->mutex));
+				mutex_exit(&fil_system->mutex);
 
 				os_thread_sleep(20000);
 
-				mutex_enter(&(system->mutex));
+				mutex_enter(&fil_system->mutex);
 
 				if (node->flush_counter >= old_mod_counter) {
 
@@ -4349,14 +4515,14 @@ retry:
 			file = node->handle;
 			node->n_pending_flushes++;
 
-			mutex_exit(&(system->mutex));
+			mutex_exit(&fil_system->mutex);
 
 			/* fprintf(stderr, "Flushing to file %s\n",
 			node->name); */
 
 			os_file_flush(file);
 
-			mutex_enter(&(system->mutex));
+			mutex_enter(&fil_system->mutex);
 
 			node->n_pending_flushes--;
 skip_flush:
@@ -4370,7 +4536,7 @@ skip_flush:
 
 					UT_LIST_REMOVE(
 						unflushed_spaces,
-						system->unflushed_spaces,
+						fil_system->unflushed_spaces,
 						space);
 				}
 			}
@@ -4387,42 +4553,41 @@ skip_flush:
 
 	space->n_pending_flushes--;
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Flushes to disk the writes in file spaces of the given type possibly cached by
 the OS. */
-
+UNIV_INTERN
 void
 fil_flush_file_spaces(
 /*==================*/
-	ulint	purpose)	/* in: FIL_TABLESPACE, FIL_LOG */
+	ulint	purpose)	/*!< in: FIL_TABLESPACE, FIL_LOG */
 {
-	fil_system_t*	system	= fil_system;
 	fil_space_t*	space;
 	ulint*		space_ids;
 	ulint		n_space_ids;
 	ulint		i;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
-	n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
+	n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
 	if (n_space_ids == 0) {
 
-		mutex_exit(&system->mutex);
+		mutex_exit(&fil_system->mutex);
 		return;
 	}
 
 	/* Assemble a list of space ids to flush.  Previously, we
-	traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
+	traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
 	on a space that was just removed from the list by fil_flush().
 	Thus, the space could be dropped and the memory overwritten. */
 	space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
 
 	n_space_ids = 0;
 
-	for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
+	for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
 	     space;
 	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
 
@@ -4432,7 +4597,7 @@ fil_flush_file_spaces(
 		}
 	}
 
-	mutex_exit(&system->mutex);
+	mutex_exit(&fil_system->mutex);
 
 	/* Flush the spaces.  It will not hurt to call fil_flush() on
 	a non-existing space id. */
@@ -4444,30 +4609,31 @@ fil_flush_file_spaces(
 	mem_free(space_ids);
 }
 
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 fil_validate(void)
 /*==============*/
-			/* out: TRUE if ok */
 {
-	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	fil_node_t*	fil_node;
 	ulint		n_open		= 0;
 	ulint		i;
 
-	mutex_enter(&(system->mutex));
+	mutex_enter(&fil_system->mutex);
 
 	/* Look for spaces in the hash table */
 
-	for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
+	for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
 
-		space = HASH_GET_FIRST(system->spaces, i);
+		space = HASH_GET_FIRST(fil_system->spaces, i);
 
 		while (space != NULL) {
-			UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
+			UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
+					 ut_a(ut_list_node_313->open
+					      || !ut_list_node_313->n_pending));
 
 			fil_node = UT_LIST_GET_FIRST(space->chain);
 
@@ -4485,11 +4651,11 @@ fil_validate(void)
 		}
 	}
 
-	ut_a(system->n_open == n_open);
+	ut_a(fil_system->n_open == n_open);
 
-	UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
+	UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
 
-	fil_node = UT_LIST_GET_FIRST(system->LRU);
+	fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
 
 	while (fil_node != NULL) {
 		ut_a(fil_node->n_pending == 0);
@@ -4500,65 +4666,70 @@ fil_validate(void)
 		fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
 	}
 
-	mutex_exit(&(system->mutex));
+	mutex_exit(&fil_system->mutex);
 
 	return(TRUE);
 }
 
-/************************************************************************
-Returns TRUE if file address is undefined. */
+/********************************************************************//**
+Returns TRUE if file address is undefined.
+@return	TRUE if undefined */
+UNIV_INTERN
 ibool
 fil_addr_is_null(
 /*=============*/
-				/* out: TRUE if undefined */
-	fil_addr_t	addr)	/* in: address */
+	fil_addr_t	addr)	/*!< in: address */
 {
-	if (addr.page == FIL_NULL) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return(addr.page == FIL_NULL);
 }
 
-/************************************************************************
-Accessor functions for a file page */
-
+/********************************************************************//**
+Get the predecessor of a file page.
+@return	FIL_PAGE_PREV */
+UNIV_INTERN
 ulint
-fil_page_get_prev(byte*	page)
+fil_page_get_prev(
+/*==============*/
+	const byte*	page)	/*!< in: file page */
 {
 	return(mach_read_from_4(page + FIL_PAGE_PREV));
 }
 
+/********************************************************************//**
+Get the successor of a file page.
+@return	FIL_PAGE_NEXT */
+UNIV_INTERN
 ulint
-fil_page_get_next(byte*	page)
+fil_page_get_next(
+/*==============*/
+	const byte*	page)	/*!< in: file page */
 {
 	return(mach_read_from_4(page + FIL_PAGE_NEXT));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the file page type. */
-
+UNIV_INTERN
 void
 fil_page_set_type(
 /*==============*/
-	byte*	page,	/* in: file page */
-	ulint	type)	/* in: type */
+	byte*	page,	/*!< in/out: file page */
+	ulint	type)	/*!< in: type */
 {
 	ut_ad(page);
 
 	mach_write_to_2(page + FIL_PAGE_TYPE, type);
 }
 
-/*************************************************************************
-Gets the file page type. */
-
+/*********************************************************************//**
+Gets the file page type.
+@return type; NOTE that if the type has not been written to page, the
+return value not defined */
+UNIV_INTERN
 ulint
 fil_page_get_type(
 /*==============*/
-			/* out: type; NOTE that if the type has not been
-			written to page, the return value not defined */
-	byte*	page)	/* in: file page */
+	const byte*	page)	/*!< in: file page */
 {
 	ut_ad(page);
 
diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c
similarity index 63%
rename from storage/innobase/fsp/fsp0fsp.c
rename to storage/innodb_plugin/fsp/fsp0fsp.c
index e1074933fe8..ce14723ba18 100644
--- a/storage/innobase/fsp/fsp0fsp.c
+++ b/storage/innodb_plugin/fsp/fsp0fsp.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-File space management
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fsp/fsp0fsp.c
+File space management
 
 Created 11/29/1995 Heikki Tuuri
 ***********************************************************************/
@@ -14,18 +31,23 @@ Created 11/29/1995 Heikki Tuuri
 
 #include "buf0buf.h"
 #include "fil0fil.h"
-#include "sync0sync.h"
 #include "mtr0log.h"
-#include "fut0fut.h"
 #include "ut0byte.h"
-#include "srv0srv.h"
-#include "page0types.h"
-#include "ibuf0ibuf.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "dict0boot.h"
+#include "page0page.h"
+#include "page0zip.h"
+#ifdef UNIV_HOTBACKUP
+# include "fut0lst.h"
+#else /* UNIV_HOTBACKUP */
+# include "sync0sync.h"
+# include "fut0fut.h"
+# include "srv0srv.h"
+# include "ibuf0ibuf.h"
+# include "btr0btr.h"
+# include "btr0sea.h"
+# include "dict0boot.h"
+# include "log0log.h"
+#endif /* UNIV_HOTBACKUP */
 #include "dict0mem.h"
-#include "log0log.h"
 
 
 #define FSP_HEADER_OFFSET	FIL_PAGE_DATA	/* Offset of the space header
@@ -60,11 +82,7 @@ descriptor page, but used only in the first. */
 					about the first extent, but have not
 					physically allocted those pages to the
 					file */
-#define	FSP_LOWEST_NO_WRITE	16	/* The lowest page offset for which
-					the page has not been written to disk
-					(if it has been written, we know that
-					the OS has really reserved the
-					physical space for the page) */
+#define	FSP_SPACE_FLAGS		16	/* table->flags & ~DICT_TF_COMPACT */
 #define	FSP_FRAG_N_USED		20	/* number of used pages in the
 					FSP_FREE_FRAG list */
 #define	FSP_FREE		24	/* list of free extents */
@@ -139,8 +157,9 @@ typedef	byte	fseg_inode_t;
 	(16 + 3 * FLST_BASE_NODE_SIZE			\
 	 + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
 
-#define FSP_SEG_INODES_PER_PAGE						\
-	((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+#define FSP_SEG_INODES_PER_PAGE(zip_size)		\
+	(((zip_size ? zip_size : UNIV_PAGE_SIZE)	\
+	  - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
 				/* Number of segment inodes which fit on a
 				single page */
 
@@ -212,60 +231,70 @@ the extent are free and which contain old tuple version to clean. */
 /* Offset of the descriptor array on a descriptor page */
 #define	XDES_ARR_OFFSET		(FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Returns an extent to the free list of a space. */
 static
 void
 fsp_free_extent(
 /*============*/
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset in the extent */
-	mtr_t*		mtr);	/* in: mtr */
-/**************************************************************************
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: page offset in the extent */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
 Frees an extent of a segment to the space free list. */
 static
 void
 fseg_free_extent(
 /*=============*/
-	fseg_inode_t*	seg_inode, /* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset in the extent */
-	mtr_t*		mtr);	/* in: mtr handle */
-/**************************************************************************
+	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: page offset in the extent */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how
-many pages are currently used. */
+many pages are currently used.
+@return	number of reserved pages */
 static
 ulint
 fseg_n_reserved_pages_low(
 /*======================*/
-				/* out: number of reserved pages */
-	fseg_inode_t*	header,	/* in: segment inode */
-	ulint*		used,	/* out: number of pages used (<= reserved) */
-	mtr_t*		mtr);	/* in: mtr handle */
-/************************************************************************
+	fseg_inode_t*	header,	/*!< in: segment inode */
+	ulint*		used,	/*!< out: number of pages used (not
+				more than reserved) */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
 static
 void
 fseg_mark_page_used(
 /*================*/
-	fseg_inode_t*	seg_inode,/* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset */
-	mtr_t*		mtr);	/* in: mtr */
-/**************************************************************************
+	fseg_inode_t*	seg_inode,/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
 Returns the first extent descriptor for a segment. We think of the extent
 lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
+-> FSEG_FREE.
+@return	the first extent descriptor, or NULL if none */
 static
 xdes_t*
 fseg_get_first_extent(
 /*==================*/
-				/* out: the first extent descriptor, or NULL if
-				none */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr);	/* in: mtr */
-/**************************************************************************
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
 Puts new extents to the free list if
 there are free extents above the free limit. If an extent happens
 to contain an extent descriptor page, the extent is put to
@@ -274,87 +303,97 @@ static
 void
 fsp_fill_free_list(
 /*===============*/
-	ibool		init_space,	/* in: TRUE if this is a single-table
+	ibool		init_space,	/*!< in: TRUE if this is a single-table
 					tablespace and we are only initing
 					the tablespace's first extent
 					descriptor page and ibuf bitmap page;
 					then we do not allocate more extents */
-	ulint		space,		/* in: space */
-	fsp_header_t*	header,		/* in: space header */
-	mtr_t*		mtr);		/* in: mtr */
-/**************************************************************************
+	ulint		space,		/*!< in: space */
+	fsp_header_t*	header,		/*!< in: space header */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
+fragmentation.
+@return	the allocated page number, FIL_NULL if no page could be allocated */
 static
 ulint
 fseg_alloc_free_page_low(
 /*=====================*/
-				/* out: the allocated page number, FIL_NULL
-				if no page could be allocated */
-	ulint		space,	/* in: space */
-	fseg_inode_t*	seg_inode, /* in: segment inode */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction, /* in: if the new page is needed because
+	ulint		space,	/*!< in: space */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction, /*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr);	/* in: mtr handle */
-
-
-/**************************************************************************
-Reads the file space size stored in the header page. */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+#endif /* !UNIV_HOTBACKUP */
 
+/**********************************************************************//**
+Reads the file space size stored in the header page.
+@return	tablespace size stored in the space header */
+UNIV_INTERN
 ulint
 fsp_get_size_low(
 /*=============*/
-			/* out: tablespace size stored in the space header */
-	page_t*	page)	/* in: header page (page 0 in the tablespace) */
+	page_t*	page)	/*!< in: header page (page 0 in the tablespace) */
 {
 	return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
 }
 
-/**************************************************************************
-Gets a pointer to the space header and x-locks its page. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets a pointer to the space header and x-locks its page.
+@return	pointer to the space header, page x-locked */
 UNIV_INLINE
 fsp_header_t*
 fsp_get_space_header(
 /*=================*/
-			/* out: pointer to the space header, page x-locked */
-	ulint	id,	/* in: space id */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	id,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
+	buf_block_t*	block;
 	fsp_header_t*	header;
 
-	ut_ad(mtr);
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+	ut_ad(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+	ut_ad(id || !zip_size);
 
-	header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
+	header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+	ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
+	ut_ad(zip_size == dict_table_flags_to_zip_size(
+		      mach_read_from_4(FSP_SPACE_FLAGS + header)));
 	return(header);
 }
 
-/**************************************************************************
-Gets a descriptor bit of a page. */
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return	TRUE if free */
 UNIV_INLINE
 ibool
 xdes_get_bit(
 /*=========*/
-			/* out: TRUE if free */
-	xdes_t*	descr,	/* in: descriptor */
-	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
-	ulint	offset,	/* in: page offset within extent:
+	xdes_t*	descr,	/*!< in: descriptor */
+	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint	offset,	/*!< in: page offset within extent:
 			0 ... FSP_EXTENT_SIZE - 1 */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	index;
 	ulint	byte_index;
 	ulint	bit_index;
 
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
 	ut_ad(offset < FSP_EXTENT_SIZE);
 
@@ -368,26 +407,25 @@ xdes_get_bit(
 			      bit_index));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Sets a descriptor bit of a page. */
 UNIV_INLINE
 void
 xdes_set_bit(
 /*=========*/
-	xdes_t*	descr,	/* in: descriptor */
-	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
-	ulint	offset,	/* in: page offset within extent:
+	xdes_t*	descr,	/*!< in: descriptor */
+	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint	offset,	/*!< in: page offset within extent:
 			0 ... FSP_EXTENT_SIZE - 1 */
-	ibool	val,	/* in: bit value */
-	mtr_t*	mtr)	/* in: mtr */
+	ibool	val,	/*!< in: bit value */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	index;
 	ulint	byte_index;
 	ulint	bit_index;
 	ulint	descr_byte;
 
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
 	ut_ad(offset < FSP_EXTENT_SIZE);
 
@@ -404,29 +442,27 @@ xdes_set_bit(
 			 MLOG_1BYTE, mtr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Looks for a descriptor bit having the desired value. Starts from hint
 and scans upward; at the end of the extent the search is wrapped to
-the start of the extent. */
+the start of the extent.
+@return	bit index of the bit, ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 xdes_find_bit(
 /*==========*/
-			/* out: bit index of the bit, ULINT_UNDEFINED if not
-			found */
-	xdes_t*	descr,	/* in: descriptor */
-	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
-	ibool	val,	/* in: desired bit value */
-	ulint	hint,	/* in: hint of which bit position would be desirable */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ibool	val,	/*!< in: desired bit value */
+	ulint	hint,	/*!< in: hint of which bit position would be desirable */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	i;
 
 	ut_ad(descr && mtr);
 	ut_ad(val <= TRUE);
 	ut_ad(hint < FSP_EXTENT_SIZE);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	for (i = hint; i < FSP_EXTENT_SIZE; i++) {
 		if (val == xdes_get_bit(descr, bit, i, mtr)) {
 
@@ -444,28 +480,26 @@ xdes_find_bit(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Looks for a descriptor bit having the desired value. Scans the extent in
-a direction opposite to xdes_find_bit. */
+a direction opposite to xdes_find_bit.
+@return	bit index of the bit, ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 xdes_find_bit_downward(
 /*===================*/
-			/* out: bit index of the bit, ULINT_UNDEFINED if not
-			found */
-	xdes_t*	descr,	/* in: descriptor */
-	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
-	ibool	val,	/* in: desired bit value */
-	ulint	hint,	/* in: hint of which bit position would be desirable */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ibool	val,	/*!< in: desired bit value */
+	ulint	hint,	/*!< in: hint of which bit position would be desirable */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	i;
 
 	ut_ad(descr && mtr);
 	ut_ad(val <= TRUE);
 	ut_ad(hint < FSP_EXTENT_SIZE);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	for (i = hint + 1; i > 0; i--) {
 		if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
 
@@ -483,22 +517,21 @@ xdes_find_bit_downward(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Returns the number of used pages in a descriptor. */
+/**********************************************************************//**
+Returns the number of used pages in a descriptor.
+@return	number of pages used */
 UNIV_INLINE
 ulint
 xdes_get_n_used(
 /*============*/
-			/* out: number of pages used */
-	xdes_t*	descr,	/* in: descriptor */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	i;
 	ulint	count	= 0;
 
 	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
 		if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
 			count++;
@@ -508,15 +541,15 @@ xdes_get_n_used(
 	return(count);
 }
 
-/**************************************************************************
-Returns true if extent contains no used pages. */
+/**********************************************************************//**
+Returns true if extent contains no used pages.
+@return	TRUE if totally free */
 UNIV_INLINE
 ibool
 xdes_is_free(
 /*=========*/
-			/* out: TRUE if totally free */
-	xdes_t*	descr,	/* in: descriptor */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	if (0 == xdes_get_n_used(descr, mtr)) {
 
@@ -526,15 +559,15 @@ xdes_is_free(
 	return(FALSE);
 }
 
-/**************************************************************************
-Returns true if extent contains no free pages. */
+/**********************************************************************//**
+Returns true if extent contains no free pages.
+@return	TRUE if full */
 UNIV_INLINE
 ibool
 xdes_is_full(
 /*=========*/
-			/* out: TRUE if full */
-	xdes_t*	descr,	/* in: descriptor */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
 
@@ -544,56 +577,57 @@ xdes_is_full(
 	return(FALSE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Sets the state of an xdes. */
 UNIV_INLINE
 void
 xdes_set_state(
 /*===========*/
-	xdes_t*	descr,	/* in: descriptor */
-	ulint	state,	/* in: state to set */
-	mtr_t*	mtr)	/* in: mtr handle */
+	xdes_t*	descr,	/*!< in: descriptor */
+	ulint	state,	/*!< in: state to set */
+	mtr_t*	mtr)	/*!< in: mtr handle */
 {
 	ut_ad(descr && mtr);
 	ut_ad(state >= XDES_FREE);
 	ut_ad(state <= XDES_FSEG);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 
 	mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
 }
 
-/**************************************************************************
-Gets the state of an xdes. */
+/**********************************************************************//**
+Gets the state of an xdes.
+@return	state */
 UNIV_INLINE
 ulint
 xdes_get_state(
 /*===========*/
-			/* out: state */
-	xdes_t*	descr,	/* in: descriptor */
-	mtr_t*	mtr)	/* in: mtr handle */
+	xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*	mtr)	/*!< in: mtr handle */
 {
-	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ulint	state;
 
-	return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
+	ut_ad(descr && mtr);
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+
+	state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
+	ut_ad(state - 1 < XDES_FSEG);
+	return(state);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Inits an extent descriptor to the free and clean state. */
 UNIV_INLINE
 void
 xdes_init(
 /*======*/
-	xdes_t*	descr,	/* in: descriptor */
-	mtr_t*	mtr)	/* in: mtr */
+	xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ulint	i;
 
 	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
 
 	for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
@@ -603,68 +637,96 @@ xdes_init(
 	xdes_set_state(descr, XDES_FREE, mtr);
 }
 
-/************************************************************************
-Calculates the page where the descriptor of a page resides. */
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return	descriptor page offset */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_page(
 /*======================*/
-				/* out: descriptor page offset */
-	ulint	offset)		/* in: page offset */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
 {
-#if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
-		+ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE
-# error
-#endif
+#ifndef DOXYGEN /* Doxygen gets confused of these */
+# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
+		+ (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+#  error
+# endif
+# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \
+		+ (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+#  error
+# endif
+#endif /* !DOXYGEN */
+	ut_ad(ut_is_2pow(zip_size));
 
-	return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
+	if (!zip_size) {
+		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(zip_size > XDES_ARR_OFFSET
+		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+		return(ut_2pow_round(offset, zip_size));
+	}
 }
 
-/************************************************************************
-Calculates the descriptor index within a descriptor page. */
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return	descriptor index */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_index(
 /*=======================*/
-				/* out: descriptor index */
-	ulint	offset)		/* in: page offset */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
 {
-	return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE)
-	       / FSP_EXTENT_SIZE);
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+		       / FSP_EXTENT_SIZE);
+	} else {
+		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets pointer to a the extent descriptor of a page. The page where the extent
 descriptor resides is x-locked. If the page offset is equal to the free limit
 of the space, adds new extents from above the free limit to the space free
 list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit. */
+descriptor defined, as they are uninitialized above the free limit.
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
 UNIV_INLINE
 xdes_t*
 xdes_get_descriptor_with_space_hdr(
 /*===============================*/
-				/* out: pointer to the extent descriptor,
-				NULL if the page does not exist in the
-				space or if offset > free limit */
-	fsp_header_t*	sp_header,/* in: space header, x-latched */
-	ulint		space,	/* in: space id */
-	ulint		offset,	/* in: page offset;
+	fsp_header_t*	sp_header,/*!< in: space header, x-latched */
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: page offset;
 				if equal to the free limit,
 				we try to add new extents to
 				the space free list */
-	mtr_t*		mtr)	/* in: mtr handle */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	limit;
 	ulint	size;
+	ulint	zip_size;
 	ulint	descr_page_no;
 	page_t*	descr_page;
 
 	ut_ad(mtr);
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
 				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
 	/* Read free limit and space size */
-	limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
-	size  = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
+	limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
+	size  = mach_read_from_4(sp_header + FSP_SIZE);
+	zip_size = dict_table_flags_to_zip_size(
+		mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
 
 	/* If offset is >= size or > limit, return NULL */
 
@@ -679,174 +741,181 @@ xdes_get_descriptor_with_space_hdr(
 		fsp_fill_free_list(FALSE, space, sp_header, mtr);
 	}
 
-	descr_page_no = xdes_calc_descriptor_page(offset);
+	descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
 
 	if (descr_page_no == 0) {
 		/* It is on the space header page */
 
-		descr_page = buf_frame_align(sp_header);
+		descr_page = page_align(sp_header);
 	} else {
-		descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH,
-					  mtr);
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+		buf_block_t*	block;
+
+		block = buf_page_get(space, zip_size, descr_page_no,
+				     RW_X_LATCH, mtr);
+		buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+		descr_page = buf_block_get_frame(block);
 	}
 
 	return(descr_page + XDES_ARR_OFFSET
-	       + XDES_SIZE * xdes_calc_descriptor_index(offset));
+	       + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets pointer to a the extent descriptor of a page. The page where the
 extent descriptor resides is x-locked. If the page offset is equal to
 the free limit of the space, adds new extents from above the free limit
 to the space free list, if not free limit == space size. This adding
 is necessary to make the descriptor defined, as they are uninitialized
-above the free limit. */
+above the free limit.
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
 static
 xdes_t*
 xdes_get_descriptor(
 /*================*/
-			/* out: pointer to the extent descriptor, NULL if the
-			page does not exist in the space or if offset > free
-			limit */
-	ulint	space,	/* in: space id */
-	ulint	offset,	/* in: page offset; if equal to the free limit,
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	offset,	/*!< in: page offset; if equal to the free limit,
 			we try to add new extents to the space free list */
-	mtr_t*	mtr)	/* in: mtr handle */
+	mtr_t*	mtr)	/*!< in: mtr handle */
 {
+	buf_block_t*	block;
 	fsp_header_t*	sp_header;
 
-	sp_header = FSP_HEADER_OFFSET
-		+ buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+	sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
 	return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
 						  mtr));
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets pointer to a the extent descriptor if the file address
 of the descriptor list node is known. The page where the
-extent descriptor resides is x-locked. */
+extent descriptor resides is x-locked.
+@return	pointer to the extent descriptor */
 UNIV_INLINE
 xdes_t*
 xdes_lst_get_descriptor(
 /*====================*/
-				/* out: pointer to the extent descriptor */
-	ulint		space,	/* in: space id */
-	fil_addr_t	lst_node,/* in: file address of the list node
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fil_addr_t	lst_node,/*!< in: file address of the list node
 				contained in the descriptor */
-	mtr_t*		mtr)	/* in: mtr handle */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	xdes_t*	descr;
 
 	ut_ad(mtr);
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
 				MTR_MEMO_X_LOCK));
-	descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;
+	descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
+		- XDES_FLST_NODE;
 
 	return(descr);
 }
 
-/************************************************************************
-Gets pointer to the next descriptor in a descriptor list and x-locks its
-page. */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_next(
-/*==============*/
-	xdes_t*	descr,	/* in: pointer to a descriptor */
-	mtr_t*	mtr)	/* in: mtr handle */
-{
-	ulint	space;
-
-	ut_ad(mtr && descr);
-
-	space = buf_frame_get_space_id(descr);
-
-	return(xdes_lst_get_descriptor(
-		       space,
-		       flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
-}
-
-/************************************************************************
-Returns page offset of the first page in extent described by a descriptor. */
+/********************************************************************//**
+Returns page offset of the first page in extent described by a descriptor.
+@return	offset of the first page in extent */
 UNIV_INLINE
 ulint
 xdes_get_offset(
 /*============*/
-			/* out: offset of the first page in extent */
-	xdes_t*	descr)	/* in: extent descriptor */
+	xdes_t*	descr)	/*!< in: extent descriptor */
 {
 	ut_ad(descr);
 
-	return(buf_frame_get_page_no(descr)
-	       + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
-		  / XDES_SIZE)
+	return(page_get_page_no(page_align(descr))
+	       + ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE)
 	       * FSP_EXTENT_SIZE);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
+/***********************************************************//**
 Inits a file page whose prior contents should be ignored. */
 static
 void
 fsp_init_file_page_low(
 /*===================*/
-	byte*	ptr)	/* in: pointer to a page */
+	buf_block_t*	block)	/*!< in: pointer to a page */
 {
-	page_t*	page;
-	page = buf_frame_align(ptr);
+	page_t*		page	= buf_block_get_frame(block);
+	page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
 
-	buf_block_align(page)->check_index_page_at_flush = FALSE;
+#ifndef UNIV_HOTBACKUP
+	block->check_index_page_at_flush = FALSE;
+#endif /* !UNIV_HOTBACKUP */
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		memset(page, 0, UNIV_PAGE_SIZE);
+		memset(page_zip->data, 0, page_zip_get_size(page_zip));
+		mach_write_to_4(page + FIL_PAGE_OFFSET,
+				buf_block_get_page_no(block));
+		mach_write_to_4(page
+				+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+				buf_block_get_space(block));
+		memcpy(page_zip->data + FIL_PAGE_OFFSET,
+		       page + FIL_PAGE_OFFSET, 4);
+		memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+		       page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
+		return;
+	}
 
 #ifdef UNIV_BASIC_LOG_DEBUG
 	memset(page, 0xff, UNIV_PAGE_SIZE);
 #endif
-	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-			ut_dulint_zero);
-	mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+	mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
+	memset(page + FIL_PAGE_LSN, 0, 8);
+	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+			buf_block_get_space(block));
+	memset(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 8);
 }
 
-/***************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
 Inits a file page whose prior contents should be ignored. */
 static
 void
 fsp_init_file_page(
 /*===============*/
-	page_t*	page,	/* in: page */
-	mtr_t*	mtr)	/* in: mtr */
+	buf_block_t*	block,	/*!< in: pointer to a page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	fsp_init_file_page_low(page);
+	fsp_init_file_page_low(block);
 
-	mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr);
+	mlog_write_initial_log_record(buf_block_get_frame(block),
+				      MLOG_INIT_FILE_PAGE, mtr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
-Parses a redo log record of a file page init. */
-
+/***********************************************************//**
+Parses a redo log record of a file page init.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 fsp_parse_init_file_page(
 /*=====================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
-	page_t*	page)	/* in: page or NULL */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr __attribute__((unused)), /*!< in: buffer end */
+	buf_block_t*	block)	/*!< in: block or NULL */
 {
 	ut_ad(ptr && end_ptr);
 
-	if (page) {
-		fsp_init_file_page_low(page);
+	if (block) {
+		fsp_init_file_page_low(block);
 	}
 
 	return(ptr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Initializes the fsp system. */
-
+UNIV_INTERN
 void
 fsp_init(void)
 /*==========*/
@@ -854,46 +923,62 @@ fsp_init(void)
 	/* Does nothing at the moment */
 }
 
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
+/**********************************************************************//**
+Writes the space id and compressed page size to a tablespace header.
+This function is used past the buffer pool when we in fil0fil.c create
+a new single-table tablespace. */
+UNIV_INTERN
 void
-fsp_header_write_space_id(
-/*======================*/
-	page_t*	page,		/* in: first page in the space */
-	ulint	space_id)	/* in: space id */
+fsp_header_init_fields(
+/*===================*/
+	page_t*	page,		/*!< in/out: first page in the space */
+	ulint	space_id,	/*!< in: space id */
+	ulint	flags)		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
+				0, or table->flags if newer than COMPACT */
 {
-	mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id);
+	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
+	format, the tablespace flags should equal table->flags. */
+	ut_a(flags != DICT_TF_COMPACT);
+
+	mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
+			space_id);
+	mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page,
+			flags);
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Initializes the space header of a new created space and creates also the
 insert buffer tree root if space == 0. */
-
+UNIV_INTERN
 void
 fsp_header_init(
 /*============*/
-	ulint	space,	/* in: space id */
-	ulint	size,	/* in: current size in blocks */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	ulint	space,		/*!< in: space id */
+	ulint	size,		/*!< in: current size in blocks */
+	mtr_t*	mtr)		/*!< in: mini-transaction handle */
 {
 	fsp_header_t*	header;
+	buf_block_t*	block;
 	page_t*		page;
+	ulint		flags;
+	ulint		zip_size;
 
 	ut_ad(mtr);
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
 
-	page = buf_page_create(space, 0, mtr);
-	buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	zip_size = dict_table_flags_to_zip_size(flags);
+	block = buf_page_create(space, 0, zip_size, mtr);
+	buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 	/* The prior contents of the file page should be ignored */
 
-	fsp_init_file_page(page, mtr);
+	fsp_init_file_page(block, mtr);
+	page = buf_block_get_frame(block);
 
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
 			 MLOG_2BYTES, mtr);
@@ -905,7 +990,8 @@ fsp_header_init(
 
 	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
 	mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
+	mlog_write_ulint(header + FSP_SPACE_FLAGS, flags,
+			 MLOG_4BYTES, mtr);
 	mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
 
 	flst_init(header + FSP_FREE, mtr);
@@ -917,21 +1003,23 @@ fsp_header_init(
 	mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr);
 	if (space == 0) {
 		fsp_fill_free_list(FALSE, space, header, mtr);
-		btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
-			   ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
+		btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+			   0, 0, ut_dulint_add(DICT_IBUF_ID_MIN, space),
+			   dict_ind_redundant, mtr);
 	} else {
 		fsp_fill_free_list(TRUE, space, header, mtr);
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
+/**********************************************************************//**
+Reads the space id from the first page of a tablespace.
+@return	space id, ULINT UNDEFINED if error */
+UNIV_INTERN
 ulint
 fsp_header_get_space_id(
 /*====================*/
-			/* out: space id, ULINT UNDEFINED if error */
-	page_t*	page)	/* in: first page of a tablespace */
+	const page_t*	page)	/*!< in: first page of a tablespace */
 {
 	ulint	fsp_id;
 	ulint	id;
@@ -952,24 +1040,56 @@ fsp_header_get_space_id(
 	return(id);
 }
 
-/**************************************************************************
-Increases the space size field of a space. */
+/**********************************************************************//**
+Reads the space flags from the first page of a tablespace.
+@return	flags */
+UNIV_INTERN
+ulint
+fsp_header_get_flags(
+/*=================*/
+	const page_t*	page)	/*!< in: first page of a tablespace */
+{
+	ut_ad(!page_offset(page));
 
+	return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
+}
+
+/**********************************************************************//**
+Reads the compressed page size from the first page of a tablespace.
+@return	compressed page size in bytes, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_zip_size(
+/*====================*/
+	const page_t*	page)	/*!< in: first page of a tablespace */
+{
+	ulint	flags = fsp_header_get_flags(page);
+
+	return(dict_table_flags_to_zip_size(flags));
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Increases the space size field of a space. */
+UNIV_INTERN
 void
 fsp_header_inc_size(
 /*================*/
-	ulint	space,	/* in: space id */
-	ulint	size_inc,/* in: size increment in pages */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	ulint	space,	/*!< in: space id */
+	ulint	size_inc,/*!< in: size increment in pages */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
 	fsp_header_t*	header;
 	ulint		size;
+	ulint		flags;
 
 	ut_ad(mtr);
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
 
-	header = fsp_get_space_header(space, mtr);
+	header = fsp_get_space_header(space,
+				      dict_table_flags_to_zip_size(flags),
+				      mtr);
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 
@@ -977,33 +1097,30 @@ fsp_header_inc_size(
 			 mtr);
 }
 
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
+/**********************************************************************//**
+Gets the current free limit of the system tablespace.  The free limit
+means the place of the first page which has never been put to the the
+free list for allocation.  The space above that address is initialized
+to zero.  Sets also the global variable log_fsp_current_free_limit.
+@return	free limit in megabytes */
+UNIV_INTERN
 ulint
-fsp_header_get_free_limit(
-/*======================*/
-			/* out: free limit in megabytes */
-	ulint	space)	/* in: space id, must be 0 */
+fsp_header_get_free_limit(void)
+/*===========================*/
 {
 	fsp_header_t*	header;
 	ulint		limit;
 	mtr_t		mtr;
 
-	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
 	mtr_start(&mtr);
 
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(0, 0, &mtr);
 
 	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
 
-	limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
+	limit /= ((1024 * 1024) / UNIV_PAGE_SIZE);
 
 	log_fsp_current_free_limit_set_and_checkpoint(limit);
 
@@ -1012,28 +1129,26 @@ fsp_header_get_free_limit(
 	return(limit);
 }
 
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
+/**********************************************************************//**
+Gets the size of the system tablespace from the tablespace header.  If
+we do not have an auto-extending data file, this should be equal to
+the size of the data files.  If there is an auto-extending data file,
+this can be smaller.
+@return	size in pages */
+UNIV_INTERN
 ulint
-fsp_header_get_tablespace_size(
-/*===========================*/
-			/* out: size in pages */
-	ulint	space)	/* in: space id, must be 0 */
+fsp_header_get_tablespace_size(void)
+/*================================*/
 {
 	fsp_header_t*	header;
 	ulint		size;
 	mtr_t		mtr;
 
-	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
 	mtr_start(&mtr);
 
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(0, 0, &mtr);
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
 
@@ -1042,18 +1157,18 @@ fsp_header_get_tablespace_size(
 	return(size);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Tries to extend a single-table tablespace so that a page would fit in the
-data file. */
+data file.
+@return	TRUE if success */
 static
 ibool
 fsp_try_extend_data_file_with_pages(
 /*================================*/
-					/* out: TRUE if success */
-	ulint		space,		/* in: space */
-	ulint		page_no,	/* in: page number */
-	fsp_header_t*	header,		/* in: space header */
-	mtr_t*		mtr)		/* in: mtr */
+	ulint		space,		/*!< in: space */
+	ulint		page_no,	/*!< in: page number */
+	fsp_header_t*	header,		/*!< in: space header */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ibool	success;
 	ulint	actual_size;
@@ -1075,23 +1190,24 @@ fsp_try_extend_data_file_with_pages(
 	return(success);
 }
 
-/***************************************************************************
-Tries to extend the last data file of a tablespace if it is auto-extending. */
+/***********************************************************************//**
+Tries to extend the last data file of a tablespace if it is auto-extending.
+@return	FALSE if not auto-extending */
 static
 ibool
 fsp_try_extend_data_file(
 /*=====================*/
-					/* out: FALSE if not auto-extending */
-	ulint*		actual_increase,/* out: actual increase in pages, where
+	ulint*		actual_increase,/*!< out: actual increase in pages, where
 					we measure the tablespace size from
 					what the header field says; it may be
 					the actual file size rounded down to
 					megabyte */
-	ulint		space,		/* in: space */
-	fsp_header_t*	header,		/* in: space header */
-	mtr_t*		mtr)		/* in: mtr */
+	ulint		space,		/*!< in: space */
+	fsp_header_t*	header,		/*!< in: space header */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	size;
+	ulint	zip_size;
 	ulint	new_size;
 	ulint	old_size;
 	ulint	size_increase;
@@ -1106,62 +1222,70 @@ fsp_try_extend_data_file(
 	}
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	zip_size = dict_table_flags_to_zip_size(
+		mach_read_from_4(header + FSP_SPACE_FLAGS));
 
 	old_size = size;
 
-	if (space == 0 && srv_last_file_size_max != 0) {
-		if (srv_last_file_size_max
-		    < srv_data_file_sizes[srv_n_data_files - 1]) {
-
-			fprintf(stderr,
-				"InnoDB: Error: Last data file size is %lu,"
-				" max size allowed %lu\n",
-				(ulong) srv_data_file_sizes[
-					srv_n_data_files - 1],
-				(ulong) srv_last_file_size_max);
-		}
-
-		size_increase = srv_last_file_size_max
-			- srv_data_file_sizes[srv_n_data_files - 1];
-		if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
-			size_increase = SRV_AUTO_EXTEND_INCREMENT;
-		}
-	} else {
-		if (space == 0) {
+	if (space == 0) {
+		if (!srv_last_file_size_max) {
 			size_increase = SRV_AUTO_EXTEND_INCREMENT;
 		} else {
-			/* We extend single-table tablespaces first one extent
-			at a time, but for bigger tablespaces more. It is not
-			enough to extend always by one extent, because some
-			extents are frag page extents. */
+			if (srv_last_file_size_max
+			    < srv_data_file_sizes[srv_n_data_files - 1]) {
 
-			if (size < FSP_EXTENT_SIZE) {
-				/* Let us first extend the file to 64 pages */
-				success = fsp_try_extend_data_file_with_pages(
-					space, FSP_EXTENT_SIZE - 1,
-					header, mtr);
-				if (!success) {
-					new_size = mtr_read_ulint(
-						header + FSP_SIZE,
-						MLOG_4BYTES, mtr);
-
-					*actual_increase = new_size - old_size;
-
-					return(FALSE);
-				}
-
-				size = FSP_EXTENT_SIZE;
+				fprintf(stderr,
+					"InnoDB: Error: Last data file size"
+					" is %lu, max size allowed %lu\n",
+					(ulong) srv_data_file_sizes[
+						srv_n_data_files - 1],
+					(ulong) srv_last_file_size_max);
 			}
 
-			if (size < 32 * FSP_EXTENT_SIZE) {
-				size_increase = FSP_EXTENT_SIZE;
-			} else {
-				/* Below in fsp_fill_free_list() we assume
-				that we add at most FSP_FREE_ADD extents at
-				a time */
-				size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE;
+			size_increase = srv_last_file_size_max
+				- srv_data_file_sizes[srv_n_data_files - 1];
+			if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
+				size_increase = SRV_AUTO_EXTEND_INCREMENT;
 			}
 		}
+	} else {
+		/* We extend single-table tablespaces first one extent
+		at a time, but for bigger tablespaces more. It is not
+		enough to extend always by one extent, because some
+		extents are frag page extents. */
+		ulint	extent_size;	/*!< one megabyte, in pages */
+
+		if (!zip_size) {
+			extent_size = FSP_EXTENT_SIZE;
+		} else {
+			extent_size = FSP_EXTENT_SIZE
+				* UNIV_PAGE_SIZE / zip_size;
+		}
+
+		if (size < extent_size) {
+			/* Let us first extend the file to extent_size */
+			success = fsp_try_extend_data_file_with_pages(
+				space, extent_size - 1, header, mtr);
+			if (!success) {
+				new_size = mtr_read_ulint(header + FSP_SIZE,
+							  MLOG_4BYTES, mtr);
+
+				*actual_increase = new_size - old_size;
+
+				return(FALSE);
+			}
+
+			size = extent_size;
+		}
+
+		if (size < 32 * extent_size) {
+			size_increase = extent_size;
+		} else {
+			/* Below in fsp_fill_free_list() we assume
+			that we add at most FSP_FREE_ADD extents at
+			a time */
+			size_increase = FSP_FREE_ADD * extent_size;
+		}
 	}
 
 	if (size_increase == 0) {
@@ -1174,18 +1298,21 @@ fsp_try_extend_data_file(
 	/* We ignore any fragments of a full megabyte when storing the size
 	to the space header */
 
-	mlog_write_ulint(header + FSP_SIZE,
-			 ut_calc_align_down(actual_size,
-					    (1024 * 1024) / UNIV_PAGE_SIZE),
-			 MLOG_4BYTES, mtr);
-	new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	if (!zip_size) {
+		new_size = ut_calc_align_down(actual_size,
+					      (1024 * 1024) / UNIV_PAGE_SIZE);
+	} else {
+		new_size = ut_calc_align_down(actual_size,
+					      (1024 * 1024) / zip_size);
+	}
+	mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
 
 	*actual_increase = new_size - old_size;
 
 	return(TRUE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Puts new extents to the free list if there are free extents above the free
 limit. If an extent happens to contain an extent descriptor page, the extent
 is put to the FSP_FREE_FRAG list with the page marked as used. */
@@ -1193,32 +1320,38 @@ static
 void
 fsp_fill_free_list(
 /*===============*/
-	ibool		init_space,	/* in: TRUE if this is a single-table
+	ibool		init_space,	/*!< in: TRUE if this is a single-table
 					tablespace and we are only initing
 					the tablespace's first extent
 					descriptor page and ibuf bitmap page;
 					then we do not allocate more extents */
-	ulint		space,		/* in: space */
-	fsp_header_t*	header,		/* in: space header */
-	mtr_t*		mtr)		/* in: mtr */
+	ulint		space,		/*!< in: space */
+	fsp_header_t*	header,		/*!< in: space header */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	limit;
 	ulint	size;
+	ulint	zip_size;
 	xdes_t*	descr;
 	ulint	count		= 0;
 	ulint	frag_n_used;
-	page_t*	descr_page;
-	page_t*	ibuf_page;
 	ulint	actual_increase;
 	ulint	i;
 	mtr_t	ibuf_mtr;
 
 	ut_ad(header && mtr);
+	ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
 
 	/* Check if we can fill free list from above the free list limit */
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
 
+	zip_size = dict_table_flags_to_zip_size(
+		mach_read_from_4(FSP_SPACE_FLAGS + header));
+	ut_a(ut_is_2pow(zip_size));
+	ut_a(zip_size <= UNIV_PAGE_SIZE);
+	ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+
 	if (space == 0 && srv_auto_extend_last_data_file
 	    && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
 
@@ -1240,32 +1373,44 @@ fsp_fill_free_list(
 	while ((init_space && i < 1)
 	       || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
 
+		ibool	init_xdes;
+		if (zip_size) {
+			init_xdes = ut_2pow_remainder(i, zip_size) == 0;
+		} else {
+			init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
+		}
+
 		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
 				 MLOG_4BYTES, mtr);
 
 		/* Update the free limit info in the log system and make
 		a checkpoint */
 		if (space == 0) {
+			ut_a(!zip_size);
 			log_fsp_current_free_limit_set_and_checkpoint(
 				(i + FSP_EXTENT_SIZE)
 				/ ((1024 * 1024) / UNIV_PAGE_SIZE));
 		}
 
-		if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+		if (UNIV_UNLIKELY(init_xdes)) {
+
+			buf_block_t*	block;
 
 			/* We are going to initialize a new descriptor page
 			and a new ibuf bitmap page: the prior contents of the
 			pages should be ignored. */
 
 			if (i > 0) {
-				descr_page = buf_page_create(space, i, mtr);
-				buf_page_get(space, i, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-				buf_page_dbg_add_level(descr_page,
-						       SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-				fsp_init_file_page(descr_page, mtr);
-				mlog_write_ulint(descr_page + FIL_PAGE_TYPE,
+				block = buf_page_create(
+					space, i, zip_size, mtr);
+				buf_page_get(space, zip_size, i,
+					     RW_X_LATCH, mtr);
+				buf_block_dbg_add_level(block,
+							SYNC_FSP_PAGE);
+
+				fsp_init_file_page(block, mtr);
+				mlog_write_ulint(buf_block_get_frame(block)
+						 + FIL_PAGE_TYPE,
 						 FIL_PAGE_TYPE_XDES,
 						 MLOG_2BYTES, mtr);
 			}
@@ -1277,17 +1422,17 @@ fsp_fill_free_list(
 
 			mtr_start(&ibuf_mtr);
 
-			ibuf_page = buf_page_create(space,
+			block = buf_page_create(space,
 						    i + FSP_IBUF_BITMAP_OFFSET,
-						    &ibuf_mtr);
-			buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
+						    zip_size, &ibuf_mtr);
+			buf_page_get(space, zip_size,
+				     i + FSP_IBUF_BITMAP_OFFSET,
 				     RW_X_LATCH, &ibuf_mtr);
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-			fsp_init_file_page(ibuf_page, &ibuf_mtr);
+			buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
-			ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr);
+			fsp_init_file_page(block, &ibuf_mtr);
+
+			ibuf_bitmap_page_init(block, &ibuf_mtr);
 
 			mtr_commit(&ibuf_mtr);
 		}
@@ -1296,11 +1441,14 @@ fsp_fill_free_list(
 							   mtr);
 		xdes_init(descr, mtr);
 
-#if XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE
-# error "XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE != 0"
+#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
+# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
+#endif
+#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
+# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
 #endif
 
-		if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+		if (UNIV_UNLIKELY(init_xdes)) {
 
 			/* The first page in the extent is a descriptor page
 			and the second is an ibuf bitmap page: mark them
@@ -1327,19 +1475,20 @@ fsp_fill_free_list(
 	}
 }
 
-/**************************************************************************
-Allocates a new free extent. */
+/**********************************************************************//**
+Allocates a new free extent.
+@return	extent descriptor, NULL if cannot be allocated */
 static
 xdes_t*
 fsp_alloc_free_extent(
 /*==================*/
-			/* out: extent descriptor, NULL if cannot be
-			allocated */
-	ulint	space,	/* in: space id */
-	ulint	hint,	/* in: hint of which extent would be desirable: any
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	hint,	/*!< in: hint of which extent would be desirable: any
 			page offset in the extent goes; the hint must not
 			be > FSP_FREE_LIMIT */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	fsp_header_t*	header;
 	fil_addr_t	first;
@@ -1347,7 +1496,7 @@ fsp_alloc_free_extent(
 
 	ut_ad(mtr);
 
-	header = fsp_get_space_header(space, mtr);
+	header = fsp_get_space_header(space, zip_size, mtr);
 
 	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
 
@@ -1368,7 +1517,7 @@ fsp_alloc_free_extent(
 			return(NULL);	/* No free extents left */
 		}
 
-		descr = xdes_lst_get_descriptor(space, first, mtr);
+		descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
 	}
 
 	flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
@@ -1376,22 +1525,23 @@ fsp_alloc_free_extent(
 	return(descr);
 }
 
-/**************************************************************************
-Allocates a single free page from a space. The page is marked as used. */
+/**********************************************************************//**
+Allocates a single free page from a space. The page is marked as used.
+@return	the page offset, FIL_NULL if no page could be allocated */
 static
 ulint
 fsp_alloc_free_page(
 /*================*/
-			/* out: the page offset, FIL_NULL if no page could
-			be allocated */
-	ulint	space,	/* in: space id */
-	ulint	hint,	/* in: hint of which page would be desirable */
-	mtr_t*	mtr)	/* in: mtr handle */
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	hint,	/*!< in: hint of which page would be desirable */
+	mtr_t*	mtr)	/*!< in: mtr handle */
 {
 	fsp_header_t*	header;
 	fil_addr_t	first;
 	xdes_t*		descr;
-	page_t*		page;
+	buf_block_t*	block;
 	ulint		free;
 	ulint		frag_n_used;
 	ulint		page_no;
@@ -1400,7 +1550,7 @@ fsp_alloc_free_page(
 
 	ut_ad(mtr);
 
-	header = fsp_get_space_header(space, mtr);
+	header = fsp_get_space_header(space, zip_size, mtr);
 
 	/* Get the hinted descriptor */
 	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
@@ -1419,7 +1569,8 @@ fsp_alloc_free_page(
 			FREE_FRAG list. But we will allocate our page from the
 			the free extent anyway. */
 
-			descr = fsp_alloc_free_extent(space, hint, mtr);
+			descr = fsp_alloc_free_extent(space, zip_size,
+						      hint, mtr);
 
 			if (descr == NULL) {
 				/* No free space left */
@@ -1431,7 +1582,8 @@ fsp_alloc_free_page(
 			flst_add_last(header + FSP_FREE_FRAG,
 				      descr + XDES_FLST_NODE, mtr);
 		} else {
-			descr = xdes_lst_get_descriptor(space, first, mtr);
+			descr = xdes_lst_get_descriptor(space, zip_size,
+							first, mtr);
 		}
 
 		/* Reset the hint */
@@ -1446,6 +1598,7 @@ fsp_alloc_free_page(
 	if (free == ULINT_UNDEFINED) {
 
 		ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
+		putc('\n', stderr);
 
 		ut_error;
 	}
@@ -1502,28 +1655,28 @@ fsp_alloc_free_page(
 	be obtained immediately with buf_page_get without need for a disk
 	read. */
 
-	buf_page_create(space, page_no, mtr);
+	buf_page_create(space, page_no, zip_size, mtr);
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 	/* Prior contents of the page should be ignored */
-	fsp_init_file_page(page, mtr);
+	fsp_init_file_page(block, mtr);
 
 	return(page_no);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a single page of a space. The page is marked as free and clean. */
 static
 void
 fsp_free_page(
 /*==========*/
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page offset */
-	mtr_t*	mtr)	/* in: mtr handle */
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	page,	/*!< in: page offset */
+	mtr_t*	mtr)	/*!< in: mtr handle */
 {
 	fsp_header_t*	header;
 	xdes_t*		descr;
@@ -1534,7 +1687,7 @@ fsp_free_page(
 
 	/* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
 
-	header = fsp_get_space_header(space, mtr);
+	header = fsp_get_space_header(space, zip_size, mtr);
 
 	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
 
@@ -1599,32 +1752,35 @@ fsp_free_page(
 		/* The extent has become free: move it to another list */
 		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
 			    mtr);
-		fsp_free_extent(space, page, mtr);
+		fsp_free_extent(space, zip_size, page, mtr);
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Returns an extent to the free list of a space. */
 static
 void
 fsp_free_extent(
 /*============*/
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page offset in the extent */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	page,	/*!< in: page offset in the extent */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	fsp_header_t*	header;
 	xdes_t*		descr;
 
 	ut_ad(mtr);
 
-	header = fsp_get_space_header(space, mtr);
+	header = fsp_get_space_header(space, zip_size, mtr);
 
 	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
 
 	if (xdes_get_state(descr, mtr) == XDES_FREE) {
 
 		ut_print_buf(stderr, (byte*)descr - 500, 1000);
+		putc('\n', stderr);
 
 		ut_error;
 	}
@@ -1634,44 +1790,46 @@ fsp_free_extent(
 	flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
 }
 
-/**************************************************************************
-Returns the nth inode slot on an inode page. */
+/**********************************************************************//**
+Returns the nth inode slot on an inode page.
+@return	segment inode */
 UNIV_INLINE
 fseg_inode_t*
 fsp_seg_inode_page_get_nth_inode(
 /*=============================*/
-			/* out: segment inode */
-	page_t*	page,	/* in: segment inode page */
-	ulint	i,	/* in: inode index on page */
-	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
+	page_t*	page,	/*!< in: segment inode page */
+	ulint	i,	/*!< in: inode index on page */
+	ulint	zip_size __attribute__((unused)),
+			/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr __attribute__((unused)))
+			/*!< in: mini-transaction handle */
 {
-	ut_ad(i < FSP_SEG_INODES_PER_PAGE);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 
 	return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
 }
 
-/**************************************************************************
-Looks for a used segment inode on a segment inode page. */
+/**********************************************************************//**
+Looks for a used segment inode on a segment inode page.
+@return	segment inode index, or ULINT_UNDEFINED if not found */
 static
 ulint
 fsp_seg_inode_page_find_used(
 /*=========================*/
-			/* out: segment inode index, or ULINT_UNDEFINED
-			if not found */
-	page_t*	page,	/* in: segment inode page */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	page_t*	page,	/*!< in: segment inode page */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		i;
 	fseg_inode_t*	inode;
 
-	for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+	for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
 
-		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+		inode = fsp_seg_inode_page_get_nth_inode(
+			page, i, zip_size, mtr);
 
-		if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
-				  ut_dulint_zero) != 0) {
+		if (!ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
 			/* This is used */
 
 			return(i);
@@ -1681,27 +1839,26 @@ fsp_seg_inode_page_find_used(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Looks for an unused segment inode on a segment inode page. */
+/**********************************************************************//**
+Looks for an unused segment inode on a segment inode page.
+@return	segment inode index, or ULINT_UNDEFINED if not found */
 static
 ulint
 fsp_seg_inode_page_find_free(
 /*=========================*/
-			/* out: segment inode index, or ULINT_UNDEFINED
-			if not found */
-	page_t*	page,	/* in: segment inode page */
-	ulint	j,	/* in: search forward starting from this index */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	page_t*	page,	/*!< in: segment inode page */
+	ulint	i,	/*!< in: search forward starting from this index */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
-	ulint		i;
 	fseg_inode_t*	inode;
 
-	for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) {
+	for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
 
-		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+		inode = fsp_seg_inode_page_get_nth_inode(
+			page, i, zip_size, mtr);
 
-		if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
-				  ut_dulint_zero) == 0) {
+		if (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
 			/* This is unused */
 
 			return(i);
@@ -1711,44 +1868,51 @@ fsp_seg_inode_page_find_free(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Allocates a new file segment inode page. */
+/**********************************************************************//**
+Allocates a new file segment inode page.
+@return	TRUE if could be allocated */
 static
 ibool
 fsp_alloc_seg_inode_page(
 /*=====================*/
-					/* out: TRUE if could be allocated */
-	fsp_header_t*	space_header,	/* in: space header */
-	mtr_t*		mtr)		/* in: mini-transaction handle */
+	fsp_header_t*	space_header,	/*!< in: space header */
+	mtr_t*		mtr)		/*!< in: mini-transaction handle */
 {
 	fseg_inode_t*	inode;
+	buf_block_t*	block;
 	page_t*		page;
 	ulint		page_no;
 	ulint		space;
+	ulint		zip_size;
 	ulint		i;
 
-	space = buf_frame_get_space_id(space_header);
+	ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
 
-	page_no = fsp_alloc_free_page(space, 0, mtr);
+	space = page_get_space_id(page_align(space_header));
+	zip_size = dict_table_flags_to_zip_size(
+		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+
+	page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
 
 	if (page_no == FIL_NULL) {
 
 		return(FALSE);
 	}
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
-	buf_block_align(page)->check_index_page_at_flush = FALSE;
+	block->check_index_page_at_flush = FALSE;
+
+	page = buf_block_get_frame(block);
 
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
 			 MLOG_2BYTES, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
 
-	for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+	for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
 
-		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+		inode = fsp_seg_inode_page_get_nth_inode(page, i,
+							 zip_size, mtr);
 
 		mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
 	}
@@ -1758,23 +1922,26 @@ fsp_alloc_seg_inode_page(
 	return(TRUE);
 }
 
-/**************************************************************************
-Allocates a new file segment inode. */
+/**********************************************************************//**
+Allocates a new file segment inode.
+@return	segment inode, or NULL if not enough space */
 static
 fseg_inode_t*
 fsp_alloc_seg_inode(
 /*================*/
-					/* out: segment inode, or NULL if
-					not enough space */
-	fsp_header_t*	space_header,	/* in: space header */
-	mtr_t*		mtr)		/* in: mini-transaction handle */
+	fsp_header_t*	space_header,	/*!< in: space header */
+	mtr_t*		mtr)		/*!< in: mini-transaction handle */
 {
 	ulint		page_no;
+	buf_block_t*	block;
 	page_t*		page;
 	fseg_inode_t*	inode;
 	ibool		success;
+	ulint		zip_size;
 	ulint		n;
 
+	ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
+
 	if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
 		/* Allocate a new segment inode page */
 
@@ -1788,20 +1955,22 @@ fsp_alloc_seg_inode(
 
 	page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
 
-	page = buf_page_get(buf_frame_get_space_id(space_header), page_no,
-			    RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	zip_size = dict_table_flags_to_zip_size(
+		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+	block = buf_page_get(page_get_space_id(page_align(space_header)),
+			     zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
-	n = fsp_seg_inode_page_find_free(page, 0, mtr);
+	page = buf_block_get_frame(block);
+
+	n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
 
 	ut_a(n != ULINT_UNDEFINED);
 
-	inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);
+	inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
 
 	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
-							    mtr)) {
+							    zip_size, mtr)) {
 		/* There are no other unused headers left on the page: move it
 		to another list */
 
@@ -1815,26 +1984,29 @@ fsp_alloc_seg_inode(
 	return(inode);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a file segment inode. */
 static
 void
 fsp_free_seg_inode(
 /*===============*/
-	ulint		space,	/* in: space id */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	page_t*		page;
 	fsp_header_t*	space_header;
 
-	page = buf_frame_align(inode);
+	page = page_align(inode);
 
-	space_header = fsp_get_space_header(space, mtr);
+	space_header = fsp_get_space_header(space, zip_size, mtr);
 
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
-	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) {
+	if (ULINT_UNDEFINED
+	    == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
 
 		/* Move the page to another list */
 
@@ -1848,90 +2020,91 @@ fsp_free_seg_inode(
 	mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
 	mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
 
-	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) {
+	if (ULINT_UNDEFINED
+	    == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
 
 		/* There are no other used headers left on the page: free it */
 
 		flst_remove(space_header + FSP_SEG_INODES_FREE,
 			    page + FSEG_INODE_PAGE_NODE, mtr);
 
-		fsp_free_page(space, buf_frame_get_page_no(page), mtr);
+		fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
 	}
 }
 
-/**************************************************************************
-Returns the file segment inode, page x-latched. */
+/**********************************************************************//**
+Returns the file segment inode, page x-latched.
+@return	segment inode, page x-latched */
 static
 fseg_inode_t*
 fseg_inode_get(
 /*===========*/
-				/* out: segment inode, page x-latched */
-	fseg_header_t*	header,	/* in: segment header */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_header_t*	header,	/*!< in: segment header */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	fil_addr_t	inode_addr;
 	fseg_inode_t*	inode;
 
 	inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
 	inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
+	ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
 
-	inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE),
-			    inode_addr, RW_X_LATCH, mtr);
+	inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
 
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
 	return(inode);
 }
 
-/**************************************************************************
-Gets the page number from the nth fragment page slot. */
+/**********************************************************************//**
+Gets the page number from the nth fragment page slot.
+@return	page number, FIL_NULL if not in use */
 UNIV_INLINE
 ulint
 fseg_get_nth_frag_page_no(
 /*======================*/
-				/* out: page number, FIL_NULL if not in use */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	ulint		n,	/* in: slot index */
-	mtr_t*		mtr __attribute__((unused))) /* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		n,	/*!< in: slot index */
+	mtr_t*		mtr __attribute__((unused))) /*!< in: mtr handle */
 {
 	ut_ad(inode && mtr);
 	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
 	return(mach_read_from_4(inode + FSEG_FRAG_ARR
 				+ n * FSEG_FRAG_SLOT_SIZE));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Sets the page number in the nth fragment page slot. */
 UNIV_INLINE
 void
 fseg_set_nth_frag_page_no(
 /*======================*/
-	fseg_inode_t*	inode,	/* in: segment inode */
-	ulint		n,	/* in: slot index */
-	ulint		page_no,/* in: page number to set */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		n,	/*!< in: slot index */
+	ulint		page_no,/*!< in: page number to set */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ut_ad(inode && mtr);
 	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
 
 	mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
 			 page_no, MLOG_4BYTES, mtr);
 }
 
-/**************************************************************************
-Finds a fragment page slot which is free. */
+/**********************************************************************//**
+Finds a fragment page slot which is free.
+@return	slot index; ULINT_UNDEFINED if none found */
 static
 ulint
 fseg_find_free_frag_page_slot(
 /*==========================*/
-				/* out: slot index; ULINT_UNDEFINED if none
-				found */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	i;
 	ulint	page_no;
@@ -1950,16 +2123,15 @@ fseg_find_free_frag_page_slot(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Finds a fragment page slot which is used and last in the array. */
+/**********************************************************************//**
+Finds a fragment page slot which is used and last in the array.
+@return	slot index; ULINT_UNDEFINED if none found */
 static
 ulint
 fseg_find_last_used_frag_page_slot(
 /*===============================*/
-				/* out: slot index; ULINT_UNDEFINED if none
-				found */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	i;
 	ulint	page_no;
@@ -1979,15 +2151,15 @@ fseg_find_last_used_frag_page_slot(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Calculates reserved fragment page slots. */
+/**********************************************************************//**
+Calculates reserved fragment page slots.
+@return	number of fragment pages */
 static
 ulint
 fseg_get_n_frag_pages(
 /*==================*/
-				/* out: number of fragment pages */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	i;
 	ulint	count	= 0;
@@ -2003,51 +2175,55 @@ fseg_get_n_frag_pages(
 	return(count);
 }
 
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
 fseg_create_general(
 /*================*/
-			/* out: the page where the segment header is placed,
-			x-latched, NULL if could not create segment
-			because of lack of space */
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page where the segment header is placed: if
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
-	ulint	byte_offset, /* in: byte offset of the created segment header
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
 			on the page */
-	ibool	has_done_reservation, /* in: TRUE if the caller has already
+	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
 			done the reservation for the pages with
 			fsp_reserve_free_extents (at least 2 extents: one for
 			the inode and the other for the segment) then there is
 			no need to do the check for this individual
 			operation */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
+	ulint		flags;
+	ulint		zip_size;
 	fsp_header_t*	space_header;
 	fseg_inode_t*	inode;
 	dulint		seg_id;
-	fseg_header_t*	header = 0; /* remove warning */
+	buf_block_t*	block	= 0; /* remove warning */
+	fseg_header_t*	header	= 0; /* remove warning */
 	rw_lock_t*	latch;
 	ibool		success;
 	ulint		n_reserved;
-	page_t*		ret		= NULL;
 	ulint		i;
 
 	ut_ad(mtr);
+	ut_ad(byte_offset + FSEG_HEADER_SIZE
+	      <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	if (page != 0) {
-		header = byte_offset + buf_page_get(space, page, RW_X_LATCH,
-						    mtr);
+		block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+		header = byte_offset + buf_block_get_frame(block);
 	}
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
-	latch = fil_space_get_latch(space);
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
 	mtr_x_lock(latch, mtr);
 
@@ -2055,8 +2231,8 @@ fseg_create_general(
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		if (space == 0) {
-			ibuf_free_excess_pages(space);
+		if (space == IBUF_SPACE_ID) {
+			ibuf_free_excess_pages();
 		}
 	}
 
@@ -2068,7 +2244,7 @@ fseg_create_general(
 		}
 	}
 
-	space_header = fsp_get_space_header(space, mtr);
+	space_header = fsp_get_space_header(space, zip_size, mtr);
 
 	inode = fsp_alloc_seg_inode(space_header, mtr);
 
@@ -2099,78 +2275,77 @@ fseg_create_general(
 	}
 
 	if (page == 0) {
-		page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
+		page = fseg_alloc_free_page_low(space, zip_size,
+						inode, 0, FSP_UP, mtr);
 
 		if (page == FIL_NULL) {
 
-			fsp_free_seg_inode(space, inode, mtr);
+			fsp_free_seg_inode(space, zip_size, inode, mtr);
 
 			goto funct_exit;
 		}
 
-		header = byte_offset
-			+ buf_page_get(space, page, RW_X_LATCH, mtr);
+		block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+		header = byte_offset + buf_block_get_frame(block);
 		mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE,
 				 FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
 	}
 
 	mlog_write_ulint(header + FSEG_HDR_OFFSET,
-			 inode - buf_frame_align(inode), MLOG_2BYTES, mtr);
+			 page_offset(inode), MLOG_2BYTES, mtr);
 
 	mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
-			 buf_frame_get_page_no(inode), MLOG_4BYTES, mtr);
+			 page_get_page_no(page_align(inode)),
+			 MLOG_4BYTES, mtr);
 
 	mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
 
-	ret = buf_frame_align(header);
-
 funct_exit:
 	if (!has_done_reservation) {
 
 		fil_space_release_free_extents(space, n_reserved);
 	}
 
-	return(ret);
+	return(block);
 }
 
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
 fseg_create(
 /*========*/
-			/* out: the page where the segment header is placed,
-			x-latched, NULL if could not create segment
-			because of lack of space */
-	ulint	space,	/* in: space id */
-	ulint	page,	/* in: page where the segment header is placed: if
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
 			will belong to the created segment */
-	ulint	byte_offset, /* in: byte offset of the created segment header
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
 			on the page */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
+currently used.
+@return	number of reserved pages */
 static
 ulint
 fseg_n_reserved_pages_low(
 /*======================*/
-				/* out: number of reserved pages */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	ulint*		used,	/* out: number of pages used (<= reserved) */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint*		used,	/*!< out: number of pages used (not
+				more than reserved) */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	ret;
 
 	ut_ad(inode && used && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
 
 	*used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
 		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
@@ -2184,38 +2359,42 @@ fseg_n_reserved_pages_low(
 	return(ret);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
+currently used.
+@return	number of reserved pages */
+UNIV_INTERN
 ulint
 fseg_n_reserved_pages(
 /*==================*/
-				/* out: number of reserved pages */
-	fseg_header_t*	header,	/* in: segment header */
-	ulint*		used,	/* out: number of pages used (<= reserved) */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_header_t*	header,	/*!< in: segment header */
+	ulint*		used,	/*!< out: number of pages used (<= reserved) */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint		ret;
 	fseg_inode_t*	inode;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
+	rw_lock_t*	latch;
 
-	space = buf_frame_get_space_id(header);
+	space = page_get_space_id(page_align(header));
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(latch, mtr);
 
-	inode = fseg_inode_get(header, mtr);
+	inode = fseg_inode_get(header, space, zip_size, mtr);
 
 	ret = fseg_n_reserved_pages_low(inode, used, mtr);
 
 	return(ret);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Tries to fill the free list of a segment with consecutive free extents.
 This happens if the segment is big enough to allow extents in the free list,
 the free list is empty, and the extents can be allocated consecutively from
@@ -2224,11 +2403,13 @@ static
 void
 fseg_fill_free_list(
 /*================*/
-	fseg_inode_t*	inode,	/* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		hint,	/* in: hint which extent would be good as
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		hint,	/*!< in: hint which extent would be good as
 				the first extent */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	xdes_t*	descr;
 	ulint	i;
@@ -2237,6 +2418,7 @@ fseg_fill_free_list(
 	ulint	used;
 
 	ut_ad(inode && mtr);
+	ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 
 	reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
 
@@ -2254,7 +2436,7 @@ fseg_fill_free_list(
 	}
 
 	for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
-		descr = xdes_get_descriptor(space, hint, mtr);
+		descr = xdes_get_descriptor(space, zip_size, hint, mtr);
 
 		if ((descr == NULL)
 		    || (XDES_FREE != xdes_get_state(descr, mtr))) {
@@ -2264,7 +2446,7 @@ fseg_fill_free_list(
 			return;
 		}
 
-		descr = fsp_alloc_free_extent(space, hint, mtr);
+		descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
 
 		xdes_set_state(descr, XDES_FSEG, mtr);
 
@@ -2276,34 +2458,37 @@ fseg_fill_free_list(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Allocates a free extent for the segment: looks first in the free list of the
 segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it! */
+returned still resides in the segment free list, it is not yet taken off it!
+@return allocated extent, still placed in the segment free list, NULL
+if could not be allocated */
 static
 xdes_t*
 fseg_alloc_free_extent(
 /*===================*/
-				/* out: allocated extent, still placed in the
-				segment free list, NULL if could
-				not be allocated */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	ulint		space,	/* in: space id */
-	mtr_t*		mtr)	/* in: mtr */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	xdes_t*		descr;
 	dulint		seg_id;
 	fil_addr_t	first;
 
+	ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+
 	if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
 		/* Segment free list is not empty, allocate from it */
 
 		first = flst_get_first(inode + FSEG_FREE, mtr);
 
-		descr = xdes_lst_get_descriptor(space, first, mtr);
+		descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
 	} else {
 		/* Segment free list was empty, allocate from space */
-		descr = fsp_alloc_free_extent(space, 0, mtr);
+		descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
 
 		if (descr == NULL) {
 
@@ -2317,7 +2502,7 @@ fseg_alloc_free_extent(
 		flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
 
 		/* Try to fill the segment free list */
-		fseg_fill_free_list(inode, space,
+		fseg_fill_free_list(inode, space, zip_size,
 				    xdes_get_offset(descr) + FSP_EXTENT_SIZE,
 				    mtr);
 	}
@@ -2325,36 +2510,36 @@ fseg_alloc_free_extent(
 	return(descr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
+fragmentation.
+@return	the allocated page number, FIL_NULL if no page could be allocated */
 static
 ulint
 fseg_alloc_free_page_low(
 /*=====================*/
-				/* out: the allocated page number, FIL_NULL
-				if no page could be allocated */
-	ulint		space,	/* in: space */
-	fseg_inode_t*	seg_inode, /* in: segment inode */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction, /* in: if the new page is needed because
+	ulint		space,	/*!< in: space */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction, /*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr)	/* in: mtr handle */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	fsp_header_t*	space_header;
 	ulint		space_size;
 	dulint		seg_id;
 	ulint		used;
 	ulint		reserved;
-	xdes_t*		descr;		/* extent of the hinted page */
-	ulint		ret_page;	/* the allocated page offset, FIL_NULL
+	xdes_t*		descr;		/*!< extent of the hinted page */
+	ulint		ret_page;	/*!< the allocated page offset, FIL_NULL
 					if could not be allocated */
-	xdes_t*		ret_descr;	/* the extent of the allocated page */
-	page_t*		page;
+	xdes_t*		ret_descr;	/*!< the extent of the allocated page */
 	ibool		frag_page_allocated = FALSE;
 	ibool		success;
 	ulint		n;
@@ -2363,13 +2548,14 @@ fseg_alloc_free_page_low(
 	ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
+	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 	seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
 
-	ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
+	ut_ad(!ut_dulint_is_zero(seg_id));
 
 	reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
 
-	space_header = fsp_get_space_header(space, mtr);
+	space_header = fsp_get_space_header(space, zip_size, mtr);
 
 	descr = xdes_get_descriptor_with_space_hdr(space_header, space,
 						   hint, mtr);
@@ -2377,7 +2563,7 @@ fseg_alloc_free_page_low(
 		/* Hint outside space or too high above free limit: reset
 		hint */
 		hint = 0;
-		descr = xdes_get_descriptor(space, hint, mtr);
+		descr = xdes_get_descriptor(space, zip_size, hint, mtr);
 	}
 
 	/* In the big if-else below we look for ret_page and ret_descr */
@@ -2401,7 +2587,7 @@ fseg_alloc_free_page_low(
 		=========================================================
 		the hinted page
 		===============*/
-		ret_descr = fsp_alloc_free_extent(space, hint, mtr);
+		ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
 
 		ut_a(ret_descr == descr);
 
@@ -2411,7 +2597,7 @@ fseg_alloc_free_page_low(
 			      ret_descr + XDES_FLST_NODE, mtr);
 
 		/* Try to fill the segment free list */
-		fseg_fill_free_list(seg_inode, space,
+		fseg_fill_free_list(seg_inode, space, zip_size,
 				    hint + FSP_EXTENT_SIZE, mtr);
 		ret_page = hint;
 		/*-----------------------------------------------------------*/
@@ -2419,7 +2605,8 @@ fseg_alloc_free_page_low(
 		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
 		   && (used >= FSEG_FRAG_LIMIT)
 		   && (!!(ret_descr
-			  = fseg_alloc_free_extent(seg_inode, space, mtr)))) {
+			  = fseg_alloc_free_extent(seg_inode,
+						   space, zip_size, mtr)))) {
 
 		/* 3. We take any free extent (which was already assigned above
 		===============================================================
@@ -2464,7 +2651,8 @@ fseg_alloc_free_page_low(
 			return(FIL_NULL);
 		}
 
-		ret_descr = xdes_lst_get_descriptor(space, first, mtr);
+		ret_descr = xdes_lst_get_descriptor(space, zip_size,
+						    first, mtr);
 		ret_page = xdes_get_offset(ret_descr)
 			+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
 					0, mtr);
@@ -2472,7 +2660,7 @@ fseg_alloc_free_page_low(
 	} else if (used < FSEG_FRAG_LIMIT) {
 		/* 6. We allocate an individual page from the space
 		===================================================*/
-		ret_page = fsp_alloc_free_page(space, hint, mtr);
+		ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
 		ret_descr = NULL;
 
 		frag_page_allocated = TRUE;
@@ -2490,7 +2678,8 @@ fseg_alloc_free_page_low(
 	} else {
 		/* 7. We allocate a new extent and take its first page
 		======================================================*/
-		ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr);
+		ret_descr = fseg_alloc_free_extent(seg_inode,
+						   space, zip_size, mtr);
 
 		if (ret_descr == NULL) {
 			ret_page = FIL_NULL;
@@ -2536,27 +2725,32 @@ fseg_alloc_free_page_low(
 		/* Initialize the allocated page to buffer pool, so that it
 		can be obtained immediately with buf_page_get without need
 		for a disk read */
+		buf_block_t*	block;
+		ulint		zip_size = dict_table_flags_to_zip_size(
+			mach_read_from_4(FSP_SPACE_FLAGS + space_header));
 
-		page = buf_page_create(space, ret_page, mtr);
+		block = buf_page_create(space, ret_page, zip_size, mtr);
+		buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
-		ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
-
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+		if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
+							ret_page, RW_X_LATCH,
+							mtr))) {
+			ut_error;
+		}
 
 		/* The prior contents of the page should be ignored */
-		fsp_init_file_page(page, mtr);
+		fsp_init_file_page(block, mtr);
 
 		/* At this point we know the extent and the page offset.
 		The extent is still in the appropriate list (FSEG_NOT_FULL
 		or FSEG_FREE), and the page is not yet marked as used. */
 
-		ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
+		ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
+		      == ret_descr);
 		ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
 				   ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
 
-		fseg_mark_page_used(seg_inode, space, ret_page, mtr);
+		fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
 	}
 
 	buf_reset_check_index_page_at_flush(space, ret_page);
@@ -2564,43 +2758,46 @@ fseg_alloc_free_page_low(
 	return(ret_page);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
+fragmentation.
+@return	allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
 ulint
 fseg_alloc_free_page_general(
 /*=========================*/
-				/* out: allocated page offset, FIL_NULL if no
-				page could be allocated */
-	fseg_header_t*	seg_header,/* in: segment header */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction,/* in: if the new page is needed because
+	fseg_header_t*	seg_header,/*!< in: segment header */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction,/*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	ibool		has_done_reservation, /* in: TRUE if the caller has
+	ibool		has_done_reservation, /*!< in: TRUE if the caller has
 				already done the reservation for the page
 				with fsp_reserve_free_extents, then there
 				is no need to do the check for this individual
 				page */
-	mtr_t*		mtr)	/* in: mtr handle */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	fseg_inode_t*	inode;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
 	rw_lock_t*	latch;
 	ibool		success;
 	ulint		page_no;
 	ulint		n_reserved;
 
-	space = buf_frame_get_space_id(seg_header);
+	space = page_get_space_id(page_align(seg_header));
+
+	latch = fil_space_get_latch(space, &flags);
+
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
-	latch = fil_space_get_latch(space);
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
 	mtr_x_lock(latch, mtr);
 
@@ -2608,12 +2805,12 @@ fseg_alloc_free_page_general(
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		if (space == 0) {
-			ibuf_free_excess_pages(space);
+		if (space == IBUF_SPACE_ID) {
+			ibuf_free_excess_pages();
 		}
 	}
 
-	inode = fseg_inode_get(seg_header, mtr);
+	inode = fseg_inode_get(seg_header, space, zip_size, mtr);
 
 	if (!has_done_reservation) {
 		success = fsp_reserve_free_extents(&n_reserved, space, 2,
@@ -2623,7 +2820,7 @@ fseg_alloc_free_page_general(
 		}
 	}
 
-	page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
+	page_no = fseg_alloc_free_page_low(space, zip_size,
 					   inode, hint, direction, mtr);
 	if (!has_done_reservation) {
 		fil_space_release_free_extents(space, n_reserved);
@@ -2632,47 +2829,45 @@ fseg_alloc_free_page_general(
 	return(page_no);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
+fragmentation.
+@return	allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
 ulint
 fseg_alloc_free_page(
 /*=================*/
-				/* out: allocated page offset, FIL_NULL if no
-				page could be allocated */
-	fseg_header_t*	seg_header,/* in: segment header */
-	ulint		hint,	/* in: hint of which page would be desirable */
-	byte		direction,/* in: if the new page is needed because
+	fseg_header_t*	seg_header,/*!< in: segment header */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction,/*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr)	/* in: mtr handle */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	return(fseg_alloc_free_page_general(seg_header, hint, direction,
 					    FALSE, mtr));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Checks that we have at least 2 frag pages free in the first extent of a
 single-table tablespace, and they are also physically initialized to the data
 file. That is we have already extended the data file so that those pages are
 inside the data file. If not, this function extends the tablespace with
-pages. */
+pages.
+@return	TRUE if there were >= 3 free pages, or we were able to extend */
 static
 ibool
 fsp_reserve_free_pages(
 /*===================*/
-					/* out: TRUE if there were >= 3 free
-					pages, or we were able to extend */
-	ulint		space,		/* in: space id, must be != 0 */
-	fsp_header_t*	space_header,	/* in: header of that space,
+	ulint		space,		/*!< in: space id, must be != 0 */
+	fsp_header_t*	space_header,	/*!< in: header of that space,
 					x-latched */
-	ulint		size,		/* in: size of the tablespace in pages,
+	ulint		size,		/*!< in: size of the tablespace in pages,
 					must be < FSP_EXTENT_SIZE / 2 */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	xdes_t*	descr;
 	ulint	n_used;
@@ -2695,7 +2890,7 @@ fsp_reserve_free_pages(
 						   space_header, mtr));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
@@ -2719,25 +2914,27 @@ Single-table tablespaces whose size is < 32 pages are a special case. In this
 function we would liberally reserve several 64 page extents for every page
 split or merge in a B-tree. But we do not want to waste disk space if the table
 only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
+case, just ensuring that there are 3 free pages available.
+@return	TRUE if we were able to make the reservation */
+UNIV_INTERN
 ibool
 fsp_reserve_free_extents(
 /*=====================*/
-			/* out: TRUE if we were able to make the reservation */
-	ulint*	n_reserved,/* out: number of extents actually reserved; if we
+	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
 			return TRUE and the tablespace size is < 64 pages,
 			then this can be 0, otherwise it is n_ext */
-	ulint	space,	/* in: space id */
-	ulint	n_ext,	/* in: number of extents to reserve */
-	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	space,	/*!< in: space id */
+	ulint	n_ext,	/*!< in: number of extents to reserve */
+	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	fsp_header_t*	space_header;
 	rw_lock_t*	latch;
 	ulint		n_free_list_ext;
 	ulint		free_limit;
 	ulint		size;
+	ulint		flags;
+	ulint		zip_size;
 	ulint		n_free;
 	ulint		n_free_up;
 	ulint		reserve;
@@ -2745,16 +2942,17 @@ fsp_reserve_free_extents(
 	ulint		n_pages_added;
 
 	ut_ad(mtr);
-	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
 	*n_reserved = n_ext;
 
-	latch = fil_space_get_latch(space);
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
+
+	ut_ad(!mutex_own(&kernel_mutex)
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
 	mtr_x_lock(latch, mtr);
 
-	space_header = fsp_get_space_header(space, mtr);
+	space_header = fsp_get_space_header(space, zip_size, mtr);
 try_again:
 	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
 
@@ -2777,8 +2975,13 @@ try_again:
 
 	if (n_free_up > 0) {
 		n_free_up--;
-		n_free_up = n_free_up - n_free_up
-			/ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+		if (!zip_size) {
+			n_free_up -= n_free_up
+				/ (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
+		} else {
+			n_free_up -= n_free_up
+				/ (zip_size / FSP_EXTENT_SIZE);
+		}
 	}
 
 	n_free = n_free_list_ext + n_free_up;
@@ -2823,22 +3026,24 @@ try_to_extend:
 	return(FALSE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 This function should be used to get information on how much we still
 will be able to insert new data to the database without running out the
 tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
+the safety margin required by the above function fsp_reserve_free_extents.
+@return	available space in kB */
+UNIV_INTERN
 ullint
 fsp_get_available_space_in_free_extents(
 /*====================================*/
-			/* out: available space in kB */
-	ulint	space)	/* in: space id */
+	ulint	space)	/*!< in: space id */
 {
 	fsp_header_t*	space_header;
 	ulint		n_free_list_ext;
 	ulint		free_limit;
 	ulint		size;
+	ulint		flags;
+	ulint		zip_size;
 	ulint		n_free;
 	ulint		n_free_up;
 	ulint		reserve;
@@ -2849,11 +3054,12 @@ fsp_get_available_space_in_free_extents(
 
 	mtr_start(&mtr);
 
-	latch = fil_space_get_latch(space);
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	mtr_x_lock(latch, &mtr);
 
-	space_header = fsp_get_space_header(space, &mtr);
+	space_header = fsp_get_space_header(space, zip_size, &mtr);
 
 	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
 
@@ -2879,8 +3085,13 @@ fsp_get_available_space_in_free_extents(
 
 	if (n_free_up > 0) {
 		n_free_up--;
-		n_free_up = n_free_up - n_free_up
-			/ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+		if (!zip_size) {
+			n_free_up -= n_free_up
+				/ (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
+		} else {
+			n_free_up -= n_free_up
+				/ (zip_size / FSP_EXTENT_SIZE);
+		}
 	}
 
 	n_free = n_free_list_ext + n_free_up;
@@ -2895,29 +3106,38 @@ fsp_get_available_space_in_free_extents(
 		return(0);
 	}
 
-	return((ullint)(n_free - reserve)
-	       * FSP_EXTENT_SIZE
-	       * (UNIV_PAGE_SIZE / 1024));
+	if (!zip_size) {
+		return((ullint) (n_free - reserve)
+		       * FSP_EXTENT_SIZE
+		       * (UNIV_PAGE_SIZE / 1024));
+	} else {
+		return((ullint) (n_free - reserve)
+		       * FSP_EXTENT_SIZE
+		       * (zip_size / 1024));
+	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
 static
 void
 fseg_mark_page_used(
 /*================*/
-	fseg_inode_t*	seg_inode,/* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset */
-	mtr_t*		mtr)	/* in: mtr */
+	fseg_inode_t*	seg_inode,/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	xdes_t*	descr;
 	ulint	not_full_n_used;
 
 	ut_ad(seg_inode && mtr);
+	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 
-	descr = xdes_get_descriptor(space, page, mtr);
+	descr = xdes_get_descriptor(space, zip_size, page, mtr);
 
 	ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
 	      == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
@@ -2955,16 +3175,18 @@ fseg_mark_page_used(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a single page of a segment. */
 static
 void
 fseg_free_page_low(
 /*===============*/
-	fseg_inode_t*	seg_inode, /* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	xdes_t*	descr;
 	ulint	not_full_n_used;
@@ -2976,13 +3198,14 @@ fseg_free_page_low(
 	ut_ad(seg_inode && mtr);
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
+	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 
 	/* Drop search system page hash index if the page is found in
 	the pool and is hashed */
 
-	btr_search_drop_page_hash_when_freed(space, page);
+	btr_search_drop_page_hash_when_freed(space, zip_size, page);
 
-	descr = xdes_get_descriptor(space, page, mtr);
+	descr = xdes_get_descriptor(space, zip_size, page, mtr);
 
 	ut_a(descr);
 	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
@@ -3001,8 +3224,7 @@ fseg_free_page_low(
 			"InnoDB: database!\n", (ulong) page);
 crash:
 		fputs("InnoDB: Please refer to\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "forcing-recovery.html\n"
+		      "InnoDB: " REFMAN "forcing-recovery.html\n"
 		      "InnoDB: about forcing recovery.\n", stderr);
 		ut_error;
 	}
@@ -3022,7 +3244,7 @@ crash:
 			}
 		}
 
-		fsp_free_page(space, page, mtr);
+		fsp_free_page(space, zip_size, page, mtr);
 
 		return;
 	}
@@ -3088,48 +3310,55 @@ crash:
 		/* The extent has become free: free it to space */
 		flst_remove(seg_inode + FSEG_NOT_FULL,
 			    descr + XDES_FLST_NODE, mtr);
-		fsp_free_extent(space, page, mtr);
+		fsp_free_extent(space, zip_size, page, mtr);
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a single page of a segment. */
-
+UNIV_INTERN
 void
 fseg_free_page(
 /*===========*/
-	fseg_header_t*	seg_header, /* in: segment header */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: page offset */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_header_t*	seg_header, /*!< in: segment header */
+	ulint		space,	/*!< in: space id */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
+	ulint		flags;
+	ulint		zip_size;
 	fseg_inode_t*	seg_inode;
+	rw_lock_t*	latch;
+
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(latch, mtr);
 
-	seg_inode = fseg_inode_get(seg_header, mtr);
+	seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
 
-	fseg_free_page_low(seg_inode, space, page, mtr);
+	fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
 	buf_page_set_file_page_was_freed(space, page);
 #endif
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees an extent of a segment to the space free list. */
 static
 void
 fseg_free_extent(
 /*=============*/
-	fseg_inode_t*	seg_inode, /* in: segment inode */
-	ulint		space,	/* in: space id */
-	ulint		page,	/* in: a page in the extent */
-	mtr_t*		mtr)	/* in: mtr handle */
+	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		page,	/*!< in: a page in the extent */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	first_page_in_extent;
 	xdes_t*	descr;
@@ -3139,7 +3368,7 @@ fseg_free_extent(
 
 	ut_ad(seg_inode && mtr);
 
-	descr = xdes_get_descriptor(space, page, mtr);
+	descr = xdes_get_descriptor(space, zip_size, page, mtr);
 
 	ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
 	ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr),
@@ -3154,7 +3383,7 @@ fseg_free_extent(
 			found in the pool and is hashed */
 
 			btr_search_drop_page_hash_when_freed(
-				space, first_page_in_extent + i);
+				space, zip_size, first_page_in_extent + i);
 		}
 	}
 
@@ -3178,7 +3407,7 @@ fseg_free_extent(
 				 MLOG_4BYTES, mtr);
 	}
 
-	fsp_free_extent(space, page, mtr);
+	fsp_free_extent(space, zip_size, page, mtr);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
 	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
@@ -3189,53 +3418,60 @@ fseg_free_extent(
 #endif
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees part of a segment. This function can be used to free a segment by
 repeatedly calling this function in different mini-transactions. Doing
 the freeing in a single mini-transaction might result in too big a
-mini-transaction. */
-
+mini-transaction.
+@return	TRUE if freeing completed */
+UNIV_INTERN
 ibool
 fseg_free_step(
 /*===========*/
-				/* out: TRUE if freeing completed */
-	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
+	fseg_header_t*	header,	/*!< in, own: segment header; NOTE: if the header
 				resides on the first page of the frag list
 				of the segment, this pointer becomes obsolete
 				after the last freeing step */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		n;
 	ulint		page;
 	xdes_t*		descr;
 	fseg_inode_t*	inode;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
+	ulint		header_page;
+	rw_lock_t*	latch;
 
-	space = buf_frame_get_space_id(header);
+	space = page_get_space_id(page_align(header));
+	header_page = page_get_page_no(page_align(header));
+
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(latch, mtr);
 
-	descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr);
+	descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
 
 	/* Check that the header resides on a page which has not been
 	freed yet */
 
 	ut_a(descr);
-	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
-			  % FSP_EXTENT_SIZE, mtr) == FALSE);
-	inode = fseg_inode_get(header, mtr);
+	ut_a(xdes_get_bit(descr, XDES_FREE_BIT,
+			  header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+	inode = fseg_inode_get(header, space, zip_size, mtr);
 
-	descr = fseg_get_first_extent(inode, mtr);
+	descr = fseg_get_first_extent(inode, space, zip_size, mtr);
 
 	if (descr != NULL) {
 		/* Free the extent held by the segment */
 		page = xdes_get_offset(descr);
 
-		fseg_free_extent(inode, space, page, mtr);
+		fseg_free_extent(inode, space, zip_size, page, mtr);
 
 		return(FALSE);
 	}
@@ -3245,19 +3481,19 @@ fseg_free_step(
 
 	if (n == ULINT_UNDEFINED) {
 		/* Freeing completed: free the segment inode */
-		fsp_free_seg_inode(space, inode, mtr);
+		fsp_free_seg_inode(space, zip_size, inode, mtr);
 
 		return(TRUE);
 	}
 
-	fseg_free_page_low(inode, space,
+	fseg_free_page_low(inode, space, zip_size,
 			   fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
 
 	n = fseg_find_last_used_frag_page_slot(inode, mtr);
 
 	if (n == ULINT_UNDEFINED) {
 		/* Freeing completed: free the segment inode */
-		fsp_free_seg_inode(space, inode, mtr);
+		fsp_free_seg_inode(space, zip_size, inode, mtr);
 
 		return(TRUE);
 	}
@@ -3265,43 +3501,47 @@ fseg_free_step(
 	return(FALSE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
+leaves the header page unfreed.
+@return	TRUE if freeing completed, except the header page */
+UNIV_INTERN
 ibool
 fseg_free_step_not_header(
 /*======================*/
-				/* out: TRUE if freeing completed, except the
-				header page */
-	fseg_header_t*	header,	/* in: segment header which must reside on
+	fseg_header_t*	header,	/*!< in: segment header which must reside on
 				the first fragment page of the segment */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		n;
 	ulint		page;
 	xdes_t*		descr;
 	fseg_inode_t*	inode;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
 	ulint		page_no;
+	rw_lock_t*	latch;
 
-	space = buf_frame_get_space_id(header);
+	space = page_get_space_id(page_align(header));
+
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
 	ut_ad(!mutex_own(&kernel_mutex)
-	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
-				   MTR_MEMO_X_LOCK));
+	      || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(latch, mtr);
 
-	inode = fseg_inode_get(header, mtr);
+	inode = fseg_inode_get(header, space, zip_size, mtr);
 
-	descr = fseg_get_first_extent(inode, mtr);
+	descr = fseg_get_first_extent(inode, space, zip_size, mtr);
 
 	if (descr != NULL) {
 		/* Free the extent held by the segment */
 		page = xdes_get_offset(descr);
 
-		fseg_free_extent(inode, space, page, mtr);
+		fseg_free_extent(inode, space, zip_size, page, mtr);
 
 		return(FALSE);
 	}
@@ -3316,73 +3556,37 @@ fseg_free_step_not_header(
 
 	page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
 
-	if (page_no == buf_frame_get_page_no(header)) {
+	if (page_no == page_get_page_no(page_align(header))) {
 
 		return(TRUE);
 	}
 
-	fseg_free_page_low(inode, space, page_no, mtr);
+	fseg_free_page_low(inode, space, zip_size, page_no, mtr);
 
 	return(FALSE);
 }
 
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
-	ulint	space,	/* in: space id */
-	ulint	page_no,/* in: page number where the segment header is
-			placed */
-	ulint	offset) /* in: byte offset of the segment header on that
-			page */
-{
-	mtr_t		mtr;
-	ibool		finished;
-	fseg_header_t*	header;
-	fil_addr_t	addr;
-
-	addr.page = page_no;
-	addr.boffset = offset;
-
-	for (;;) {
-		mtr_start(&mtr);
-
-		header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr);
-
-		finished = fseg_free_step(header, &mtr);
-
-		mtr_commit(&mtr);
-
-		if (finished) {
-
-			return;
-		}
-	}
-}
-
-/**************************************************************************
+/**********************************************************************//**
 Returns the first extent descriptor for a segment. We think of the extent
 lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
+-> FSEG_FREE.
+@return	the first extent descriptor, or NULL if none */
 static
 xdes_t*
 fseg_get_first_extent(
 /*==================*/
-				/* out: the first extent descriptor, or NULL if
-				none */
-	fseg_inode_t*	inode,	/* in: segment inode */
-	mtr_t*		mtr)	/* in: mtr */
+	fseg_inode_t*	inode,	/*!< in: segment inode */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	fil_addr_t	first;
-	ulint		space;
 	xdes_t*		descr;
 
 	ut_ad(inode && mtr);
 
-	space = buf_frame_get_space_id(inode);
+	ut_ad(space == page_get_space_id(page_align(inode)));
 
 	first = fil_addr_null;
 
@@ -3403,20 +3607,20 @@ fseg_get_first_extent(
 
 		return(NULL);
 	}
-	descr = xdes_lst_get_descriptor(space, first, mtr);
+	descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
 
 	return(descr);
 }
 
-/***********************************************************************
-Validates a segment. */
+/*******************************************************************//**
+Validates a segment.
+@return	TRUE if ok */
 static
 ibool
 fseg_validate_low(
 /*==============*/
-				/* out: TRUE if ok */
-	fseg_inode_t*	inode, /* in: segment inode */
-	mtr_t*		mtr2)	/* in: mtr */
+	fseg_inode_t*	inode, /*!< in: segment inode */
+	mtr_t*		mtr2)	/*!< in: mtr */
 {
 	ulint		space;
 	dulint		seg_id;
@@ -3426,11 +3630,10 @@ fseg_validate_low(
 	ulint		n_used		= 0;
 	ulint		n_used2		= 0;
 
-	ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
-	space = buf_frame_get_space_id(inode);
+	space = page_get_space_id(page_align(inode));
 
 	seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2);
 	n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
@@ -3443,10 +3646,15 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		ulint	flags;
+		ulint	zip_size;
 
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		mtr_start(&mtr);
+		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+		zip_size = dict_table_flags_to_zip_size(flags);
+
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == 0);
 		ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
@@ -3462,10 +3670,15 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		ulint	flags;
+		ulint	zip_size;
 
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		mtr_start(&mtr);
+		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+		zip_size = dict_table_flags_to_zip_size(flags);
+
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) > 0);
 		ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
@@ -3484,10 +3697,15 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		ulint	flags;
+		ulint	zip_size;
 
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		mtr_start(&mtr);
+		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+		zip_size = dict_table_flags_to_zip_size(flags);
+
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
 		ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
@@ -3503,39 +3721,44 @@ fseg_validate_low(
 	return(TRUE);
 }
 
-/***********************************************************************
-Validates a segment. */
-
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Validates a segment.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 fseg_validate(
 /*==========*/
-				/* out: TRUE if ok */
-	fseg_header_t*	header, /* in: segment header */
-	mtr_t*		mtr2)	/* in: mtr */
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	fseg_inode_t*	inode;
 	ibool		ret;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
 
-	space = buf_frame_get_space_id(header);
+	space = page_get_space_id(page_align(header));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr2);
+	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
-	inode = fseg_inode_get(header, mtr2);
+	inode = fseg_inode_get(header, space, zip_size, mtr);
 
-	ret = fseg_validate_low(inode, mtr2);
+	ret = fseg_validate_low(inode, mtr);
 
 	return(ret);
 }
+#endif /* UNIV_DEBUG */
 
-/***********************************************************************
+/*******************************************************************//**
 Writes info of a segment. */
 static
 void
 fseg_print_low(
 /*===========*/
-	fseg_inode_t*	inode, /* in: segment inode */
-	mtr_t*		mtr)	/* in: mtr */
+	fseg_inode_t*	inode, /*!< in: segment inode */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	space;
 	ulint	seg_id_low;
@@ -3550,10 +3773,9 @@ fseg_print_low(
 	ulint	page_no;
 	dulint	 d_var;
 
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
-				MTR_MEMO_PAGE_X_FIX));
-	space = buf_frame_get_space_id(inode);
-	page_no = buf_frame_get_page_no(inode);
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+	space = page_get_space_id(page_align(inode));
+	page_no = page_get_page_no(page_align(inode));
 
 	reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
 
@@ -3581,40 +3803,48 @@ fseg_print_low(
 		(ulong) n_used);
 }
 
-/***********************************************************************
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
 Writes info of a segment. */
-
+UNIV_INTERN
 void
 fseg_print(
 /*=======*/
-	fseg_header_t*	header, /* in: segment header */
-	mtr_t*		mtr)	/* in: mtr */
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	fseg_inode_t*	inode;
 	ulint		space;
+	ulint		flags;
+	ulint		zip_size;
 
-	space = buf_frame_get_space_id(header);
+	space = page_get_space_id(page_align(header));
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
-	inode = fseg_inode_get(header, mtr);
+	inode = fseg_inode_get(header, space, zip_size, mtr);
 
 	fseg_print_low(inode, mtr);
 }
+#endif /* UNIV_BTR_PRINT */
 
-/***********************************************************************
-Validates the file space system and its segments. */
-
+/*******************************************************************//**
+Validates the file space system and its segments.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 fsp_validate(
 /*=========*/
-			/* out: TRUE if ok */
-	ulint	space)	/* in: space id */
+	ulint	space)	/*!< in: space id */
 {
 	fsp_header_t*	header;
 	fseg_inode_t*	seg_inode;
 	page_t*		seg_inode_page;
+	rw_lock_t*	latch;
 	ulint		size;
+	ulint		flags;
+	ulint		zip_size;
 	ulint		free_limit;
 	ulint		frag_n_used;
 	mtr_t		mtr;
@@ -3630,15 +3860,21 @@ fsp_validate(
 	ulint		seg_inode_len_free;
 	ulint		seg_inode_len_full;
 
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
+	ut_a(ut_is_2pow(zip_size));
+	ut_a(zip_size <= UNIV_PAGE_SIZE);
+	ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+
 	/* Start first a mini-transaction mtr2 to lock out all other threads
 	from the fsp system */
 	mtr_start(&mtr2);
-	mtr_x_lock(fil_space_get_latch(space), &mtr2);
+	mtr_x_lock(latch, &mtr2);
 
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
 	free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
@@ -3663,19 +3899,20 @@ fsp_validate(
 
 	/* Validate FSP_FREE list */
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 	node_addr = flst_get_first(header + FSP_FREE, &mtr);
 
 	mtr_commit(&mtr);
 
 	while (!fil_addr_is_null(node_addr)) {
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		mtr_x_lock(latch, &mtr);
 
 		descr_count++;
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == 0);
 		ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
@@ -3686,19 +3923,20 @@ fsp_validate(
 
 	/* Validate FSP_FREE_FRAG list */
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 	node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
 
 	mtr_commit(&mtr);
 
 	while (!fil_addr_is_null(node_addr)) {
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		mtr_x_lock(latch, &mtr);
 
 		descr_count++;
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) > 0);
 		ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
@@ -3712,19 +3950,20 @@ fsp_validate(
 
 	/* Validate FSP_FULL_FRAG list */
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 	node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
 
 	mtr_commit(&mtr);
 
 	while (!fil_addr_is_null(node_addr)) {
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space), &mtr);
+		mtr_x_lock(latch, &mtr);
 
 		descr_count++;
-		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+		descr = xdes_lst_get_descriptor(space, zip_size,
+						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
 		ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
@@ -3735,9 +3974,9 @@ fsp_validate(
 
 	/* Validate segments */
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
 
@@ -3747,20 +3986,19 @@ fsp_validate(
 
 	while (!fil_addr_is_null(node_addr)) {
 
-		for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
+		n = 0;
+		do {
 			mtr_start(&mtr);
-			mtr_x_lock(fil_space_get_latch(space), &mtr);
+			mtr_x_lock(latch, &mtr);
 
 			seg_inode_page = fut_get_ptr(
-				space, node_addr, RW_X_LATCH, &mtr)
+				space, zip_size, node_addr, RW_X_LATCH, &mtr)
 				- FSEG_INODE_PAGE_NODE;
 
 			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, &mtr);
-			ut_a(ut_dulint_cmp(
-				     mach_read_from_8(seg_inode + FSEG_ID),
-				     ut_dulint_zero) != 0);
+				seg_inode_page, n, zip_size, &mtr);
+			ut_a(!ut_dulint_is_zero(
+				     mach_read_from_8(seg_inode + FSEG_ID)));
 			fseg_validate_low(seg_inode, &mtr);
 
 			descr_count += flst_get_len(seg_inode + FSEG_FREE,
@@ -3775,15 +4013,15 @@ fsp_validate(
 			next_node_addr = flst_get_next_addr(
 				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
 			mtr_commit(&mtr);
-		}
+		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
 
 		node_addr = next_node_addr;
 	}
 
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
 
@@ -3793,20 +4031,20 @@ fsp_validate(
 
 	while (!fil_addr_is_null(node_addr)) {
 
-		for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+		n = 0;
 
+		do {
 			mtr_start(&mtr);
-			mtr_x_lock(fil_space_get_latch(space), &mtr);
+			mtr_x_lock(latch, &mtr);
 
 			seg_inode_page = fut_get_ptr(
-				space, node_addr, RW_X_LATCH, &mtr)
+				space, zip_size, node_addr, RW_X_LATCH, &mtr)
 				- FSEG_INODE_PAGE_NODE;
 
 			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, &mtr);
-			if (ut_dulint_cmp(
-				    mach_read_from_8(seg_inode + FSEG_ID),
-				    ut_dulint_zero) != 0) {
+				seg_inode_page, n, zip_size, &mtr);
+			if (!ut_dulint_is_zero(
+				    mach_read_from_8(seg_inode + FSEG_ID))) {
 				fseg_validate_low(seg_inode, &mtr);
 
 				descr_count += flst_get_len(
@@ -3822,16 +4060,23 @@ fsp_validate(
 			next_node_addr = flst_get_next_addr(
 				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
 			mtr_commit(&mtr);
-		}
+		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
 
 		node_addr = next_node_addr;
 	}
 
 	ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
-	ut_a(n_used + n_full_frag_pages
-	     == n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
-			      / XDES_DESCRIBED_PER_PAGE)
-	     + seg_inode_len_full + seg_inode_len_free);
+	if (!zip_size) {
+		ut_a(n_used + n_full_frag_pages
+		     == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
+				       / UNIV_PAGE_SIZE)
+		     + seg_inode_len_full + seg_inode_len_free);
+	} else {
+		ut_a(n_used + n_full_frag_pages
+		     == n_used2 + 2 * ((free_limit + (zip_size - 1))
+				       / zip_size)
+		     + seg_inode_len_full + seg_inode_len_free);
+	}
 	ut_a(frag_n_used == n_used);
 
 	mtr_commit(&mtr2);
@@ -3839,17 +4084,20 @@ fsp_validate(
 	return(TRUE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Prints info of a file space. */
-
+UNIV_INTERN
 void
 fsp_print(
 /*======*/
-	ulint	space)	/* in: space id */
+	ulint	space)	/*!< in: space id */
 {
 	fsp_header_t*	header;
 	fseg_inode_t*	seg_inode;
 	page_t*		seg_inode_page;
+	rw_lock_t*	latch;
+	ulint		flags;
+	ulint		zip_size;
 	ulint		size;
 	ulint		free_limit;
 	ulint		frag_n_used;
@@ -3866,18 +4114,21 @@ fsp_print(
 	mtr_t		mtr;
 	mtr_t		mtr2;
 
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_table_flags_to_zip_size(flags);
+
 	/* Start first a mini-transaction mtr2 to lock out all other threads
 	from the fsp system */
 
 	mtr_start(&mtr2);
 
-	mtr_x_lock(fil_space_get_latch(space), &mtr2);
+	mtr_x_lock(latch, &mtr2);
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
 
@@ -3900,7 +4151,7 @@ fsp_print(
 		"not full frag extents %lu: used pages %lu,"
 		" full frag extents %lu\n"
 		"first seg id not used %lu %lu\n",
-		(long) space,
+		(ulong) space,
 		(ulong) size, (ulong) free_limit, (ulong) n_free,
 		(ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
 		(ulong) seg_id_high, (ulong) seg_id_low);
@@ -3910,9 +4161,9 @@ fsp_print(
 	/* Print segments */
 
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
 
@@ -3920,20 +4171,21 @@ fsp_print(
 
 	while (!fil_addr_is_null(node_addr)) {
 
-		for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+		n = 0;
+
+		do {
 
 			mtr_start(&mtr);
-			mtr_x_lock(fil_space_get_latch(space), &mtr);
+			mtr_x_lock(latch, &mtr);
 
 			seg_inode_page = fut_get_ptr(
-				space, node_addr, RW_X_LATCH, &mtr)
+				space, zip_size, node_addr, RW_X_LATCH, &mtr)
 				- FSEG_INODE_PAGE_NODE;
 
 			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, &mtr);
-			ut_a(ut_dulint_cmp(
-				     mach_read_from_8(seg_inode + FSEG_ID),
-				     ut_dulint_zero) != 0);
+				seg_inode_page, n, zip_size, &mtr);
+			ut_a(!ut_dulint_is_zero(
+				     mach_read_from_8(seg_inode + FSEG_ID)));
 			fseg_print_low(seg_inode, &mtr);
 
 			n_segs++;
@@ -3941,15 +4193,15 @@ fsp_print(
 			next_node_addr = flst_get_next_addr(
 				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
 			mtr_commit(&mtr);
-		}
+		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
 
 		node_addr = next_node_addr;
 	}
 
 	mtr_start(&mtr);
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(latch, &mtr);
 
-	header = fsp_get_space_header(space, &mtr);
+	header = fsp_get_space_header(space, zip_size, &mtr);
 
 	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
 
@@ -3957,20 +4209,21 @@ fsp_print(
 
 	while (!fil_addr_is_null(node_addr)) {
 
-		for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+		n = 0;
+
+		do {
 
 			mtr_start(&mtr);
-			mtr_x_lock(fil_space_get_latch(space), &mtr);
+			mtr_x_lock(latch, &mtr);
 
 			seg_inode_page = fut_get_ptr(
-				space, node_addr, RW_X_LATCH, &mtr)
+				space, zip_size, node_addr, RW_X_LATCH, &mtr)
 				- FSEG_INODE_PAGE_NODE;
 
 			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, &mtr);
-			if (ut_dulint_cmp(
-				    mach_read_from_8(seg_inode + FSEG_ID),
-				    ut_dulint_zero) != 0) {
+				seg_inode_page, n, zip_size, &mtr);
+			if (!ut_dulint_is_zero(
+				    mach_read_from_8(seg_inode + FSEG_ID))) {
 
 				fseg_print_low(seg_inode, &mtr);
 				n_segs++;
@@ -3979,7 +4232,7 @@ fsp_print(
 			next_node_addr = flst_get_next_addr(
 				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
 			mtr_commit(&mtr);
-		}
+		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
 
 		node_addr = next_node_addr;
 	}
@@ -3988,3 +4241,4 @@ fsp_print(
 
 	fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/fut/fut0fut.c b/storage/innodb_plugin/fut/fut0fut.c
new file mode 100644
index 00000000000..20b45a575e6
--- /dev/null
+++ b/storage/innodb_plugin/fut/fut0fut.c
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fut/fut0fut.c
+File-based utilities
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+
+#ifdef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
diff --git a/storage/innobase/fut/fut0lst.c b/storage/innodb_plugin/fut/fut0lst.c
similarity index 61%
rename from storage/innobase/fut/fut0lst.c
rename to storage/innodb_plugin/fut/fut0lst.c
index 75fa8bf5552..a1e21c22725 100644
--- a/storage/innobase/fut/fut0lst.c
+++ b/storage/innodb_plugin/fut/fut0lst.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based list utilities
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fut/fut0lst.c
+File-based list utilities
 
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
@@ -13,18 +30,18 @@ Created 11/28/1995 Heikki Tuuri
 #endif
 
 #include "buf0buf.h"
+#include "page0page.h"
 
-
-/************************************************************************
+/********************************************************************//**
 Adds a node to an empty list. */
 static
 void
 flst_add_to_empty(
 /*==============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of
+	flst_base_node_t*	base,	/*!< in: pointer to base node of
 					empty list */
-	flst_node_t*		node,	/* in: node to add */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
@@ -32,10 +49,8 @@ flst_add_to_empty(
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	ut_a(len == 0);
 
@@ -53,15 +68,15 @@ flst_add_to_empty(
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Adds a node as the last node in a list. */
-
+UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node,	/* in: node to add */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
@@ -71,10 +86,8 @@ flst_add_last(
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	last_addr = flst_get_last(base, mtr);
 
@@ -83,10 +96,12 @@ flst_add_last(
 	/* If the list is not empty, call flst_insert_after */
 	if (len != 0) {
 		if (last_addr.page == node_addr.page) {
-			last_node = buf_frame_align(node) + last_addr.boffset;
+			last_node = page_align(node) + last_addr.boffset;
 		} else {
-			last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
-						mtr);
+			ulint	zip_size = fil_space_get_zip_size(space);
+
+			last_node = fut_get_ptr(space, zip_size, last_addr,
+						RW_X_LATCH, mtr);
 		}
 
 		flst_insert_after(base, last_node, node, mtr);
@@ -96,15 +111,15 @@ flst_add_last(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Adds a node as the first node in a list. */
-
+UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node,	/* in: node to add */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node_addr;
@@ -114,10 +129,8 @@ flst_add_first(
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
 	len = flst_get_len(base, mtr);
 	first_addr = flst_get_first(base, mtr);
 
@@ -126,10 +139,11 @@ flst_add_first(
 	/* If the list is not empty, call flst_insert_before */
 	if (len != 0) {
 		if (first_addr.page == node_addr.page) {
-			first_node = buf_frame_align(node)
-				+ first_addr.boffset;
+			first_node = page_align(node) + first_addr.boffset;
 		} else {
-			first_node = fut_get_ptr(space, first_addr,
+			ulint	zip_size = fil_space_get_zip_size(space);
+
+			first_node = fut_get_ptr(space, zip_size, first_addr,
 						 RW_X_LATCH, mtr);
 		}
 
@@ -140,16 +154,16 @@ flst_add_first(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Inserts a node after another in a list. */
-
+UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node1,	/* in: node to insert after */
-	flst_node_t*		node2,	/* in: node to add */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node1,	/*!< in: node to insert after */
+	flst_node_t*		node2,	/*!< in: node to add */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	fil_addr_t	node1_addr;
@@ -162,12 +176,9 @@ flst_insert_after(
 	ut_ad(base != node1);
 	ut_ad(base != node2);
 	ut_ad(node2 != node1);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 
 	buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -180,7 +191,10 @@ flst_insert_after(
 
 	if (!fil_addr_is_null(node3_addr)) {
 		/* Update prev field of node3 */
-		node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
+		ulint	zip_size = fil_space_get_zip_size(space);
+
+		node3 = fut_get_ptr(space, zip_size,
+				    node3_addr, RW_X_LATCH, mtr);
 		flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
 	} else {
 		/* node1 was last in list: update last field in base */
@@ -195,16 +209,16 @@ flst_insert_after(
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Inserts a node before another in a list. */
-
+UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: node to insert */
-	flst_node_t*		node3,	/* in: node to insert before */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to insert */
+	flst_node_t*		node3,	/*!< in: node to insert before */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	flst_node_t*	node1;
@@ -217,12 +231,9 @@ flst_insert_before(
 	ut_ad(base != node2);
 	ut_ad(base != node3);
 	ut_ad(node2 != node3);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
@@ -234,8 +245,10 @@ flst_insert_before(
 	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
 
 	if (!fil_addr_is_null(node1_addr)) {
+		ulint	zip_size = fil_space_get_zip_size(space);
 		/* Update next field of node1 */
-		node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
+		node1 = fut_get_ptr(space, zip_size, node1_addr,
+				    RW_X_LATCH, mtr);
 		flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
 	} else {
 		/* node3 was first in list: update first field in base */
@@ -250,17 +263,18 @@ flst_insert_before(
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Removes a node. */
-
+UNIV_INTERN
 void
 flst_remove(
 /*========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: node to remove */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to remove */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
+	ulint		zip_size;
 	flst_node_t*	node1;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
@@ -269,12 +283,11 @@ flst_remove(
 	ulint		len;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+	zip_size = fil_space_get_zip_size(space);
 
 	node1_addr = flst_get_prev_addr(node2, mtr);
 	node3_addr = flst_get_next_addr(node2, mtr);
@@ -285,10 +298,10 @@ flst_remove(
 
 		if (node1_addr.page == node2_addr.page) {
 
-			node1 = buf_frame_align(node2) + node1_addr.boffset;
+			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
-			node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
-					    mtr);
+			node1 = fut_get_ptr(space, zip_size,
+					    node1_addr, RW_X_LATCH, mtr);
 		}
 
 		ut_ad(node1 != node2);
@@ -304,10 +317,10 @@ flst_remove(
 
 		if (node3_addr.page == node2_addr.page) {
 
-			node3 = buf_frame_align(node2) + node3_addr.boffset;
+			node3 = page_align(node2) + node3_addr.boffset;
 		} else {
-			node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
-					    mtr);
+			node3 = fut_get_ptr(space, zip_size,
+					    node3_addr, RW_X_LATCH, mtr);
 		}
 
 		ut_ad(node2 != node3);
@@ -325,19 +338,19 @@ flst_remove(
 	mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-
+UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: first node to remove */
-	ulint			n_nodes,/* in: number of nodes to remove,
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove,
 					must be >= 1 */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
 	flst_node_t*	node1;
@@ -346,10 +359,8 @@ flst_cut_end(
 	ulint		len;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(n_nodes > 0);
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -362,10 +373,11 @@ flst_cut_end(
 
 		if (node1_addr.page == node2_addr.page) {
 
-			node1 = buf_frame_align(node2) + node1_addr.boffset;
+			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
-			node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
-					    mtr);
+			node1 = fut_get_ptr(space,
+					    fil_space_get_zip_size(space),
+					    node1_addr, RW_X_LATCH, mtr);
 		}
 
 		flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
@@ -383,28 +395,26 @@ flst_cut_end(
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-
+UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	flst_node_t*		node2,	/* in: first node not to remove */
-	ulint			n_nodes,/* in: number of nodes to remove */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node not to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	fil_addr_t	node2_addr;
 	ulint		len;
 	ulint		space;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
 	if (n_nodes == 0) {
 
 		ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
@@ -426,27 +436,27 @@ flst_truncate_end(
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
 }
 
-/************************************************************************
-Validates a file-based list. */
-
+/********************************************************************//**
+Validates a file-based list.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 flst_validate(
 /*==========*/
-					/* out: TRUE if ok */
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	mtr_t*			mtr1)	/* in: mtr */
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr1)	/*!< in: mtr */
 {
-	ulint		space;
-	flst_node_t*	node;
-	fil_addr_t	node_addr;
-	fil_addr_t	base_addr;
-	ulint		len;
-	ulint		i;
-	mtr_t		mtr2;
+	ulint			space;
+	ulint			zip_size;
+	const flst_node_t*	node;
+	fil_addr_t		node_addr;
+	fil_addr_t		base_addr;
+	ulint			len;
+	ulint			i;
+	mtr_t			mtr2;
 
 	ut_ad(base);
-	ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
 
 	/* We use two mini-transaction handles: the first is used to
 	lock the base node, and prevent other threads from modifying the
@@ -457,6 +467,7 @@ flst_validate(
 
 	/* Find out the space id */
 	buf_ptr_get_fsp_addr(base, &space, &base_addr);
+	zip_size = fil_space_get_zip_size(space);
 
 	len = flst_get_len(base, mtr1);
 	node_addr = flst_get_first(base, mtr1);
@@ -464,7 +475,8 @@ flst_validate(
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 
-		node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+		node = fut_get_ptr(space, zip_size,
+				   node_addr, RW_X_LATCH, &mtr2);
 		node_addr = flst_get_next_addr(node, &mtr2);
 
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -478,7 +490,8 @@ flst_validate(
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 
-		node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+		node = fut_get_ptr(space, zip_size,
+				   node_addr, RW_X_LATCH, &mtr2);
 		node_addr = flst_get_prev_addr(node, &mtr2);
 
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -490,29 +503,28 @@ flst_validate(
 	return(TRUE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Prints info of a file-based list. */
-
+UNIV_INTERN
 void
 flst_print(
 /*=======*/
-	flst_base_node_t*	base,	/* in: pointer to base node of list */
-	mtr_t*			mtr)	/* in: mtr */
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr)	/*!< in: mtr */
 {
-	buf_frame_t*	frame;
-	ulint		len;
+	const buf_frame_t*	frame;
+	ulint			len;
 
 	ut_ad(base && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
-	frame = buf_frame_align(base);
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	frame = page_align((byte*) base);
 
 	len = flst_get_len(base, mtr);
 
 	fprintf(stderr,
 		"FILE-BASED LIST:\n"
 		"Base node in space %lu page %lu byte offset %lu; len %lu\n",
-		(ulong) buf_frame_get_space_id(frame),
-		(ulong) buf_frame_get_page_no(frame),
-		(ulong) (base - frame), (ulong) len);
+		(ulong) page_get_space_id(frame),
+		(ulong) page_get_page_no(frame),
+		(ulong) page_offset(base), (ulong) len);
 }
diff --git a/storage/innodb_plugin/ha/ha0ha.c b/storage/innodb_plugin/ha/ha0ha.c
new file mode 100644
index 00000000000..cb5e541b55d
--- /dev/null
+++ b/storage/innodb_plugin/ha/ha0ha.c
@@ -0,0 +1,441 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file ha/ha0ha.c
+The hash table with external chains
+
+Created 8/22/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ha0ha.h"
+#ifdef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#ifdef UNIV_DEBUG
+# include "buf0buf.h"
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_SYNC_DEBUG
+# include "btr0sea.h"
+#endif /* UNIV_SYNC_DEBUG */
+#include "page0page.h"
+
+/*************************************************************//**
+Creates a hash table with at least n array cells.  The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
+hash_table_t*
+ha_create_func(
+/*===========*/
+	ulint	n,		/*!< in: number of array cells */
+#ifdef UNIV_SYNC_DEBUG
+	ulint	mutex_level,	/*!< in: level of the mutexes in the latching
+				order: this is used in the debug version */
+#endif /* UNIV_SYNC_DEBUG */
+	ulint	n_mutexes)	/*!< in: number of mutexes to protect the
+				hash table: must be a power of 2, or 0 */
+{
+	hash_table_t*	table;
+#ifndef UNIV_HOTBACKUP
+	ulint		i;
+#endif /* !UNIV_HOTBACKUP */
+
+	ut_ad(ut_is_2pow(n_mutexes));
+	table = hash_create(n);
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+	table->adaptive = TRUE;
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
+	but in practise it never should in this case, hence the asserts. */
+
+	if (n_mutexes == 0) {
+		table->heap = mem_heap_create_in_btr_search(
+			ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
+		ut_a(table->heap);
+
+		return(table);
+	}
+
+#ifndef UNIV_HOTBACKUP
+	hash_create_mutexes(table, n_mutexes, mutex_level);
+
+	table->heaps = mem_alloc(n_mutexes * sizeof(void*));
+
+	for (i = 0; i < n_mutexes; i++) {
+		table->heaps[i] = mem_heap_create_in_btr_search(4096);
+		ut_a(table->heaps[i]);
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	return(table);
+}
+
+/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+	hash_table_t*	table)	/*!< in, own: hash table */
+{
+	ulint	i;
+	ulint	n;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+	/* Free the memory heaps. */
+	n = table->n_mutexes;
+
+	for (i = 0; i < n; i++) {
+		mem_heap_free(table->heaps[i]);
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Clear the hash table. */
+	n = hash_get_n_cells(table);
+
+	for (i = 0; i < n; i++) {
+		hash_get_nth_cell(table, i)->node = NULL;
+	}
+}
+
+/*************************************************************//**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated */
+UNIV_INTERN
+ibool
+ha_insert_for_fold_func(
+/*====================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block,	/*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		data)	/*!< in: data, must not be NULL */
+{
+	hash_cell_t*	cell;
+	ha_node_t*	node;
+	ha_node_t*	prev_node;
+	ulint		hash;
+
+	ut_ad(table && data);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	ut_a(block->frame == page_align(data));
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	hash = hash_calc_hash(fold, table);
+
+	cell = hash_get_nth_cell(table, hash);
+
+	prev_node = cell->node;
+
+	while (prev_node != NULL) {
+		if (prev_node->fold == fold) {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+			if (table->adaptive) {
+				buf_block_t* prev_block = prev_node->block;
+				ut_a(prev_block->frame
+				     == page_align(prev_node->data));
+				ut_a(prev_block->n_pointers > 0);
+				prev_block->n_pointers--;
+				block->n_pointers++;
+			}
+# endif /* !UNIV_HOTBACKUP */
+
+			prev_node->block = block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+			prev_node->data = data;
+
+			return(TRUE);
+		}
+
+		prev_node = prev_node->next;
+	}
+
+	/* We have to allocate a new chain node */
+
+	node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
+
+	if (node == NULL) {
+		/* It was a btr search type memory heap and at the moment
+		no more memory could be allocated: return */
+
+		ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
+
+		return(FALSE);
+	}
+
+	ha_node_set_data(node, block, data);
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+	if (table->adaptive) {
+		block->n_pointers++;
+	}
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+	node->fold = fold;
+
+	node->next = NULL;
+
+	prev_node = cell->node;
+
+	if (prev_node == NULL) {
+
+		cell->node = node;
+
+		return(TRUE);
+	}
+
+	while (prev_node->next != NULL) {
+
+		prev_node = prev_node->next;
+	}
+
+	prev_node->next = node;
+
+	return(TRUE);
+}
+
+/***********************************************************//**
+Deletes a hash node. */
+UNIV_INTERN
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ha_node_t*	del_node)	/*!< in: node to be deleted */
+{
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+	if (table->adaptive) {
+		ut_a(del_node->block->frame = page_align(del_node->data));
+		ut_a(del_node->block->n_pointers > 0);
+		del_node->block->n_pointers--;
+	}
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+	HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
+}
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data, and updates
+the pointer to data, if found. */
+UNIV_INTERN
+void
+ha_search_and_update_if_found_func(
+/*===============================*/
+	hash_table_t*	table,	/*!< in/out: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	void*		data,	/*!< in: pointer to the data */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	new_block,/*!< in: block containing new_data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		new_data)/*!< in: new pointer to the data */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	ut_a(new_block->frame == page_align(new_data));
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+		if (table->adaptive) {
+			ut_a(node->block->n_pointers > 0);
+			node->block->n_pointers--;
+			new_block->n_pointers++;
+		}
+# endif /* !UNIV_HOTBACKUP */
+
+		node->block = new_block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+		node->data = new_data;
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+UNIV_INTERN
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: fold value */
+	const page_t*	page)	/*!< in: buffer page */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (page_align(ha_node_get_data(node)) == page) {
+
+			/* Remove the hash node */
+
+			ha_delete_hash_node(table, node);
+
+			/* Start again from the first node in the chain
+			because the deletion may compact the heap of
+			nodes and move other nodes! */
+
+			node = ha_chain_get_first(table, fold);
+		} else {
+			node = ha_chain_get_next(node);
+		}
+	}
+#ifdef UNIV_DEBUG
+	/* Check that all nodes really got deleted */
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		ut_a(page_align(ha_node_get_data(node)) != page);
+
+		node = ha_chain_get_next(node);
+	}
+#endif
+}
+
+/*************************************************************//**
+Validates a given range of the cells in hash table.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+ha_validate(
+/*========*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ulint		start_index,	/*!< in: start index */
+	ulint		end_index)	/*!< in: end index */
+{
+	hash_cell_t*	cell;
+	ha_node_t*	node;
+	ibool		ok	= TRUE;
+	ulint		i;
+
+	ut_a(start_index <= end_index);
+	ut_a(start_index < hash_get_n_cells(table));
+	ut_a(end_index < hash_get_n_cells(table));
+
+	for (i = start_index; i <= end_index; i++) {
+
+		cell = hash_get_nth_cell(table, i);
+
+		node = cell->node;
+
+		while (node) {
+			if (hash_calc_hash(node->fold, table) != i) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"InnoDB: Error: hash table node"
+					" fold value %lu does not\n"
+					"InnoDB: match the cell number %lu.\n",
+					(ulong) node->fold, (ulong) i);
+
+				ok = FALSE;
+			}
+
+			node = node->next;
+		}
+	}
+
+	return(ok);
+}
+
+/*************************************************************//**
+Prints info of a hash table. */
+UNIV_INTERN
+void
+ha_print_info(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	hash_table_t*	table)	/*!< in: hash table */
+{
+#ifdef UNIV_DEBUG
+/* Some of the code here is disabled for performance reasons in production
+builds, see http://bugs.mysql.com/36941 */
+#define PRINT_USED_CELLS
+#endif /* UNIV_DEBUG */
+
+#ifdef PRINT_USED_CELLS
+	hash_cell_t*	cell;
+	ulint		cells	= 0;
+	ulint		i;
+#endif /* PRINT_USED_CELLS */
+	ulint		n_bufs;
+
+#ifdef PRINT_USED_CELLS
+	for (i = 0; i < hash_get_n_cells(table); i++) {
+
+		cell = hash_get_nth_cell(table, i);
+
+		if (cell->node) {
+
+			cells++;
+		}
+	}
+#endif /* PRINT_USED_CELLS */
+
+	fprintf(file, "Hash table size %lu",
+		(ulong) hash_get_n_cells(table));
+
+#ifdef PRINT_USED_CELLS
+	fprintf(file, ", used cells %lu", (ulong) cells);
+#endif /* PRINT_USED_CELLS */
+
+	if (table->heaps == NULL && table->heap != NULL) {
+
+		/* This calculation is intended for the adaptive hash
+		index: how many buffer frames we have reserved? */
+
+		n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
+
+		if (table->heap->free_block) {
+			n_bufs++;
+		}
+
+		fprintf(file, ", node heap has %lu buffer(s)\n",
+			(ulong) n_bufs);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ha/ha0storage.c b/storage/innodb_plugin/ha/ha0storage.c
new file mode 100644
index 00000000000..698e34f1166
--- /dev/null
+++ b/storage/innodb_plugin/ha/ha0storage.c
@@ -0,0 +1,184 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ha/ha0storage.c
+Hash storage.
+Provides a data structure that stores chunks of data in
+its own storage, avoiding duplicates.
+
+Created September 22, 2007 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "ha0storage.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+#ifdef UNIV_NONINL
+#include "ha0storage.ic"
+#endif
+
+/*******************************************************************//**
+Retrieves a data from a storage. If it is present, a pointer to the
+stored copy of data is returned, otherwise NULL is returned. */
+static
+const void*
+ha_storage_get(
+/*===========*/
+	ha_storage_t*	storage,	/*!< in: hash storage */
+	const void*	data,		/*!< in: data to check for */
+	ulint		data_len)	/*!< in: data length */
+{
+	ha_storage_node_t*	node;
+	ulint			fold;
+
+	/* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH
+	macro */
+	fold = ut_fold_binary(data, data_len);
+
+#define IS_FOUND	\
+	node->data_len == data_len && memcmp(node->data, data, data_len) == 0
+
+	HASH_SEARCH(
+		next,			/* node->"next" */
+		storage->hash,		/* the hash table */
+		fold,			/* key */
+		ha_storage_node_t*,	/* type of node->next */
+		node,			/* auxiliary variable */
+		,			/* assertion */
+		IS_FOUND);		/* search criteria */
+
+	if (node == NULL) {
+
+		return(NULL);
+	}
+	/* else */
+
+	return(node->data);
+}
+
+/*******************************************************************//**
+Copies data into the storage and returns a pointer to the copy. If the
+same data chunk is already present, then pointer to it is returned.
+Data chunks are considered to be equal if len1 == len2 and
+memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
+data_len bytes need to be allocated) and the size of storage is going to
+become more than "memlim" then "data" is not added and NULL is returned.
+To disable this behavior "memlim" can be set to 0, which stands for
+"no limit". */
+UNIV_INTERN
+const void*
+ha_storage_put_memlim(
+/*==================*/
+	ha_storage_t*	storage,	/*!< in/out: hash storage */
+	const void*	data,		/*!< in: data to store */
+	ulint		data_len,	/*!< in: data length */
+	ulint		memlim)		/*!< in: memory limit to obey */
+{
+	void*			raw;
+	ha_storage_node_t*	node;
+	const void*		data_copy;
+	ulint			fold;
+
+	/* check if data chunk is already present */
+	data_copy = ha_storage_get(storage, data, data_len);
+	if (data_copy != NULL) {
+
+		return(data_copy);
+	}
+
+	/* not present */
+
+	/* check if we are allowed to allocate data_len bytes */
+	if (memlim > 0
+	    && ha_storage_get_size(storage) + data_len > memlim) {
+
+		return(NULL);
+	}
+
+	/* we put the auxiliary node struct and the data itself in one
+	continuous block */
+	raw = mem_heap_alloc(storage->heap,
+			     sizeof(ha_storage_node_t) + data_len);
+
+	node = (ha_storage_node_t*) raw;
+	data_copy = (byte*) raw + sizeof(*node);
+
+	memcpy((byte*) raw + sizeof(*node), data, data_len);
+
+	node->data_len = data_len;
+	node->data = data_copy;
+
+	/* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT
+	macro */
+	fold = ut_fold_binary(data, data_len);
+
+	HASH_INSERT(
+		ha_storage_node_t,	/* type used in the hash chain */
+		next,			/* node->"next" */
+		storage->hash,		/* the hash table */
+		fold,			/* key */
+		node);			/* add this data to the hash */
+
+	/* the output should not be changed because it will spoil the
+	hash table */
+	return(data_copy);
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+void
+test_ha_storage()
+{
+	ha_storage_t*	storage;
+	char		buf[1024];
+	int		i;
+	const void*	stored[256];
+	const void*	p;
+
+	storage = ha_storage_create(0, 0);
+
+	for (i = 0; i < 256; i++) {
+
+		memset(buf, i, sizeof(buf));
+		stored[i] = ha_storage_put(storage, buf, sizeof(buf));
+	}
+
+	//ha_storage_empty(&storage);
+
+	for (i = 255; i >= 0; i--) {
+
+		memset(buf, i, sizeof(buf));
+		p = ha_storage_put(storage, buf, sizeof(buf));
+
+		if (p != stored[i]) {
+
+			fprintf(stderr, "ha_storage_put() returned %p "
+				"instead of %p, i=%d\n", p, stored[i], i);
+			return;
+		}
+	}
+
+	fprintf(stderr, "all ok\n");
+
+	ha_storage_free(storage);
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/ha/hash0hash.c b/storage/innodb_plugin/ha/hash0hash.c
new file mode 100644
index 00000000000..2800d7793f8
--- /dev/null
+++ b/storage/innodb_plugin/ha/hash0hash.c
@@ -0,0 +1,174 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ha/hash0hash.c
+The simple hash table utility
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "hash0hash.h"
+#ifdef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#include "mem0mem.h"
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Reserves the mutex for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_mutex_enter(
+/*=============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	mutex_enter(hash_get_mutex(table, fold));
+}
+
+/************************************************************//**
+Releases the mutex for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit(
+/*============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	mutex_exit(hash_get_mutex(table, fold));
+}
+
+/************************************************************//**
+Reserves all the mutexes of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_mutex_enter_all(
+/*=================*/
+	hash_table_t*	table)	/*!< in: hash table */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_enter(table->mutexes + i);
+	}
+}
+
+/************************************************************//**
+Releases all the mutexes of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all(
+/*================*/
+	hash_table_t*	table)	/*!< in: hash table */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_exit(table->mutexes + i);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
+hash_table_t*
+hash_create(
+/*========*/
+	ulint	n)	/*!< in: number of array cells */
+{
+	hash_cell_t*	array;
+	ulint		prime;
+	hash_table_t*	table;
+
+	prime = ut_find_prime(n);
+
+	table = mem_alloc(sizeof(hash_table_t));
+
+	array = ut_malloc(sizeof(hash_cell_t) * prime);
+
+	table->array = array;
+	table->n_cells = prime;
+#ifndef UNIV_HOTBACKUP
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	table->adaptive = FALSE;
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	table->n_mutexes = 0;
+	table->mutexes = NULL;
+	table->heaps = NULL;
+#endif /* !UNIV_HOTBACKUP */
+	table->heap = NULL;
+	table->magic_n = HASH_TABLE_MAGIC_N;
+
+	/* Initialize the cell array */
+	hash_table_clear(table);
+
+	return(table);
+}
+
+/*************************************************************//**
+Frees a hash table. */
+UNIV_INTERN
+void
+hash_table_free(
+/*============*/
+	hash_table_t*	table)	/*!< in, own: hash table */
+{
+#ifndef UNIV_HOTBACKUP
+	ut_a(table->mutexes == NULL);
+#endif /* !UNIV_HOTBACKUP */
+
+	ut_free(table->array);
+	mem_free(table);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Creates a mutex array to protect a hash table. */
+UNIV_INTERN
+void
+hash_create_mutexes_func(
+/*=====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+#ifdef UNIV_SYNC_DEBUG
+	ulint		sync_level,	/*!< in: latching order level of the
+					mutexes: used in the debug version */
+#endif /* UNIV_SYNC_DEBUG */
+	ulint		n_mutexes)	/*!< in: number of mutexes, must be a
+					power of 2 */
+{
+	ulint	i;
+
+	ut_a(n_mutexes > 0);
+	ut_a(ut_is_2pow(n_mutexes));
+
+	table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
+
+	for (i = 0; i < n_mutexes; i++) {
+		mutex_create(table->mutexes + i, sync_level);
+	}
+
+	table->n_mutexes = n_mutexes;
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ha_innodb.def b/storage/innodb_plugin/ha_innodb.def
new file mode 100644
index 00000000000..e0faa62deb1
--- /dev/null
+++ b/storage/innodb_plugin/ha_innodb.def
@@ -0,0 +1,4 @@
+EXPORTS
+	_mysql_plugin_interface_version_
+	_mysql_sizeof_struct_st_plugin_
+	_mysql_plugin_declarations_
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
similarity index 68%
rename from storage/innobase/handler/ha_innodb.cc
rename to storage/innodb_plugin/handler/ha_innodb.cc
index 828dcdb843d..682004407c7 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -1,21 +1,53 @@
-/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
+/*****************************************************************************
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
+Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
 
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
-   GNU General Public License for more details.
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA */
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
 
-/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
-NOTE: You can only use noninlined InnoDB functions in this file, because we
-have disabled the InnoDB inlining in this file. */
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
 
 /* TODO list for the InnoDB handler in 5.0:
   - Remove the flag trx->active_trans and look at trx->conc_state
@@ -30,20 +62,59 @@ have disabled the InnoDB inlining in this file. */
 #endif
 
 #include <mysql_priv.h>
-#include <mysqld_error.h>
 
 #include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
 #include <mysys_err.h>
-#include <my_sys.h>
-#include "ha_innodb.h"
 #include <mysql/plugin.h>
 
+/** @file ha_innodb.cc */
+
+/* Include necessary InnoDB headers */
+extern "C" {
+#include "univ.i"
+#include "btr0sea.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "trx0roll.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "mtr0mtr.h"
+#include "row0ins.h"
+#include "row0mysql.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "log0log.h"
+#include "lock0lock.h"
+#include "dict0crea.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "fsp0fsp.h"
+#include "sync0sync.h"
+#include "fil0fil.h"
+#include "trx0xa.h"
+#include "row0merge.h"
+#include "thr0loc.h"
+#include "dict0boot.h"
+#include "ha_prototypes.h"
+#include "ut0mem.h"
+#include "ibuf0ibuf.h"
+}
+
+#include "ha_innodb.h"
+#include "i_s.h"
+
 #ifndef MYSQL_SERVER
 /* This is needed because of Bug #3596.  Let us hope that pthread_mutex_t
 is defined the same in both builds: the MySQL server and the InnoDB plugin. */
-extern pthread_mutex_t LOCK_thread_count;
+extern MYSQL_PLUGIN_IMPORT pthread_mutex_t LOCK_thread_count;
+
+#if MYSQL_VERSION_ID < 50124
+/* this is defined in mysql_priv.h inside #ifdef MYSQL_SERVER
+but we need it here */
+bool check_global_access(THD *thd, ulong want_access);
+#endif /* MYSQL_VERSION_ID < 50124 */
 #endif /* MYSQL_SERVER */
 
 /** to protect innobase_open_files */
@@ -56,51 +127,28 @@ static pthread_cond_t commit_cond;
 static pthread_mutex_t commit_cond_m;
 static bool innodb_inited = 0;
 
-/*
-  This needs to exist until the query cache callback is removed
-  or learns to pass hton.
-*/
-static handlerton *innodb_hton_ptr;
-
 #define INSIDE_HA_INNOBASE_CC
 
-/* Include necessary InnoDB headers */
-extern "C" {
-#include "../storage/innobase/include/univ.i"
-#include "../storage/innobase/include/os0file.h"
-#include "../storage/innobase/include/os0thread.h"
-#include "../storage/innobase/include/srv0start.h"
-#include "../storage/innobase/include/srv0srv.h"
-#include "../storage/innobase/include/trx0roll.h"
-#include "../storage/innobase/include/trx0trx.h"
-#include "../storage/innobase/include/trx0sys.h"
-#include "../storage/innobase/include/mtr0mtr.h"
-#include "../storage/innobase/include/row0ins.h"
-#include "../storage/innobase/include/row0mysql.h"
-#include "../storage/innobase/include/row0sel.h"
-#include "../storage/innobase/include/row0upd.h"
-#include "../storage/innobase/include/log0log.h"
-#include "../storage/innobase/include/lock0lock.h"
-#include "../storage/innobase/include/dict0crea.h"
-#include "../storage/innobase/include/btr0cur.h"
-#include "../storage/innobase/include/btr0btr.h"
-#include "../storage/innobase/include/fsp0fsp.h"
-#include "../storage/innobase/include/sync0sync.h"
-#include "../storage/innobase/include/fil0fil.h"
-#include "../storage/innobase/include/trx0xa.h"
-#include "../storage/innobase/include/thr0loc.h"
-#include "../storage/innobase/include/ha_prototypes.h"
-}
+/* In the Windows plugin, the return value of current_thd is
+undefined.  Map it to NULL. */
+
+#define EQ_CURRENT_THD(thd) ((thd) == current_thd)
+
+
+static struct handlerton* innodb_hton_ptr;
 
 static const long AUTOINC_OLD_STYLE_LOCKING = 0;
 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
 static const long AUTOINC_NO_LOCKING = 2;
 
 static long innobase_mirrored_log_groups, innobase_log_files_in_group,
-	innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
-	innobase_additional_mem_pool_size,
-	innobase_lock_wait_timeout, innobase_force_recovery,
-	innobase_open_files, innobase_autoinc_lock_mode;
+	innobase_log_buffer_size,
+	innobase_additional_mem_pool_size, innobase_file_io_threads,
+	innobase_force_recovery, innobase_open_files,
+	innobase_autoinc_lock_mode;
+static ulong innobase_commit_concurrency = 0;
+static ulong innobase_read_io_threads;
+static ulong innobase_write_io_threads;
 
 static long long innobase_buffer_pool_size, innobase_log_file_size;
 
@@ -110,6 +158,14 @@ are determined in innobase_init below: */
 static char*	innobase_data_home_dir			= NULL;
 static char*	innobase_data_file_path			= NULL;
 static char*	innobase_log_group_home_dir		= NULL;
+static char*	innobase_file_format_name		= NULL;
+static char*	innobase_change_buffering		= NULL;
+
+/* Note: This variable can be set to on/off and any of the supported
+file formats in the configuration file, but can only be set to any
+of the supported file formats during runtime. */
+static char*	innobase_file_format_check		= NULL;
+
 /* The following has a misleading name: starting from 4.0.5, this also
 affects Windows: */
 static char*	innobase_unix_file_flush_method		= NULL;
@@ -124,32 +180,14 @@ static char*	innobase_log_arch_dir			= NULL;
 #endif /* UNIV_LOG_ARCHIVE */
 static my_bool	innobase_use_doublewrite		= TRUE;
 static my_bool	innobase_use_checksums			= TRUE;
-static my_bool	innobase_file_per_table			= FALSE;
 static my_bool	innobase_locks_unsafe_for_binlog	= FALSE;
 static my_bool	innobase_rollback_on_timeout		= FALSE;
 static my_bool	innobase_create_status_file		= FALSE;
 static my_bool	innobase_stats_on_metadata		= TRUE;
-static my_bool	innobase_adaptive_hash_index	= TRUE;
 
 static char*	internal_innobase_data_file_path	= NULL;
 
-/* Default number of IO per second supported by server. Tunes background
-   IO rate. */
-static long innobase_io_capacity = 100;
-
-/* Write dirty pages when pct dirty is less than max pct dirty */
-static my_bool innobase_extra_dirty_writes = TRUE;
-
-/* Max number of IO requests merged to perform large IO in background
-   IO threads.
-*/
-long innobase_max_merged_io = 64;
-
-/* Number of background IO threads for read and write. */
-long innobase_read_io_threads, innobase_write_io_threads;
-
-/* Use timer based InnoDB concurrency throttling flag */
-static my_bool innobase_thread_concurrency_timer_based;
+static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
 
 /* The following counter is used to convey information to InnoDB
 about server activity: in selects it is not sensible to call
@@ -159,14 +197,18 @@ it every INNOBASE_WAKE_INTERVAL'th step. */
 #define INNOBASE_WAKE_INTERVAL	32
 static ulong	innobase_active_counter	= 0;
 
-static HASH	innobase_open_tables;
+static hash_table_t*	innobase_open_tables;
 
 #ifdef __NETWARE__	/* some special cleanup for NetWare */
 bool nw_panic = FALSE;
 #endif
 
-static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length,
-	my_bool not_used __attribute__((unused)));
+/** Allowed values of innodb_change_buffering */
+static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
+	"none",		/* IBUF_USE_NONE */
+	"inserts"	/* IBUF_USE_INSERT */
+};
+
 static INNOBASE_SHARE *get_share(const char *table_name);
 static void free_share(INNOBASE_SHARE *share);
 static int innobase_close_connection(handlerton *hton, THD* thd);
@@ -181,8 +223,88 @@ static handler *innobase_create_handler(handlerton *hton,
                                         TABLE_SHARE *table,
                                         MEM_ROOT *mem_root);
 
+/** @brief Initialize the default value of innodb_commit_concurrency.
+
+Once InnoDB is running, the innodb_commit_concurrency must not change
+from zero to nonzero. (Bug #42101)
+
+The initial default value is 0, and without this extra initialization,
+SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
+to 0, even if it was initially set to nonzero at the command line
+or configuration file. */
+static
+void
+innobase_commit_concurrency_init_default(void);
+/*==========================================*/
+
+/************************************************************//**
+Validate the file format name and return its corresponding id.
+@return	valid file format id */
+static
+uint
+innobase_file_format_name_lookup(
+/*=============================*/
+	const char*	format_name);		/*!< in: pointer to file format
+						name */
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return	true if one of  "on" or "off" */
+static
+bool
+innobase_file_format_check_on_off(
+/*==============================*/
+	const char*	format_check);		/*!< in: parameter value */
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return	true if valid config value */
+static
+bool
+innobase_file_format_check_validate(
+/*================================*/
+	const char*	format_check);		/*!< in: parameter value */
+/****************************************************************//**
+Return alter table flags supported in an InnoDB database. */
+static
+uint
+innobase_alter_table_flags(
+/*=======================*/
+	uint	flags);
+
 static const char innobase_hton_name[]= "InnoDB";
 
+/*************************************************************//**
+Check for a valid value of innobase_commit_concurrency.
+@return	0 for valid innodb_commit_concurrency */
+static
+int
+innobase_commit_concurrency_validate(
+/*=================================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
+						variable */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+	long long	intbuf;
+	ulong		commit_concurrency;
+
+	DBUG_ENTER("innobase_commit_concurrency_validate");
+
+	if (value->val_int(value, &intbuf)) {
+		/* The value is NULL. That is invalid. */
+		DBUG_RETURN(1);
+	}
+
+	*reinterpret_cast<ulong*>(save) = commit_concurrency
+		= static_cast<ulong>(intbuf);
+
+	/* Allow the value to be updated, as long as it remains zero
+	or nonzero. */
+	DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
+}
 
 static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
   "Enable InnoDB support for the XA two-phase commit",
@@ -194,6 +316,15 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
   /* check_func */ NULL, /* update_func */ NULL,
   /* default */ TRUE);
 
+static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
+  "Use strict mode when evaluating create options.",
+  NULL, NULL, FALSE);
+
+static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
+  "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
+  NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
+
+
 static handler *innobase_create_handler(handlerton *hton,
                                         TABLE_SHARE *table, 
                                         MEM_ROOT *mem_root)
@@ -201,62 +332,64 @@ static handler *innobase_create_handler(handlerton *hton,
   return new (mem_root) ha_innobase(hton, table);
 }
 
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction   */
+/*******************************************************************//**
+This function is used to prepare an X/Open XA distributed transaction.
+@return	0 or error number */
 static
 int
 innobase_xa_prepare(
 /*================*/
-			/* out: 0 or error number */
-	handlerton* hton,
-	THD*	thd,	/* in: handle to the MySQL thread of the user
-			whose XA transaction should be prepared */
-	bool	all);	/* in: TRUE - commit transaction
-			FALSE - the current SQL statement ended */
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions   */
+        handlerton*	hton,	/*!< in: InnoDB handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread of
+				the user whose XA transaction should
+				be prepared */
+	bool		all);	/*!< in: TRUE - commit transaction
+				FALSE - the current SQL statement
+				ended */
+/*******************************************************************//**
+This function is used to recover X/Open XA distributed transactions.
+@return	number of prepared transactions stored in xid_list */
 static
 int
 innobase_xa_recover(
 /*================*/
-				/* out: number of prepared transactions
-				stored in xid_list */
-	handlerton* hton,
-	XID*	xid_list,	/* in/out: prepared transactions */
-	uint	len);		/* in: number of slots in xid_list */
-/***********************************************************************
+	handlerton*	hton,	/*!< in: InnoDB handlerton */
+	XID*		xid_list,/*!< in/out: prepared transactions */
+	uint		len);	/*!< in: number of slots in xid_list */
+/*******************************************************************//**
 This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return	0 or error number */
 static
 int
 innobase_commit_by_xid(
 /*===================*/
-			/* out: 0 or error number */
 	handlerton* hton,
-	XID*	xid);	/* in: X/Open XA transaction identification */
-/***********************************************************************
+	XID*	xid);	/*!< in: X/Open XA transaction identification */
+/*******************************************************************//**
 This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return	0 or error number */
 static
 int
 innobase_rollback_by_xid(
 /*=====================*/
-			/* out: 0 or error number */
-	handlerton* hton,
-	XID	*xid);	/* in: X/Open XA transaction identification */
-/***********************************************************************
+	handlerton*	hton,	/*!< in: InnoDB handlerton */
+	XID*		xid);	/*!< in: X/Open XA transaction
+				identification */
+/*******************************************************************//**
 Create a consistent view for a cursor based on current transaction
 which is created if the corresponding MySQL thread still lacks one.
 This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
+using a cursor.
+@return	pointer to cursor view or NULL */
 static
 void*
 innobase_create_cursor_view(
 /*========================*/
-				/* out: pointer to cursor view or NULL */
-	handlerton*	hton,	/* in: innobase hton */
-	THD*		thd);	/* in: user thread handle */
-/***********************************************************************
+	handlerton*	hton,	/*!< in: innobase hton */
+	THD*		thd);	/*!< in: user thread handle */
+/*******************************************************************//**
 Set the given consistent cursor view to a transaction which is created
 if the corresponding MySQL thread still lacks one. If the given
 consistent cursor view is NULL global read view of a transaction is
@@ -266,9 +399,9 @@ void
 innobase_set_cursor_view(
 /*=====================*/
 	handlerton* hton,
-	THD*	thd,	/* in: user thread handle */
-	void*	curview);/* in: Consistent cursor view to be set */
-/***********************************************************************
+	THD*	thd,	/*!< in: user thread handle */
+	void*	curview);/*!< in: Consistent cursor view to be set */
+/*******************************************************************//**
 Close the given consistent cursor view of a transaction and restore
 global read view to a transaction read view. Transaction is created if the
 corresponding MySQL thread still lacks one. */
@@ -277,71 +410,70 @@ void
 innobase_close_cursor_view(
 /*=======================*/
 	handlerton* hton,
-	THD*	thd,	/* in: user thread handle */
-	void*	curview);/* in: Consistent read view to be closed */
-/*********************************************************************
+	THD*	thd,	/*!< in: user thread handle */
+	void*	curview);/*!< in: Consistent read view to be closed */
+/*****************************************************************//**
 Removes all tables in the named database inside InnoDB. */
 static
 void
 innobase_drop_database(
 /*===================*/
-			/* out: error number */
-	handlerton* hton, /* in: handlerton of Innodb */
-	char*	path);	/* in: database path; inside InnoDB the name
+	handlerton* hton, /*!< in: handlerton of Innodb */
+	char*	path);	/*!< in: database path; inside InnoDB the name
 			of the last directory in the path is used as
 			the database name: for example, in 'mysql/data/test'
 			the database name is 'test' */
-/***********************************************************************
+/*******************************************************************//**
 Closes an InnoDB database. */
 static
 int
 innobase_end(handlerton *hton, ha_panic_function type);
 
-/*********************************************************************
+/*****************************************************************//**
 Creates an InnoDB transaction struct for the thd if it does not yet have one.
 Starts a new InnoDB transaction if a transaction is not yet started. And
 assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
+have one.
+@return	0 */
 static
 int
 innobase_start_trx_and_assign_read_view(
 /*====================================*/
-			/* out: 0 */
-	handlerton* hton, /* in: Innodb handlerton */ 
-	THD*	thd);	/* in: MySQL thread handle of the user for whom
+	handlerton* hton, /*!< in: Innodb handlerton */ 
+	THD*	thd);	/*!< in: MySQL thread handle of the user for whom
 			the transaction should be committed */
-/********************************************************************
+/****************************************************************//**
 Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
+the logs, and the name of this function should be innobase_checkpoint.
+@return	TRUE if error */
 static
 bool
 innobase_flush_logs(
 /*================*/
-				/* out: TRUE if error */
-	handlerton*	hton);	/* in: InnoDB handlerton */
+	handlerton*	hton);	/*!< in: InnoDB handlerton */
 
-/****************************************************************************
+/************************************************************************//**
 Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
 Monitor to the client. */
 static
 bool
 innodb_show_status(
 /*===============*/
-	handlerton*	hton,	/* in: the innodb handlerton */
-	THD*	thd,	/* in: the MySQL query thread of the caller */
+	handlerton*	hton,	/*!< in: the innodb handlerton */
+	THD*	thd,	/*!< in: the MySQL query thread of the caller */
 	stat_print_fn *stat_print);
 static
 bool innobase_show_status(handlerton *hton, THD* thd, 
                           stat_print_fn* stat_print,
                           enum ha_stat_type stat_type);
 
-/*********************************************************************
+/*****************************************************************//**
 Commits a transaction in an InnoDB database. */
 static
 void
 innobase_commit_low(
 /*================*/
-	trx_t*	trx);	/* in: transaction handle */
+	trx_t*	trx);	/*!< in: transaction handle */
 
 static SHOW_VAR innodb_status_variables[]= {
   {"buffer_pool_pages_data",
@@ -392,10 +524,8 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
   {"dblwr_writes",
   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
-  {"have_sync_atomic",
-  (char*) &export_vars.innodb_have_sync_atomic,		  SHOW_BOOL},
-  {"heap_enabled",
-  (char*) &export_vars.innodb_heap_enabled,		  SHOW_BOOL},
+  {"have_atomic_builtins",
+  (char*) &export_vars.innodb_have_atomic_builtins,	  SHOW_BOOL},
   {"log_waits",
   (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
   {"log_write_requests",
@@ -436,37 +566,35 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
   {"rows_updated",
   (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
-  {"wake_ups",
-  (char*) &export_vars.innodb_wake_ups,		  SHOW_LONG},
   {NullS, NullS, SHOW_LONG}
 };
 
 /* General functions */
 
-/**********************************************************************
+/******************************************************************//**
 Returns true if the thread is the replication thread on the slave
 server. Used in srv_conc_enter_innodb() to determine if the thread
 should be allowed to enter InnoDB - the replication thread is treated
 differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-extern "C"
+srv_conc_force_exit_innodb().
+@return	true if thd is the replication thread */
+extern "C" UNIV_INTERN
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
-			/* out: true if thd is the replication thread */
-	void*	thd)	/* in: thread handle (THD*) */
+	void*	thd)	/*!< in: thread handle (THD*) */
 {
 	return((ibool) thd_slave_thread((THD*) thd));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Save some CPU by testing the value of srv_thread_concurrency in inline
 functions. */
-inline
+static inline
 void
 innodb_srv_conc_enter_innodb(
 /*=========================*/
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	if (UNIV_LIKELY(!srv_thread_concurrency)) {
 
@@ -476,14 +604,14 @@ innodb_srv_conc_enter_innodb(
 	srv_conc_enter_innodb(trx);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Save some CPU by testing the value of srv_thread_concurrency in inline
 functions. */
-inline
+static inline
 void
 innodb_srv_conc_exit_innodb(
 /*========================*/
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) {
 
@@ -493,16 +621,16 @@ innodb_srv_conc_exit_innodb(
 	srv_conc_exit_innodb(trx);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases possible search latch and InnoDB thread FIFO ticket. These should
 be released at each SQL statement end, and also when mysqld passes the
 control to the client. It does no harm to release these also in the middle
 of an SQL statement. */
-inline
+static inline
 void
 innobase_release_stat_resources(
 /*============================*/
-	trx_t*	trx)	/* in: transaction object */
+	trx_t*	trx)	/*!< in: transaction object */
 {
 	if (trx->has_search_latch) {
 		trx_search_latch_release_if_reserved(trx);
@@ -515,57 +643,85 @@ innobase_release_stat_resources(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-extern "C"
+rolling back transactions that have edited non-transactional tables.
+@return	true if non-transactional tables have been edited */
+extern "C" UNIV_INTERN
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
-			/* out: true if non-transactional tables have
-			been edited */
-	void*	thd)	/* in: thread handle (THD*) */
+	void*	thd)	/*!< in: thread handle (THD*) */
 {
 	return((ibool) thd_non_transactional_update((THD*) thd));
 }
 
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
-extern "C"
+/******************************************************************//**
+Returns true if the thread is executing a SELECT statement.
+@return	true if thd is executing SELECT */
+extern "C" UNIV_INTERN
 ibool
 thd_is_select(
 /*==========*/
-				/* out: true if thd is executing SELECT */
-	const void*	thd)	/* in: thread handle (THD*) */
+	const void*	thd)	/*!< in: thread handle (THD*) */
 {
 	return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
 }
 
-/************************************************************************
-Obtain the InnoDB transaction of a MySQL thread. */
-inline
+/******************************************************************//**
+Returns true if the thread supports XA,
+global value of innodb_supports_xa if thd is NULL.
+@return	true if thd has XA support */
+extern "C" UNIV_INTERN
+ibool
+thd_supports_xa(
+/*============*/
+	void*	thd)	/*!< in: thread handle (THD*), or NULL to query
+			the global innodb_supports_xa */
+{
+	return(THDVAR((THD*) thd, support_xa));
+}
+
+/******************************************************************//**
+Returns the lock wait timeout for the current connection.
+@return	the lock wait timeout, in seconds */
+extern "C" UNIV_INTERN
+ulong
+thd_lock_wait_timeout(
+/*==================*/
+	void*	thd)	/*!< in: thread handle (THD*), or NULL to query
+			the global innodb_lock_wait_timeout */
+{
+	/* According to <mysql/plugin.h>, passing thd == NULL
+	returns the global value of the session variable. */
+	return(THDVAR((THD*) thd, lock_wait_timeout));
+}
+
+/********************************************************************//**
+Obtain the InnoDB transaction of a MySQL thread.
+@return	reference to transaction pointer */
+static inline
 trx_t*&
 thd_to_trx(
 /*=======*/
-			/* out: reference to transaction pointer */
-	THD*	thd)	/* in: MySQL thread */
+	THD*	thd)	/*!< in: MySQL thread */
 {
 	return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
 }
 
-/************************************************************************
+/********************************************************************//**
 Call this function when mysqld passes control to the client. That is to
 avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
-documentation, see handler.cc. */
+documentation, see handler.cc.
+@return	0 */
 static
 int
 innobase_release_temporary_latches(
 /*===============================*/
-				/* out: 0 */
-	handlerton*	hton,	/* in: handlerton */
-	THD*		thd)	/* in: MySQL thread */
+	handlerton*	hton,	/*!< in: handlerton */
+	THD*		thd)	/*!< in: MySQL thread */
 {
 	trx_t*	trx;
 
@@ -573,7 +729,7 @@ innobase_release_temporary_latches(
 
 	if (!innodb_inited) {
 
-		return 0;
+		return(0);
 	}
 
 	trx = thd_to_trx(thd);
@@ -581,15 +737,15 @@ innobase_release_temporary_latches(
 	if (trx) {
 		innobase_release_stat_resources(trx);
 	}
-	return 0;
+	return(0);
 }
 
-/************************************************************************
+/********************************************************************//**
 Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
 time calls srv_active_wake_master_thread. This function should be used
 when a single database operation may introduce a small need for
 server utility activity, like checkpointing. */
-inline
+static inline
 void
 innobase_active_small(void)
 /*=======================*/
@@ -601,39 +757,40 @@ innobase_active_small(void)
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
 about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock. */
-static
+timeout or a deadlock.
+@return	MySQL error code */
+extern "C" UNIV_INTERN
 int
 convert_error_code_to_mysql(
 /*========================*/
-			/* out: MySQL error code */
-	int	error,	/* in: InnoDB error code */
-	THD*	thd)	/* in: user thread handle or NULL */
+	int	error,	/*!< in: InnoDB error code */
+	ulint	flags,	/*!< in: InnoDB table flags, or 0 */
+	THD*	thd)	/*!< in: user thread handle or NULL */
 {
-	if (error == DB_SUCCESS) {
-
+	switch (error) {
+	case DB_SUCCESS:
 		return(0);
 
-	} else if (error == (int) DB_DUPLICATE_KEY) {
-
-		return(HA_ERR_FOUND_DUPP_KEY);
-
-	} else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) {
-
-		return(HA_ERR_FOREIGN_DUPLICATE_KEY);
-
-	} else if (error == (int) DB_RECORD_NOT_FOUND) {
-
-		return(HA_ERR_NO_ACTIVE_RECORD);
-
-	} else if (error == (int) DB_ERROR) {
-
+	case DB_ERROR:
+	default:
 		return(-1); /* unspecified error */
 
-	} else if (error == (int) DB_DEADLOCK) {
+	case DB_DUPLICATE_KEY:
+		return(HA_ERR_FOUND_DUPP_KEY);
+
+	case DB_FOREIGN_DUPLICATE_KEY:
+		return(HA_ERR_FOREIGN_DUPLICATE_KEY);
+
+	case DB_MISSING_HISTORY:
+		return(HA_ERR_TABLE_DEF_CHANGED);
+
+	case DB_RECORD_NOT_FOUND:
+		return(HA_ERR_NO_ACTIVE_RECORD);
+
+	case DB_DEADLOCK:
 		/* Since we rolled back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */
@@ -643,8 +800,8 @@ convert_error_code_to_mysql(
 		}
 
 		return(HA_ERR_LOCK_DEADLOCK);
-	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
 
+	case DB_LOCK_WAIT_TIMEOUT:
 		/* Starting from 5.0.13, we let MySQL just roll back the
 		latest SQL statement in a lock wait timeout. Previously, we
 		rolled back the whole transaction. */
@@ -656,61 +813,58 @@ convert_error_code_to_mysql(
 
 		return(HA_ERR_LOCK_WAIT_TIMEOUT);
 
-	} else if (error == (int) DB_NO_REFERENCED_ROW) {
-
+	case DB_NO_REFERENCED_ROW:
 		return(HA_ERR_NO_REFERENCED_ROW);
 
-	} else if (error == (int) DB_ROW_IS_REFERENCED) {
-
+	case DB_ROW_IS_REFERENCED:
 		return(HA_ERR_ROW_IS_REFERENCED);
 
-	} else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
-
+	case DB_CANNOT_ADD_CONSTRAINT:
 		return(HA_ERR_CANNOT_ADD_FOREIGN);
 
-	} else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
+	case DB_CANNOT_DROP_CONSTRAINT:
 
 		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
 						misleading, a new MySQL error
 						code should be introduced */
-	} else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
 
+	case DB_COL_APPEARS_TWICE_IN_INDEX:
+	case DB_CORRUPTION:
 		return(HA_ERR_CRASHED);
 
-	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
-
+	case DB_OUT_OF_FILE_SPACE:
 		return(HA_ERR_RECORD_FILE_FULL);
 
-	} else if (error == (int) DB_TABLE_IS_BEING_USED) {
-
+	case DB_TABLE_IS_BEING_USED:
 		return(HA_ERR_WRONG_COMMAND);
 
-	} else if (error == (int) DB_TABLE_NOT_FOUND) {
-
+	case DB_TABLE_NOT_FOUND:
 		return(HA_ERR_NO_SUCH_TABLE);
 
-	} else if (error == (int) DB_TOO_BIG_RECORD) {
-
+	case DB_TOO_BIG_RECORD:
+		my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
+			 page_get_free_space_of_empty(flags
+						      & DICT_TF_COMPACT) / 2);
 		return(HA_ERR_TO_BIG_ROW);
 
-	} else if (error == (int) DB_CORRUPTION) {
-
-		return(HA_ERR_CRASHED);
-	} else if (error == (int) DB_NO_SAVEPOINT) {
-
+	case DB_NO_SAVEPOINT:
 		return(HA_ERR_NO_SAVEPOINT);
-	} else if (error == (int) DB_LOCK_TABLE_FULL) {
- 		/* Since we rolled back the whole transaction, we must
- 		tell it also to MySQL so that MySQL knows to empty the
- 		cached binlog for this transaction */
+
+	case DB_LOCK_TABLE_FULL:
+		/* Since we rolled back the whole transaction, we must
+		tell it also to MySQL so that MySQL knows to empty the
+		cached binlog for this transaction */
 
 		if (thd) {
 			thd_mark_transaction_to_rollback(thd, TRUE);
 		}
 
-    		return(HA_ERR_LOCK_TABLE_FULL);
-	} else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
+		return(HA_ERR_LOCK_TABLE_FULL);
 
+	case DB_PRIMARY_KEY_IS_NULL:
+		return(ER_PRIMARY_CANT_HAVE_NULL);
+
+	case DB_TOO_MANY_CONCURRENT_TRXS:
 		/* Once MySQL add the appropriate code to errmsg.txt then
 		we can get rid of this #ifdef. NOTE: The code checked by
 		the #ifdef is the suggested name for the error condition
@@ -722,78 +876,68 @@ convert_error_code_to_mysql(
 #else
 		return(HA_ERR_RECORD_FILE_FULL);
 #endif
-
-	} else if (error == DB_UNSUPPORTED) {
-
+	case DB_UNSUPPORTED:
 		return(HA_ERR_UNSUPPORTED);
-    	} else {
-    		return(-1);			// Unknown error
-    	}
+	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 If you want to print a thd that is not associated with the current thread,
 you must call this function before reserving the InnoDB kernel_mutex, to
 protect MySQL from setting thd->query NULL. If you print a thd of the current
 thread, we know that MySQL cannot modify thd->query, and it is not necessary
 to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
+the kernel_mutex. */
+extern "C" UNIV_INTERN
 void
 innobase_mysql_prepare_print_arbitrary_thd(void)
 /*============================================*/
 {
+	ut_ad(!mutex_own(&kernel_mutex));
 	VOID(pthread_mutex_lock(&LOCK_thread_count));
 }
 
-/*****************************************************************
+/*************************************************************//**
 Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
+In the InnoDB latching order, the mutex sits right above the
+kernel_mutex.  In debug builds, we assert that the kernel_mutex is
+released before this function is invoked. */
+extern "C" UNIV_INTERN
 void
 innobase_mysql_end_print_arbitrary_thd(void)
 /*========================================*/
 {
+	ut_ad(!mutex_own(&kernel_mutex));
 	VOID(pthread_mutex_unlock(&LOCK_thread_count));
 }
 
-/*****************************************************************
-Prints info of a THD object (== user session thread) to the given file.
-NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
-this function! */
-extern "C"
+/*************************************************************//**
+Prints info of a THD object (== user session thread) to the given file. */
+extern "C" UNIV_INTERN
 void
 innobase_mysql_print_thd(
 /*=====================*/
-	FILE*	f,		/* in: output stream */
-	void*	input_thd,	/* in: pointer to a MySQL THD object */
-	uint	max_query_len)	/* in: max query length to print, or 0 to
+	FILE*	f,		/*!< in: output stream */
+	void*	thd,		/*!< in: pointer to a MySQL THD object */
+	uint	max_query_len)	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 {
-	THD*	thd;
 	char	buffer[1024];
 
-	thd = (THD*) input_thd;
-	fputs(thd_security_context(thd, buffer, sizeof(buffer), 
+	fputs(thd_security_context((THD*) thd, buffer, sizeof buffer,
 				   max_query_len), f);
 	putc('\n', f);
 }
 
-/**********************************************************************
-Get the variable length bounds of the given character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/include/data0type.ic! */
-extern "C"
+/******************************************************************//**
+Get the variable length bounds of the given character set. */
+extern "C" UNIV_INTERN
 void
 innobase_get_cset_width(
 /*====================*/
-	ulint	cset,		/* in: MySQL charset-collation code */
-	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
-	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
+	ulint	cset,		/*!< in: MySQL charset-collation code */
+	ulint*	mbminlen,	/*!< out: minimum length of a char (in bytes) */
+	ulint*	mbmaxlen)	/*!< out: maximum length of a char (in bytes) */
 {
 	CHARSET_INFO*	cs;
 	ut_ad(cset < 256);
@@ -810,96 +954,175 @@ innobase_get_cset_width(
 	}
 }
 
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Converts an identifier to a table name. */
+extern "C" UNIV_INTERN
 void
 innobase_convert_from_table_id(
 /*===========================*/
-	char*		to,	/* out: converted identifier */
-	const char*	from,	/* in: identifier to convert */
-	ulint		len)	/* in: length of 'to', in bytes */
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len)	/*!< in: length of 'to', in bytes */
 {
 	uint	errors;
 
-	strconvert(thd_charset(current_thd), from,
-		   &my_charset_filename, to, (uint) len, &errors);
+	strconvert(cs, from, &my_charset_filename, to, (uint) len, &errors);
 }
 
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Converts an identifier to UTF-8. */
+extern "C" UNIV_INTERN
 void
 innobase_convert_from_id(
 /*=====================*/
-	char*		to,	/* out: converted identifier */
-	const char*	from,	/* in: identifier to convert */
-	ulint		len)	/* in: length of 'to', in bytes */
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len)	/*!< in: length of 'to', in bytes */
 {
 	uint	errors;
 
-	strconvert(thd_charset(current_thd), from,
-		   system_charset_info, to, (uint) len, &errors);
+	strconvert(cs, from, system_charset_info, to, (uint) len, &errors);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+@return	0 if a=b, <0 if a<b, >1 if a>b */
+extern "C" UNIV_INTERN
 int
 innobase_strcasecmp(
 /*================*/
-				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
-	const char*	a,	/* in: first string to compare */
-	const char*	b)	/* in: second string to compare */
+	const char*	a,	/*!< in: first string to compare */
+	const char*	b)	/*!< in: second string to compare */
 {
 	return(my_strcasecmp(system_charset_info, a, b));
 }
 
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+extern "C" UNIV_INTERN
 void
 innobase_casedn_str(
 /*================*/
-	char*	a)	/* in/out: string to put in lower case */
+	char*	a)	/*!< in/out: string to put in lower case */
 {
 	my_casedn_str(system_charset_info, a);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Determines the connection character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+@return	connection character set */
+extern "C" UNIV_INTERN
 struct charset_info_st*
 innobase_get_charset(
 /*=================*/
-				/* out: connection character set */
-	void*	mysql_thd)	/* in: MySQL thread handle */
+	void*	mysql_thd)	/*!< in: MySQL thread handle */
 {
 	return(thd_charset((THD*) mysql_thd));
 }
 
-/*************************************************************************
-Creates a temporary file. */
+#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
+extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
+/*******************************************************************//**
+Map an OS error to an errno value. The OS error number is stored in
+_doserrno and the mapped value is stored in errno) */
 extern "C"
+void __cdecl
+_dosmaperr(
+	unsigned long);	/*!< in: OS error value */
+
+/*********************************************************************//**
+Creates a temporary file.
+@return	temporary file descriptor, or < 0 on error */
+extern "C" UNIV_INTERN
+int
+innobase_mysql_tmpfile(void)
+/*========================*/
+{
+	int	fd;				/* handle of opened file */
+	HANDLE	osfh;				/* OS handle of opened file */
+	char*	tmpdir;				/* point to the directory
+						where to create file */
+	TCHAR	path_buf[MAX_PATH - 14];	/* buffer for tmp file path.
+						The length cannot be longer
+						than MAX_PATH - 14, or
+						GetTempFileName will fail. */
+	char	filename[MAX_PATH];		/* name of the tmpfile */
+	DWORD	fileaccess = GENERIC_READ	/* OS file access */
+			     | GENERIC_WRITE
+			     | DELETE;
+	DWORD	fileshare = FILE_SHARE_READ	/* OS file sharing mode */
+			    | FILE_SHARE_WRITE
+			    | FILE_SHARE_DELETE;
+	DWORD	filecreate = CREATE_ALWAYS;	/* OS method of open/create */
+	DWORD	fileattrib =			/* OS file attribute flags */
+			     FILE_ATTRIBUTE_NORMAL
+			     | FILE_FLAG_DELETE_ON_CLOSE
+			     | FILE_ATTRIBUTE_TEMPORARY
+			     | FILE_FLAG_SEQUENTIAL_SCAN;
+
+	DBUG_ENTER("innobase_mysql_tmpfile");
+
+	tmpdir = my_tmpdir(&mysql_tmpdir_list);
+
+	/* The tmpdir parameter can not be NULL for GetTempFileName. */
+	if (!tmpdir) {
+		uint	ret;
+
+		/* Use GetTempPath to determine path for temporary files. */
+		ret = GetTempPath(sizeof(path_buf), path_buf);
+		if (ret > sizeof(path_buf) || (ret == 0)) {
+
+			_dosmaperr(GetLastError());	/* map error */
+			DBUG_RETURN(-1);
+		}
+
+		tmpdir = path_buf;
+	}
+
+	/* Use GetTempFileName to generate a unique filename. */
+	if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
+
+		_dosmaperr(GetLastError());	/* map error */
+		DBUG_RETURN(-1);
+	}
+
+	DBUG_PRINT("info", ("filename: %s", filename));
+
+	/* Open/Create the file. */
+	osfh = CreateFile(filename, fileaccess, fileshare, NULL,
+			  filecreate, fileattrib, NULL);
+	if (osfh == INVALID_HANDLE_VALUE) {
+
+		/* open/create file failed! */
+		_dosmaperr(GetLastError());	/* map error */
+		DBUG_RETURN(-1);
+	}
+
+	do {
+		/* Associates a CRT file descriptor with the OS file handle. */
+		fd = _open_osfhandle((intptr_t) osfh, 0);
+	} while (fd == -1 && errno == EINTR);
+
+	if (fd == -1) {
+		/* Open failed, close the file handle. */
+
+		_dosmaperr(GetLastError());	/* map error */
+		CloseHandle(osfh);		/* no need to check if
+						CloseHandle fails */
+	}
+
+	DBUG_RETURN(fd);
+}
+#else
+/*********************************************************************//**
+Creates a temporary file.
+@return	temporary file descriptor, or < 0 on error */
+extern "C" UNIV_INTERN
 int
 innobase_mysql_tmpfile(void)
 /*========================*/
-			/* out: temporary file descriptor, or < 0 on error */
 {
 	int	fd2 = -1;
 	File	fd = mysql_tmpfile("ib");
@@ -924,28 +1147,69 @@ innobase_mysql_tmpfile(void)
 	}
 	return(fd2);
 }
+#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
 
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
-extern "C"
+/*********************************************************************//**
+Wrapper around MySQL's copy_and_convert function.
+@return	number of bytes copied to 'to' */
+extern "C" UNIV_INTERN
 ulint
 innobase_convert_string(
 /*====================*/
-	void*		to,
-	ulint		to_length,
-	CHARSET_INFO*	to_cs,
-	const void*	from,
-	ulint		from_length,
-	CHARSET_INFO*	from_cs,
-	uint*		errors)
+	void*		to,		/*!< out: converted string */
+	ulint		to_length,	/*!< in: number of bytes reserved
+					for the converted string */
+	CHARSET_INFO*	to_cs,		/*!< in: character set to convert to */
+	const void*	from,		/*!< in: string to convert */
+	ulint		from_length,	/*!< in: number of bytes to convert */
+	CHARSET_INFO*	from_cs,	/*!< in: character set to convert from */
+	uint*		errors)		/*!< out: number of errors encountered
+					during the conversion */
 {
   return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
                           (const char*)from, (uint32) from_length, from_cs,
                           errors));
 }
 
-/*************************************************************************
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
+the result to "buf". The result is converted to "system_charset_info".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+extern "C" UNIV_INTERN
+ulint
+innobase_raw_format(
+/*================*/
+	const char*	data,		/*!< in: raw data */
+	ulint		data_len,	/*!< in: raw data length
+					in bytes */
+	ulint		charset_coll,	/*!< in: charset collation */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size)	/*!< in: output buffer size
+					in bytes */
+{
+	/* XXX we use a hard limit instead of allocating
+	but_size bytes from the heap */
+	CHARSET_INFO*	data_cs;
+	char		buf_tmp[8192];
+	ulint		buf_tmp_used;
+	uint		num_errors;
+
+	data_cs = all_charsets[charset_coll];
+
+	buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
+					       system_charset_info,
+					       data, data_len, data_cs,
+					       &num_errors);
+
+	return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
+}
+
+/*********************************************************************//**
 Compute the next autoinc value.
 
 For MySQL replication the autoincrement values can be partitioned among
@@ -961,16 +1225,16 @@ values we want to reserve for multi-value inserts e.g.,
 
 innobase_next_autoinc() will be called with increment set to
 n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above. */
+to reserve 3 values for the multi-value INSERT above.
+@return	the next value */
 static
 ulonglong
 innobase_next_autoinc(
 /*==================*/
-					/* out: the next value */
-	ulonglong	current,	/* in: Current value */
-	ulonglong	increment,	/* in: increment current by */
-	ulonglong	offset,		/* in: AUTOINC offset */
-	ulonglong	max_value)	/* in: max value for type */
+	ulonglong	current,	/*!< in: Current value */
+	ulonglong	increment,	/*!< in: increment current by */
+	ulonglong	offset,		/*!< in: AUTOINC offset */
+	ulonglong	max_value)	/*!< in: max value for type */
 {
 	ulonglong	next_value;
 
@@ -993,7 +1257,7 @@ innobase_next_autoinc(
 		} else {
 			next_value = current + increment;
 		}
-	} else {
+	} else if (max_value > current) {
 		if (current > offset) {
 			next_value = ((current - offset) / increment) + 1;
 		} else {
@@ -1019,6 +1283,8 @@ innobase_next_autoinc(
 				next_value += offset;
 			}
 		}
+	} else {
+		next_value = max_value;
 	}
 
 	ut_a(next_value <= max_value);
@@ -1026,58 +1292,84 @@ innobase_next_autoinc(
 	return(next_value);
 }
 
-/*************************************************************************
+/*********************************************************************//**
+Initializes some fields in an InnoDB transaction object. */
+static
+void
+innobase_trx_init(
+/*==============*/
+	THD*	thd,	/*!< in: user thread handle */
+	trx_t*	trx)	/*!< in/out: InnoDB transaction handle */
+{
+	DBUG_ENTER("innobase_trx_init");
+	DBUG_ASSERT(EQ_CURRENT_THD(thd));
+	DBUG_ASSERT(thd == trx->mysql_thd);
+
+	trx->check_foreigns = !thd_test_options(
+		thd, OPTION_NO_FOREIGN_KEY_CHECKS);
+
+	trx->check_unique_secondary = !thd_test_options(
+		thd, OPTION_RELAXED_UNIQUE_CHECKS);
+
+	DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Allocates an InnoDB transaction for a MySQL handler object.
+@return	InnoDB transaction handle */
+extern "C" UNIV_INTERN
+trx_t*
+innobase_trx_allocate(
+/*==================*/
+	THD*	thd)	/*!< in: user thread handle */
+{
+	trx_t*	trx;
+
+	DBUG_ENTER("innobase_trx_allocate");
+	DBUG_ASSERT(thd != NULL);
+	DBUG_ASSERT(EQ_CURRENT_THD(thd));
+
+	trx = trx_allocate_for_mysql();
+
+	trx->mysql_thd = thd;
+	trx->mysql_query_str = thd_query(thd);
+
+	innobase_trx_init(thd, trx);
+
+	DBUG_RETURN(trx);
+}
+
+/*********************************************************************//**
 Gets the InnoDB transaction handle for a MySQL handler object, creates
 an InnoDB transaction struct if the corresponding MySQL thread struct still
-lacks one. */
+lacks one.
+@return	InnoDB transaction handle */
 static
 trx_t*
 check_trx_exists(
 /*=============*/
-			/* out: InnoDB transaction handle */
-	THD*	thd)	/* in: user thread handle */
+	THD*	thd)	/*!< in: user thread handle */
 {
 	trx_t*&	trx = thd_to_trx(thd);
 
-	ut_ad(thd == current_thd);
+	ut_ad(EQ_CURRENT_THD(thd));
 
 	if (trx == NULL) {
-		DBUG_ASSERT(thd != NULL);
-		trx = trx_allocate_for_mysql();
-
-		trx->mysql_thd = thd;
-		trx->mysql_query_str = thd_query(thd);
-
-		/* Update the info whether we should skip XA steps that eat
-		CPU time */
-		trx->support_xa = THDVAR(thd, support_xa);
-	} else {
-		if (trx->magic_n != TRX_MAGIC_N) {
-			mem_analyze_corruption(trx);
-
-			ut_error;
-		}
+		trx = innobase_trx_allocate(thd);
+	} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
+		mem_analyze_corruption(trx);
+		ut_error;
 	}
 
-	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-		trx->check_foreigns = FALSE;
-	} else {
-		trx->check_foreigns = TRUE;
-	}
-
-	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
-		trx->check_unique_secondary = FALSE;
-	} else {
-		trx->check_unique_secondary = TRUE;
-	}
+	innobase_trx_init(thd, trx);
 
 	return(trx);
 }
 
 
-/*************************************************************************
+/*********************************************************************//**
 Construct ha_innobase handler. */
-
+UNIV_INTERN
 ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
   :handler(hton, table_arg),
   int_table_flags(HA_REC_NOT_IN_SEQ |
@@ -1093,16 +1385,22 @@ ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
   num_write_row(0)
 {}
 
-/*************************************************************************
+/*********************************************************************//**
+Destruct ha_innobase handler. */
+UNIV_INTERN
+ha_innobase::~ha_innobase()
+{
+}
+
+/*********************************************************************//**
 Updates the user_thd field in a handle and also allocates a new InnoDB
 transaction handle if needed, and updates the transaction fields in the
 prebuilt struct. */
-inline
-int
+UNIV_INTERN inline
+void
 ha_innobase::update_thd(
 /*====================*/
-			/* out: 0 or error code */
-	THD*	thd)	/* in: thd to use the handle */
+	THD*	thd)	/*!< in: thd to use the handle */
 {
 	trx_t*		trx;
 
@@ -1114,39 +1412,52 @@ ha_innobase::update_thd(
 	}
 
 	user_thd = thd;
-
-	return(0);
 }
 
-/*************************************************************************
+/*********************************************************************//**
+Updates the user_thd field in a handle and also allocates a new InnoDB
+transaction handle if needed, and updates the transaction fields in the
+prebuilt struct. */
+UNIV_INTERN
+void
+ha_innobase::update_thd()
+/*=====================*/
+{
+	THD*	thd = ha_thd();
+	ut_ad(EQ_CURRENT_THD(thd));
+	update_thd(thd);
+}
+
+/*********************************************************************//**
 Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
 roll back the statement if the statement results in an error. This MUST be
 called for every SQL statement that may be rolled back by MySQL. Calling this
 several times to register the same statement is allowed, too. */
-inline
+static inline
 void
 innobase_register_stmt(
 /*===================*/
-        handlerton*	hton,	/* in: Innobase hton */
-	THD*	thd)	/* in: MySQL thd (connection) object */
+        handlerton*	hton,	/*!< in: Innobase hton */
+	THD*	thd)	/*!< in: MySQL thd (connection) object */
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 	/* Register the statement */
 	trans_register_ha(thd, FALSE, hton);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
 to call the InnoDB prepare and commit, or rollback for the transaction. This
 MUST be called for every transaction for which the user may call commit or
 rollback. Calling this several times to register the same transaction is
 allowed, too.
 This function also registers the current SQL statement. */
-inline
+static inline
 void
 innobase_register_trx_and_stmt(
 /*===========================*/
-        handlerton *hton, /* in: Innobase handlerton */
-	THD*	thd)	/* in: MySQL thd (connection) object */
+        handlerton *hton, /*!< in: Innobase handlerton */
+	THD*	thd)	/*!< in: MySQL thd (connection) object */
 {
 	/* NOTE that actually innobase_register_stmt() registers also
 	the transaction in the AUTOCOMMIT=1 mode. */
@@ -1203,7 +1514,7 @@ AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
 put restrictions on the use of the query cache.
 */
 
-/**********************************************************************
+/******************************************************************//**
 The MySQL query cache uses this to check from InnoDB if the query cache at
 the moment is allowed to operate on an InnoDB table. The SQL query must
 be a non-locking SELECT.
@@ -1220,24 +1531,23 @@ at the start of a SELECT processing. Then the calling thread cannot be
 holding any InnoDB semaphores. The calling thread is holding the
 query cache mutex, and this function will reserver the InnoDB kernel mutex.
 Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
-the InnoDB kernel mutex. */
+the InnoDB kernel mutex.
+@return TRUE if permitted, FALSE if not; note that the value FALSE
+does not mean we should invalidate the query cache: invalidation is
+called explicitly */
 static
 my_bool
 innobase_query_caching_of_table_permitted(
 /*======================================*/
-				/* out: TRUE if permitted, FALSE if not;
-				note that the value FALSE does not mean
-				we should invalidate the query cache:
-				invalidation is called explicitly */
-	THD*	thd,		/* in: thd of the user who is trying to
+	THD*	thd,		/*!< in: thd of the user who is trying to
 				store a result to the query cache or
 				retrieve it */
-	char*	full_name,	/* in: concatenation of database name,
-				the null character '\0', and the table
+	char*	full_name,	/*!< in: concatenation of database name,
+				the null character NUL, and the table
 				name */
-	uint	full_name_len,	/* in: length of the full name, i.e.
+	uint	full_name_len,	/*!< in: length of the full name, i.e.
 				len(dbname) + len(tablename) + 1 */
-	ulonglong *unused)	/* unused for this engine */
+	ulonglong *unused)	/*!< unused for this engine */
 {
 	ibool	is_autocommit;
 	trx_t*	trx;
@@ -1259,9 +1569,9 @@ innobase_query_caching_of_table_permitted(
 				"search, latch though calling "
 				"innobase_query_caching_of_table_permitted.");
 
-		mutex_enter_noninline(&kernel_mutex);
+		mutex_enter(&kernel_mutex);
 		trx_print(stderr, trx, 1024);
-		mutex_exit_noninline(&kernel_mutex);
+		mutex_exit(&kernel_mutex);
 	}
 
 	innobase_release_stat_resources(trx);
@@ -1327,21 +1637,21 @@ innobase_query_caching_of_table_permitted(
 	return((my_bool)FALSE);
 }
 
-/*********************************************************************
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern "C"
+/*****************************************************************//**
+Invalidates the MySQL query cache for the table. */
+extern "C" UNIV_INTERN
 void
 innobase_invalidate_query_cache(
 /*============================*/
-	trx_t*	trx,		/* in: transaction which modifies the table */
-	char*	full_name,	/* in: concatenation of database name, null
-				char '\0', table name, null char'\0';
-				NOTE that in Windows this is always
-				in LOWER CASE! */
-	ulint	full_name_len)	/* in: full name length where also the null
-				chars count */
+	trx_t*		trx,		/*!< in: transaction which
+					modifies the table */
+	const char*	full_name,	/*!< in: concatenation of
+					database name, null char NUL,
+					table name, null char NUL;
+					NOTE that in Windows this is
+					always in LOWER CASE! */
+	ulint		full_name_len)	/*!< in: full name length where
+					also the null chars count */
 {
 	/* Note that the sync0sync.h rank of the query cache mutex is just
 	above the InnoDB kernel mutex. The caller of this function must not
@@ -1350,110 +1660,186 @@ innobase_invalidate_query_cache(
 	/* Argument TRUE below means we are using transactions */
 #ifdef HAVE_QUERY_CACHE
 	mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
-				      (const char*) full_name,
+				      full_name,
 				      (uint32) full_name_len,
 				      TRUE);
 #endif
 }
 
-/*********************************************************************
-Display an SQL identifier. */
-extern "C"
-void
-innobase_print_identifier(
-/*======================*/
-	FILE*		f,	/* in: output stream */
-	trx_t*		trx,	/* in: transaction */
-	ibool		table_id,/* in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/* in: name to print */
-	ulint		namelen)/* in: length of name */
+/*****************************************************************//**
+Convert an SQL identifier to the MySQL system_charset_info (UTF-8)
+and quote it if needed.
+@return	pointer to the end of buf */
+static
+char*
+innobase_convert_identifier(
+/*========================*/
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	id,	/*!< in: identifier to convert */
+	ulint		idlen,	/*!< in: length of id, in bytes */
+	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	ibool		file_id)/*!< in: TRUE=id is a table or database name;
+				FALSE=id is an UTF-8 string */
 {
-	const char*	s	= name;
-	char*		qname	= NULL;
+	char nz[NAME_LEN + 1];
+	char nz2[NAME_LEN + 1 + sizeof srv_mysql50_table_name_prefix];
+
+	const char*	s	= id;
 	int		q;
 
-	if (table_id) {
+	if (file_id) {
 		/* Decode the table name.  The filename_to_tablename()
 		function expects a NUL-terminated string.  The input and
-		output strings buffers must not be shared.  The function
-		only produces more output when the name contains other
-		characters than [0-9A-Z_a-z]. */
-          char*	temp_name = (char*) my_malloc((uint) namelen + 1, MYF(MY_WME));
-          uint	qnamelen = (uint) (namelen
-                                   + (1 + sizeof srv_mysql50_table_name_prefix));
+		output strings buffers must not be shared. */
 
-		if (temp_name) {
-                  qname = (char*) my_malloc(qnamelen, MYF(MY_WME));
-			if (qname) {
-				memcpy(temp_name, name, namelen);
-				temp_name[namelen] = 0;
-				s = qname;
-				namelen = filename_to_tablename(temp_name,
-						qname, qnamelen);
-			}
-			my_free(temp_name, MYF(0));
+		if (UNIV_UNLIKELY(idlen > (sizeof nz) - 1)) {
+			idlen = (sizeof nz) - 1;
 		}
+
+		memcpy(nz, id, idlen);
+		nz[idlen] = 0;
+
+		s = nz2;
+		idlen = filename_to_tablename(nz, nz2, sizeof nz2);
 	}
 
-	if (!trx || !trx->mysql_thd) {
-
+	/* See if the identifier needs to be quoted. */
+	if (UNIV_UNLIKELY(!thd)) {
 		q = '"';
 	} else {
-		q = get_quote_char_for_identifier((THD*) trx->mysql_thd,
-						s, (int) namelen);
+		q = get_quote_char_for_identifier((THD*) thd, s, (int) idlen);
 	}
 
 	if (q == EOF) {
-		fwrite(s, 1, namelen, f);
-	} else {
-		const char*	e = s + namelen;
-		putc(q, f);
-		while (s < e) {
-			int	c = *s++;
-			if (c == q) {
-				putc(c, f);
-			}
-			putc(c, f);
+		if (UNIV_UNLIKELY(idlen > buflen)) {
+			idlen = buflen;
 		}
-		putc(q, f);
+		memcpy(buf, s, idlen);
+		return(buf + idlen);
 	}
 
-	my_free(qname, MYF(MY_ALLOW_ZERO_PTR));
+	/* Quote the identifier. */
+	if (buflen < 2) {
+		return(buf);
+	}
+
+	*buf++ = q;
+	buflen--;
+
+	for (; idlen; idlen--) {
+		int	c = *s++;
+		if (UNIV_UNLIKELY(c == q)) {
+			if (UNIV_UNLIKELY(buflen < 3)) {
+				break;
+			}
+
+			*buf++ = c;
+			*buf++ = c;
+			buflen -= 2;
+		} else {
+			if (UNIV_UNLIKELY(buflen < 2)) {
+				break;
+			}
+
+			*buf++ = c;
+			buflen--;
+		}
+	}
+
+	*buf++ = q;
+	return(buf);
 }
 
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
-extern "C"
+/*****************************************************************//**
+Convert a table or index name to the MySQL system_charset_info (UTF-8)
+and quote it if needed.
+@return	pointer to the end of buf */
+extern "C" UNIV_INTERN
+char*
+innobase_convert_name(
+/*==================*/
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	id,	/*!< in: identifier to convert */
+	ulint		idlen,	/*!< in: length of id, in bytes */
+	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	ibool		table_id)/*!< in: TRUE=id is a table or database name;
+				FALSE=id is an index name */
+{
+	char*		s	= buf;
+	const char*	bufend	= buf + buflen;
+
+	if (table_id) {
+		const char*	slash = (const char*) memchr(id, '/', idlen);
+		if (!slash) {
+
+			goto no_db_name;
+		}
+
+		/* Print the database name and table name separately. */
+		s = innobase_convert_identifier(s, bufend - s, id, slash - id,
+						thd, TRUE);
+		if (UNIV_LIKELY(s < bufend)) {
+			*s++ = '.';
+			s = innobase_convert_identifier(s, bufend - s,
+							slash + 1, idlen
+							- (slash - id) - 1,
+							thd, TRUE);
+		}
+	} else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
+		/* Temporary index name (smart ALTER TABLE) */
+		const char temp_index_suffix[]= "--temporary--";
+
+		s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
+						thd, FALSE);
+		if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
+			memcpy(s, temp_index_suffix,
+			       sizeof temp_index_suffix - 1);
+			s += sizeof temp_index_suffix - 1;
+		}
+	} else {
+no_db_name:
+		s = innobase_convert_identifier(buf, buflen, id, idlen,
+						thd, table_id);
+	}
+
+	return(s);
+
+}
+
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return	TRUE if interrupted */
+extern "C" UNIV_INTERN
 ibool
 trx_is_interrupted(
 /*===============*/
-			/* out: TRUE if interrupted */
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
 }
 
-/******************************************************************
+/**************************************************************//**
 Resets some fields of a prebuilt struct. The template is used in fast
 retrieval of just those column values MySQL needs in its processing. */
 static
 void
 reset_template(
 /*===========*/
-	row_prebuilt_t*	prebuilt)	/* in/out: prebuilt struct */
+	row_prebuilt_t*	prebuilt)	/*!< in/out: prebuilt struct */
 {
 	prebuilt->keep_other_fields_on_keyread = 0;
 	prebuilt->read_just_key = 0;
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Call this when you have opened a new table handle in HANDLER, before you
 call index_read_idx() etc. Actually, we can let the cursor stay open even
 over a transaction commit! Then you should call this before every operation,
 fetch next etc. This function inits the necessary things even after a
 transaction commit. */
-
+UNIV_INTERN
 void
 ha_innobase::init_table_handle_for_HANDLER(void)
 /*============================================*/
@@ -1472,7 +1858,7 @@ ha_innobase::init_table_handle_for_HANDLER(void)
 
 	/* If the transaction is not started yet, start it */
 
-	trx_start_if_not_started_noninline(prebuilt->trx);
+	trx_start_if_not_started(prebuilt->trx);
 
 	/* Assign a read view if the transaction does not have it yet */
 
@@ -1509,19 +1895,20 @@ ha_innobase::init_table_handle_for_HANDLER(void)
 	reset_template(prebuilt);
 }
 
-/*************************************************************************
-Opens an InnoDB database. */
+/*********************************************************************//**
+Opens an InnoDB database.
+@return	0 on success, error code on failure */
 static
 int
 innobase_init(
 /*==========*/
-			/* out: 0 on success, error code on failure */
-	void	*p)	/* in: InnoDB handlerton */
+	void	*p)	/*!< in: InnoDB handlerton */
 {
-	static char	current_dir[3];		/* Set if using current lib */
+	static char	current_dir[3];		/*!< Set if using current lib */
 	int		err;
 	bool		ret;
 	char		*default_path;
+	uint		format_id;
 
 	DBUG_ENTER("innobase_init");
         handlerton *innobase_hton= (handlerton *)p;
@@ -1551,6 +1938,7 @@ innobase_init(
         innobase_hton->show_status=innobase_show_status;
         innobase_hton->flags=HTON_NO_FLAGS;
         innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
+	innobase_hton->alter_table_flags = innobase_alter_table_flags;
 
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
@@ -1644,16 +2032,12 @@ innobase_init(
 						   MYF(MY_FAE));
 
 	ret = (bool) srv_parse_data_file_paths_and_sizes(
-				internal_innobase_data_file_path,
-				&srv_data_file_names,
-				&srv_data_file_sizes,
-				&srv_data_file_is_raw_partition,
-				&srv_n_data_files,
-				&srv_auto_extend_last_data_file,
-				&srv_last_file_size_max);
+		internal_innobase_data_file_path);
 	if (ret == FALSE) {
 		sql_print_error(
 			"InnoDB: syntax error in innodb_data_file_path");
+mem_free_and_error:
+		srv_free_paths_and_sizes();
 		my_free(internal_innobase_data_file_path,
 						MYF(MY_ALLOW_ZERO_PTR));
 		goto error;
@@ -1678,18 +2062,93 @@ innobase_init(
 #endif /* UNIG_LOG_ARCHIVE */
 
 	ret = (bool)
-		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
-						&srv_log_group_home_dirs);
+		srv_parse_log_group_home_dirs(innobase_log_group_home_dir);
 
 	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
 	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
 			  "wrong number of mirrored log groups");
 
-		my_free(internal_innobase_data_file_path,
-						MYF(MY_ALLOW_ZERO_PTR));
-		goto error;
+		goto mem_free_and_error;
 	}
 
+	/* Validate the file format by animal name */
+	if (innobase_file_format_name != NULL) {
+
+		format_id = innobase_file_format_name_lookup(
+			innobase_file_format_name);
+
+		if (format_id > DICT_TF_FORMAT_MAX) {
+
+			sql_print_error("InnoDB: wrong innodb_file_format.");
+
+			goto mem_free_and_error;
+		}
+	} else {
+		/* Set it to the default file format id. Though this
+		should never happen. */
+		format_id = 0;
+	}
+
+	srv_file_format = format_id;
+
+	/* Given the type of innobase_file_format_name we have little
+	choice but to cast away the constness from the returned name.
+	innobase_file_format_name is used in the MySQL set variable
+	interface and so can't be const. */
+
+	innobase_file_format_name = 
+		(char*) trx_sys_file_format_id_to_name(format_id);
+
+	/* Process innobase_file_format_check variable */
+	ut_a(innobase_file_format_check != NULL);
+
+	/* As a side effect it will set srv_check_file_format_at_startup
+	on valid input. First we check for "on"/"off". */
+	if (!innobase_file_format_check_on_off(innobase_file_format_check)) {
+
+		/* Did the user specify a format name that we support ?
+		As a side effect it will update the variable
+		srv_check_file_format_at_startup */
+		if (!innobase_file_format_check_validate(
+			innobase_file_format_check)) {
+
+			sql_print_error("InnoDB: invalid "
+					"innodb_file_format_check value: "
+					"should be either 'on' or 'off' or "
+					"any value up to %s or its "
+					"equivalent numeric id",
+					trx_sys_file_format_id_to_name(
+						DICT_TF_FORMAT_MAX));
+
+			goto mem_free_and_error;
+		}
+	}
+
+	if (innobase_change_buffering) {
+		ulint	use;
+
+		for (use = 0;
+		     use < UT_ARR_SIZE(innobase_change_buffering_values);
+		     use++) {
+			if (!innobase_strcasecmp(
+				    innobase_change_buffering,
+				    innobase_change_buffering_values[use])) {
+				ibuf_use = (ibuf_use_t) use;
+				goto innobase_change_buffering_inited_ok;
+			}
+		}
+
+		sql_print_error("InnoDB: invalid value "
+				"innodb_file_format_check=%s",
+				innobase_change_buffering);
+		goto mem_free_and_error;
+	}
+
+innobase_change_buffering_inited_ok:
+	ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
+	innobase_change_buffering = (char*)
+		innobase_change_buffering_values[ibuf_use];
+
 	/* --------------------------------------------------*/
 
 	srv_file_flush_method_str = innobase_unix_file_flush_method;
@@ -1698,41 +2157,19 @@ innobase_init(
 	srv_n_log_files = (ulint) innobase_log_files_in_group;
 	srv_log_file_size = (ulint) innobase_log_file_size;
 
-        srv_thread_concurrency_timer_based =
-          (ibool) innobase_thread_concurrency_timer_based;
-
 #ifdef UNIV_LOG_ARCHIVE
 	srv_log_archive_on = (ulint) innobase_log_archive;
 #endif /* UNIV_LOG_ARCHIVE */
 	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
 
-	srv_io_capacity = (ulint) innobase_io_capacity;
-	srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes;
-
-	/* We set srv_pool_size here in units of 1 kB. InnoDB internally
-	changes the value so that it becomes the number of database pages. */
-
-	if (innobase_buffer_pool_awe_mem_mb == 0) {
-		srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024);
-	} else {
-		srv_use_awe = TRUE;
-		srv_pool_size = (ulint)
-				(1024 * innobase_buffer_pool_awe_mem_mb);
-		srv_awe_window_size = (ulint) innobase_buffer_pool_size;
-
-		/* Note that what the user specified as
-		innodb_buffer_pool_size is actually the AWE memory window
-		size in this case, and the real buffer pool size is
-		determined by .._awe_mem_mb. */
-	}
+	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
 
 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
 
-        srv_n_read_io_threads = (ulint) innobase_read_io_threads;
-        srv_n_write_io_threads = (ulint) innobase_write_io_threads;
-        srv_max_merged_io = (ulint) innobase_max_merged_io;
+	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
+	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
 
-	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
 	srv_force_recovery = (ulint) innobase_force_recovery;
 
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
@@ -1745,15 +2182,11 @@ innobase_init(
 
 	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
 
-	srv_file_per_table = (ibool) innobase_file_per_table;
 	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
 
 	srv_max_n_open_files = (ulint) innobase_open_files;
 	srv_innodb_status = (ibool) innobase_create_status_file;
 
-	srv_use_adaptive_hash_indexes =
-		(ibool) innobase_adaptive_hash_index;
-
 	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
 
 	/* Store the default charset-collation number of this MySQL
@@ -1770,9 +2203,10 @@ innobase_init(
 	and consequently we do not need to know the ordering internally in
 	InnoDB. */
 
-	ut_a(0 == strcmp((char*)my_charset_latin1.name,
-						(char*)"latin1_swedish_ci"));
-	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
+	ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
+	srv_latin1_ordering = my_charset_latin1.sort_order;
+
+	innobase_commit_concurrency_init_default();
 
 	/* Since we in this module access directly the fields of a trx
 	struct, and due to different headers and flags it might happen that
@@ -1780,41 +2214,49 @@ innobase_init(
 	modules, we check at run time that the size is the same in
 	these compilation modules. */
 
-	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);
-
 	err = innobase_start_or_create_for_mysql();
 
 	if (err != DB_SUCCESS) {
-		my_free(internal_innobase_data_file_path,
-						MYF(MY_ALLOW_ZERO_PTR));
-		goto error;
+		goto mem_free_and_error;
 	}
 
-	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
-					(hash_get_key) innobase_get_key, 0, 0);
+	innobase_open_tables = hash_create(200);
 	pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
 	pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
 	pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
 	pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
 	pthread_cond_init(&commit_cond, NULL);
 	innodb_inited= 1;
+#ifdef MYSQL_DYNAMIC_PLUGIN
+	if (innobase_hton != p) {
+		innobase_hton = reinterpret_cast<handlerton*>(p);
+		*innobase_hton = *innodb_hton_ptr;
+	}
+#endif /* MYSQL_DYNAMIC_PLUGIN */
+
+	/* Get the current high water mark format. */
+	innobase_file_format_check = (char*) trx_sys_file_format_max_get();
 
 	DBUG_RETURN(FALSE);
 error:
 	DBUG_RETURN(TRUE);
 }
 
-/***********************************************************************
-Closes an InnoDB database. */
+/*******************************************************************//**
+Closes an InnoDB database.
+@return	TRUE if error */
 static
 int
-innobase_end(handlerton *hton, ha_panic_function type)
-/*==============*/
-				/* out: TRUE if error */
+innobase_end(
+/*=========*/
+	handlerton*		hton,	/*!< in/out: InnoDB handlerton */
+	ha_panic_function	type __attribute__((unused)))
+					/*!< in: ha_panic() parameter */
 {
 	int	err= 0;
 
 	DBUG_ENTER("innobase_end");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 #ifdef __NETWARE__	/* some special cleanup for NetWare */
 	if (nw_panic) {
@@ -1825,10 +2267,12 @@ innobase_end(handlerton *hton, ha_panic_function type)
 
 		srv_fast_shutdown = (ulint) innobase_fast_shutdown;
 		innodb_inited = 0;
+		hash_table_free(innobase_open_tables);
+		innobase_open_tables = NULL;
 		if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
 			err = 1;
 		}
-		hash_free(&innobase_open_tables);
+		srv_free_paths_and_sizes();
 		my_free(internal_innobase_data_file_path,
 						MYF(MY_ALLOW_ZERO_PTR));
 		pthread_mutex_destroy(&innobase_share_mutex);
@@ -1841,31 +2285,48 @@ innobase_end(handlerton *hton, ha_panic_function type)
 	DBUG_RETURN(err);
 }
 
-/********************************************************************
+/****************************************************************//**
 Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
+the logs, and the name of this function should be innobase_checkpoint.
+@return	TRUE if error */
 static
 bool
-innobase_flush_logs(handlerton *hton)
-/*=====================*/
-				/* out: TRUE if error */
+innobase_flush_logs(
+/*================*/
+	handlerton*	hton)	/*!< in/out: InnoDB handlerton */
 {
 	bool	result = 0;
 
 	DBUG_ENTER("innobase_flush_logs");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	log_buffer_flush_to_disk();
 
 	DBUG_RETURN(result);
 }
 
-/*********************************************************************
+/****************************************************************//**
+Return alter table flags supported in an InnoDB database. */
+static
+uint
+innobase_alter_table_flags(
+/*=======================*/
+	uint	flags)
+{
+	return(HA_ONLINE_ADD_INDEX_NO_WRITES
+		| HA_ONLINE_DROP_INDEX_NO_WRITES
+		| HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES
+		| HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES
+		| HA_ONLINE_ADD_PK_INDEX_NO_WRITES);
+}
+
+/*****************************************************************//**
 Commits a transaction in an InnoDB database. */
 static
 void
 innobase_commit_low(
 /*================*/
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	if (trx->conc_state == TRX_NOT_STARTED) {
 
@@ -1875,23 +2336,24 @@ innobase_commit_low(
 	trx_commit_for_mysql(trx);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates an InnoDB transaction struct for the thd if it does not yet have one.
 Starts a new InnoDB transaction if a transaction is not yet started. And
 assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
+have one.
+@return	0 */
 static
 int
 innobase_start_trx_and_assign_read_view(
 /*====================================*/
-			/* out: 0 */
-        handlerton *hton, /* in: Innodb handlerton */ 
-	THD*	thd)	/* in: MySQL thread handle of the user for whom
+        handlerton *hton, /*!< in: Innodb handlerton */ 
+	THD*	thd)	/*!< in: MySQL thread handle of the user for whom
 			the transaction should be committed */
 {
 	trx_t*	trx;
 
 	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	/* Create a new trx struct for thd, if it does not yet have one */
 
@@ -1905,7 +2367,7 @@ innobase_start_trx_and_assign_read_view(
 
 	/* If the transaction is not started yet, start it */
 
-	trx_start_if_not_started_noninline(trx);
+	trx_start_if_not_started(trx);
 
 	/* Assign a read view if the transaction does not have it yet */
 
@@ -1914,37 +2376,35 @@ innobase_start_trx_and_assign_read_view(
 	/* Set the MySQL flag to mark that there is an active transaction */
 
 	if (trx->active_trans == 0) {
-		innobase_register_trx_and_stmt(hton, current_thd);
+		innobase_register_trx_and_stmt(hton, thd);
 		trx->active_trans = 1;
 	}
 
 	DBUG_RETURN(0);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Commits a transaction in an InnoDB database or marks an SQL statement
-ended. */
+ended.
+@return	0 */
 static
 int
 innobase_commit(
 /*============*/
-			/* out: 0 */
-        handlerton *hton, /* in: Innodb handlerton */ 
-	THD* 	thd,	/* in: MySQL thread handle of the user for whom
+        handlerton *hton, /*!< in: Innodb handlerton */ 
+	THD* 	thd,	/*!< in: MySQL thread handle of the user for whom
 			the transaction should be committed */
-	bool	all)	/* in:	TRUE - commit transaction
+	bool	all)	/*!< in:	TRUE - commit transaction
 				FALSE - the current SQL statement ended */
 {
 	trx_t*		trx;
 
 	DBUG_ENTER("innobase_commit");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 	DBUG_PRINT("trans", ("ending transaction"));
 
 	trx = check_trx_exists(thd);
 
-	/* Update the info whether we should skip XA steps that eat CPU time */
-	trx->support_xa = THDVAR(thd, support_xa);
-
 	/* Since we will reserve the kernel mutex, we have to release
 	the search system latch first to obey the latching order. */
 
@@ -1983,11 +2443,11 @@ innobase_commit(
 		Note, the position is current because of
 		prepare_commit_mutex */
 retry:
-		if (srv_commit_concurrency > 0) {
+		if (innobase_commit_concurrency > 0) {
 			pthread_mutex_lock(&commit_cond_m);
 			commit_threads++;
 
-			if (commit_threads > srv_commit_concurrency) {
+			if (commit_threads > innobase_commit_concurrency) {
 				commit_threads--;
 				pthread_cond_wait(&commit_cond,
 					&commit_cond_m);
@@ -2000,11 +2460,16 @@ retry:
 		}
 
 		trx->mysql_log_file_name = mysql_bin_log_file_name();
-		trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
+		trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
 
+		/* Don't do write + flush right now. For group commit
+		to work we want to do the flush after releasing the
+		prepare_commit_mutex. */
+		trx->flush_log_later = TRUE;
 		innobase_commit_low(trx);
+		trx->flush_log_later = FALSE;
 
-		if (srv_commit_concurrency > 0) {
+		if (innobase_commit_concurrency > 0) {
 			pthread_mutex_lock(&commit_cond_m);
 			commit_threads--;
 			pthread_cond_signal(&commit_cond);
@@ -2016,6 +2481,8 @@ retry:
 			pthread_mutex_unlock(&prepare_commit_mutex);
 		}
 
+		/* Now do a write + flush of logs. */
+		trx_commit_complete_for_mysql(trx);
 		trx->active_trans = 0;
 
 	} else {
@@ -2049,30 +2516,28 @@ retry:
 	DBUG_RETURN(0);
 }
 
-/*********************************************************************
-Rolls back a transaction or the latest SQL statement. */
+/*****************************************************************//**
+Rolls back a transaction or the latest SQL statement.
+@return	0 or error number */
 static
 int
 innobase_rollback(
 /*==============*/
-			/* out: 0 or error number */
-        handlerton *hton, /* in: Innodb handlerton */ 
-	THD*	thd,	/* in: handle to the MySQL thread of the user
+        handlerton *hton, /*!< in: Innodb handlerton */ 
+	THD*	thd,	/*!< in: handle to the MySQL thread of the user
 			whose transaction should be rolled back */
-	bool	all)	/* in:	TRUE - commit transaction
+	bool	all)	/*!< in:	TRUE - commit transaction
 				FALSE - the current SQL statement ended */
 {
 	int	error = 0;
 	trx_t*	trx;
 
 	DBUG_ENTER("innobase_rollback");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 	DBUG_PRINT("trans", ("aborting transaction"));
 
 	trx = check_trx_exists(thd);
 
-	/* Update the info whether we should skip XA steps that eat CPU time */
-	trx->support_xa = THDVAR(thd, support_xa);
-
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
 	first to obey the latching order. */
@@ -2094,17 +2559,17 @@ innobase_rollback(
 		error = trx_rollback_last_sql_stat_for_mysql(trx);
 	}
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
-/*********************************************************************
-Rolls back a transaction */
+/*****************************************************************//**
+Rolls back a transaction
+@return	0 or error number */
 static
 int
 innobase_rollback_trx(
 /*==================*/
-			/* out: 0 or error number */
-	trx_t*	trx)	/*  in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	int	error = 0;
 
@@ -2125,28 +2590,29 @@ innobase_rollback_trx(
 
 	error = trx_rollback_for_mysql(trx);
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
-/*********************************************************************
-Rolls back a transaction to a savepoint. */
+/*****************************************************************//**
+Rolls back a transaction to a savepoint.
+@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
+given name */
 static
 int
 innobase_rollback_to_savepoint(
 /*===========================*/
-				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
-				no savepoint with the given name */
-        handlerton *hton,       /* in: Innodb handlerton */ 
-	THD*	thd,		/* in: handle to the MySQL thread of the user
+        handlerton *hton,       /*!< in: Innodb handlerton */ 
+	THD*	thd,		/*!< in: handle to the MySQL thread of the user
 				whose transaction should be rolled back */
-	void*	savepoint)	/* in: savepoint data */
+	void*	savepoint)	/*!< in: savepoint data */
 {
-	ib_longlong	mysql_binlog_cache_pos;
+	ib_int64_t	mysql_binlog_cache_pos;
 	int		error = 0;
 	trx_t*		trx;
 	char		name[64];
 
 	DBUG_ENTER("innobase_rollback_to_savepoint");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	trx = check_trx_exists(thd);
 
@@ -2162,27 +2628,28 @@ innobase_rollback_to_savepoint(
 
 	error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
 						&mysql_binlog_cache_pos);
-	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
-/*********************************************************************
-Release transaction savepoint name. */
+/*****************************************************************//**
+Release transaction savepoint name.
+@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
+given name */
 static
 int
 innobase_release_savepoint(
 /*=======================*/
-				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
-				no savepoint with the given name */
-        handlerton*	hton,	/* in: handlerton for Innodb */
-	THD*	thd,		/* in: handle to the MySQL thread of the user
+        handlerton*	hton,	/*!< in: handlerton for Innodb */
+	THD*	thd,		/*!< in: handle to the MySQL thread of the user
 				whose transaction should be rolled back */
-	void*	savepoint)	/* in: savepoint data */
+	void*	savepoint)	/*!< in: savepoint data */
 {
 	int		error = 0;
 	trx_t*		trx;
 	char		name[64];
 
 	DBUG_ENTER("innobase_release_savepoint");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	trx = check_trx_exists(thd);
 
@@ -2192,24 +2659,25 @@ innobase_release_savepoint(
 
 	error = (int) trx_release_savepoint_for_mysql(trx, name);
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
-/*********************************************************************
-Sets a transaction savepoint. */
+/*****************************************************************//**
+Sets a transaction savepoint.
+@return	always 0, that is, always succeeds */
 static
 int
 innobase_savepoint(
 /*===============*/
-				/* out: always 0, that is, always succeeds */
-	handlerton*	hton,   /* in: handle to the Innodb handlerton */
-	THD*	thd,		/* in: handle to the MySQL thread */
-	void*	savepoint)	/* in: savepoint data */
+	handlerton*	hton,   /*!< in: handle to the Innodb handlerton */
+	THD*	thd,		/*!< in: handle to the MySQL thread */
+	void*	savepoint)	/*!< in: savepoint data */
 {
 	int	error = 0;
 	trx_t*	trx;
 
 	DBUG_ENTER("innobase_savepoint");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	/*
 	  In the autocommit mode there is no sense to set a savepoint
@@ -2236,20 +2704,20 @@ innobase_savepoint(
 	char name[64];
 	longlong2str((ulint)savepoint,name,36);
 
-	error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
+	error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
-/*********************************************************************
-Frees a possible InnoDB trx object associated with the current THD. */
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return	0 or error number */
 static
 int
 innobase_close_connection(
 /*======================*/
-			/* out: 0 or error number */
-        handlerton*	hton,	/* in:  innobase handlerton */
-	THD*	thd)	/* in: handle to the MySQL thread of the user
+        handlerton*	hton,	/*!< in:  innobase handlerton */
+	THD*	thd)	/*!< in: handle to the MySQL thread of the user
 			whose resources should be free'd */
 {
 	trx_t*	trx;
@@ -2286,23 +2754,41 @@ innobase_close_connection(
 }
 
 
-/*****************************************************************************
+/*************************************************************************//**
 ** InnoDB database tables
 *****************************************************************************/
 
-/********************************************************************
-Get the record format from the data dictionary. */
+/****************************************************************//**
+Get the record format from the data dictionary.
+@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
+ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
+UNIV_INTERN
 enum row_type
 ha_innobase::get_row_type() const
 /*=============================*/
-			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
 {
 	if (prebuilt && prebuilt->table) {
-		if (dict_table_is_comp_noninline(prebuilt->table)) {
-			return(ROW_TYPE_COMPACT);
-		} else {
+		const ulint	flags = prebuilt->table->flags;
+
+		if (UNIV_UNLIKELY(!flags)) {
 			return(ROW_TYPE_REDUNDANT);
 		}
+
+		ut_ad(flags & DICT_TF_COMPACT);
+
+		switch (flags & DICT_TF_FORMAT_MASK) {
+		case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
+			return(ROW_TYPE_COMPACT);
+		case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT:
+			if (flags & DICT_TF_ZSSIZE_MASK) {
+				return(ROW_TYPE_COMPRESSED);
+			} else {
+				return(ROW_TYPE_DYNAMIC);
+			}
+#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
+# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
+#endif
+		}
 	}
 	ut_ad(0);
 	return(ROW_TYPE_NOT_USED);
@@ -2310,36 +2796,137 @@ ha_innobase::get_row_type() const
 
 
 
-/********************************************************************
-Get the table flags to use for the statement. */
+/****************************************************************//**
+Get the table flags to use for the statement.
+@return	table flags */
+UNIV_INTERN
 handler::Table_flags
 ha_innobase::table_flags() const
+/*============================*/
 {
        /* Need to use tx_isolation here since table flags is (also)
           called before prebuilt is inited. */
-        ulong const tx_isolation = thd_tx_isolation(current_thd);
+        ulong const tx_isolation = thd_tx_isolation(ha_thd());
         if (tx_isolation <= ISO_READ_COMMITTED)
                 return int_table_flags;
         return int_table_flags | HA_BINLOG_STMT_CAPABLE;
 }
 
-/********************************************************************
+/****************************************************************//**
 Gives the file extension of an InnoDB single-table tablespace. */
 static const char* ha_innobase_exts[] = {
   ".ibd",
   NullS
 };
 
+/****************************************************************//**
+Returns the table type (storage engine name).
+@return	table type */
+UNIV_INTERN
+const char*
+ha_innobase::table_type() const
+/*===========================*/
+{
+	return(innobase_hton_name);
+}
+
+/****************************************************************//**
+Returns the index type. */
+UNIV_INTERN
+const char*
+ha_innobase::index_type(
+/*====================*/
+	uint)
+				/*!< out: index type */
+{
+	return("BTREE");
+}
+
+/****************************************************************//**
+Returns the table file name extension.
+@return	file extension string */
+UNIV_INTERN
 const char**
 ha_innobase::bas_ext() const
 /*========================*/
-				/* out: file extension string */
 {
-  return ha_innobase_exts;
+	return(ha_innobase_exts);
 }
 
+/****************************************************************//**
+Returns the operations supported for indexes.
+@return	flags of supported operations */
+UNIV_INTERN
+ulong
+ha_innobase::index_flags(
+/*=====================*/
+	uint,
+	uint,
+	bool)
+const
+{
+	return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+	       | HA_READ_RANGE | HA_KEYREAD_ONLY);
+}
 
-/*********************************************************************
+/****************************************************************//**
+Returns the maximum number of keys.
+@return	MAX_KEY */
+UNIV_INTERN
+uint
+ha_innobase::max_supported_keys() const
+/*===================================*/
+{
+	return(MAX_KEY);
+}
+
+/****************************************************************//**
+Returns the maximum key length.
+@return	maximum supported key length, in bytes */
+UNIV_INTERN
+uint
+ha_innobase::max_supported_key_length() const
+/*=========================================*/
+{
+	/* An InnoDB page must store >= 2 keys; a secondary key record
+	must also contain the primary key value: max key length is
+	therefore set to slightly less than 1 / 4 of page size which
+	is 16 kB; but currently MySQL does not work with keys whose
+	size is > MAX_KEY_LENGTH */
+	return(3500);
+}
+
+/****************************************************************//**
+Returns the key map of keys that are usable for scanning.
+@return	key_map_full */
+UNIV_INTERN
+const key_map*
+ha_innobase::keys_to_use_for_scanning()
+{
+	return(&key_map_full);
+}
+
+/****************************************************************//**
+Determines if table caching is supported.
+@return	HA_CACHE_TBL_ASKTRANSACT */
+UNIV_INTERN
+uint8
+ha_innobase::table_cache_type()
+{
+	return(HA_CACHE_TBL_ASKTRANSACT);
+}
+
+/****************************************************************//**
+Determines if the primary key is clustered index.
+@return	true */
+UNIV_INTERN
+bool
+ha_innobase::primary_key_is_clustered()
+{
+	return(true);
+}
+
+/*****************************************************************//**
 Normalizes a table name string. A normalized name consists of the
 database name catenated to '/' and table name. An example:
 test/mytable. On Windows normalization puts both the database name and the
@@ -2348,9 +2935,9 @@ static
 void
 normalize_table_name(
 /*=================*/
-	char*		norm_name,	/* out: normalized name as a
+	char*		norm_name,	/*!< out: normalized name as a
 					null-terminated string */
-	const char*	name)		/* in: table name string */
+	const char*	name)		/*!< in: table name string */
 {
 	char*	name_ptr;
 	char*	db_ptr;
@@ -2385,19 +2972,19 @@ normalize_table_name(
 #endif
 }
 
-/************************************************************************
+/********************************************************************//**
 Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock. */
-
-ulong
+ha_innobase::open(). Therefore there's no need for a covering lock.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
 ha_innobase::innobase_initialize_autoinc()
 /*======================================*/
 {
 	dict_index_t*	index;
 	ulonglong	auto_inc;
 	const char*	col_name;
-	ulint		error = DB_SUCCESS;
-	dict_table_t*	innodb_table = prebuilt->table;
+	ulint		error;
 
 	col_name = table->found_next_number_field->field_name;
 	index = innobase_get_index(table->s->next_number_index);
@@ -2405,35 +2992,53 @@ ha_innobase::innobase_initialize_autoinc()
 	/* Execute SELECT MAX(col_name) FROM TABLE; */
 	error = row_search_max_autoinc(index, col_name, &auto_inc);
 
-	if (error == DB_SUCCESS) {
+	switch (error) {
+	case DB_SUCCESS:
 
-		/* At the this stage we dont' know the increment
+		/* At the this stage we don't know the increment
 		or the offset, so use default inrement of 1. */
 		++auto_inc;
+		break;
 
-		dict_table_autoinc_initialize(innodb_table, auto_inc);
-
-	} else {
+	case DB_RECORD_NOT_FOUND:
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%lu) Couldn't read "
-			"the MAX(%s) autoinc value from the "
-			"index (%s).\n", error, col_name, index->name);
+		fprintf(stderr, "  InnoDB: MySQL and InnoDB data "
+			"dictionaries are out of sync.\n"
+			"InnoDB: Unable to find the AUTOINC column %s in the "
+			"InnoDB table %s.\n"
+			"InnoDB: We set the next AUTOINC column value to the "
+			"maximum possible value,\n"
+			"InnoDB: in effect disabling the AUTOINC next value "
+			"generation.\n"
+			"InnoDB: You can either set the next AUTOINC value "
+			"explicitly using ALTER TABLE\n"
+			"InnoDB: or fix the data dictionary by recreating "
+			"the table.\n",
+			col_name, index->table->name);
+
+		auto_inc = 0xFFFFFFFFFFFFFFFFULL;
+		break;
+
+	default:
+		return(error);
 	}
 
-	return(ulong(error));
+	dict_table_autoinc_initialize(prebuilt->table, auto_inc);
+
+	return(DB_SUCCESS);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and opens a handle to a table which already exists in an InnoDB
-database. */
-
+database.
+@return	1 if error, 0 if success */
+UNIV_INTERN
 int
 ha_innobase::open(
 /*==============*/
-					/* out: 1 if error, 0 if success */
-	const char*	name,		/* in: table name */
-	int		mode,		/* in: not used */
-	uint		test_if_locked)	/* in: not used */
+	const char*	name,		/*!< in: table name */
+	int		mode,		/*!< in: not used */
+	uint		test_if_locked)	/*!< in: not used */
 {
 	dict_table_t*	ib_table;
 	char		norm_name[1000];
@@ -2516,7 +3121,7 @@ retry:
 				"or, the table contains indexes that this "
 				"version of the engine\n"
 				"doesn't support.\n"
-				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+				"See " REFMAN "innodb-troubleshooting.html\n"
 				"how you can resolve the problem.\n",
 				norm_name);
 		free_share(share);
@@ -2532,14 +3137,14 @@ retry:
 				"Have you deleted the .ibd file from the "
 				"database directory under\nthe MySQL datadir, "
 				"or have you used DISCARD TABLESPACE?\n"
-				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+				"See " REFMAN "innodb-troubleshooting.html\n"
 				"how you can resolve the problem.\n",
 				norm_name);
 		free_share(share);
 		my_free(upd_buff, MYF(0));
 		my_errno = ENOENT;
 
-		dict_table_decrement_handle_count(ib_table);
+		dict_table_decrement_handle_count(ib_table, FALSE);
 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
 	}
 
@@ -2608,12 +3213,21 @@ retry:
 		}
 	}
 
-	stats.block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
-				in query optimization */
+	/* Index block size in InnoDB: used by MySQL in query optimization */
+	stats.block_size = 16 * 1024;
 
 	/* Init table lock structure */
 	thr_lock_data_init(&share->lock,&lock,(void*) 0);
 
+	if (prebuilt->table) {
+		/* We update the highest file format in the system table
+		space, if this table has higher file format setting. */
+
+		trx_sys_file_format_max_upgrade(
+			(const char**) &innobase_file_format_check,
+			dict_table_get_format(prebuilt->table));
+	}
+
 	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
 
 	/* Only if the table has an AUTOINC column. */
@@ -2629,7 +3243,6 @@ retry:
 		if (dict_table_autoinc_read(prebuilt->table) == 0) {
 
 			error = innobase_initialize_autoinc();
-			/* Should always succeed! */
 			ut_a(error == DB_SUCCESS);
 		}
 
@@ -2639,30 +3252,31 @@ retry:
 	DBUG_RETURN(0);
 }
 
+UNIV_INTERN
 uint
 ha_innobase::max_supported_key_part_length() const
 {
 	return(DICT_MAX_INDEX_COL_LEN - 1);
 }
 
-/**********************************************************************
-Closes a handle to an InnoDB table. */
-
+/******************************************************************//**
+Closes a handle to an InnoDB table.
+@return	0 */
+UNIV_INTERN
 int
 ha_innobase::close(void)
 /*====================*/
-				/* out: 0 */
 {
 	THD*	thd;
 
 	DBUG_ENTER("ha_innobase::close");
 
-	thd = current_thd;  // avoid calling current_thd twice, it may be slow
+	thd = ha_thd();
 	if (thd != NULL) {
 		innobase_release_temporary_latches(ht, thd);
 	}
 
-	row_prebuilt_free(prebuilt);
+	row_prebuilt_free(prebuilt, FALSE);
 
 	my_free(upd_buff, MYF(0));
 	free_share(share);
@@ -2677,30 +3291,30 @@ ha_innobase::close(void)
 
 /* The following accessor functions should really be inside MySQL code! */
 
-/******************************************************************
-Gets field offset for a field in a table. */
-inline
+/**************************************************************//**
+Gets field offset for a field in a table.
+@return	offset */
+static inline
 uint
 get_field_offset(
 /*=============*/
-			/* out: offset */
-	TABLE*	table,	/* in: MySQL table object */
-	Field*	field)	/* in: MySQL field object */
+	TABLE*	table,	/*!< in: MySQL table object */
+	Field*	field)	/*!< in: MySQL field object */
 {
 	return((uint) (field->ptr - table->record[0]));
 }
 
-/******************************************************************
+/**************************************************************//**
 Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record. */
+information in table to track the null bit in record.
+@return	1 if NULL, 0 otherwise */
 static inline
 uint
 field_in_record_is_null(
 /*====================*/
-			/* out: 1 if NULL, 0 otherwise */
-	TABLE*	table,	/* in: MySQL table object */
-	Field*	field,	/* in: MySQL field object */
-	char*	record)	/* in: a row in MySQL format */
+	TABLE*	table,	/*!< in: MySQL table object */
+	Field*	field,	/*!< in: MySQL field object */
+	char*	record)	/*!< in: a row in MySQL format */
 {
 	int	null_offset;
 
@@ -2720,16 +3334,16 @@ field_in_record_is_null(
 	return(0);
 }
 
-/******************************************************************
+/**************************************************************//**
 Sets a field in a record to SQL NULL. Uses the record format
 information in table to track the null bit in record. */
-inline
+static inline
 void
 set_field_in_record_to_null(
 /*========================*/
-	TABLE*	table,	/* in: MySQL table object */
-	Field*	field,	/* in: MySQL field object */
-	char*	record)	/* in: a row in MySQL format */
+	TABLE*	table,	/*!< in: MySQL table object */
+	Field*	field,	/*!< in: MySQL field object */
+	char*	record)	/*!< in: a row in MySQL format */
 {
 	int	null_offset;
 
@@ -2739,25 +3353,23 @@ set_field_in_record_to_null(
 	record[null_offset] = record[null_offset] | field->null_bit;
 }
 
-extern "C" {
-/*****************************************************************
+/*************************************************************//**
 InnoDB uses this function to compare two data fields for which the data type
 is such that we must use MySQL code to compare them. NOTE that the prototype
 of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there! */
-
+function, remember to update the prototype there!
+@return	1, 0, -1, if a is greater, equal, less than b, respectively */
+extern "C" UNIV_INTERN
 int
 innobase_mysql_cmp(
 /*===============*/
-					/* out: 1, 0, -1, if a is greater,
-					equal, less than b, respectively */
-	int		mysql_type,	/* in: MySQL type */
-	uint		charset_number,	/* in: number of the charset */
-	unsigned char*	a,		/* in: data field */
-	unsigned int	a_length,	/* in: data field length,
+	int		mysql_type,	/*!< in: MySQL type */
+	uint		charset_number,	/*!< in: number of the charset */
+	const unsigned char* a,		/*!< in: data field */
+	unsigned int	a_length,	/*!< in: data field length,
 					not UNIV_SQL_NULL */
-	unsigned char*	b,		/* in: data field */
-	unsigned int	b_length)	/* in: data field length,
+	const unsigned char* b,		/*!< in: data field */
+	unsigned int	b_length)	/*!< in: data field length,
 					not UNIV_SQL_NULL */
 {
 	CHARSET_INFO*		charset;
@@ -2816,27 +3428,30 @@ innobase_mysql_cmp(
 			return(0);
 		}
 	default:
-		assert(0);
+		ut_error;
 	}
 
 	return(0);
 }
-}
 
-/******************************************************************
+/**************************************************************//**
 Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
-inline
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
+@return	DATA_BINARY, DATA_VARCHAR, ... */
+extern "C" UNIV_INTERN
 ulint
 get_innobase_type_from_mysql_type(
 /*==============================*/
-				/* out: DATA_BINARY, DATA_VARCHAR, ... */
-	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
-				at least ENUM and SET, and unsigned integer
-				types are 'unsigned types' */
-	Field*	field)		/* in: MySQL field */
+	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
+					'unsigned type';
+					at least ENUM and SET,
+					and unsigned integer
+					types are 'unsigned types' */
+	const void*	f)		/*!< in: MySQL Field */
 {
+	const class Field* field = reinterpret_cast<const class Field*>(f);
+
 	/* The following asserts try to check that the MySQL type code fits in
 	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
 	the type */
@@ -2920,21 +3535,21 @@ get_innobase_type_from_mysql_type(
 	case MYSQL_TYPE_LONG_BLOB:
 		return(DATA_BLOB);
 	default:
-		assert(0);
+		ut_error;
 	}
 
 	return(0);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
 storage format. */
-inline
+static inline
 void
 innobase_write_to_2_little_endian(
 /*==============================*/
-	byte*	buf,	/* in: where to store */
-	ulint	val)	/* in: value to write, must be < 64k */
+	byte*	buf,	/*!< in: where to store */
+	ulint	val)	/*!< in: value to write, must be < 64k */
 {
 	ut_a(val < 256 * 256);
 
@@ -2942,31 +3557,31 @@ innobase_write_to_2_little_endian(
 	buf[1] = (byte)(val / 256);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
-storage format. */
-inline
+storage format.
+@return	value */
+static inline
 uint
 innobase_read_from_2_little_endian(
 /*===============================*/
-				/* out: value */
-	const uchar*	buf)	/* in: from where to read */
+	const uchar*	buf)	/*!< in: from where to read */
 {
 	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
 }
 
-/***********************************************************************
-Stores a key value for a row to a buffer. */
-
+/*******************************************************************//**
+Stores a key value for a row to a buffer.
+@return	key value length as stored in buff */
+UNIV_INTERN
 uint
 ha_innobase::store_key_val_for_row(
 /*===============================*/
-				/* out: key value length as stored in buff */
-	uint		keynr,	/* in: key number */
-	char*		buff,	/* in/out: buffer for the key value (in MySQL
+	uint		keynr,	/*!< in: key number */
+	char*		buff,	/*!< in/out: buffer for the key value (in MySQL
 				format) */
-	uint		buff_len,/* in: buffer length */
-	const uchar*	record)/* in: row in MySQL format */
+	uint		buff_len,/*!< in: buffer length */
+	const uchar*	record)/*!< in: row in MySQL format */
 {
 	KEY*		key_info	= table->key_info + keynr;
 	KEY_PART_INFO*	key_part	= key_info->key_part;
@@ -3024,13 +3639,13 @@ ha_innobase::store_key_val_for_row(
 
 		if (mysql_type == MYSQL_TYPE_VARCHAR) {
 						/* >= 5.0.3 true VARCHAR */
-			ulint	lenlen;
-			ulint	len;
-			byte*	data;
-			ulint	key_len;
-			ulint	true_len;
+			ulint		lenlen;
+			ulint		len;
+			const byte*	data;
+			ulint		key_len;
+			ulint		true_len;
 			CHARSET_INFO*	cs;
-			int	error=0;
+			int		error=0;
 
 			key_len = key_part->length;
 
@@ -3096,7 +3711,7 @@ ha_innobase::store_key_val_for_row(
 			ulint		true_len;
 			int		error=0;
 			ulint		blob_len;
-			byte*		blob_data;
+			const byte*	blob_data;
 
 			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
 
@@ -3229,19 +3844,19 @@ ha_innobase::store_key_val_for_row(
 	DBUG_RETURN((uint)(buff - buff_start));
 }
 
-/******************************************************************
+/**************************************************************//**
 Builds a 'template' to the prebuilt struct. The template is used in fast
 retrieval of just those column values MySQL needs in its processing. */
 static
 void
 build_template(
 /*===========*/
-	row_prebuilt_t*	prebuilt,	/* in/out: prebuilt struct */
-	THD*		thd,		/* in: current user thread, used
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct */
+	THD*		thd,		/*!< in: current user thread, used
 					only if templ_type is
 					ROW_MYSQL_REC_FIELDS */
-	TABLE*		table,		/* in: MySQL table */
-	uint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
+	TABLE*		table,		/*!< in: MySQL table */
+	uint		templ_type)	/*!< in: ROW_MYSQL_WHOLE_ROW or
 					ROW_MYSQL_REC_FIELDS */
 {
 	dict_index_t*	index;
@@ -3296,7 +3911,7 @@ build_template(
 		}
 	}
 
-	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
+	clust_index = dict_table_get_first_index(prebuilt->table);
 
 	if (templ_type == ROW_MYSQL_REC_FIELDS) {
 		index = prebuilt->index;
@@ -3316,8 +3931,7 @@ build_template(
 
 	if (!prebuilt->mysql_template) {
 		prebuilt->mysql_template = (mysql_row_templ_t*)
-						mem_alloc_noninline(
-					n_fields * sizeof(mysql_row_templ_t));
+			mem_alloc(n_fields * sizeof(mysql_row_templ_t));
 	}
 
 	prebuilt->template_type = templ_type;
@@ -3375,7 +3989,7 @@ include_field:
 		templ->col_no = i;
 
 		if (index == clust_index) {
-			templ->rec_field_no = dict_col_get_clust_pos_noninline(
+			templ->rec_field_no = dict_col_get_clust_pos(
 				&index->table->cols[i], index);
 		} else {
 			templ->rec_field_no = dict_index_get_nth_col_pos(
@@ -3413,8 +4027,8 @@ include_field:
 				(((Field_varstring*)field)->length_bytes);
 		}
 
-		templ->charset = dtype_get_charset_coll_noninline(
-				index->table->cols[i].prtype);
+		templ->charset = dtype_get_charset_coll(
+			index->table->cols[i].prtype);
 		templ->mbminlen = index->table->cols[i].mbminlen;
 		templ->mbmaxlen = index->table->cols[i].mbmaxlen;
 		templ->is_unsigned = index->table->cols[i].prtype
@@ -3435,16 +4049,16 @@ skip_field:
 		for (i = 0; i < n_requested_fields; i++) {
 			templ = prebuilt->mysql_template + i;
 
-			templ->rec_field_no = dict_col_get_clust_pos_noninline(
+			templ->rec_field_no = dict_col_get_clust_pos(
 				&index->table->cols[templ->col_no],
 				clust_index);
 		}
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Get the upper limit of the MySQL integral and floating-point type. */
-
+UNIV_INTERN
 ulonglong
 ha_innobase::innobase_get_int_col_max_value(
 /*========================================*/
@@ -3503,18 +4117,17 @@ ha_innobase::innobase_get_int_col_max_value(
 	return(max_value);
 }
 
-/************************************************************************
+/********************************************************************//**
 This special handling is really to overcome the limitations of MySQL's
 binlogging. We need to eliminate the non-determinism that will arise in
 INSERT ... SELECT type of statements, since MySQL binlog only stores the
 min value of the autoinc interval. Once that is fixed we can get rid of
-the special lock handling.*/
-
-ulong
+the special lock handling.
+@return	DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+ulint
 ha_innobase::innobase_lock_autoinc(void)
 /*====================================*/
-					/* out: DB_SUCCESS if all OK else
-					error code */
 {
 	ulint		error = DB_SUCCESS;
 
@@ -3564,15 +4177,14 @@ ha_innobase::innobase_lock_autoinc(void)
 	return(ulong(error));
 }
 
-/************************************************************************
-Reset the autoinc value in the table.*/
-
-ulong
+/********************************************************************//**
+Reset the autoinc value in the table.
+@return	DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+ulint
 ha_innobase::innobase_reset_autoinc(
 /*================================*/
-					/* out: DB_SUCCESS if all went well
-					else error code */
-	ulonglong	autoinc)	/* in: value to store */
+	ulonglong	autoinc)	/*!< in: value to store */
 {
 	ulint		error;
 
@@ -3588,16 +4200,15 @@ ha_innobase::innobase_reset_autoinc(
 	return(ulong(error));
 }
 
-/************************************************************************
+/********************************************************************//**
 Store the autoinc value in the table. The autoinc value is only set if
-it's greater than the existing autoinc value in the table.*/
-
-ulong
+it's greater than the existing autoinc value in the table.
+@return	DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+ulint
 ha_innobase::innobase_set_max_autoinc(
 /*==================================*/
-					/* out: DB_SUCCES if all went well
-					else error code */
-	ulonglong	auto_inc)	/* in: value to store */
+	ulonglong	auto_inc)	/*!< in: value to store */
 {
 	ulint		error;
 
@@ -3613,15 +4224,15 @@ ha_innobase::innobase_set_max_autoinc(
 	return(ulong(error));
 }
 
-/************************************************************************
+/********************************************************************//**
 Stores a row in an InnoDB database, to the table specified in this
-handle. */
-
+handle.
+@return	error code */
+UNIV_INTERN
 int
 ha_innobase::write_row(
 /*===================*/
-			/* out: error code */
-	uchar*	record)	/* in: a row in MySQL format */
+	uchar*	record)	/*!< in: a row in MySQL format */
 {
 	ulint		error = 0;
         int             error_result= 0;
@@ -3634,7 +4245,7 @@ ha_innobase::write_row(
 	if (prebuilt->trx != trx) {
 	  sql_print_error("The transaction object for the table handle is at "
 			  "%p, but for the current thread it is at %p",
-			  prebuilt->trx, trx);
+			  (const void*) prebuilt->trx, (const void*) trx);
 
 		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
 		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
@@ -3668,7 +4279,7 @@ ha_innobase::write_row(
 		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
 
 		dict_table_t*	src_table;
-		ulint		mode;
+		enum lock_mode	mode;
 
 		num_write_row = 0;
 
@@ -3803,7 +4414,6 @@ no_commit:
 			case SQLCOM_INSERT_SELECT:
 			case SQLCOM_REPLACE_SELECT:
 				goto set_max_autoinc;
-				break;
 
 			default:
 				break;
@@ -3844,7 +4454,9 @@ set_max_autoinc:
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
 report_error:
-	error_result = convert_error_code_to_mysql((int) error, user_thd);
+	error_result = convert_error_code_to_mysql((int) error,
+						   prebuilt->table->flags,
+						   user_thd);
 
 func_exit:
 	innobase_active_small();
@@ -3852,23 +4464,23 @@ func_exit:
 	DBUG_RETURN(error_result);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Checks which fields have changed in a row and stores information
-of them to an update vector. */
+of them to an update vector.
+@return	error number or 0 */
 static
 int
 calc_row_difference(
 /*================*/
-					/* out: error number or 0 */
-	upd_t*		uvect,		/* in/out: update vector */
-	uchar*		old_row,	/* in: old row in MySQL format */
-	uchar*		new_row,	/* in: new row in MySQL format */
-	struct st_table* table,		/* in: table in MySQL data
+	upd_t*		uvect,		/*!< in/out: update vector */
+	uchar*		old_row,	/*!< in: old row in MySQL format */
+	uchar*		new_row,	/*!< in: new row in MySQL format */
+	struct st_table* table,		/*!< in: table in MySQL data
 					dictionary */
-	uchar*		upd_buff,	/* in: buffer to use */
-	ulint		buff_len,	/* in: buffer length */
-	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
-	THD*		thd)		/* in: user thread */
+	uchar*		upd_buff,	/*!< in: buffer to use */
+	ulint		buff_len,	/*!< in: buffer length */
+	row_prebuilt_t*	prebuilt,	/*!< in: InnoDB prebuilt struct */
+	THD*		thd)		/*!< in: user thread */
 {
 	uchar*		original_upd_buff = upd_buff;
 	Field*		field;
@@ -3877,9 +4489,9 @@ calc_row_difference(
 	ulint		o_len;
 	ulint		n_len;
 	ulint		col_pack_len;
-	byte*		new_mysql_row_col;
-	byte*		o_ptr;
-	byte*		n_ptr;
+	const byte*	new_mysql_row_col;
+	const byte*	o_ptr;
+	const byte*	n_ptr;
 	byte*		buf;
 	upd_field_t*	ufield;
 	ulint		col_type;
@@ -3889,7 +4501,7 @@ calc_row_difference(
 	uint		i;
 
 	n_fields = table->s->fields;
-	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
+	clust_index = dict_table_get_first_index(prebuilt->table);
 
 	/* We use upd_buff to convert changed fields */
 	buf = (byte*) upd_buff;
@@ -3897,8 +4509,8 @@ calc_row_difference(
 	for (i = 0; i < n_fields; i++) {
 		field = table->field[i];
 
-		o_ptr = (byte*) old_row + get_field_offset(table, field);
-		n_ptr = (byte*) new_row + get_field_offset(table, field);
+		o_ptr = (const byte*) old_row + get_field_offset(table, field);
+		n_ptr = (const byte*) new_row + get_field_offset(table, field);
 
 		/* Use new_mysql_row_col and col_pack_len save the values */
 
@@ -3968,8 +4580,8 @@ calc_row_difference(
 			/* Let us use a dummy dfield to make the conversion
 			from the MySQL column format to the InnoDB format */
 
-			dict_col_copy_type_noninline(prebuilt->table->cols + i,
-						     &dfield.type);
+			dict_col_copy_type(prebuilt->table->cols + i,
+					   dfield_get_type(&dfield));
 
 			if (n_len != UNIV_SQL_NULL) {
 				buf = row_mysql_store_col_in_innobase_format(
@@ -3978,17 +4590,15 @@ calc_row_difference(
 					TRUE,
 					new_mysql_row_col,
 					col_pack_len,
-					dict_table_is_comp_noninline(
-							prebuilt->table));
-				ufield->new_val.data = dfield.data;
-				ufield->new_val.len = dfield.len;
+					dict_table_is_comp(prebuilt->table));
+				dfield_copy_data(&ufield->new_val, &dfield);
 			} else {
-				ufield->new_val.data = NULL;
-				ufield->new_val.len = UNIV_SQL_NULL;
+				dfield_set_null(&ufield->new_val);
 			}
 
 			ufield->exp = NULL;
-			ufield->field_no = dict_col_get_clust_pos_noninline(
+			ufield->orig_len = 0;
+			ufield->field_no = dict_col_get_clust_pos(
 				&prebuilt->table->cols[i], clust_index);
 			n_changed++;
 		}
@@ -4002,20 +4612,20 @@ calc_row_difference(
 	return(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Updates a row given as a parameter to a new value. Note that we are given
 whole rows, not just the fields which are updated: this incurs some
 overhead for CPU when we check which fields are actually updated.
 TODO: currently InnoDB does not prevent the 'Halloween problem':
 in a searched update a single row can get updated several times
-if its index columns are updated! */
-
+if its index columns are updated!
+@return	error number or 0 */
+UNIV_INTERN
 int
 ha_innobase::update_row(
 /*====================*/
-					/* out: error number or 0 */
-	const uchar*	old_row,	/* in: old row in MySQL format */
-	uchar*		new_row)	/* in: new row in MySQL format */
+	const uchar*	old_row,	/*!< in: old row in MySQL format */
+	uchar*		new_row)	/*!< in: new row in MySQL format */
 {
 	upd_t*		uvect;
 	int		error = 0;
@@ -4046,7 +4656,7 @@ ha_innobase::update_row(
 	/* This is not a delete */
 	prebuilt->upd_node->is_delete = FALSE;
 
-	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+	ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
 
 	innodb_srv_conc_enter_innodb(trx);
 
@@ -4094,7 +4704,8 @@ ha_innobase::update_row(
 
 	innodb_srv_conc_exit_innodb(trx);
 
-	error = convert_error_code_to_mysql(error, user_thd);
+	error = convert_error_code_to_mysql(error,
+					    prebuilt->table->flags, user_thd);
 
 	if (error == 0 /* success */
 	    && uvect->n_fields == 0 /* no columns were updated */) {
@@ -4114,14 +4725,14 @@ ha_innobase::update_row(
 	DBUG_RETURN(error);
 }
 
-/**************************************************************************
-Deletes a row given as the parameter. */
-
+/**********************************************************************//**
+Deletes a row given as the parameter.
+@return	error number or 0 */
+UNIV_INTERN
 int
 ha_innobase::delete_row(
 /*====================*/
-				/* out: error number or 0 */
-	const uchar*	record)	/* in: a row in MySQL format */
+	const uchar*	record)	/*!< in: a row in MySQL format */
 {
 	int		error = 0;
 	trx_t*		trx = thd_to_trx(user_thd);
@@ -4146,7 +4757,8 @@ ha_innobase::delete_row(
 
 	innodb_srv_conc_exit_innodb(trx);
 
-	error = convert_error_code_to_mysql(error, user_thd);
+	error = convert_error_code_to_mysql(
+		error, prebuilt->table->flags, user_thd);
 
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
@@ -4156,11 +4768,11 @@ ha_innobase::delete_row(
 	DBUG_RETURN(error);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Removes a new lock set on a row, if it was not read optimistically. This can
 be called after a row has been read in the processing of an UPDATE or a DELETE
 query, if the option innodb_locks_unsafe_for_binlog is set. */
-
+UNIV_INTERN
 void
 ha_innobase::unlock_row(void)
 /*=========================*/
@@ -4194,6 +4806,7 @@ ha_innobase::unlock_row(void)
 }
 
 /* See handler.h and row0mysql.h for docs on this function. */
+UNIV_INTERN
 bool
 ha_innobase::was_semi_consistent_read(void)
 /*=======================================*/
@@ -4202,6 +4815,7 @@ ha_innobase::was_semi_consistent_read(void)
 }
 
 /* See handler.h and row0mysql.h for docs on this function. */
+UNIV_INTERN
 void
 ha_innobase::try_semi_consistent_read(bool yes)
 /*===========================================*/
@@ -4222,27 +4836,25 @@ ha_innobase::try_semi_consistent_read(bool yes)
 	}
 }
 
-/**********************************************************************
-Initializes a handle to use an index. */
-
+/******************************************************************//**
+Initializes a handle to use an index.
+@return	0 or error number */
+UNIV_INTERN
 int
 ha_innobase::index_init(
 /*====================*/
-			/* out: 0 or error number */
-	uint	keynr,	/* in: key (index) number */
-	bool sorted)	/* in: 1 if result MUST be sorted according to index */
+	uint	keynr,	/*!< in: key (index) number */
+	bool sorted)	/*!< in: 1 if result MUST be sorted according to index */
 {
-	int	error	= 0;
 	DBUG_ENTER("index_init");
 
-	error = change_active_index(keynr);
-
-	DBUG_RETURN(error);
+	DBUG_RETURN(change_active_index(keynr));
 }
 
-/**********************************************************************
-Currently does nothing. */
-
+/******************************************************************//**
+Currently does nothing.
+@return	0 */
+UNIV_INTERN
 int
 ha_innobase::index_end(void)
 /*========================*/
@@ -4253,10 +4865,10 @@ ha_innobase::index_end(void)
 	DBUG_RETURN(error);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Converts a search mode flag understood by MySQL to a flag understood
 by InnoDB. */
-inline
+static inline
 ulint
 convert_search_mode_to_innobase(
 /*============================*/
@@ -4358,18 +4970,17 @@ overwrap, we use this test only as a secondary way of determining the
 start of a new SQL statement. */
 
 
-/**************************************************************************
+/**********************************************************************//**
 Positions an index cursor to the index specified in the handle. Fetches the
-row if any. */
-
+row if any.
+@return	0, HA_ERR_KEY_NOT_FOUND, or error number */
+UNIV_INTERN
 int
 ha_innobase::index_read(
 /*====================*/
-					/* out: 0, HA_ERR_KEY_NOT_FOUND,
-					or error number */
-	uchar*		buf,		/* in/out: buffer for the returned
+	uchar*		buf,		/*!< in/out: buffer for the returned
 					row */
-	const uchar*	key_ptr,	/* in: key value; if this is NULL
+	const uchar*	key_ptr,	/*!< in: key value; if this is NULL
 					we position the cursor at the
 					start or end of index; this can
 					also contain an InnoDB row id, in
@@ -4378,8 +4989,8 @@ ha_innobase::index_read(
 					also be a prefix of a full key value,
 					and the last column can be a prefix
 					of a full column */
-	uint			key_len,/* in: key value length */
-	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
+	uint			key_len,/*!< in: key value length */
+	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
 {
 	ulint		mode;
 	dict_index_t*	index;
@@ -4399,20 +5010,21 @@ ha_innobase::index_read(
 	necessarily prebuilt->index, but can also be the clustered index */
 
 	if (prebuilt->sql_stat_start) {
-		build_template(prebuilt, user_thd, table,
-							ROW_MYSQL_REC_FIELDS);
+		build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
 	}
 
 	if (key_ptr) {
 		/* Convert the search key value to InnoDB format into
 		prebuilt->search_tuple */
 
-		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
-					(byte*) key_val_buff,
-					(ulint)upd_and_key_val_buff_len,
-					index,
-					(byte*) key_ptr,
-					(ulint) key_len, prebuilt->trx);
+		row_sel_convert_mysql_key_to_innobase(
+			prebuilt->search_tuple,
+			(byte*) key_val_buff,
+			(ulint)upd_and_key_val_buff_len,
+			index,
+			(byte*) key_ptr,
+			(ulint) key_len,
+			prebuilt->trx);
 	} else {
 		/* We position the cursor to the last or the first entry
 		in the index */
@@ -4425,10 +5037,12 @@ ha_innobase::index_read(
 	match_mode = 0;
 
 	if (find_flag == HA_READ_KEY_EXACT) {
+
 		match_mode = ROW_SEL_EXACT;
 
 	} else if (find_flag == HA_READ_PREFIX
-				|| find_flag == HA_READ_PREFIX_LAST) {
+		   || find_flag == HA_READ_PREFIX_LAST) {
+
 		match_mode = ROW_SEL_EXACT_PREFIX;
 	}
 
@@ -4447,51 +5061,55 @@ ha_innobase::index_read(
 		ret = DB_UNSUPPORTED;
 	}
 
-	if (ret == DB_SUCCESS) {
+	switch (ret) {
+	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
-
-	} else if (ret == DB_RECORD_NOT_FOUND) {
+		break;
+	case DB_RECORD_NOT_FOUND:
 		error = HA_ERR_KEY_NOT_FOUND;
 		table->status = STATUS_NOT_FOUND;
-
-	} else if (ret == DB_END_OF_INDEX) {
+		break;
+	case DB_END_OF_INDEX:
 		error = HA_ERR_KEY_NOT_FOUND;
 		table->status = STATUS_NOT_FOUND;
-	} else {
-		error = convert_error_code_to_mysql((int) ret, user_thd);
+		break;
+	default:
+		error = convert_error_code_to_mysql((int) ret,
+						    prebuilt->table->flags,
+						    user_thd);
 		table->status = STATUS_NOT_FOUND;
+		break;
 	}
 
 	DBUG_RETURN(error);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 The following functions works like index_read, but it find the last
-row with the current key value or prefix. */
-
+row with the current key value or prefix.
+@return	0, HA_ERR_KEY_NOT_FOUND, or an error code */
+UNIV_INTERN
 int
 ha_innobase::index_read_last(
 /*=========================*/
-				/* out: 0, HA_ERR_KEY_NOT_FOUND, or an
-				error code */
-	uchar*		buf,	/* out: fetched row */
-	const uchar*	key_ptr,/* in: key value, or a prefix of a full
+	uchar*		buf,	/*!< out: fetched row */
+	const uchar*	key_ptr,/*!< in: key value, or a prefix of a full
 				key value */
-	uint		key_len)/* in: length of the key val or prefix
+	uint		key_len)/*!< in: length of the key val or prefix
 				in bytes */
 {
 	return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
 }
 
-/************************************************************************
-Get the index for a handle. Does not change active index.*/
-
+/********************************************************************//**
+Get the index for a handle. Does not change active index.
+@return	NULL or index instance. */
+UNIV_INTERN
 dict_index_t*
 ha_innobase::innobase_get_index(
 /*============================*/
-				/* out: NULL or index instance. */
-	uint		keynr)	/* in: use this index; MAX_KEY means always
+	uint		keynr)	/*!< in: use this index; MAX_KEY means always
 				clustered index, even if it was internally
 				generated by InnoDB */
 {
@@ -4507,10 +5125,10 @@ ha_innobase::innobase_get_index(
 	if (keynr != MAX_KEY && table->s->keys > 0) {
 		key = table->key_info + keynr;
 
-		index = dict_table_get_index_noninline(
-			prebuilt->table, key->name);
+		index = dict_table_get_index_on_name(prebuilt->table,
+						     key->name);
 	} else {
-		index = dict_table_get_first_index_noninline(prebuilt->table);
+		index = dict_table_get_first_index(prebuilt->table);
 	}
 
 	if (!index) {
@@ -4524,14 +5142,14 @@ ha_innobase::innobase_get_index(
 	DBUG_RETURN(index);
 }
 
-/************************************************************************
-Changes the active index of a handle. */
-
+/********************************************************************//**
+Changes the active index of a handle.
+@return	0 or error code */
+UNIV_INTERN
 int
 ha_innobase::change_active_index(
 /*=============================*/
-			/* out: 0 or error code */
-	uint	keynr)	/* in: use this index; MAX_KEY means always clustered
+	uint	keynr)	/*!< in: use this index; MAX_KEY means always clustered
 			index, even if it was internally generated by
 			InnoDB */
 {
@@ -4544,11 +5162,24 @@ ha_innobase::change_active_index(
 
 	prebuilt->index = innobase_get_index(keynr);
 
-	if (!prebuilt->index) {
+	if (UNIV_UNLIKELY(!prebuilt->index)) {
+		sql_print_warning("InnoDB: change_active_index(%u) failed",
+				  keynr);
 		DBUG_RETURN(1);
 	}
 
-	assert(prebuilt->search_tuple != 0);
+	prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
+							   prebuilt->index);
+
+	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+		sql_print_warning("InnoDB: insufficient history for index %u",
+				  keynr);
+		/* The caller seems to ignore this.  Thus, we must check
+		this again in row_search_for_mysql(). */
+		DBUG_RETURN(2);
+	}
+
+	ut_a(prebuilt->search_tuple != 0);
 
 	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
 
@@ -4566,23 +5197,23 @@ ha_innobase::change_active_index(
 	DBUG_RETURN(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Positions an index cursor to the index specified in keynr. Fetches the
-row if any. */
-/* ??? This is only used to read whole keys ??? */
-
+row if any.
+??? This is only used to read whole keys ???
+@return	error number or 0 */
+UNIV_INTERN
 int
 ha_innobase::index_read_idx(
 /*========================*/
-					/* out: error number or 0 */
-	uchar*		buf,		/* in/out: buffer for the returned
+	uchar*		buf,		/*!< in/out: buffer for the returned
 					row */
-	uint		keynr,		/* in: use this index */
-	const uchar*	key,		/* in: key value; if this is NULL
+	uint		keynr,		/*!< in: use this index */
+	const uchar*	key,		/*!< in: key value; if this is NULL
 					we position the cursor at the
 					start or end of index */
-	uint		key_len,	/* in: key value length */
-	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
+	uint		key_len,	/*!< in: key value length */
+	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
 {
 	if (change_active_index(keynr)) {
 
@@ -4592,19 +5223,18 @@ ha_innobase::index_read_idx(
 	return(index_read(buf, key, key_len, find_flag));
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Reads the next or previous row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return	0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
 int
 ha_innobase::general_fetch(
 /*=======================*/
-				/* out: 0, HA_ERR_END_OF_FILE, or error
-				number */
-	uchar*	buf,		/* in/out: buffer for next row in MySQL
+	uchar*	buf,		/*!< in/out: buffer for next row in MySQL
 				format */
-	uint	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
-	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
+	uint	direction,	/*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
+	uint	match_mode)	/*!< in: 0, ROW_SEL_EXACT, or
 				ROW_SEL_EXACT_PREFIX */
 {
 	ulint		ret;
@@ -4616,39 +5246,43 @@ ha_innobase::general_fetch(
 
 	innodb_srv_conc_enter_innodb(prebuilt->trx);
 
-	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
-								direction);
+	ret = row_search_for_mysql(
+		(byte*)buf, 0, prebuilt, match_mode, direction);
+
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
-	if (ret == DB_SUCCESS) {
+	switch (ret) {
+	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
-
-	} else if (ret == DB_RECORD_NOT_FOUND) {
+		break;
+	case DB_RECORD_NOT_FOUND:
 		error = HA_ERR_END_OF_FILE;
 		table->status = STATUS_NOT_FOUND;
-
-	} else if (ret == DB_END_OF_INDEX) {
+		break;
+	case DB_END_OF_INDEX:
 		error = HA_ERR_END_OF_FILE;
 		table->status = STATUS_NOT_FOUND;
-	} else {
-		error = convert_error_code_to_mysql((int) ret, user_thd);
+		break;
+	default:
+		error = convert_error_code_to_mysql(
+			(int) ret, prebuilt->table->flags, user_thd);
 		table->status = STATUS_NOT_FOUND;
+		break;
 	}
 
 	DBUG_RETURN(error);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Reads the next row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return	0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
 int
 ha_innobase::index_next(
 /*====================*/
-				/* out: 0, HA_ERR_END_OF_FILE, or error
-				number */
-	uchar*		buf)	/* in/out: buffer for next row in MySQL
+	uchar*		buf)	/*!< in/out: buffer for next row in MySQL
 				format */
 {
 	ha_statistic_increment(&SSV::ha_read_next_count);
@@ -4656,47 +5290,46 @@ ha_innobase::index_next(
 	return(general_fetch(buf, ROW_SEL_NEXT, 0));
 }
 
-/***********************************************************************
-Reads the next row matching to the key value given as the parameter. */
-
+/*******************************************************************//**
+Reads the next row matching to the key value given as the parameter.
+@return	0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
 int
 ha_innobase::index_next_same(
 /*=========================*/
-				/* out: 0, HA_ERR_END_OF_FILE, or error
-				number */
-	uchar*		buf,	/* in/out: buffer for the row */
-	const uchar*	key,	/* in: key value */
-	uint		keylen)	/* in: key value length */
+	uchar*		buf,	/*!< in/out: buffer for the row */
+	const uchar*	key,	/*!< in: key value */
+	uint		keylen)	/*!< in: key value length */
 {
 	ha_statistic_increment(&SSV::ha_read_next_count);
 
 	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Reads the previous row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return	0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
 int
 ha_innobase::index_prev(
 /*====================*/
-			/* out: 0, HA_ERR_END_OF_FILE, or error number */
-	uchar*	buf)	/* in/out: buffer for previous row in MySQL format */
+	uchar*	buf)	/*!< in/out: buffer for previous row in MySQL format */
 {
 	ha_statistic_increment(&SSV::ha_read_prev_count);
 
 	return(general_fetch(buf, ROW_SEL_PREV, 0));
 }
 
-/************************************************************************
+/********************************************************************//**
 Positions a cursor on the first record in an index and reads the
-corresponding row to buf. */
-
+corresponding row to buf.
+@return	0, HA_ERR_END_OF_FILE, or error code */
+UNIV_INTERN
 int
 ha_innobase::index_first(
 /*=====================*/
-			/* out: 0, HA_ERR_END_OF_FILE, or error code */
-	uchar*	buf)	/* in/out: buffer for the row */
+	uchar*	buf)	/*!< in/out: buffer for the row */
 {
 	int	error;
 
@@ -4714,15 +5347,15 @@ ha_innobase::index_first(
 	DBUG_RETURN(error);
 }
 
-/************************************************************************
+/********************************************************************//**
 Positions a cursor on the last record in an index and reads the
-corresponding row to buf. */
-
+corresponding row to buf.
+@return	0, HA_ERR_END_OF_FILE, or error code */
+UNIV_INTERN
 int
 ha_innobase::index_last(
 /*====================*/
-			/* out: 0, HA_ERR_END_OF_FILE, or error code */
-	uchar*	buf)	/* in/out: buffer for the row */
+	uchar*	buf)	/*!< in/out: buffer for the row */
 {
 	int	error;
 
@@ -4740,14 +5373,14 @@ ha_innobase::index_last(
 	DBUG_RETURN(error);
 }
 
-/********************************************************************
-Initialize a table scan. */
-
+/****************************************************************//**
+Initialize a table scan.
+@return	0 or error number */
+UNIV_INTERN
 int
 ha_innobase::rnd_init(
 /*==================*/
-			/* out: 0 or error number */
-	bool	scan)	/* in: ???????? */
+	bool	scan)	/*!< in: TRUE if table/index scan FALSE otherwise */
 {
 	int	err;
 
@@ -4772,26 +5405,26 @@ ha_innobase::rnd_init(
 	return(err);
 }
 
-/*********************************************************************
-Ends a table scan. */
-
+/*****************************************************************//**
+Ends a table scan.
+@return	0 or error number */
+UNIV_INTERN
 int
 ha_innobase::rnd_end(void)
 /*======================*/
-				/* out: 0 or error number */
 {
 	return(index_end());
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Reads the next row in a table scan (also used to read the FIRST row
-in a table scan). */
-
+in a table scan).
+@return	0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
 int
 ha_innobase::rnd_next(
 /*==================*/
-			/* out: 0, HA_ERR_END_OF_FILE, or error number */
-	uchar*	buf)	/* in/out: returns the row in this buffer,
+	uchar*	buf)	/*!< in/out: returns the row in this buffer,
 			in MySQL format */
 {
 	int	error;
@@ -4801,9 +5434,11 @@ ha_innobase::rnd_next(
 
 	if (start_of_scan) {
 		error = index_first(buf);
+
 		if (error == HA_ERR_KEY_NOT_FOUND) {
 			error = HA_ERR_END_OF_FILE;
 		}
+
 		start_of_scan = 0;
 	} else {
 		error = general_fetch(buf, ROW_SEL_NEXT, 0);
@@ -4812,15 +5447,15 @@ ha_innobase::rnd_next(
 	DBUG_RETURN(error);
 }
 
-/**************************************************************************
-Fetches a row from the table based on a row reference. */
-
+/**********************************************************************//**
+Fetches a row from the table based on a row reference.
+@return	0, HA_ERR_KEY_NOT_FOUND, or error code */
+UNIV_INTERN
 int
 ha_innobase::rnd_pos(
 /*=================*/
-			/* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
-	uchar*	buf,	/* in/out: buffer for the row */
-	uchar*	pos)	/* in: primary key value of the row in the
+	uchar*	buf,	/*!< in/out: buffer for the row */
+	uchar*	pos)	/*!< in: primary key value of the row in the
 			MySQL format, or the row id if the clustered
 			index was internally generated by InnoDB; the
 			length of data in pos has to be ref_length */
@@ -4864,7 +5499,7 @@ ha_innobase::rnd_pos(
 	DBUG_RETURN(error);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Stores a reference to the current row to 'ref' field of the handle. Note
 that in the case where we have generated the clustered index for the
 table, the function parameter is illogical: we MUST ASSUME that 'record'
@@ -4872,11 +5507,11 @@ is the current 'position' of the handle, because if row ref is actually
 the row id internally generated in InnoDB, then 'record' does not contain
 it. We just guess that the row id must be for the record where the handle
 was positioned the last time. */
-
+UNIV_INTERN
 void
 ha_innobase::position(
 /*==================*/
-	const uchar*	record)	/* in: row in MySQL format */
+	const uchar*	record)	/*!< in: row in MySQL format */
 {
 	uint		len;
 
@@ -4905,41 +5540,23 @@ ha_innobase::position(
 	}
 }
 
-/*********************************************************************
-If it's a DB_TOO_BIG_RECORD error then set a suitable message to
-return to the client.*/
-inline
-void
-innodb_check_for_record_too_big_error(
-/*==================================*/
-	ulint	comp,	/* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
-	int	error)	/* in: error code to check */
-{
-	if (error == (int)DB_TOO_BIG_RECORD) {
-		ulint	max_row_size
-			= page_get_free_space_of_empty_noninline(comp) / 2;
-
-		my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
-	}
-}
-
 /* limit innodb monitor access to users with PROCESS privilege.
 See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
 #define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
 	(row_is_magic_monitor_table(table_name) \
 	 && check_global_access(thd, PROCESS_ACL))
 
-/*********************************************************************
+/*****************************************************************//**
 Creates a table definition to an InnoDB database. */
 static
 int
 create_table_def(
 /*=============*/
-	trx_t*		trx,		/* in: InnoDB transaction handle */
-	TABLE*		form,		/* in: information on table
+	trx_t*		trx,		/*!< in: InnoDB transaction handle */
+	TABLE*		form,		/*!< in: information on table
 					columns and indexes */
-	const char*	table_name,	/* in: table name */
-	const char*	path_of_temp_table,/* in: if this is a table explicitly
+	const char*	table_name,	/*!< in: table name */
+	const char*	path_of_temp_table,/*!< in: if this is a table explicitly
 					created by the user with the
 					TEMPORARY keyword, then this
 					parameter is the dir path where the
@@ -4947,7 +5564,7 @@ create_table_def(
 					an .ibd file for it (no .ibd extension
 					in the path, though); otherwise this
 					is NULL */
-	ulint		flags)		/* in: table flags */
+	ulint		flags)		/*!< in: table flags */
 {
 	Field*		field;
 	dict_table_t*	table;
@@ -5006,9 +5623,19 @@ create_table_def(
 
 			charset_no = (ulint)field->charset()->number;
 
-			ut_a(charset_no < 256); /* in data0type.h we assume
-						that the number fits in one
-						byte */
+			if (UNIV_UNLIKELY(charset_no >= 256)) {
+				/* in data0type.h we assume that the
+				number fits in one byte in prtype */
+				push_warning_printf(
+					(THD*) trx->mysql_thd,
+					MYSQL_ERROR::WARN_LEVEL_ERROR,
+					ER_CANT_CREATE_TABLE,
+					"In InnoDB, charset-collation codes"
+					" must be below 256."
+					" Unsupported code %lu.",
+					(ulong) charset_no);
+				DBUG_RETURN(ER_CANT_CREATE_TABLE);
+			}
 		}
 
 		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
@@ -5043,24 +5670,23 @@ create_table_def(
 
 	error = row_create_table_for_mysql(table, trx);
 
-	innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
-
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, flags, NULL);
 
 	DBUG_RETURN(error);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates an index in an InnoDB database. */
 static
 int
 create_index(
 /*=========*/
-	trx_t*		trx,		/* in: InnoDB transaction handle */
-	TABLE*		form,		/* in: information on table
+	trx_t*		trx,		/*!< in: InnoDB transaction handle */
+	TABLE*		form,		/*!< in: information on table
 					columns and indexes */
-	const char*	table_name,	/* in: table name */
-	uint		key_num)	/* in: index number */
+	ulint		flags,		/*!< in: InnoDB table flags */
+	const char*	table_name,	/*!< in: table name */
+	uint		key_num)	/*!< in: index number */
 {
 	Field*		field;
 	dict_index_t*	index;
@@ -5095,8 +5721,8 @@ create_index(
 	/* We pass 0 as the space id, and determine at a lower level the space
 	id where to store the table */
 
-	index = dict_mem_index_create((char*) table_name, key->name, 0,
-						ind_type, n_fields);
+	index = dict_mem_index_create(table_name, key->name, 0,
+				      ind_type, n_fields);
 
 	field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
 		MYF(MY_FAE));
@@ -5167,27 +5793,23 @@ create_index(
 	sure we don't create too long indexes. */
 	error = row_create_index_for_mysql(index, trx, field_lengths);
 
-	innodb_check_for_record_too_big_error(form->s->row_type
-					      != ROW_TYPE_REDUNDANT, error);
-
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, flags, NULL);
 
 	my_free(field_lengths, MYF(0));
 
 	DBUG_RETURN(error);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates an index to an InnoDB table when the user has defined no
 primary index. */
 static
 int
 create_clustered_index_when_no_primary(
 /*===================================*/
-	trx_t*		trx,		/* in: InnoDB transaction handle */
-	ulint		comp,		/* in: ROW_FORMAT:
-					nonzero=COMPACT, 0=REDUNDANT */
-	const char*	table_name)	/* in: table name */
+	trx_t*		trx,		/*!< in: InnoDB transaction handle */
+	ulint		flags,		/*!< in: InnoDB table flags */
+	const char*	table_name)	/*!< in: table name */
 {
 	dict_index_t*	index;
 	int		error;
@@ -5197,22 +5819,187 @@ create_clustered_index_when_no_primary(
 
 	index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX",
 				      0, DICT_CLUSTERED, 0);
+
 	error = row_create_index_for_mysql(index, trx, NULL);
 
-	innodb_check_for_record_too_big_error(comp, error);
-
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, flags, NULL);
 
 	return(error);
 }
 
-/*********************************************************************
-Update create_info.  Used in SHOW CREATE TABLE et al. */
+/*****************************************************************//**
+Validates the create options. We may build on this function
+in future. For now, it checks two specifiers:
+KEY_BLOCK_SIZE and ROW_FORMAT
+If innodb_strict_mode is not set then this function is a no-op
+@return	TRUE if valid. */
+static
+ibool
+create_options_are_valid(
+/*=====================*/
+	THD*		thd,		/*!< in: connection thread. */
+	TABLE*		form,		/*!< in: information on table
+					columns and indexes */
+	HA_CREATE_INFO*	create_info)	/*!< in: create info. */
+{
+	ibool 	kbs_specified	= FALSE;
+	ibool	ret		= TRUE;
 
+
+	ut_ad(thd != NULL);
+
+	/* If innodb_strict_mode is not set don't do any validation. */
+	if (!(THDVAR(thd, strict_mode))) {
+		return(TRUE);
+	}
+
+	ut_ad(form != NULL);
+	ut_ad(create_info != NULL);
+
+	/* First check if KEY_BLOCK_SIZE was specified. */
+	if (create_info->key_block_size
+	    || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
+
+		kbs_specified = TRUE;
+		switch (create_info->key_block_size) {
+		case 1:
+		case 2:
+		case 4:
+		case 8:
+		case 16:
+			/* Valid value. */
+			break;
+		default:
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
+					    ER_ILLEGAL_HA_CREATE_OPTION,
+					    "InnoDB: invalid"
+					    " KEY_BLOCK_SIZE = %lu."
+					    " Valid values are"
+					    " [1, 2, 4, 8, 16]",
+					    create_info->key_block_size);
+			ret = FALSE;
+		}
+	}
+	
+	/* If KEY_BLOCK_SIZE was specified, check for its
+	dependencies. */
+	if (kbs_specified && !srv_file_per_table) {
+		push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
+			     ER_ILLEGAL_HA_CREATE_OPTION,
+			     "InnoDB: KEY_BLOCK_SIZE"
+			     " requires innodb_file_per_table.");
+		ret = FALSE;
+	}
+
+	if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) {
+		push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
+			     ER_ILLEGAL_HA_CREATE_OPTION,
+			     "InnoDB: KEY_BLOCK_SIZE"
+			     " requires innodb_file_format >"
+			     " Antelope.");
+		ret = FALSE;
+	}
+
+	/* Now check for ROW_FORMAT specifier. */
+	if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
+		switch (form->s->row_type) {
+			const char* row_format_name;
+		case ROW_TYPE_COMPRESSED:
+		case ROW_TYPE_DYNAMIC:
+			row_format_name
+				= form->s->row_type == ROW_TYPE_COMPRESSED
+				? "COMPRESSED"
+				: "DYNAMIC";
+
+			/* These two ROW_FORMATs require
+			srv_file_per_table and srv_file_format */
+			if (!srv_file_per_table) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_ERROR,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s"
+					" requires innodb_file_per_table.",
+					row_format_name);
+					ret = FALSE;
+
+			}
+
+			if (srv_file_format < DICT_TF_FORMAT_ZIP) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_ERROR,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s"
+					" requires innodb_file_format >"
+					" Antelope.",
+					row_format_name);
+					ret = FALSE;
+			}
+
+			/* Cannot specify KEY_BLOCK_SIZE with
+			ROW_FORMAT = DYNAMIC.
+			However, we do allow COMPRESSED to be
+			specified with KEY_BLOCK_SIZE. */
+			if (kbs_specified
+			    && form->s->row_type == ROW_TYPE_DYNAMIC) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_ERROR,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: cannot specify"
+					" ROW_FORMAT = DYNAMIC with"
+					" KEY_BLOCK_SIZE.");
+					ret = FALSE;
+			}
+
+			break;
+
+		case ROW_TYPE_REDUNDANT:
+		case ROW_TYPE_COMPACT:
+		case ROW_TYPE_DEFAULT:
+			/* Default is COMPACT. */
+			row_format_name
+				= form->s->row_type == ROW_TYPE_REDUNDANT
+				? "REDUNDANT"
+				: "COMPACT";
+
+			/* Cannot specify KEY_BLOCK_SIZE with these
+			format specifiers. */
+			if (kbs_specified) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_ERROR,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: cannot specify"
+					" ROW_FORMAT = %s with"
+					" KEY_BLOCK_SIZE.",
+					row_format_name);
+					ret = FALSE;
+			}
+
+			break;
+
+		default:
+			push_warning(thd,
+				     MYSQL_ERROR::WARN_LEVEL_ERROR,
+				     ER_ILLEGAL_HA_CREATE_OPTION,
+				     "InnoDB: invalid ROW_FORMAT specifier.");
+			ret = FALSE;
+
+		}
+	}
+
+	return(ret);
+}
+
+/*****************************************************************//**
+Update create_info.  Used in SHOW CREATE TABLE et al. */
+UNIV_INTERN
 void
 ha_innobase::update_create_info(
 /*============================*/
-	HA_CREATE_INFO* create_info)	/* in/out: create info */
+	HA_CREATE_INFO* create_info)	/*!< in/out: create info */
 {
   if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
     ha_innobase::info(HA_STATUS_AUTO);
@@ -5220,17 +6007,17 @@ ha_innobase::update_create_info(
   }
 }
 
-/*********************************************************************
-Creates a new table to an InnoDB database. */
-
+/*****************************************************************//**
+Creates a new table to an InnoDB database.
+@return	error number */
+UNIV_INTERN
 int
 ha_innobase::create(
 /*================*/
-					/* out: error number */
-	const char*	name,		/* in: table name */
-	TABLE*		form,		/* in: information on table
+	const char*	name,		/*!< in: table name */
+	TABLE*		form,		/*!< in: information on table
 					columns and indexes */
-	HA_CREATE_INFO*	create_info)	/* in: more information of the
+	HA_CREATE_INFO*	create_info)	/*!< in: more information of the
 					created table, contains also the
 					create statement string */
 {
@@ -5243,8 +6030,11 @@ ha_innobase::create(
 	char		name2[FN_REFLEN];
 	char		norm_name[FN_REFLEN];
 	THD*		thd = ha_thd();
-	ib_longlong	auto_inc_value;
+	ib_int64_t	auto_inc_value;
 	ulint		flags;
+	/* Cache the value of innodb_file_format, in case it is
+	modified by another thread while the table is being created. */
+	const ulint	file_format = srv_file_format;
 
 	DBUG_ENTER("ha_innobase::create");
 
@@ -5290,18 +6080,7 @@ ha_innobase::create(
 
 	trx_search_latch_release_if_reserved(parent_trx);
 
-	trx = trx_allocate_for_mysql();
-
-	trx->mysql_thd = thd;
-	trx->mysql_query_str = thd_query(thd);
-
-	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-		trx->check_foreigns = FALSE;
-	}
-
-	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
-		trx->check_unique_secondary = FALSE;
-	}
+	trx = innobase_trx_allocate(thd);
 
 	if (lower_case_table_names) {
 		srv_lower_case_table_names = TRUE;
@@ -5323,8 +6102,145 @@ ha_innobase::create(
 
 	flags = 0;
 
-	if (form->s->row_type != ROW_TYPE_REDUNDANT) {
-		flags |= DICT_TF_COMPACT;
+	/* Validate create options if innodb_strict_mode is set. */
+	if (!create_options_are_valid(thd, form, create_info)) {
+		error = ER_ILLEGAL_HA_CREATE_OPTION;
+		goto cleanup;
+	}
+
+	if (create_info->key_block_size
+	    || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
+		/* Determine the page_zip.ssize corresponding to the
+		requested page size (key_block_size) in kilobytes. */
+
+		ulint	ssize, ksize;
+		ulint	key_block_size = create_info->key_block_size;
+
+		for (ssize = ksize = 1; ssize <= DICT_TF_ZSSIZE_MAX;
+		     ssize++, ksize <<= 1) {
+			if (key_block_size == ksize) {
+				flags = ssize << DICT_TF_ZSSIZE_SHIFT
+					| DICT_TF_COMPACT
+					| DICT_TF_FORMAT_ZIP
+					  << DICT_TF_FORMAT_SHIFT;
+				break;
+			}
+		}
+
+		if (!srv_file_per_table) {
+			push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+				     ER_ILLEGAL_HA_CREATE_OPTION,
+				     "InnoDB: KEY_BLOCK_SIZE"
+				     " requires innodb_file_per_table.");
+			flags = 0;
+		}
+
+		if (file_format < DICT_TF_FORMAT_ZIP) {
+			push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+				     ER_ILLEGAL_HA_CREATE_OPTION,
+				     "InnoDB: KEY_BLOCK_SIZE"
+				     " requires innodb_file_format >"
+				     " Antelope.");
+			flags = 0;
+		}
+
+		if (!flags) {
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_ILLEGAL_HA_CREATE_OPTION,
+					    "InnoDB: ignoring"
+					    " KEY_BLOCK_SIZE=%lu.",
+					    create_info->key_block_size);
+		}
+	}
+
+	if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
+		if (flags) {
+			/* KEY_BLOCK_SIZE was specified. */
+			if (form->s->row_type != ROW_TYPE_COMPRESSED) {
+				/* ROW_FORMAT other than COMPRESSED
+				ignores KEY_BLOCK_SIZE.  It does not
+				make sense to reject conflicting
+				KEY_BLOCK_SIZE and ROW_FORMAT, because
+				such combinations can be obtained
+				with ALTER TABLE anyway. */
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
+					" unless ROW_FORMAT=COMPRESSED.",
+					create_info->key_block_size);
+				flags = 0;
+			}
+		} else {
+			/* No KEY_BLOCK_SIZE */
+			if (form->s->row_type == ROW_TYPE_COMPRESSED) {
+				/* ROW_FORMAT=COMPRESSED without
+				KEY_BLOCK_SIZE implies half the
+				maximum KEY_BLOCK_SIZE. */
+				flags = (DICT_TF_ZSSIZE_MAX - 1)
+					<< DICT_TF_ZSSIZE_SHIFT
+					| DICT_TF_COMPACT
+					| DICT_TF_FORMAT_ZIP
+					<< DICT_TF_FORMAT_SHIFT;
+#if DICT_TF_ZSSIZE_MAX < 1
+# error "DICT_TF_ZSSIZE_MAX < 1"
+#endif
+			}
+		}
+
+		switch (form->s->row_type) {
+			const char* row_format_name;
+		case ROW_TYPE_REDUNDANT:
+			break;
+		case ROW_TYPE_COMPRESSED:
+		case ROW_TYPE_DYNAMIC:
+			row_format_name
+				= form->s->row_type == ROW_TYPE_COMPRESSED
+				? "COMPRESSED"
+				: "DYNAMIC";
+
+			if (!srv_file_per_table) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s"
+					" requires innodb_file_per_table.",
+					row_format_name);
+			} else if (file_format < DICT_TF_FORMAT_ZIP) {
+				push_warning_printf(
+					thd,
+					MYSQL_ERROR::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s"
+					" requires innodb_file_format >"
+					" Antelope.",
+					row_format_name);
+			} else {
+				flags |= DICT_TF_COMPACT
+					| (DICT_TF_FORMAT_ZIP
+					   << DICT_TF_FORMAT_SHIFT);
+				break;
+			}
+
+			/* fall through */
+		case ROW_TYPE_NOT_USED:
+		case ROW_TYPE_FIXED:
+		default:
+			push_warning(thd,
+				     MYSQL_ERROR::WARN_LEVEL_WARN,
+				     ER_ILLEGAL_HA_CREATE_OPTION,
+				     "InnoDB: assuming ROW_FORMAT=COMPACT.");
+		case ROW_TYPE_DEFAULT:
+		case ROW_TYPE_COMPACT:
+			flags = DICT_TF_COMPACT;
+			break;
+		}
+	} else if (!flags) {
+		/* No KEY_BLOCK_SIZE or ROW_FORMAT specified:
+		use ROW_FORMAT=COMPACT by default. */
+		flags = DICT_TF_COMPACT;
 	}
 
 	error = create_table_def(trx, form, norm_name,
@@ -5344,7 +6260,7 @@ ha_innobase::create(
 	/* Our function row_get_mysql_key_number_for_index assumes
 	the primary key is always number 0, if it exists */
 
-	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
+	ut_a(primary_key_no == -1 || primary_key_no == 0);
 
 	/* Create the keys */
 
@@ -5354,8 +6270,7 @@ ha_innobase::create(
 		by InnoDB */
 
 		error = create_clustered_index_when_no_primary(
-			trx, form->s->row_type != ROW_TYPE_REDUNDANT,
-			norm_name);
+			trx, flags, norm_name);
 		if (error) {
 			goto cleanup;
 		}
@@ -5364,7 +6279,7 @@ ha_innobase::create(
 	if (primary_key_no != -1) {
 		/* In InnoDB the clustered index must always be created
 		first */
-		if ((error = create_index(trx, form, norm_name,
+		if ((error = create_index(trx, form, flags, norm_name,
 					  (uint) primary_key_no))) {
 			goto cleanup;
 		}
@@ -5374,7 +6289,8 @@ ha_innobase::create(
 
 		if (i != (uint) primary_key_no) {
 
-			if ((error = create_index(trx, form, norm_name, i))) {
+			if ((error = create_index(trx, form, flags, norm_name,
+						  i))) {
 				goto cleanup;
 			}
 		}
@@ -5385,7 +6301,7 @@ ha_innobase::create(
 			*trx->mysql_query_str, norm_name,
 			create_info->options & HA_LEX_CREATE_TMP_TABLE);
 
-		error = convert_error_code_to_mysql(error, NULL);
+		error = convert_error_code_to_mysql(error, flags, NULL);
 
 		if (error) {
 			goto cleanup;
@@ -5406,6 +6322,15 @@ ha_innobase::create(
 
 	DBUG_ASSERT(innobase_table != 0);
 
+	if (innobase_table) {
+		/* We update the highest file format in the system table
+		space, if this table has higher file format setting. */
+
+		trx_sys_file_format_max_upgrade(
+			(const char**) &innobase_file_format_check,
+			dict_table_get_format(innobase_table));
+	}
+
 	/* Note: We can't call update_thd() as prebuilt will not be
 	setup at this stage and so we use thd. */
 
@@ -5449,14 +6374,14 @@ cleanup:
 	DBUG_RETURN(error);
 }
 
-/*********************************************************************
-Discards or imports an InnoDB tablespace. */
-
+/*****************************************************************//**
+Discards or imports an InnoDB tablespace.
+@return	0 == success, -1 == error */
+UNIV_INTERN
 int
 ha_innobase::discard_or_import_tablespace(
 /*======================================*/
-				/* out: 0 == success, -1 == error */
-	my_bool discard)	/* in: TRUE if discard, else import */
+	my_bool discard)	/*!< in: TRUE if discard, else import */
 {
 	dict_table_t*	dict_table;
 	trx_t*		trx;
@@ -5477,18 +6402,18 @@ ha_innobase::discard_or_import_tablespace(
 		err = row_import_tablespace_for_mysql(dict_table->name, trx);
 	}
 
-	err = convert_error_code_to_mysql(err, NULL);
+	err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
 
 	DBUG_RETURN(err);
 }
 
-/*********************************************************************
-Deletes all rows of an InnoDB table. */
-
+/*****************************************************************//**
+Deletes all rows of an InnoDB table.
+@return	error number */
+UNIV_INTERN
 int
 ha_innobase::delete_all_rows(void)
 /*==============================*/
-				/* out: error number */
 {
 	int		error;
 
@@ -5515,23 +6440,24 @@ ha_innobase::delete_all_rows(void)
 		goto fallback;
 	}
 
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, prebuilt->table->flags,
+					    NULL);
 
 	DBUG_RETURN(error);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Drops a table from an InnoDB database. Before calling this function,
 MySQL calls innobase_commit to commit the transaction of the current user.
 Then the current user cannot have locks set on the table. Drop table
 operation inside InnoDB will remove all locks any user has on the table
-inside InnoDB. */
-
+inside InnoDB.
+@return	error number */
+UNIV_INTERN
 int
 ha_innobase::delete_table(
 /*======================*/
-				/* out: error number */
-	const char*	name)	/* in: table name */
+	const char*	name)	/*!< in: table name */
 {
 	ulint	name_len;
 	int	error;
@@ -5560,28 +6486,17 @@ ha_innobase::delete_table(
 
 	trx_search_latch_release_if_reserved(parent_trx);
 
+	trx = innobase_trx_allocate(thd);
+
 	if (lower_case_table_names) {
 		srv_lower_case_table_names = TRUE;
 	} else {
 		srv_lower_case_table_names = FALSE;
 	}
 
-	trx = trx_allocate_for_mysql();
-
-	trx->mysql_thd = thd;
-	trx->mysql_query_str = thd_query(thd);
-
-	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-		trx->check_foreigns = FALSE;
-	}
-
-	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
-		trx->check_unique_secondary = FALSE;
-	}
-
 	name_len = strlen(name);
 
-	assert(name_len < 1000);
+	ut_a(name_len < 1000);
 
 	/* Drop the table in InnoDB */
 
@@ -5604,26 +6519,24 @@ ha_innobase::delete_table(
 
 	trx_free_for_mysql(trx);
 
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, 0, NULL);
 
 	DBUG_RETURN(error);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Removes all tables in the named database inside InnoDB. */
 static
 void
 innobase_drop_database(
 /*===================*/
-			/* out: error number */
-        handlerton *hton, /* in: handlerton of Innodb */
-	char*	path)	/* in: database path; inside InnoDB the name
+	handlerton *hton, /*!< in: handlerton of Innodb */
+	char*	path)	/*!< in: database path; inside InnoDB the name
 			of the last directory in the path is used as
 			the database name: for example, in 'mysql/data/test'
 			the database name is 'test' */
 {
 	ulint	len		= 0;
-	trx_t*	parent_trx;
 	trx_t*	trx;
 	char*	ptr;
 	int	error;
@@ -5633,12 +6546,18 @@ innobase_drop_database(
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
-	parent_trx = check_trx_exists(thd);
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
+	/* In the Windows plugin, thd = current_thd is always NULL */
+	if (thd) {
+		trx_t*	parent_trx = check_trx_exists(thd);
 
-	trx_search_latch_release_if_reserved(parent_trx);
+		/* In case MySQL calls this in the middle of a SELECT
+		query, release possible adaptive hash latch to avoid
+		deadlocks of threads */
+
+		trx_search_latch_release_if_reserved(parent_trx);
+	}
 
 	ptr = strend(path) - 2;
 
@@ -5656,14 +6575,14 @@ innobase_drop_database(
 #ifdef	__WIN__
 	innobase_casedn_str(namebuf);
 #endif
+#if defined __WIN__ && !defined MYSQL_SERVER
+	/* In the Windows plugin, thd = current_thd is always NULL */
 	trx = trx_allocate_for_mysql();
-	trx->mysql_thd = thd;
-	trx->mysql_query_str = thd_query(thd);
-
-	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-		trx->check_foreigns = FALSE;
-	}
-
+	trx->mysql_thd = NULL;
+	trx->mysql_query_str = NULL;
+#else
+	trx = innobase_trx_allocate(thd);
+#endif
 	error = row_drop_database_for_mysql(namebuf, trx);
 	my_free(namebuf, MYF(0));
 
@@ -5680,32 +6599,85 @@ innobase_drop_database(
 
 	innobase_commit_low(trx);
 	trx_free_for_mysql(trx);
-#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR
-	error = convert_error_code_to_mysql(error, NULL);
-
-	return(error);
-#else
-	return;
-#endif
 }
+/*********************************************************************//**
+Renames an InnoDB table.
+@return	0 or error code */
+static
+int
+innobase_rename_table(
+/*==================*/
+	trx_t*		trx,	/*!< in: transaction */
+	const char*	from,	/*!< in: old name of the table */
+	const char*	to,	/*!< in: new name of the table */
+	ibool		lock_and_commit)
+				/*!< in: TRUE=lock data dictionary and commit */
+{
+	int	error;
+	char*	norm_to;
+	char*	norm_from;
 
-/*************************************************************************
-Renames an InnoDB table. */
+	if (lower_case_table_names) {
+		srv_lower_case_table_names = TRUE;
+	} else {
+		srv_lower_case_table_names = FALSE;
+	}
 
+	// Magic number 64 arbitrary
+	norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
+	norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
+
+	normalize_table_name(norm_to, to);
+	normalize_table_name(norm_from, from);
+
+	/* Serialize data dictionary operations with dictionary mutex:
+	no deadlocks can occur then in these operations */
+
+	if (lock_and_commit) {
+		row_mysql_lock_data_dictionary(trx);
+	}
+
+	error = row_rename_table_for_mysql(
+		norm_from, norm_to, trx, lock_and_commit);
+
+	if (error != DB_SUCCESS) {
+		FILE* ef = dict_foreign_err_file;
+
+		fputs("InnoDB: Renaming table ", ef);
+		ut_print_name(ef, trx, TRUE, norm_from);
+		fputs(" to ", ef);
+		ut_print_name(ef, trx, TRUE, norm_to);
+		fputs(" failed!\n", ef);
+	}
+
+	if (lock_and_commit) {
+		row_mysql_unlock_data_dictionary(trx);
+
+		/* Flush the log to reduce probability that the .frm
+		files and the InnoDB data dictionary get out-of-sync
+		if the user runs with innodb_flush_log_at_trx_commit = 0 */
+
+		log_buffer_flush_to_disk();
+	}
+
+	my_free(norm_to, MYF(0));
+	my_free(norm_from, MYF(0));
+
+	return error;
+}
+/*********************************************************************//**
+Renames an InnoDB table.
+@return	0 or error code */
+UNIV_INTERN
 int
 ha_innobase::rename_table(
 /*======================*/
-				/* out: 0 or error code */
-	const char*	from,	/* in: old name of the table */
-	const char*	to)	/* in: new name of the table */
+	const char*	from,	/*!< in: old name of the table */
+	const char*	to)	/*!< in: new name of the table */
 {
-	ulint	name_len1;
-	ulint	name_len2;
+	trx_t*	trx;
 	int	error;
 	trx_t*	parent_trx;
-	trx_t*	trx;
-	char	norm_from[1000];
-	char	norm_to[1000];
 	THD*	thd		= ha_thd();
 
 	DBUG_ENTER("ha_innobase::rename_table");
@@ -5720,38 +6692,9 @@ ha_innobase::rename_table(
 
 	trx_search_latch_release_if_reserved(parent_trx);
 
-	if (lower_case_table_names) {
-		srv_lower_case_table_names = TRUE;
-	} else {
-		srv_lower_case_table_names = FALSE;
-	}
+	trx = innobase_trx_allocate(thd);
 
-	trx = trx_allocate_for_mysql();
-	trx->mysql_thd = thd;
-	trx->mysql_query_str = thd_query(thd);
-
-	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-		trx->check_foreigns = FALSE;
-	}
-
-	name_len1 = strlen(from);
-	name_len2 = strlen(to);
-
-	assert(name_len1 < 1000);
-	assert(name_len2 < 1000);
-
-	normalize_table_name(norm_from, from);
-	normalize_table_name(norm_to, to);
-
-	/* Rename the table in InnoDB */
-
-	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
-
-	/* Flush the log to reduce probability that the .frm files and
-	the InnoDB data dictionary get out-of-sync if the user runs
-	with innodb_flush_log_at_trx_commit = 0 */
-
-	log_buffer_flush_to_disk();
+	error = innobase_rename_table(trx, from, to, TRUE);
 
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
@@ -5761,23 +6704,22 @@ ha_innobase::rename_table(
 	innobase_commit_low(trx);
 	trx_free_for_mysql(trx);
 
-	error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, 0, NULL);
 
 	DBUG_RETURN(error);
 }
 
-/*************************************************************************
-Estimates the number of index records in a range. */
-
+/*********************************************************************//**
+Estimates the number of index records in a range.
+@return	estimated number of rows */
+UNIV_INTERN
 ha_rows
 ha_innobase::records_in_range(
 /*==========================*/
-						/* out: estimated number of
-						rows */
-	uint			keynr,		/* in: index number */
-	key_range		*min_key,	/* in: start key value of the
+	uint			keynr,		/*!< in: index number */
+	key_range		*min_key,	/*!< in: start key value of the
 						   range, may also be 0 */
-	key_range		*max_key)	/* in: range end key val, may
+	key_range		*max_key)	/*!< in: range end key val, may
 						   also be 0 */
 {
 	KEY*		key;
@@ -5790,11 +6732,10 @@ ha_innobase::records_in_range(
 					+ table->s->max_key_length + 100;
 	dtuple_t*	range_start;
 	dtuple_t*	range_end;
-	ib_longlong	n_rows;
+	ib_int64_t	n_rows;
 	ulint		mode1;
 	ulint		mode2;
-	void*		heap1;
-	void*		heap2;
+	mem_heap_t*	heap;
 
 	DBUG_ENTER("records_in_range");
 
@@ -5811,12 +6752,18 @@ ha_innobase::records_in_range(
 
 	key = table->key_info + active_index;
 
-	index = dict_table_get_index_noninline(prebuilt->table, key->name);
+	index = dict_table_get_index_on_name(prebuilt->table, key->name);
 
-	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
+	/* MySQL knows about this index and so we must be able to find it.*/
+	ut_a(index);
+
+	heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
+				    + sizeof(dtuple_t)));
+
+	range_start = dtuple_create(heap, key->key_parts);
 	dict_index_copy_types(range_start, index, key->key_parts);
 
-	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
+	range_end = dtuple_create(heap, key->key_parts);
 	dict_index_copy_types(range_end, index, key->key_parts);
 
 	row_sel_convert_mysql_key_to_innobase(
@@ -5851,8 +6798,7 @@ ha_innobase::records_in_range(
 		n_rows = HA_POS_ERROR;
 	}
 
-	dtuple_free_for_mysql(heap1);
-	dtuple_free_for_mysql(heap2);
+	mem_heap_free(heap);
 
 	my_free(key_val_buff2, MYF(0));
 
@@ -5871,14 +6817,14 @@ ha_innobase::records_in_range(
 	DBUG_RETURN((ha_rows) n_rows);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc. */
-
+filesort.cc.
+@return	upper bound of rows */
+UNIV_INTERN
 ha_rows
 ha_innobase::estimate_rows_upper_bound(void)
 /*======================================*/
-			/* out: upper bound of rows */
 {
 	dict_index_t*	index;
 	ulonglong	estimate;
@@ -5900,10 +6846,13 @@ ha_innobase::estimate_rows_upper_bound(void)
 
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
-	index = dict_table_get_first_index_noninline(prebuilt->table);
+	index = dict_table_get_first_index(prebuilt->table);
+
+	ut_a(index->stat_n_leaf_pages > 0);
+
+	local_data_file_length =
+		((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE;
 
-	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
-							* UNIV_PAGE_SIZE;
 
 	/* Calculate a minimum length for a clustered index record and from
 	that an upper bound for the number of rows. Since we only calculate
@@ -5918,15 +6867,15 @@ ha_innobase::estimate_rows_upper_bound(void)
 	DBUG_RETURN((ha_rows) estimate);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 How many seeks it will take to read through the table. This is to be
 comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys. */
-
+decide if we should scan the table or use keys.
+@return	estimated time measured in disk seeks */
+UNIV_INTERN
 double
 ha_innobase::scan_time()
 /*====================*/
-			/* out: estimated time measured in disk seeks */
 {
 	/* Since MySQL seems to favor table scans too much over index
 	searches, we pretend that a sequential read takes the same time
@@ -5936,17 +6885,17 @@ ha_innobase::scan_time()
 	return((double) (prebuilt->table->stat_clustered_index_size));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes. */
-
+This enables us to optimise reads for clustered indexes.
+@return	estimated time measured in disk seeks */
+UNIV_INTERN
 double
 ha_innobase::read_time(
 /*===================*/
-			/* out: estimated time measured in disk seeks */
-	uint	index,	/* in: key number */
-	uint	ranges,	/* in: how many ranges */
-	ha_rows rows)	/* in: estimated number of rows in the ranges */
+	uint	index,	/*!< in: key number */
+	uint	ranges,	/*!< in: how many ranges */
+	ha_rows rows)	/*!< in: estimated number of rows in the ranges */
 {
 	ha_rows total_rows;
 	double	time_for_scan;
@@ -5974,19 +6923,19 @@ ha_innobase::read_time(
 	return(ranges + (double) rows / (double) total_rows * time_for_scan);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Returns statistics information of the table to the MySQL interpreter,
 in various fields of the handle object. */
-
+UNIV_INTERN
 int
 ha_innobase::info(
 /*==============*/
-	uint flag)	/* in: what information MySQL requests */
+	uint flag)	/*!< in: what information MySQL requests */
 {
 	dict_table_t*	ib_table;
 	dict_index_t*	index;
 	ha_rows		rec_per_key;
-	ib_longlong	n_rows;
+	ib_int64_t	n_rows;
 	ulong		j;
 	ulong		i;
 	char		path[FN_REFLEN];
@@ -6044,7 +6993,7 @@ ha_innobase::info(
 		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
 
 		if (os_file_get_status(path,&stat_info)) {
-			stats.create_time = stat_info.ctime;
+			stats.create_time = (ulong) stat_info.ctime;
 		}
 	}
 
@@ -6152,10 +7101,10 @@ ha_innobase::info(
 	}
 
 	if (flag & HA_STATUS_CONST) {
-		index = dict_table_get_first_index_noninline(ib_table);
+		index = dict_table_get_first_index(ib_table);
 
 		if (prebuilt->clust_index_was_generated) {
-			index = dict_table_get_next_index_noninline(index);
+			index = dict_table_get_next_index(index);
 		}
 
 		for (i = 0; i < table->s->keys; i++) {
@@ -6166,8 +7115,8 @@ ha_innobase::info(
 						".frm file. Have you mixed up "
 						".frm files from different "
 						"installations? See "
-"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
-
+						REFMAN
+						"innodb-troubleshooting.html\n",
 						ib_table->name);
 				break;
 			}
@@ -6179,7 +7128,7 @@ ha_innobase::info(
 "Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
 "statistics for %lu columns. Have you mixed up .frm files from different "
 "installations? "
-"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
+"See " REFMAN "innodb-troubleshooting.html\n",
 							index->name,
 							ib_table->name,
 							(unsigned long)
@@ -6211,37 +7160,45 @@ ha_innobase::info(
 				  (ulong) rec_per_key;
 			}
 
-			index = dict_table_get_next_index_noninline(index);
+			index = dict_table_get_next_index(index);
 		}
 	}
 
 	if (flag & HA_STATUS_ERRKEY) {
+		const dict_index_t*	err_index;
+
 		ut_a(prebuilt->trx);
 		ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
 
-		errkey = (unsigned int) row_get_mysql_key_number_for_index(
-			(dict_index_t*) trx_get_error_info(prebuilt->trx));
+		err_index = trx_get_error_info(prebuilt->trx);
+
+		if (err_index) {
+			errkey = (unsigned int)
+				row_get_mysql_key_number_for_index(err_index);
+		} else {
+			errkey = (unsigned int) prebuilt->trx->error_key_num;
+		}
 	}
 
-	if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
- 		stats.auto_increment_value = innobase_peek_autoinc();
+	if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
+		stats.auto_increment_value = innobase_peek_autoinc();
 	}
 
 	prebuilt->trx->op_info = (char*)"";
 
-  	DBUG_RETURN(0);
+	DBUG_RETURN(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Updates index cardinalities of the table, based on 8 random dives into
-each index tree. This does NOT calculate exact statistics on the table. */
-
+each index tree. This does NOT calculate exact statistics on the table.
+@return	returns always 0 (success) */
+UNIV_INTERN
 int
 ha_innobase::analyze(
 /*=================*/
-					/* out: returns always 0 (success) */
-	THD*		thd,		/* in: connection thread handle */
-	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
+	THD*		thd,		/*!< in: connection thread handle */
+	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
 {
 	/* Simply call ::info() with all the flags */
 	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);
@@ -6249,31 +7206,30 @@ ha_innobase::analyze(
 	return(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
 the table in MySQL. */
-
+UNIV_INTERN
 int
 ha_innobase::optimize(
 /*==================*/
-	THD*		thd,		/* in: connection thread handle */
-	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
+	THD*		thd,		/*!< in: connection thread handle */
+	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
 {
 	return(HA_ADMIN_TRY_ALTER);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Tries to check that an InnoDB table is not corrupted. If corruption is
 noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server. */
-
+may also assert a failure and crash the server.
+@return	HA_ADMIN_CORRUPT or HA_ADMIN_OK */
+UNIV_INTERN
 int
 ha_innobase::check(
 /*===============*/
-					/* out: HA_ADMIN_CORRUPT or
-					HA_ADMIN_OK */
-	THD*		thd,		/* in: user thread handle */
-	HA_CHECK_OPT*	check_opt)	/* in: check options, currently
+	THD*		thd,		/*!< in: user thread handle */
+	HA_CHECK_OPT*	check_opt)	/*!< in: check options, currently
 					ignored */
 {
 	ulint		ret;
@@ -6299,17 +7255,16 @@ ha_innobase::check(
 	return(HA_ADMIN_CORRUPT);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Adds information about free space in the InnoDB tablespace to a table comment
 which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
-foreign keys. */
-
+foreign keys.
+@return	table comment + InnoDB free space + info on foreign keys */
+UNIV_INTERN
 char*
 ha_innobase::update_table_comment(
 /*==============================*/
-				/* out: table comment + InnoDB free space +
-				info on foreign keys */
-	const char*	comment)/* in: table comment defined by user */
+	const char*	comment)/*!< in: table comment defined by user */
 {
 	uint	length = (uint) strlen(comment);
 	char*	str;
@@ -6335,7 +7290,7 @@ ha_innobase::update_table_comment(
 
 	/* output the data to a temporary file */
 
-	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
+	mutex_enter(&srv_dict_tmpfile_mutex);
 	rewind(srv_dict_tmpfile);
 
 	fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
@@ -6368,22 +7323,22 @@ ha_innobase::update_table_comment(
 		pos[flen] = 0;
 	}
 
-	mutex_exit_noninline(&srv_dict_tmpfile_mutex);
+	mutex_exit(&srv_dict_tmpfile_mutex);
 
 	prebuilt->trx->op_info = (char*)"";
 
 	return(str ? str : (char*) comment);
 }
 
-/***********************************************************************
-Gets the foreign key create info for a table stored in InnoDB. */
-
+/*******************************************************************//**
+Gets the foreign key create info for a table stored in InnoDB.
+@return own: character string in the form which can be inserted to the
+CREATE TABLE statement, MUST be freed with
+ha_innobase::free_foreign_key_create_info */
+UNIV_INTERN
 char*
 ha_innobase::get_foreign_key_create_info(void)
 /*==========================================*/
-			/* out, own: character string in the form which
-			can be inserted to the CREATE TABLE statement,
-			MUST be freed with ::free_foreign_key_create_info */
 {
 	char*	str	= 0;
 	long	flen;
@@ -6404,7 +7359,7 @@ ha_innobase::get_foreign_key_create_info(void)
 
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
-	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
+	mutex_enter(&srv_dict_tmpfile_mutex);
 	rewind(srv_dict_tmpfile);
 
 	/* output the data to a temporary file */
@@ -6430,12 +7385,13 @@ ha_innobase::get_foreign_key_create_info(void)
 		str[flen] = 0;
 	}
 
-	mutex_exit_noninline(&srv_dict_tmpfile_mutex);
+	mutex_exit(&srv_dict_tmpfile_mutex);
 
 	return(str);
 }
 
 
+UNIV_INTERN
 int
 ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
 {
@@ -6446,7 +7402,7 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
   update_thd(ha_thd());
   prebuilt->trx->op_info = (char*)"getting list of foreign keys";
   trx_search_latch_release_if_reserved(prebuilt->trx);
-  mutex_enter_noninline(&(dict_sys->mutex));
+  mutex_enter(&(dict_sys->mutex));
   foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
 
   while (foreign != NULL) {
@@ -6503,7 +7459,7 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
           {
             length=7;
             tmp_buff= "CASCADE";
-          }	
+          }
           else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
           {
             length=8;
@@ -6521,8 +7477,8 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
           }
 	  f_key_info.delete_method = thd_make_lex_string(
 		  thd, f_key_info.delete_method, tmp_buff, length, 1);
- 
- 
+
+
           if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
           {
             length=7;
@@ -6561,17 +7517,18 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
 	  f_key_list->push_back(pf_key_info);
 	  foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
   }
-  mutex_exit_noninline(&(dict_sys->mutex));
+  mutex_exit(&(dict_sys->mutex));
   prebuilt->trx->op_info = (char*)"";
 
   DBUG_RETURN(0);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Checks if ALTER TABLE may change the storage engine of the table.
 Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables). */
-
+are foreign key constraints (parent or child tables).
+@return	TRUE if can switch engines */
+UNIV_INTERN
 bool
 ha_innobase::can_switch_engines(void)
 /*=================================*/
@@ -6595,18 +7552,18 @@ ha_innobase::can_switch_engines(void)
 	DBUG_RETURN(can_switch);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Checks if a table is referenced by a foreign key. The MySQL manual states that
 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
 delete is then allowed internally to resolve a duplicate key conflict in
-REPLACE, not an update. */
-
+REPLACE, not an update.
+@return	> 0 if referenced by a FOREIGN KEY */
+UNIV_INTERN
 uint
 ha_innobase::referenced_by_foreign_key(void)
 /*========================================*/
-			/* out: > 0 if referenced by a FOREIGN KEY */
 {
-	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {
+	if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
 
 		return(1);
 	}
@@ -6614,29 +7571,29 @@ ha_innobase::referenced_by_foreign_key(void)
 	return(0);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees the foreign key create info for a table stored in InnoDB, if it is
 non-NULL. */
-
+UNIV_INTERN
 void
 ha_innobase::free_foreign_key_create_info(
 /*======================================*/
-	char*	str)	/* in, own: create info string to free	*/
+	char*	str)	/*!< in, own: create info string to free */
 {
 	if (str) {
 		my_free(str, MYF(0));
 	}
 }
 
-/***********************************************************************
-Tells something additional to the handler about how to do things. */
-
+/*******************************************************************//**
+Tells something additional to the handler about how to do things.
+@return	0 or error number */
+UNIV_INTERN
 int
 ha_innobase::extra(
 /*===============*/
-			   /* out: 0 or error number */
 	enum ha_extra_function operation)
-			   /* in: HA_EXTRA_FLUSH or some other flag */
+			   /*!< in: HA_EXTRA_FLUSH or some other flag */
 {
 	/* Warning: since it is not sure that MySQL calls external_lock
 	before calling this function, the trx field in prebuilt can be
@@ -6687,11 +7644,9 @@ ha_innobase::extra(
 	return(0);
 }
 
-/**********************************************************************
-Reset state of file to after 'open'.
-This function is called after every statement for all tables used
-by that statement.  */
-int ha_innobase::reset()
+UNIV_INTERN
+int
+ha_innobase::reset()
 {
 	if (prebuilt->blob_heap) {
 		row_mysql_prebuilt_free_blob_heap(prebuilt);
@@ -6708,7 +7663,7 @@ int ha_innobase::reset()
 	return(0);
 }
 
-/**********************************************************************
+/******************************************************************//**
 MySQL calls this function at the start of each SQL statement inside LOCK
 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
 mark SQL statement borders. Note also a special case: if a temporary table
@@ -6718,13 +7673,13 @@ MySQL-5.0 also calls this before each statement in an execution of a stored
 procedure. To make the execution more deterministic for binlogging, MySQL-5.0
 locks all tables involved in a stored procedure with full explicit table
 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure. */
-
+procedure.
+@return	0 or error code */
+UNIV_INTERN
 int
 ha_innobase::start_stmt(
 /*====================*/
-				/* out: 0 or error code */
-	THD*		thd,	/* in: handle to the user thread */
+	THD*		thd,	/*!< in: handle to the user thread */
 	thr_lock_type	lock_type)
 {
 	trx_t*		trx;
@@ -6793,14 +7748,14 @@ ha_innobase::start_stmt(
 	return(0);
 }
 
-/**********************************************************************
-Maps a MySQL trx isolation level code to the InnoDB isolation level code */
-inline
+/******************************************************************//**
+Maps a MySQL trx isolation level code to the InnoDB isolation level code
+@return	InnoDB isolation level */
+static inline
 ulint
 innobase_map_isolation_level(
 /*=========================*/
-					/* out: InnoDB isolation level */
-	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
+	enum_tx_isolation	iso)	/*!< in: MySQL isolation level code */
 {
 	switch(iso) {
 		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
@@ -6811,21 +7766,21 @@ innobase_map_isolation_level(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 As MySQL will execute an external lock for every new table it uses when it
 starts to process an SQL statement (an exception is when MySQL calls
 start_stmt for the handle) we can use this function to store the pointer to
 the THD in the handle. We will also use this function to communicate
 to InnoDB that a new SQL statement has started and that we must store a
 savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error. */
-
+the SQL statement in case of an error.
+@return	0 */
+UNIV_INTERN
 int
 ha_innobase::external_lock(
 /*=======================*/
-				/* out: 0 */
-	THD*	thd,		/* in: handle to the user thread */
-	int	lock_type)	/* in: lock type */
+	THD*	thd,		/*!< in: handle to the user thread */
+	int	lock_type)	/*!< in: lock type */
 {
 	trx_t*		trx;
 
@@ -6841,7 +7796,7 @@ ha_innobase::external_lock(
 	if (lock_type == F_WRLCK)
 	{
 		ulong const binlog_format= thd_binlog_format(thd);
-		ulong const tx_isolation = thd_tx_isolation(current_thd);
+		ulong const tx_isolation = thd_tx_isolation(ha_thd());
 		if (tx_isolation <= ISO_READ_COMMITTED &&
 		    binlog_format == BINLOG_FORMAT_STMT)
 		{
@@ -6927,7 +7882,7 @@ ha_innobase::external_lock(
 
 				if (error != DB_SUCCESS) {
 					error = convert_error_code_to_mysql(
-						(int) error, thd);
+						(int) error, 0, thd);
 					DBUG_RETURN((int) error);
 				}
 			}
@@ -6979,16 +7934,16 @@ ha_innobase::external_lock(
 	DBUG_RETURN(0);
 }
 
-/**********************************************************************
+/******************************************************************//**
 With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */
-
+user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
+@return	error code */
+UNIV_INTERN
 int
 ha_innobase::transactional_table_lock(
 /*==================================*/
-				/* out: error code */
-	THD*	thd,		/* in: handle to the user thread */
-	int	lock_type)	/* in: lock type */
+	THD*	thd,		/*!< in: handle to the user thread */
+	int	lock_type)	/*!< in: lock type */
 {
 	trx_t*		trx;
 
@@ -7010,8 +7965,8 @@ ha_innobase::transactional_table_lock(
 			"InnoDB: Have you deleted the .ibd file"
 			" from the database directory under\n"
 			"InnoDB: the MySQL datadir?"
-			"InnoDB: See"
-			" http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+			"InnoDB: See " REFMAN
+			"innodb-troubleshooting.html\n"
 			"InnoDB: how you can resolve the problem.\n",
 			prebuilt->table->name);
 		DBUG_RETURN(HA_ERR_CRASHED);
@@ -7054,7 +8009,8 @@ ha_innobase::transactional_table_lock(
 		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
 
 		if (error != DB_SUCCESS) {
-			error = convert_error_code_to_mysql((int) error, thd);
+			error = convert_error_code_to_mysql(
+				(int) error, prebuilt->table->flags, thd);
 			DBUG_RETURN((int) error);
 		}
 
@@ -7071,29 +8027,27 @@ ha_innobase::transactional_table_lock(
 	DBUG_RETURN(0);
 }
 
-/****************************************************************************
-Here we export InnoDB status variables to MySQL.  */
+/************************************************************************//**
+Here we export InnoDB status variables to MySQL. */
 static
-int
-innodb_export_status()
-/*==================*/
+void
+innodb_export_status(void)
+/*======================*/
 {
 	if (innodb_inited) {
 		srv_export_innodb_status();
 	}
-
-	return 0;
 }
 
-/****************************************************************************
+/************************************************************************//**
 Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
 Monitor to the client. */
 static
 bool
 innodb_show_status(
 /*===============*/
-	handlerton*	hton,	/* in: the innodb handlerton */
-	THD*	thd,	/* in: the MySQL query thread of the caller */
+	handlerton*	hton,	/*!< in: the innodb handlerton */
+	THD*	thd,	/*!< in: the MySQL query thread of the caller */
 	stat_print_fn *stat_print)
 {
 	trx_t*			trx;
@@ -7103,6 +8057,7 @@ innodb_show_status(
 	ulint			trx_list_end = ULINT_UNDEFINED;
 
 	DBUG_ENTER("innodb_show_status");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	trx = check_trx_exists(thd);
 
@@ -7114,9 +8069,10 @@ innodb_show_status(
 	long	flen, usable_len;
 	char*	str;
 
-	mutex_enter_noninline(&srv_monitor_file_mutex);
+	mutex_enter(&srv_monitor_file_mutex);
 	rewind(srv_monitor_file);
-	srv_printf_innodb_monitor(srv_monitor_file);
+	srv_printf_innodb_monitor(srv_monitor_file,
+				&trx_list_start, &trx_list_end);
 	flen = ftell(srv_monitor_file);
 	os_file_set_eof(srv_monitor_file);
 
@@ -7134,7 +8090,7 @@ innodb_show_status(
 	read the contents of the temporary file */
 
 	if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
-	  mutex_exit_noninline(&srv_monitor_file_mutex);
+	  mutex_exit(&srv_monitor_file_mutex);
 	  DBUG_RETURN(TRUE);
 	}
 
@@ -7159,7 +8115,7 @@ innodb_show_status(
 		flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
 	}
 
-	mutex_exit_noninline(&srv_monitor_file_mutex);
+	mutex_exit(&srv_monitor_file_mutex);
 
 	bool result = FALSE;
 
@@ -7172,20 +8128,20 @@ innodb_show_status(
 	DBUG_RETURN(FALSE);
 }
 
-/****************************************************************************
+/************************************************************************//**
 Implements the SHOW MUTEX STATUS command. . */
 static
 bool
 innodb_mutex_show_status(
 /*=====================*/
-	handlerton*	hton,	/* in: the innodb handlerton */
-	THD*		thd,		/* in: the MySQL query thread of the
+	handlerton*	hton,	/*!< in: the innodb handlerton */
+	THD*		thd,		/*!< in: the MySQL query thread of the
 					caller */
 	stat_print_fn*	stat_print)
 {
 	char buf1[IO_SIZE], buf2[IO_SIZE];
-	mutex_t*  mutex;
-	rw_lock_t* lock;
+	mutex_t*	mutex;
+	rw_lock_t*	lock;
 #ifdef UNIV_DEBUG
 	ulint	  rw_lock_count= 0;
 	ulint	  rw_lock_count_spin_loop= 0;
@@ -7196,12 +8152,17 @@ innodb_mutex_show_status(
 #endif /* UNIV_DEBUG */
 	uint	  hton_name_len= (uint) strlen(innobase_hton_name), buf1len, buf2len;
 	DBUG_ENTER("innodb_mutex_show_status");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	mutex_enter_noninline(&mutex_list_mutex);
+	mutex_enter(&mutex_list_mutex);
 
 	mutex = UT_LIST_GET_FIRST(mutex_list);
 
 	while (mutex != NULL) {
+		if (mutex->count_os_wait == 0
+		    || buf_pool_is_block_mutex(mutex)) {
+			goto next_mutex;
+		}
 #ifdef UNIV_DEBUG
 		if (mutex->mutex_type != 1) {
 			if (mutex->count_using > 0) {
@@ -7223,8 +8184,7 @@ innodb_mutex_show_status(
 				if (stat_print(thd, innobase_hton_name,
 						hton_name_len, buf1, buf1len,
 						buf2, buf2len)) {
-					mutex_exit_noninline(
-						&mutex_list_mutex);
+					mutex_exit(&mutex_list_mutex);
 					DBUG_RETURN(1);
 				}
 			}
@@ -7246,24 +8206,24 @@ innodb_mutex_show_status(
 		if (stat_print(thd, innobase_hton_name,
 			       hton_name_len, buf1, buf1len,
 			       buf2, buf2len)) {
-			mutex_exit_noninline(&mutex_list_mutex);
+			mutex_exit(&mutex_list_mutex);
 			DBUG_RETURN(1);
 		}
 #endif /* UNIV_DEBUG */
 
+next_mutex:
 		mutex = UT_LIST_GET_NEXT(list, mutex);
 	}
 
-	mutex_exit_noninline(&mutex_list_mutex);
+	mutex_exit(&mutex_list_mutex);
 
-	mutex_enter_noninline(&rw_lock_list_mutex);
+	mutex_enter(&rw_lock_list_mutex);
 
 	lock = UT_LIST_GET_FIRST(rw_lock_list);
 
-	while (lock != NULL)
-	{
-		if (lock->count_os_wait)
-		{
+	while (lock != NULL) {
+		if (lock->count_os_wait
+		    && !buf_pool_is_block_lock(lock)) {
 			buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
                                     lock->cfile_name, (ulong) lock->cline);
 			buf2len= my_snprintf(buf2, sizeof(buf2),
@@ -7272,14 +8232,14 @@ innodb_mutex_show_status(
 			if (stat_print(thd, innobase_hton_name,
 				       hton_name_len, buf1, buf1len,
 				       buf2, buf2len)) {
-				mutex_exit_noninline(&rw_lock_list_mutex);
+				mutex_exit(&rw_lock_list_mutex);
 				DBUG_RETURN(1);
 			}
 		}
 		lock = UT_LIST_GET_NEXT(list, lock);
 	}
 
-	mutex_exit_noninline(&rw_lock_list_mutex);
+	mutex_exit(&rw_lock_list_mutex);
 
 #ifdef UNIV_DEBUG
 	buf2len= my_snprintf(buf2, sizeof(buf2),
@@ -7304,101 +8264,111 @@ bool innobase_show_status(handlerton *hton, THD* thd,
                           stat_print_fn* stat_print,
                           enum ha_stat_type stat_type)
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	switch (stat_type) {
 	case HA_ENGINE_STATUS:
 		return innodb_show_status(hton, thd, stat_print);
 	case HA_ENGINE_MUTEX:
 		return innodb_mutex_show_status(hton, thd, stat_print);
 	default:
-		return FALSE;
+		return(FALSE);
 	}
 }
-	rw_lock_t* lock;
 
-
-/****************************************************************************
+/************************************************************************//**
  Handling the shared INNOBASE_SHARE structure that is needed to provide table
  locking.
 ****************************************************************************/
 
-static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length,
-	my_bool not_used __attribute__((unused)))
-{
-	*length=share->table_name_length;
-
-	return (uchar*) share->table_name;
-}
-
 static INNOBASE_SHARE* get_share(const char* table_name)
 {
 	INNOBASE_SHARE *share;
 	pthread_mutex_lock(&innobase_share_mutex);
-	uint length=(uint) strlen(table_name);
 
-	if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
-				(uchar*) table_name,
-				length))) {
+	ulint	fold = ut_fold_string(table_name);
+
+	HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
+		    INNOBASE_SHARE*, share,
+		    ut_ad(share->use_count > 0),
+		    !strcmp(share->table_name, table_name));
+
+	if (!share) {
+
+		uint length = (uint) strlen(table_name);
+
+		/* TODO: invoke HASH_MIGRATE if innobase_open_tables
+		grows too big */
 
 		share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
 			MYF(MY_FAE | MY_ZEROFILL));
 
-		share->table_name_length=length;
-		share->table_name=(char*) (share+1);
-		strmov(share->table_name,table_name);
+		share->table_name = (char*) memcpy(share + 1,
+						   table_name, length + 1);
 
-		if (my_hash_insert(&innobase_open_tables,
-				(uchar*) share)) {
-			pthread_mutex_unlock(&innobase_share_mutex);
-			my_free(share,0);
-
-			return 0;
-		}
+		HASH_INSERT(INNOBASE_SHARE, table_name_hash,
+			    innobase_open_tables, fold, share);
 
 		thr_lock_init(&share->lock);
-		pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
 	}
 
 	share->use_count++;
 	pthread_mutex_unlock(&innobase_share_mutex);
 
-	return share;
+	return(share);
 }
 
 static void free_share(INNOBASE_SHARE* share)
 {
 	pthread_mutex_lock(&innobase_share_mutex);
 
+#ifdef UNIV_DEBUG
+	INNOBASE_SHARE* share2;
+	ulint	fold = ut_fold_string(share->table_name);
+
+	HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
+		    INNOBASE_SHARE*, share2,
+		    ut_ad(share->use_count > 0),
+		    !strcmp(share->table_name, share2->table_name));
+
+	ut_a(share2 == share);
+#endif /* UNIV_DEBUG */
+
 	if (!--share->use_count) {
-		hash_delete(&innobase_open_tables, (uchar*) share);
+		ulint	fold = ut_fold_string(share->table_name);
+
+		HASH_DELETE(INNOBASE_SHARE, table_name_hash,
+			    innobase_open_tables, fold, share);
 		thr_lock_delete(&share->lock);
-		pthread_mutex_destroy(&share->mutex);
 		my_free(share, MYF(0));
+
+		/* TODO: invoke HASH_MIGRATE if innobase_open_tables
+		shrinks too much */
 	}
 
 	pthread_mutex_unlock(&innobase_share_mutex);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Converts a MySQL table lock stored in the 'lock' field of the handle to
 a proper type before storing pointer to the lock into an array of pointers.
 MySQL also calls this if it wants to reset some table locks to a not-locked
 state during the processing of an SQL query. An example is that during a
 SELECT the read lock is released early on the 'const' tables where we only
 fetch one row. MySQL does not call this when it releases all locks at the
-end of an SQL statement. */
-
+end of an SQL statement.
+@return	pointer to the next element in the 'to' array */
+UNIV_INTERN
 THR_LOCK_DATA**
 ha_innobase::store_lock(
 /*====================*/
-						/* out: pointer to the next
-						element in the 'to' array */
-	THD*			thd,		/* in: user thread handle */
-	THR_LOCK_DATA**		to,		/* in: pointer to an array
+	THD*			thd,		/*!< in: user thread handle */
+	THR_LOCK_DATA**		to,		/*!< in: pointer to an array
 						of pointers to lock structs;
 						pointer to the 'lock' field
 						of current handle is stored
 						next to this array */
-	enum thr_lock_type	lock_type)	/* in: lock type to store in
+	enum thr_lock_type	lock_type)	/*!< in: lock type to store in
 						'lock'; this may also be
 						TL_IGNORE */
 {
@@ -7432,7 +8402,7 @@ ha_innobase::store_lock(
 		}
 	}
 
-	DBUG_ASSERT(thd == current_thd);
+	DBUG_ASSERT(EQ_CURRENT_THD(thd));
 	const bool in_lock_tables = thd_in_lock_tables(thd);
 	const uint sql_command = thd_sql_command(thd);
 
@@ -7581,16 +8551,16 @@ ha_innobase::store_lock(
 	return(to);
 }
 
-/*******************************************************************************
+/*********************************************************************//**
 Read the next autoinc value. Acquire the relevant locks before reading
 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
-on return and all relevant locks acquired. */
-
-ulong
+on return and all relevant locks acquired.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
 ha_innobase::innobase_get_autoinc(
 /*==============================*/
-					/* out: DB_SUCCESS or error code */
-	ulonglong*	value)		/* out: autoinc value */
+	ulonglong*	value)		/*!< out: autoinc value */
 {
  	*value = 0;
  
@@ -7604,18 +8574,18 @@ ha_innobase::innobase_get_autoinc(
 		/* It should have been initialized during open. */
 		ut_a(*value != 0);
 	}
-  
-	return(ulong(prebuilt->autoinc_error));
+
+	return(prebuilt->autoinc_error);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 This function reads the global auto-inc counter. It doesn't use the 
-AUTOINC lock even if the lock mode is set to TRADITIONAL. */
-
+AUTOINC lock even if the lock mode is set to TRADITIONAL.
+@return	the autoinc value */
+UNIV_INTERN
 ulonglong
-ha_innobase::innobase_peek_autoinc()
-/*================================*/
-					/* out: the autoinc value */
+ha_innobase::innobase_peek_autoinc(void)
+/*====================================*/
 {
 	ulonglong	auto_inc;
 	dict_table_t*	innodb_table;
@@ -7632,26 +8602,26 @@ ha_innobase::innobase_peek_autoinc()
 	ut_a(auto_inc > 0);
 
 	dict_table_autoinc_unlock(innodb_table);
- 
+
 	return(auto_inc);
 }
 
-/*******************************************************************************
+/*********************************************************************//**
 This function initializes the auto-inc counter if it has not been
 initialized yet. This function does not change the value of the auto-inc
 counter if it already has been initialized. Returns the value of the
 auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
 we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout)            */
-
+*first_value is set to -1 if error (deadlock or lock wait timeout) */
+UNIV_INTERN
 void
 ha_innobase::get_auto_increment(
 /*============================*/
-        ulonglong	offset,              /* in: */
-        ulonglong	increment,           /* in: table autoinc increment */
-        ulonglong	nb_desired_values,   /* in: number of values reqd */
-        ulonglong	*first_value,        /* out: the autoinc value */
-        ulonglong	*nb_reserved_values) /* out: count of reserved values */
+        ulonglong	offset,              /*!< in: table autoinc offset */
+        ulonglong	increment,           /*!< in: table autoinc increment */
+        ulonglong	nb_desired_values,   /*!< in: number of values reqd */
+        ulonglong	*first_value,        /*!< out: the autoinc value */
+        ulonglong	*nb_reserved_values) /*!< out: count of reserved values */
 {
 	trx_t*		trx;
 	ulint		error;
@@ -7745,11 +8715,17 @@ ha_innobase::get_auto_increment(
 	dict_table_autoinc_unlock(prebuilt->table);
 }
 
-/* See comment in handler.h */
+/*******************************************************************//**
+Reset the auto-increment counter to the given value, i.e. the next row
+inserted will get the given value. This is called e.g. after TRUNCATE
+is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
+returned by storage engines that don't support this operation.
+@return	0 or error code */
+UNIV_INTERN
 int
 ha_innobase::reset_auto_increment(
 /*==============================*/
-	ulonglong	value)		/* in: new value for table autoinc */
+	ulonglong	value)		/*!< in: new value for table autoinc */
 {
 	DBUG_ENTER("ha_innobase::reset_auto_increment");
 
@@ -7760,7 +8736,9 @@ ha_innobase::reset_auto_increment(
 	error = row_lock_table_autoinc_for_mysql(prebuilt);
 
 	if (error != DB_SUCCESS) {
-		error = convert_error_code_to_mysql(error, user_thd);
+		error = convert_error_code_to_mysql(error,
+						    prebuilt->table->flags,
+						    user_thd);
 
 		DBUG_RETURN(error);
 	}
@@ -7776,6 +8754,7 @@ ha_innobase::reset_auto_increment(
 }
 
 /* See comment in handler.cc */
+UNIV_INTERN
 bool
 ha_innobase::get_error_message(int error, String *buf)
 {
@@ -7784,22 +8763,21 @@ ha_innobase::get_error_message(int error, String *buf)
 	buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
 		system_charset_info);
 
-	return FALSE;
+	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
 If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key. */
-
+InnoDB internally uses the row id as the primary key.
+@return	< 0 if ref1 < ref2, 0 if equal, else > 0 */
+UNIV_INTERN
 int
 ha_innobase::cmp_ref(
 /*=================*/
-				/* out: < 0 if ref1 < ref2, 0 if equal, else
-				> 0 */
-	const uchar*	ref1,	/* in: an (internal) primary key value in the
+	const uchar*	ref1,	/*!< in: an (internal) primary key value in the
 				MySQL key value format */
-	const uchar*	ref2)	/* in: an (internal) primary key value in the
+	const uchar*	ref2)	/*!< in: an (internal) primary key value in the
 				MySQL key value format */
 {
 	enum_field_types mysql_type;
@@ -7859,25 +8837,24 @@ ha_innobase::cmp_ref(
 	return(0);
 }
 
-/***********************************************************************
-Ask InnoDB if a query to a table can be cached. */
-
+/*******************************************************************//**
+Ask InnoDB if a query to a table can be cached.
+@return	TRUE if query caching of the table is permitted */
+UNIV_INTERN
 my_bool
 ha_innobase::register_query_cache_table(
 /*====================================*/
-					/* out: TRUE if query caching
-					of the table is permitted */
-	THD*		thd,		/* in: user thread handle */
-	char*		table_key,	/* in: concatenation of database name,
-					the null character '\0',
+	THD*		thd,		/*!< in: user thread handle */
+	char*		table_key,	/*!< in: concatenation of database name,
+					the null character NUL,
 					and the table name */
-	uint		key_length,	/* in: length of the full name, i.e.
+	uint		key_length,	/*!< in: length of the full name, i.e.
 					len(dbname) + len(tablename) + 1 */
 	qc_engine_callback*
-			call_back,	/* out: pointer to function for
+			call_back,	/*!< out: pointer to function for
 					checking if query caching
 					is permitted */
-	ulonglong	*engine_data)	/* in/out: data to call_back */
+	ulonglong	*engine_data)	/*!< in/out: data to call_back */
 {
 	*call_back = innobase_query_caching_of_table_permitted;
 	*engine_data = 0;
@@ -7886,45 +8863,43 @@ ha_innobase::register_query_cache_table(
 							 engine_data));
 }
 
+UNIV_INTERN
 char*
 ha_innobase::get_mysql_bin_log_name()
 {
 	return(trx_sys_mysql_bin_log_name);
 }
 
+UNIV_INTERN
 ulonglong
 ha_innobase::get_mysql_bin_log_pos()
 {
-	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
+	/* trx... is ib_int64_t, which is a typedef for a 64-bit integer
 	(__int64 or longlong) so it's ok to cast it to ulonglong. */
 
 	return(trx_sys_mysql_bin_log_pos);
 }
 
-/**********************************************************************
+/******************************************************************//**
 This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
 finds charset information and returns length of prefix_len characters in the
 index field in bytes.
-
-NOTE: the prototype of this function is copied to data0type.c! If you change
-this function, you MUST change also data0type.c! */
-extern "C"
+@return	number of bytes occupied by the first n characters */
+extern "C" UNIV_INTERN
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
-				/* out: number of bytes occupied by the first
-				n characters */
-	ulint charset_id,	/* in: character set id */
-	ulint prefix_len,	/* in: prefix length in bytes of the index
+	ulint charset_id,	/*!< in: character set id */
+	ulint prefix_len,	/*!< in: prefix length in bytes of the index
 				(this has to be divided by mbmaxlen to get the
 				number of CHARACTERS n in the prefix) */
-	ulint data_len,		/* in: length of the string in bytes */
-	const char* str)	/* in: character string */
+	ulint data_len,		/*!< in: length of the string in bytes */
+	const char* str)	/*!< in: character string */
 {
-	ulint char_length;	/* character length in bytes */
-	ulint n_chars;		/* number of characters in prefix */
-	CHARSET_INFO* charset;	/* charset used in the field */
+	ulint char_length;	/*!< character length in bytes */
+	ulint n_chars;		/*!< number of characters in prefix */
+	CHARSET_INFO* charset;	/*!< charset used in the field */
 
 	charset = get_charset((uint) charset_id, MYF(MY_WME));
 
@@ -7975,50 +8950,30 @@ innobase_get_at_most_n_mbchars(
 	return(char_length);
 }
 
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction   */
+/*******************************************************************//**
+This function is used to prepare an X/Open XA distributed transaction.
+@return	0 or error number */
 static
 int
 innobase_xa_prepare(
 /*================*/
-			/* out: 0 or error number */
-        handlerton *hton,
-	THD*	thd,	/* in: handle to the MySQL thread of the user
-			whose XA transaction should be prepared */
-	bool	all)	/* in: TRUE - commit transaction
-			FALSE - the current SQL statement ended */
+        handlerton*	hton,	/*!< in: InnoDB handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread of
+				the user whose XA transaction should
+				be prepared */
+	bool		all)	/*!< in: TRUE - commit transaction
+				FALSE - the current SQL statement
+				ended */
 {
 	int error = 0;
 	trx_t* trx = check_trx_exists(thd);
 
-	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
-	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
-	{
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-		/* For ibbackup to work the order of transactions in binlog
-		and InnoDB must be the same. Consider the situation
-
-		  thread1> prepare; write to binlog; ...
-			  <context switch>
-		  thread2> prepare; write to binlog; commit
-		  thread1>			     ... commit
-
-		To ensure this will not happen we're taking the mutex on
-		prepare, and releasing it on commit.
-
-		Note: only do it for normal commits, done via ha_commit_trans.
-		If 2pc protocol is executed by external transaction
-		coordinator, it will be just a regular MySQL client
-		executing XA PREPARE and XA COMMIT commands.
-		In this case we cannot know how many minutes or hours
-		will be between XA PREPARE and XA COMMIT, and we don't want
-		to block for undefined period of time.
-		*/
-		pthread_mutex_lock(&prepare_commit_mutex);
-		trx->active_trans = 2;
-	}
-
-	if (!THDVAR(thd, support_xa)) {
+	/* we use support_xa value as it was seen at transaction start
+	time, not the current session variable value. Any possible changes
+	to the session variable take effect only in the next transaction */
+	if (!trx->support_xa) {
 
 		return(0);
 	}
@@ -8067,21 +9022,49 @@ innobase_xa_prepare(
 
 	srv_active_wake_master_thread();
 
-	return error;
+	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
+	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
+	{
+
+		/* For ibbackup to work the order of transactions in binlog
+		and InnoDB must be the same. Consider the situation
+
+		  thread1> prepare; write to binlog; ...
+			  <context switch>
+		  thread2> prepare; write to binlog; commit
+		  thread1>			     ... commit
+
+		To ensure this will not happen we're taking the mutex on
+		prepare, and releasing it on commit.
+
+		Note: only do it for normal commits, done via ha_commit_trans.
+		If 2pc protocol is executed by external transaction
+		coordinator, it will be just a regular MySQL client
+		executing XA PREPARE and XA COMMIT commands.
+		In this case we cannot know how many minutes or hours
+		will be between XA PREPARE and XA COMMIT, and we don't want
+		to block for undefined period of time.
+		*/
+		pthread_mutex_lock(&prepare_commit_mutex);
+		trx->active_trans = 2;
+	}
+
+	return(error);
 }
 
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions   */
+/*******************************************************************//**
+This function is used to recover X/Open XA distributed transactions.
+@return	number of prepared transactions stored in xid_list */
 static
 int
 innobase_xa_recover(
 /*================*/
-				/* out: number of prepared transactions
-				stored in xid_list */
-        handlerton *hton,
-	XID*	xid_list,	/* in/out: prepared transactions */
-	uint	len)		/* in: number of slots in xid_list */
+	handlerton*	hton,	/*!< in: InnoDB handlerton */
+	XID*		xid_list,/*!< in/out: prepared transactions */
+	uint		len)	/*!< in: number of slots in xid_list */
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	if (len == 0 || xid_list == NULL) {
 
 		return(0);
@@ -8090,19 +9073,21 @@ innobase_xa_recover(
 	return(trx_recover_for_mysql(xid_list, len));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return	0 or error number */
 static
 int
 innobase_commit_by_xid(
 /*===================*/
-			/* out: 0 or error number */
         handlerton *hton,
-	XID*	xid)	/* in: X/Open XA transaction identification */
+	XID*	xid)	/*!< in: X/Open XA transaction identification */
 {
 	trx_t*	trx;
 
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	trx = trx_get_trx_by_xid(xid);
 
 	if (trx) {
@@ -8114,19 +9099,22 @@ innobase_commit_by_xid(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return	0 or error number */
 static
 int
 innobase_rollback_by_xid(
 /*=====================*/
-			/* out: 0 or error number */
-        handlerton *hton,
-	XID	*xid)	/* in: X/Open XA transaction identification */
+	handlerton*	hton,	/*!< in: InnoDB handlerton */
+	XID*		xid)	/*!< in: X/Open XA transaction
+				identification */
 {
 	trx_t*	trx;
 
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	trx = trx_get_trx_by_xid(xid);
 
 	if (trx) {
@@ -8136,23 +9124,25 @@ innobase_rollback_by_xid(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Create a consistent view for a cursor based on current transaction
 which is created if the corresponding MySQL thread still lacks one.
 This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
+using a cursor.
+@return	pointer to cursor view or NULL */
 static
 void*
 innobase_create_cursor_view(
 /*========================*/
-                          /* out: pointer to cursor view or NULL */
-        handlerton *hton, /* in: innobase hton */
-	THD* thd)	  /* in: user thread handle */
+        handlerton *hton, /*!< in: innobase hton */
+	THD* thd)	  /*!< in: user thread handle */
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Close the given consistent cursor view of a transaction and restore
 global read view to a transaction read view. Transaction is created if the
 corresponding MySQL thread still lacks one. */
@@ -8161,14 +9151,16 @@ void
 innobase_close_cursor_view(
 /*=======================*/
         handlerton *hton,
-	THD*	thd,	/* in: user thread handle */
-	void*	curview)/* in: Consistent read view to be closed */
+	THD*	thd,	/*!< in: user thread handle */
+	void*	curview)/*!< in: Consistent read view to be closed */
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	read_cursor_view_close_for_mysql(check_trx_exists(thd),
 					 (cursor_view_t*) curview);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Set the given consistent cursor view to a transaction which is created
 if the corresponding MySQL thread still lacks one. If the given
 consistent cursor view is NULL global read view of a transaction is
@@ -8178,38 +9170,511 @@ void
 innobase_set_cursor_view(
 /*=====================*/
         handlerton *hton,
-	THD*	thd,	/* in: user thread handle */
-	void*	curview)/* in: Consistent cursor view to be set */
+	THD*	thd,	/*!< in: user thread handle */
+	void*	curview)/*!< in: Consistent cursor view to be set */
 {
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	read_cursor_set_for_mysql(check_trx_exists(thd),
 				  (cursor_view_t*) curview);
 }
 
 
-bool ha_innobase::check_if_incompatible_data(
+/***********************************************************************
+Check whether any of the given columns is being renamed in the table. */
+static
+bool
+column_is_being_renamed(
+/*====================*/
+					/* out: true if any of col_names is
+					being renamed in table */
+	TABLE*		table,		/* in: MySQL table */
+	uint		n_cols,		/* in: number of columns */
+	const char**	col_names)	/* in: names of the columns */
+{
+	uint		j;
+	uint		k;
+	Field*		field;
+	const char*	col_name;
+
+	for (j = 0; j < n_cols; j++) {
+		col_name = col_names[j];
+		for (k = 0; k < table->s->fields; k++) {
+			field = table->field[k];
+			if ((field->flags & FIELD_IS_RENAMED)
+			    && innobase_strcasecmp(field->field_name,
+						   col_name) == 0) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/***********************************************************************
+Check whether a column in table "table" is being renamed and if this column
+is part of a foreign key, either part of another table, referencing this
+table or part of this table, referencing another table. */
+static
+bool
+foreign_key_column_is_being_renamed(
+/*================================*/
+					/* out: true if a column that
+					participates in a foreign key definition
+					is being renamed */
+	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
+	TABLE*		table)		/* in: MySQL table */
+{
+	dict_foreign_t*	foreign;
+
+	/* check whether there are foreign keys at all */
+	if (UT_LIST_GET_LEN(prebuilt->table->foreign_list) == 0
+	    && UT_LIST_GET_LEN(prebuilt->table->referenced_list) == 0) {
+		/* no foreign keys involved with prebuilt->table */
+
+		return(false);
+	}
+
+	row_mysql_lock_data_dictionary(prebuilt->trx);
+
+	/* Check whether any column in the foreign key constraints which refer
+	to this table is being renamed. */
+	for (foreign = UT_LIST_GET_FIRST(prebuilt->table->referenced_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+
+		if (column_is_being_renamed(table, foreign->n_fields,
+					    foreign->referenced_col_names)) {
+
+			row_mysql_unlock_data_dictionary(prebuilt->trx);
+			return(true);
+		}
+	}
+
+	/* Check whether any column in the foreign key constraints in the
+	table is being renamed. */
+	for (foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+
+		if (column_is_being_renamed(table, foreign->n_fields,
+					    foreign->foreign_col_names)) {
+
+			row_mysql_unlock_data_dictionary(prebuilt->trx);
+			return(true);
+		}
+	}
+
+	row_mysql_unlock_data_dictionary(prebuilt->trx);
+
+	return(false);
+}
+
+UNIV_INTERN
+bool
+ha_innobase::check_if_incompatible_data(
 	HA_CREATE_INFO*	info,
 	uint		table_changes)
 {
 	if (table_changes != IS_EQUAL_YES) {
 
-		return COMPATIBLE_DATA_NO;
+		return(COMPATIBLE_DATA_NO);
 	}
 
 	/* Check that auto_increment value was not changed */
 	if ((info->used_fields & HA_CREATE_USED_AUTO) &&
 		info->auto_increment_value != 0) {
 
+		return(COMPATIBLE_DATA_NO);
+	}
+
+	/* Check if a column participating in a foreign key is being renamed.
+	There is no mechanism for updating InnoDB foreign key definitions. */
+	if (foreign_key_column_is_being_renamed(prebuilt, table)) {
+
 		return COMPATIBLE_DATA_NO;
 	}
 
 	/* Check that row format didn't change */
-	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
-		get_row_type() != info->row_type) {
+	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
+	    && info->row_type != ROW_TYPE_DEFAULT
+	    && info->row_type != get_row_type()) {
 
-		return COMPATIBLE_DATA_NO;
+		return(COMPATIBLE_DATA_NO);
 	}
 
-	return COMPATIBLE_DATA_YES;
+	/* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
+	if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
+		return(COMPATIBLE_DATA_NO);
+	}
+
+	return(COMPATIBLE_DATA_YES);
+}
+
+/************************************************************//**
+Validate the file format name and return its corresponding id.
+@return	valid file format id */
+static
+uint
+innobase_file_format_name_lookup(
+/*=============================*/
+	const char*	format_name)	/*!< in: pointer to file format name */
+{
+	char*	endp;
+	uint	format_id;
+
+	ut_a(format_name != NULL);
+
+	/* The format name can contain the format id itself instead of
+	the name and we check for that. */
+	format_id = (uint) strtoul(format_name, &endp, 10);
+
+	/* Check for valid parse. */
+	if (*endp == '\0' && *format_name != '\0') {
+
+		if (format_id <= DICT_TF_FORMAT_MAX) {
+
+			return(format_id);
+		}
+	} else {
+
+		for (format_id = 0; format_id <= DICT_TF_FORMAT_MAX;
+		     format_id++) {
+			const char*	name;
+
+			name = trx_sys_file_format_id_to_name(format_id);
+
+			if (!innobase_strcasecmp(format_name, name)) {
+
+				return(format_id);
+			}
+		}
+	}
+
+	return(DICT_TF_FORMAT_MAX + 1);
+}
+
+/************************************************************//**
+Validate the file format check value, is it one of "on" or "off",
+as a side effect it sets the srv_check_file_format_at_startup variable.
+@return	true if config value one of "on" or  "off" */
+static
+bool
+innobase_file_format_check_on_off(
+/*==============================*/
+	const char*	format_check)	/*!< in: parameter value */
+{
+	bool		ret = true;
+
+	if (!innobase_strcasecmp(format_check, "off")) {
+
+		/* Set the value to disable checking. */
+		srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX + 1;
+
+	} else if (!innobase_strcasecmp(format_check, "on")) {
+
+		/* Set the value to the lowest supported format. */
+		srv_check_file_format_at_startup = DICT_TF_FORMAT_51;
+	} else {
+		ret = FALSE;
+	}
+
+	return(ret);
+}
+
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return	true if valid config value */
+static
+bool
+innobase_file_format_check_validate(
+/*================================*/
+	const char*	format_check)	/*!< in: parameter value */
+{
+	uint		format_id;
+	bool		ret = true;
+
+	format_id = innobase_file_format_name_lookup(format_check);
+
+	if (format_id < DICT_TF_FORMAT_MAX + 1) {
+		srv_check_file_format_at_startup = format_id;
+	} else {
+		ret = false;
+	}
+
+	return(ret);
+}
+
+/*************************************************************//**
+Check if it is a valid file format. This function is registered as
+a callback with MySQL.
+@return	0 for valid file format */
+static
+int
+innodb_file_format_name_validate(
+/*=============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
+						variable */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+	const char*	file_format_input;
+	char		buff[STRING_BUFFER_USUAL_SIZE];
+	int		len = sizeof(buff);
+
+	ut_a(save != NULL);
+	ut_a(value != NULL);
+
+	file_format_input = value->val_str(value, buff, &len);
+
+	if (file_format_input != NULL) {
+		uint	format_id;
+
+		format_id = innobase_file_format_name_lookup(
+			file_format_input);
+
+		if (format_id <= DICT_TF_FORMAT_MAX) {
+
+			*static_cast<const char**>(save) = file_format_input;
+			return(0);
+		}
+	}
+
+	*static_cast<const char**>(save) = NULL;
+	return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_file_format using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_file_format_name_update(
+/*===========================*/
+	THD*				thd,		/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,		/*!< in: pointer to
+							system variable */
+	void*				var_ptr,	/*!< out: where the
+							formal string goes */
+	const void*			save)		/*!< in: immediate result
+							from check function */
+{
+	const char* format_name;
+
+	ut_a(var_ptr != NULL);
+	ut_a(save != NULL);
+
+	format_name = *static_cast<const char*const*>(save);
+
+	if (format_name) {
+		uint	format_id;
+
+		format_id = innobase_file_format_name_lookup(format_name);
+
+		if (format_id <= DICT_TF_FORMAT_MAX) {
+			srv_file_format = format_id;
+		}
+	}
+
+	*static_cast<const char**>(var_ptr)
+		= trx_sys_file_format_id_to_name(srv_file_format);
+}
+
+/*************************************************************//**
+Check if valid argument to innodb_file_format_check. This
+function is registered as a callback with MySQL.
+@return	0 for valid file format */
+static
+int
+innodb_file_format_check_validate(
+/*==============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
+						variable */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+	const char*	file_format_input;
+	char		buff[STRING_BUFFER_USUAL_SIZE];
+	int		len = sizeof(buff);
+
+	ut_a(save != NULL);
+	ut_a(value != NULL);
+
+	file_format_input = value->val_str(value, buff, &len);
+
+	if (file_format_input != NULL) {
+
+		/* Check if user set on/off, we want to print a suitable
+		message if they did so. */
+
+		if (innobase_file_format_check_on_off(file_format_input)) {
+			sql_print_warning(
+				"InnoDB: invalid innodb_file_format_check "
+				"value; on/off can only be set at startup or "
+				"in the configuration file");
+		} else if (innobase_file_format_check_validate(
+				file_format_input)) {
+
+			*static_cast<const char**>(save) = file_format_input;
+
+			return(0);
+
+		} else {
+			sql_print_warning(
+				"InnoDB: invalid innodb_file_format_check "
+				"value; can be any format up to %s "
+				"or its equivalent numeric id",
+				trx_sys_file_format_id_to_name(
+					DICT_TF_FORMAT_MAX));
+		}
+	}
+
+	*static_cast<const char**>(save) = NULL;
+	return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_file_format_check using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_file_format_check_update(
+/*============================*/
+	THD*				thd,		/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,		/*!< in: pointer to
+							system variable */
+	void*				var_ptr,	/*!< out: where the
+							formal string goes */
+	const void*			save)		/*!< in: immediate result
+							from check function */
+{
+	const char*	format_name_in;
+	const char**	format_name_out;
+	uint		format_id;
+
+	ut_a(save != NULL);
+	ut_a(var_ptr != NULL);
+
+	format_name_in = *static_cast<const char*const*>(save);
+
+	if (!format_name_in) {
+
+		return;
+	}
+
+	format_id = innobase_file_format_name_lookup(format_name_in);
+
+	if (format_id > DICT_TF_FORMAT_MAX) {
+		/* DEFAULT is "on", which is invalid at runtime. */
+		push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Ignoring SET innodb_file_format=%s",
+				    format_name_in);
+		return;
+	}
+
+	format_name_out = static_cast<const char**>(var_ptr);
+
+	/* Update the max format id in the system tablespace. */
+	if (trx_sys_file_format_max_set(format_id, format_name_out)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" [Info] InnoDB: the file format in the system "
+			"tablespace is now set to %s.\n", *format_name_out);
+	}
+}
+
+/****************************************************************//**
+Update the system variable innodb_adaptive_hash_index using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_adaptive_hash_index_update(
+/*==============================*/
+	THD*				thd,		/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,		/*!< in: pointer to
+							system variable */
+	void*				var_ptr,	/*!< out: where the
+							formal string goes */
+	const void*			save)		/*!< in: immediate result
+							from check function */
+{
+	if (*(my_bool*) save) {
+		btr_search_enable();
+	} else {
+		btr_search_disable();
+	}
+}
+
+/*************************************************************//**
+Check if it is a valid value of innodb_change_buffering.  This function is
+registered as a callback with MySQL.
+@return	0 for valid innodb_change_buffering */
+static
+int
+innodb_change_buffering_validate(
+/*=============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
+						variable */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+	const char*	change_buffering_input;
+	char		buff[STRING_BUFFER_USUAL_SIZE];
+	int		len = sizeof(buff);
+
+	ut_a(save != NULL);
+	ut_a(value != NULL);
+
+	change_buffering_input = value->val_str(value, buff, &len);
+
+	if (change_buffering_input != NULL) {
+		ulint	use;
+
+		for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
+		     use++) {
+			if (!innobase_strcasecmp(
+				    change_buffering_input,
+				    innobase_change_buffering_values[use])) {
+				*(ibuf_use_t*) save = (ibuf_use_t) use;
+				return(0);
+			}
+		}
+	}
+
+	return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_change_buffering using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_change_buffering_update(
+/*===========================*/
+	THD*				thd,		/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,		/*!< in: pointer to
+							system variable */
+	void*				var_ptr,	/*!< out: where the
+							formal string goes */
+	const void*			save)		/*!< in: immediate result
+							from check function */
+{
+	ut_a(var_ptr != NULL);
+	ut_a(save != NULL);
+	ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT);
+
+	ibuf_use = *(const ibuf_use_t*) save;
+
+	*(const char**) var_ptr = innobase_change_buffering_values[ibuf_use];
 }
 
 static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
@@ -8246,15 +9711,10 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
   "Disable with --skip-innodb-doublewrite.",
   NULL, NULL, TRUE);
 
-static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Flush dirty buffer pages when dirty max pct is not exceeded",
-  NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
+  PLUGIN_VAR_RQCMDARG,
   "Number of IOPs the server can do. Tunes the background IO rate",
-  NULL, NULL, (long)200, (long)100, LONG_MAX, (long)0);
+  NULL, NULL, 200, 100, ~0L, 0);
 
 static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
   PLUGIN_VAR_OPCMDARG,
@@ -8266,21 +9726,32 @@ static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
   */
   IF_NETWARE("", " or 2 (fastest - crash-like)")
   ".",
-  NULL, NULL, (unsigned long)1, (unsigned long)0,
-  (unsigned long)IF_NETWARE(1,2), (unsigned long)0);
+  NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);
 
-static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
+  PLUGIN_VAR_NOCMDARG,
   "Stores each InnoDB table to an .ibd file in the database dir.",
   NULL, NULL, FALSE);
 
+static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
+  PLUGIN_VAR_RQCMDARG,
+  "File format to use for new tables in .ibd files.",
+  innodb_file_format_name_validate,
+  innodb_file_format_name_update, "Antelope");
+
+static MYSQL_SYSVAR_STR(file_format_check, innobase_file_format_check,
+  PLUGIN_VAR_OPCMDARG,
+  "The highest file format in the tablespace.",
+  innodb_file_format_check_validate,
+  innodb_file_format_check_update,
+  "on");
+
 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
   PLUGIN_VAR_OPCMDARG,
   "Set to 0 (write and flush once per second),"
   " 1 (write and flush at each commit)"
   " or 2 (write at commit, flush once per second).",
-  NULL, NULL, (unsigned long)1, (unsigned long)0, (unsigned long)2,
-  (unsigned long)0);
+  NULL, NULL, 1, 0, 2, 0);
 
 static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -8308,14 +9779,17 @@ static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
 static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of dirty pages allowed in bufferpool.",
-  NULL, NULL, (unsigned long)75, (unsigned long)0, (unsigned long)99,
-  (unsigned long)0);
+  NULL, NULL, 75, 0, 99, 0);
+
+static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
+  PLUGIN_VAR_NOCMDARG,
+  "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
   PLUGIN_VAR_RQCMDARG,
   "Desired maximum length of the purge queue (0 = no limit)",
-  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)~0L,
-  (unsigned long)0);
+  NULL, NULL, 0, 0, ~0L, 0);
 
 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
@@ -8332,123 +9806,112 @@ static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
   "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
   NULL, NULL, TRUE);
 
-static MYSQL_SYSVAR_BOOL(use_legacy_cardinality_algorithm,
-  srv_use_legacy_cardinality_algorithm,
-  PLUGIN_VAR_OPCMDARG,
-  "Use legacy algorithm for picking random pages during index cardinality "
-  "estimation. Disable this to use a better algorithm, but note that your "
-  "query plans may change (enabled by default).",
-  NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_sample_pages,
+  PLUGIN_VAR_RQCMDARG,
+  "The number of index pages to sample when calculating statistics (default 8)",
+  NULL, NULL, 8, 1, ~0ULL, 0);
 
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
+  PLUGIN_VAR_OPCMDARG,
   "Enable InnoDB adaptive hash index (enabled by default).  "
   "Disable with --skip-innodb-adaptive-hash-index.",
-  NULL, NULL, TRUE);
+  NULL, innodb_adaptive_hash_index_update, TRUE);
+
+static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
+  PLUGIN_VAR_RQCMDARG,
+  "Replication thread delay (ms) on the slave server if "
+  "innodb_thread_concurrency is reached (0 by default)",
+  NULL, NULL, 0, 0, ~0UL, 0);
 
 static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
-  NULL, NULL, (long)8*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
+  NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
 
 static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
   PLUGIN_VAR_RQCMDARG,
   "Data file autoextend increment in megabytes",
-  NULL, NULL, (unsigned long)64L, (unsigned long)1L, (unsigned long)1000L,
-  (unsigned long)0);
+  NULL, NULL, 8L, 1L, 1000L, 0);
 
 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
-  NULL, NULL, (long long)1024*1024*1024L, (long long)64*1024*1024L,
-  LONGLONG_MAX, (long long)1024*1024L);
+  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
 
-static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency,
+static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
   PLUGIN_VAR_RQCMDARG,
   "Helps in performance tuning in heavily concurrent environments.",
-  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
-  (unsigned long)0);
+  innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
 
 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
   PLUGIN_VAR_RQCMDARG,
   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
-  NULL, NULL, (unsigned long)500L, (unsigned long)1L, (unsigned long)~0L,
-  (unsigned long)0);
+  NULL, NULL, 500L, 1L, ~0L, 0);
 
-static MYSQL_SYSVAR_LONG(write_io_threads, innobase_write_io_threads,
+static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Number of write I/O threads in InnoDB.",
-  NULL, NULL, (long)8, (long)1, (long)64, (long)0);
+  "Number of file I/O threads in InnoDB.",
+  NULL, NULL, 4, 4, 64, 0);
 
-static MYSQL_SYSVAR_LONG(read_io_threads, innobase_read_io_threads,
+static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Number of read I/O threads in InnoDB.",
-  NULL, NULL, (long)8, (long)1, (long)64, (long)0);
+  "Number of background read I/O threads in InnoDB.",
+  NULL, NULL, 4, 1, 64, 0);
 
-static MYSQL_SYSVAR_LONG(max_merged_io, innobase_max_merged_io,
+static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Max number of adjacent IO requests to merge in InnoDB.",
-  NULL, NULL, (long)64, (long)1, (long)64, (long)0);
+  "Number of background write I/O threads in InnoDB.",
+  NULL, NULL, 4, 1, 64, 0);
 
 static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Helps to save your data in case the disk image of the database becomes corrupt.",
-  NULL, NULL, (long)0, (long)0, (long)6, (long)0);
-
-static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
-  NULL, NULL, (long)50, (long)1, (long)(1024*1024*1024), (long)0);
+  NULL, NULL, 0, 0, 6, 0);
 
 static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the buffer which InnoDB uses to write log to the log files on disk.",
-  NULL, NULL, (long)16*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
+  NULL, NULL, 8*1024*1024L, 256*1024L, LONG_MAX, 1024);
 
 static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of each log file in a log group.",
-  NULL, NULL, (long long)128*1024*1024L, (long long)32*1024*1024L,
-  LONGLONG_MAX, (long long)1024*1024L);
+  NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
 
 static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
-  NULL, NULL, (long)3, (long)2, (long)100, (long)0);
+  NULL, NULL, 2, 2, 100, 0);
 
 static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
-  NULL, NULL, (long)1, (long)1, (long)10, (long)0);
+  NULL, NULL, 1, 1, 10, 0);
 
 static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "How many files at the maximum InnoDB keeps open at the same time.",
-  NULL, NULL, (long)300L, (long)10L, LONG_MAX, (long)0L);
+  NULL, NULL, 300L, 10L, LONG_MAX, 0);
 
 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
   PLUGIN_VAR_RQCMDARG,
-  "Count of spin-loop rounds in InnoDB mutexes",
-  NULL, NULL, (unsigned long)20L, (unsigned long)0L, (unsigned long)~0L,
-  (unsigned long)0L);
+  "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
+  NULL, NULL, 30L, 0L, ~0L, 0);
 
-static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
-                         innobase_thread_concurrency_timer_based,
-                         PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Use InnoDB timer based concurrency throttling. ",
-  NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay,
+  PLUGIN_VAR_OPCMDARG,
+  "Maximum delay between polling for a spin lock (6 by default)",
+  NULL, NULL, 6L, 0L, ~0L, 0);
 
 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
   PLUGIN_VAR_RQCMDARG,
   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
-  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
-  (unsigned long)0);
+  NULL, NULL, 0, 0, 1000, 0);
 
 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
   PLUGIN_VAR_RQCMDARG,
   "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
-  NULL, NULL, (unsigned long)10000L, (unsigned long)0L, (unsigned long)~0L,
-  (unsigned long)0);
+  NULL, NULL, 10000L, 0L, ~0L, 0);
 
 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -8465,7 +9928,29 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
   NULL, NULL,
   AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
   AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
-  AUTOINC_NO_LOCKING, (long)0);	/* Maximum value */
+  AUTOINC_NO_LOCKING, 0);	/* Maximum value */
+
+static MYSQL_SYSVAR_STR(version, innodb_version_str,
+  PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+  "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
+
+static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Use OS memory allocator instead of InnoDB's internal memory allocator",
+  NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
+  PLUGIN_VAR_RQCMDARG,
+  "Buffer changes to reduce random access: "
+  "OFF, ON, inserting, deleting, changing, or purging.",
+  innodb_change_buffering_validate,
+  innodb_change_buffering_update, NULL);
+
+static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
+  PLUGIN_VAR_RQCMDARG,
+  "Number of pages that must be accessed sequentially for InnoDB to"
+  "trigger a readahead.",
+  NULL, NULL, 56, 0, 64, 0);
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -8478,11 +9963,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(data_home_dir),
   MYSQL_SYSVAR(doublewrite),
   MYSQL_SYSVAR(fast_shutdown),
+  MYSQL_SYSVAR(file_io_threads),
   MYSQL_SYSVAR(read_io_threads),
   MYSQL_SYSVAR(write_io_threads),
-  MYSQL_SYSVAR(max_merged_io),
-  MYSQL_SYSVAR(thread_concurrency_timer_based),
   MYSQL_SYSVAR(file_per_table),
+  MYSQL_SYSVAR(file_format),
+  MYSQL_SYSVAR(file_format_check),
   MYSQL_SYSVAR(flush_log_at_trx_commit),
   MYSQL_SYSVAR(flush_method),
   MYSQL_SYSVAR(force_recovery),
@@ -8497,38 +9983,192 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_files_in_group),
   MYSQL_SYSVAR(log_group_home_dir),
   MYSQL_SYSVAR(max_dirty_pages_pct),
+  MYSQL_SYSVAR(adaptive_flushing),
   MYSQL_SYSVAR(max_purge_lag),
   MYSQL_SYSVAR(mirrored_log_groups),
   MYSQL_SYSVAR(open_files),
   MYSQL_SYSVAR(rollback_on_timeout),
   MYSQL_SYSVAR(stats_on_metadata),
-  MYSQL_SYSVAR(use_legacy_cardinality_algorithm),
+  MYSQL_SYSVAR(stats_sample_pages),
   MYSQL_SYSVAR(adaptive_hash_index),
+  MYSQL_SYSVAR(replication_delay),
   MYSQL_SYSVAR(status_file),
+  MYSQL_SYSVAR(strict_mode),
   MYSQL_SYSVAR(support_xa),
   MYSQL_SYSVAR(sync_spin_loops),
+  MYSQL_SYSVAR(spin_wait_delay),
   MYSQL_SYSVAR(table_locks),
   MYSQL_SYSVAR(thread_concurrency),
   MYSQL_SYSVAR(thread_sleep_delay),
   MYSQL_SYSVAR(autoinc_lock_mode),
-  MYSQL_SYSVAR(extra_dirty_writes),
+  MYSQL_SYSVAR(version),
+  MYSQL_SYSVAR(use_sys_malloc),
+  MYSQL_SYSVAR(change_buffering),
+  MYSQL_SYSVAR(read_ahead_threshold),
   MYSQL_SYSVAR(io_capacity),
   NULL
 };
 
-mysql_declare_plugin(innobase)
+mysql_declare_plugin(innodb_plugin)
 {
   MYSQL_STORAGE_ENGINE_PLUGIN,
   &innobase_storage_engine,
   innobase_hton_name,
-  "Innobase OY",
+  "Innobase Oy",
   "Supports transactions, row-level locking, and foreign keys",
   PLUGIN_LICENSE_GPL,
   innobase_init, /* Plugin Init */
   NULL, /* Plugin Deinit */
-  0x0100 /* 1.0 */,
+  INNODB_VERSION_SHORT,
   innodb_status_variables_export,/* status variables             */
   innobase_system_variables, /* system variables */
   NULL /* reserved */
-}
+},
+i_s_innodb_trx,
+i_s_innodb_locks,
+i_s_innodb_lock_waits,
+i_s_innodb_cmp,
+i_s_innodb_cmp_reset,
+i_s_innodb_cmpmem,
+i_s_innodb_cmpmem_reset
 mysql_declare_plugin_end;
+
+/** @brief Initialize the default value of innodb_commit_concurrency.
+
+Once InnoDB is running, the innodb_commit_concurrency must not change
+from zero to nonzero. (Bug #42101)
+
+The initial default value is 0, and without this extra initialization,
+SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
+to 0, even if it was initially set to nonzero at the command line
+or configuration file. */
+static
+void
+innobase_commit_concurrency_init_default(void)
+/*==========================================*/
+{
+	MYSQL_SYSVAR_NAME(commit_concurrency).def_val
+		= innobase_commit_concurrency;
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+typedef struct innobase_convert_name_test_struct {
+	char*		buf;
+	ulint		buflen;
+	const char*	id;
+	ulint		idlen;
+	void*		thd;
+	ibool		file_id;
+
+	const char*	expected;
+} innobase_convert_name_test_t;
+
+void
+test_innobase_convert_name()
+{
+	char	buf[1024];
+	ulint	i;
+
+	innobase_convert_name_test_t test_input[] = {
+		{buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
+		{buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
+		{buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
+		{buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
+		{buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
+
+		{buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+		{buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+		{buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+		{buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+		{buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
+		{buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
+		{buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
+
+		{buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\"\"cd\""},
+		{buf, 17, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\"\"cd\""},
+		{buf, 16, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\"\"c\""},
+		{buf, 15, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\"\"\""},
+		{buf, 14, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\""},
+		{buf, 13, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#ab\""},
+		{buf, 12, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#a\""},
+		{buf, 11, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50#\""},
+		{buf, 10, "ab\"cd", 5, NULL, TRUE,
+			"\"#mysql50\""},
+
+		{buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
+		{buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
+		{buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
+		{buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
+		{buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
+		{buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
+		{buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
+		{buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
+		{buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
+		/* XXX probably "" is a better result in this case
+		{buf, 1, "ab/cd", 5, NULL, TRUE, "."},
+		*/
+		{buf, 0, "ab/cd", 5, NULL, TRUE, ""},
+	};
+
+	for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
+
+		char*	end;
+		ibool	ok = TRUE;
+		size_t	res_len;
+
+		fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
+			test_input[i].buflen,
+			test_input[i].id,
+			test_input[i].idlen,
+			test_input[i].expected);
+
+		end = innobase_convert_name(
+			test_input[i].buf,
+			test_input[i].buflen,
+			test_input[i].id,
+			test_input[i].idlen,
+			test_input[i].thd,
+			test_input[i].file_id);
+
+		res_len = (size_t) (end - test_input[i].buf);
+
+		if (res_len != strlen(test_input[i].expected)) {
+
+			fprintf(stderr, "unexpected len of the result: %u, "
+				"expected: %u\n", (unsigned) res_len,
+				(unsigned) strlen(test_input[i].expected));
+			ok = FALSE;
+		}
+
+		if (memcmp(test_input[i].buf,
+			   test_input[i].expected,
+			   strlen(test_input[i].expected)) != 0
+		    || !ok) {
+
+			fprintf(stderr, "unexpected result: %.*s, "
+				"expected: %s\n", (int) res_len,
+				test_input[i].buf,
+				test_input[i].expected);
+			ok = FALSE;
+		}
+
+		if (ok) {
+			fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
+				buf);
+		} else {
+			fprintf(stderr, "FAILED\n\n");
+			return;
+		}
+	}
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h
similarity index 64%
rename from storage/innobase/handler/ha_innodb.h
rename to storage/innodb_plugin/handler/ha_innodb.h
index 8ca72ee1a60..cc98003f8ff 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innodb_plugin/handler/ha_innodb.h
@@ -1,17 +1,20 @@
-/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
+/*****************************************************************************
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
+Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
 
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
-   GNU General Public License for more details.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA */
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
 
 /*
   This file is based on ha_berkeley.h of MySQL distribution
@@ -24,34 +27,43 @@
 #pragma interface			/* gcc class implementation */
 #endif
 
+/** InnoDB table share */
 typedef struct st_innobase_share {
-  THR_LOCK lock;
-  pthread_mutex_t mutex;
-  char *table_name;
-  uint table_name_length,use_count;
+	THR_LOCK	lock;		/*!< MySQL lock protecting
+					this structure */
+	const char*	table_name;	/*!< InnoDB table name */
+	uint		use_count;	/*!< reference count,
+					incremented in get_share()
+					and decremented in free_share() */
+	void*		table_name_hash;/*!< hash table chain node */
 } INNOBASE_SHARE;
 
 
+/** InnoDB B-tree index */
 struct dict_index_struct;
+/** Prebuilt structures in an Innobase table handle used within MySQL */
 struct row_prebuilt_struct;
 
+/** InnoDB B-tree index */
 typedef struct dict_index_struct dict_index_t;
+/** Prebuilt structures in an Innobase table handle used within MySQL */
 typedef struct row_prebuilt_struct row_prebuilt_t;
 
-/* The class defining a handle to an Innodb table */
+/** The class defining a handle to an Innodb table */
 class ha_innobase: public handler
 {
-	row_prebuilt_t*	prebuilt;	/* prebuilt struct in InnoDB, used
+	row_prebuilt_t*	prebuilt;	/*!< prebuilt struct in InnoDB, used
 					to save CPU time with prebuilt data
 					structures*/
-	THD*		user_thd;	/* the thread handle of the user
+	THD*		user_thd;	/*!< the thread handle of the user
 					currently using the handle; this is
 					set in external_lock function */
 	THR_LOCK_DATA	lock;
-	INNOBASE_SHARE	*share;
+	INNOBASE_SHARE*	share;		/*!< information for MySQL
+					table locking */
 
-	uchar*		upd_buff;	/* buffer used in updates */
-	uchar*		key_val_buff;	/* buffer used in converting
+	uchar*		upd_buff;	/*!< buffer used in updates */
+	uchar*		key_val_buff;	/*!< buffer used in converting
 					search key values from MySQL format
 					to Innodb format */
 	ulong		upd_and_key_val_buff_len;
@@ -59,62 +71,49 @@ class ha_innobase: public handler
 					two buffers */
 	Table_flags	int_table_flags;
 	uint		primary_key;
-	ulong		start_of_scan;	/* this is set to 1 when we are
+	ulong		start_of_scan;	/*!< this is set to 1 when we are
 					starting a table scan but have not
 					yet fetched any row, else 0 */
 	uint		last_match_mode;/* match mode of the latest search:
 					ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
 					or undefined */
-	uint		num_write_row;	/* number of write_row() calls */
+	uint		num_write_row;	/*!< number of write_row() calls */
 
 	uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
                                    const uchar* record);
-	int update_thd(THD* thd);
+	inline void update_thd(THD* thd);
+	void update_thd();
 	int change_active_index(uint keynr);
 	int general_fetch(uchar* buf, uint direction, uint match_mode);
-	ulong innobase_lock_autoinc();
+	ulint innobase_lock_autoinc();
 	ulonglong innobase_peek_autoinc();
-	ulong innobase_set_max_autoinc(ulonglong auto_inc);
-	ulong innobase_reset_autoinc(ulonglong auto_inc);
-	ulong innobase_get_autoinc(ulonglong* value);
-	ulong innobase_update_autoinc(ulonglong	auto_inc);
-	ulong innobase_initialize_autoinc();
+	ulint innobase_set_max_autoinc(ulonglong auto_inc);
+	ulint innobase_reset_autoinc(ulonglong auto_inc);
+	ulint innobase_get_autoinc(ulonglong* value);
+	ulint innobase_update_autoinc(ulonglong	auto_inc);
+	ulint innobase_initialize_autoinc();
 	dict_index_t* innobase_get_index(uint keynr);
  	ulonglong innobase_get_int_col_max_value(const Field* field);
 
 	/* Init values for the class: */
  public:
 	ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
-	~ha_innobase() {}
+	~ha_innobase();
 	/*
 	  Get the row type from the storage engine.  If this method returns
 	  ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
 	*/
 	enum row_type get_row_type() const;
 
-	const char* table_type() const { return("InnoDB");}
-	const char *index_type(uint key_number) { return "BTREE"; }
+	const char* table_type() const;
+	const char* index_type(uint key_number);
 	const char** bas_ext() const;
 	Table_flags table_flags() const;
-	ulong index_flags(uint idx, uint part, bool all_parts) const
-	{
-	  return (HA_READ_NEXT |
-		  HA_READ_PREV |
-		  HA_READ_ORDER |
-		  HA_READ_RANGE |
-		  HA_KEYREAD_ONLY);
-	}
-	uint max_supported_keys()	   const { return MAX_KEY; }
-				/* An InnoDB page must store >= 2 keys;
-				a secondary key record must also contain the
-				primary key value:
-				max key length is therefore set to slightly
-				less than 1 / 4 of page size which is 16 kB;
-				but currently MySQL does not work with keys
-				whose size is > MAX_KEY_LENGTH */
-	uint max_supported_key_length() const { return 3500; }
+	ulong index_flags(uint idx, uint part, bool all_parts) const;
+	uint max_supported_keys() const;
+	uint max_supported_key_length() const;
 	uint max_supported_key_part_length() const;
-	const key_map *keys_to_use_for_scanning() { return &key_map_full; }
+	const key_map* keys_to_use_for_scanning();
 
 	int open(const char *name, int mode, uint test_if_locked);
 	int close(void);
@@ -185,7 +184,7 @@ class ha_innobase: public handler
 
 	virtual bool get_error_message(int error, String *buf);
 
-	uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
+	uint8 table_cache_type();
 	/*
 	  ask handler about permission to cache table during query registration
 	*/
@@ -195,8 +194,14 @@ class ha_innobase: public handler
 					   ulonglong *engine_data);
 	static char *get_mysql_bin_log_name();
 	static ulonglong get_mysql_bin_log_pos();
-	bool primary_key_is_clustered() { return true; }
+	bool primary_key_is_clustered();
 	int cmp_ref(const uchar *ref1, const uchar *ref2);
+	/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
+	int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
+	int prepare_drop_index(TABLE *table_arg, uint *key_num,
+			       uint num_of_keys);
+	int final_drop_index(TABLE *table_arg);
+	/** @} */
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
 };
@@ -253,3 +258,27 @@ int thd_binlog_format(const MYSQL_THD thd);
 */
 void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
 }
+
+typedef struct trx_struct trx_t;
+/********************************************************************//**
+@file handler/ha_innodb.h
+Converts an InnoDB error code to a MySQL error code and also tells to MySQL
+about a possible transaction rollback inside InnoDB caused by a lock wait
+timeout or a deadlock.
+@return	MySQL error code */
+extern "C"
+int
+convert_error_code_to_mysql(
+/*========================*/
+	int		error,	/*!< in: InnoDB error code */
+	ulint		flags,	/*!< in: InnoDB table flags, or 0 */
+	MYSQL_THD	thd);	/*!< in: user thread handle or NULL */
+
+/*********************************************************************//**
+Allocates an InnoDB transaction for a MySQL handler object.
+@return	InnoDB transaction handle */
+extern "C"
+trx_t*
+innobase_trx_allocate(
+/*==================*/
+	MYSQL_THD	thd);	/*!< in: user thread handle */
diff --git a/storage/innodb_plugin/handler/handler0alter.cc b/storage/innodb_plugin/handler/handler0alter.cc
new file mode 100644
index 00000000000..d1f64a1985c
--- /dev/null
+++ b/storage/innodb_plugin/handler/handler0alter.cc
@@ -0,0 +1,1216 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file handler/handler0alter.cc
+Smart ALTER TABLE
+*******************************************************/
+
+#include <mysql_priv.h>
+#include <mysqld_error.h>
+
+extern "C" {
+#include "log0log.h"
+#include "row0merge.h"
+#include "srv0srv.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "ha_prototypes.h"
+#include "handler0alter.h"
+}
+
+#include "ha_innodb.h"
+
+/*************************************************************//**
+Copies an InnoDB column to a MySQL field.  This function is
+adapted from row_sel_field_store_in_mysql_format(). */
+static
+void
+innobase_col_to_mysql(
+/*==================*/
+	const dict_col_t*	col,	/*!< in: InnoDB column */
+	const uchar*		data,	/*!< in: InnoDB column data */
+	ulint			len,	/*!< in: length of data, in bytes */
+	Field*			field)	/*!< in/out: MySQL field */
+{
+	uchar*	ptr;
+	uchar*	dest	= field->ptr;
+	ulint	flen	= field->pack_length();
+
+	switch (col->mtype) {
+	case DATA_INT:
+		ut_ad(len == flen);
+
+		/* Convert integer data from Innobase to little-endian
+		format, sign bit restored to normal */
+
+		for (ptr = dest + len; ptr != dest; ) {
+			*--ptr = *data++;
+		}
+
+		if (!(field->flags & UNSIGNED_FLAG)) {
+			((byte*) dest)[len - 1] ^= 0x80;
+		}
+
+		break;
+
+	case DATA_VARCHAR:
+	case DATA_VARMYSQL:
+	case DATA_BINARY:
+		field->reset();
+
+		if (field->type() == MYSQL_TYPE_VARCHAR) {
+			/* This is a >= 5.0.3 type true VARCHAR. Store the
+			length of the data to the first byte or the first
+			two bytes of dest. */
+
+			dest = row_mysql_store_true_var_len(
+				dest, len, flen - field->key_length());
+		}
+
+		/* Copy the actual data */
+		memcpy(dest, data, len);
+		break;
+
+	case DATA_BLOB:
+		/* Store a pointer to the BLOB buffer to dest: the BLOB was
+		already copied to the buffer in row_sel_store_mysql_rec */
+
+		row_mysql_store_blob_ref(dest, flen, data, len);
+		break;
+
+#ifdef UNIV_DEBUG
+	case DATA_MYSQL:
+		ut_ad(flen >= len);
+		ut_ad(col->mbmaxlen >= col->mbminlen);
+		ut_ad(col->mbmaxlen > col->mbminlen || flen == len);
+		memcpy(dest, data, len);
+		break;
+
+	default:
+	case DATA_SYS_CHILD:
+	case DATA_SYS:
+		/* These column types should never be shipped to MySQL. */
+		ut_ad(0);
+
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+	case DATA_DECIMAL:
+		/* Above are the valid column types for MySQL data. */
+		ut_ad(flen == len);
+#else /* UNIV_DEBUG */
+	default:
+#endif /* UNIV_DEBUG */
+		memcpy(dest, data, len);
+	}
+}
+
+/*************************************************************//**
+Copies an InnoDB record to table->record[0]. */
+extern "C" UNIV_INTERN
+void
+innobase_rec_to_mysql(
+/*==================*/
+	TABLE*			table,		/*!< in/out: MySQL table */
+	const rec_t*		rec,		/*!< in: record */
+	const dict_index_t*	index,		/*!< in: index */
+	const ulint*		offsets)	/*!< in: rec_get_offsets(
+						rec, index, ...) */
+{
+	uint	n_fields	= table->s->fields;
+	uint	i;
+
+	ut_ad(n_fields == dict_table_get_n_user_cols(index->table));
+
+	for (i = 0; i < n_fields; i++) {
+		Field*		field	= table->field[i];
+		ulint		ipos;
+		ulint		ilen;
+		const uchar*	ifield;
+
+		field->reset();
+
+		ipos = dict_index_get_nth_col_pos(index, i);
+
+		if (UNIV_UNLIKELY(ipos == ULINT_UNDEFINED)) {
+null_field:
+			field->set_null();
+			continue;
+		}
+
+		ifield = rec_get_nth_field(rec, offsets, ipos, &ilen);
+
+		/* Assign the NULL flag */
+		if (ilen == UNIV_SQL_NULL) {
+			ut_ad(field->real_maybe_null());
+			goto null_field;
+		}
+
+		field->set_notnull();
+
+		innobase_col_to_mysql(
+			dict_field_get_col(
+				dict_index_get_nth_field(index, ipos)),
+			ifield, ilen, field);
+	}
+}
+
+/*************************************************************//**
+Resets table->record[0]. */
+extern "C" UNIV_INTERN
+void
+innobase_rec_reset(
+/*===============*/
+	TABLE*			table)		/*!< in/out: MySQL table */
+{
+	uint	n_fields	= table->s->fields;
+	uint	i;
+
+	for (i = 0; i < n_fields; i++) {
+		table->field[i]->set_default();
+	}
+}
+
+/******************************************************************//**
+Removes the filename encoding of a database and table name. */
+static
+void
+innobase_convert_tablename(
+/*=======================*/
+	char*	s)	/*!< in: identifier; out: decoded identifier */
+{
+	uint	errors;
+
+	char*	slash = strchr(s, '/');
+
+	if (slash) {
+		char*	t;
+		/* Temporarily replace the '/' with NUL. */
+		*slash = 0;
+		/* Convert the database name. */
+		strconvert(&my_charset_filename, s, system_charset_info,
+			   s, slash - s + 1, &errors);
+
+		t = s + strlen(s);
+		ut_ad(slash >= t);
+		/* Append a  '.' after the database name. */
+		*t++ = '.';
+		slash++;
+		/* Convert the table name. */
+		strconvert(&my_charset_filename, slash, system_charset_info,
+			   t, slash - t + strlen(slash), &errors);
+	} else {
+		strconvert(&my_charset_filename, s,
+			   system_charset_info, s, strlen(s), &errors);
+	}
+}
+
+/*******************************************************************//**
+This function checks that index keys are sensible.
+@return	0 or error number */
+static
+int
+innobase_check_index_keys(
+/*======================*/
+	const KEY*	key_info,	/*!< in: Indexes to be created */
+	ulint		num_of_keys)	/*!< in: Number of indexes to
+					be created */
+{
+	ulint		key_num;
+
+	ut_ad(key_info);
+	ut_ad(num_of_keys);
+
+	for (key_num = 0; key_num < num_of_keys; key_num++) {
+		const KEY&	key = key_info[key_num];
+
+		/* Check that the same index name does not appear
+		twice in indexes to be created. */
+
+		for (ulint i = 0; i < key_num; i++) {
+			const KEY&	key2 = key_info[i];
+
+			if (0 == strcmp(key.name, key2.name)) {
+				sql_print_error("InnoDB: key name `%s` appears"
+						" twice in CREATE INDEX\n",
+						key.name);
+
+				return(ER_WRONG_NAME_FOR_INDEX);
+			}
+		}
+
+		/* Check that MySQL does not try to create a column
+		prefix index field on an inappropriate data type and
+		that the same colum does not appear twice in the index. */
+
+		for (ulint i = 0; i < key.key_parts; i++) {
+			const KEY_PART_INFO&	key_part1
+				= key.key_part[i];
+			const Field*		field
+				= key_part1.field;
+			ibool			is_unsigned;
+
+			switch (get_innobase_type_from_mysql_type(
+					&is_unsigned, field)) {
+			default:
+				break;
+			case DATA_INT:
+			case DATA_FLOAT:
+			case DATA_DOUBLE:
+			case DATA_DECIMAL:
+				if (field->type() == MYSQL_TYPE_VARCHAR) {
+					if (key_part1.length
+					    >= field->pack_length()
+					    - ((Field_varstring*) field)
+					    ->length_bytes) {
+						break;
+					}
+				} else {
+					if (key_part1.length
+					    >= field->pack_length()) {
+						break;
+					}
+				}
+
+				sql_print_error("InnoDB: MySQL is trying to"
+						" create a column prefix"
+						" index field on an"
+						" inappropriate data type."
+						" column `%s`,"
+						" index `%s`.\n",
+						field->field_name,
+						key.name);
+				return(ER_WRONG_KEY_COLUMN);
+			}
+
+			for (ulint j = 0; j < i; j++) {
+				const KEY_PART_INFO&	key_part2
+					= key.key_part[j];
+
+				if (strcmp(key_part1.field->field_name,
+					   key_part2.field->field_name)) {
+					continue;
+				}
+
+				sql_print_error("InnoDB: column `%s`"
+						" is not allowed to occur"
+						" twice in index `%s`.\n",
+						key_part1.field->field_name,
+						key.name);
+				return(ER_WRONG_KEY_COLUMN);
+			}
+		}
+	}
+
+	return(0);
+}
+
+/*******************************************************************//**
+Create index field definition for key part */
+static
+void
+innobase_create_index_field_def(
+/*============================*/
+	KEY_PART_INFO*		key_part,	/*!< in: MySQL key definition */
+	mem_heap_t*		heap,		/*!< in: memory heap */
+	merge_index_field_t*	index_field)	/*!< out: index field
+						definition for key_part */
+{
+	Field*		field;
+	ibool		is_unsigned;
+	ulint		col_type;
+
+	DBUG_ENTER("innobase_create_index_field_def");
+
+	ut_ad(key_part);
+	ut_ad(index_field);
+
+	field = key_part->field;
+	ut_a(field);
+
+	col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
+
+	if (DATA_BLOB == col_type
+	    || (key_part->length < field->pack_length()
+		&& field->type() != MYSQL_TYPE_VARCHAR)
+	    || (field->type() == MYSQL_TYPE_VARCHAR
+		&& key_part->length < field->pack_length()
+			- ((Field_varstring*)field)->length_bytes)) {
+
+		index_field->prefix_len = key_part->length;
+	} else {
+		index_field->prefix_len = 0;
+	}
+
+	index_field->field_name = mem_heap_strdup(heap, field->field_name);
+
+	DBUG_VOID_RETURN;
+}
+
+/*******************************************************************//**
+Create index definition for key */
+static
+void
+innobase_create_index_def(
+/*======================*/
+	KEY*			key,		/*!< in: key definition */
+	bool			new_primary,	/*!< in: TRUE=generating
+						a new primary key
+						on the table */
+	bool			key_primary,	/*!< in: TRUE if this key
+						is a primary key */
+	merge_index_def_t*	index,		/*!< out: index definition */
+	mem_heap_t*		heap)		/*!< in: heap where memory
+						is allocated */
+{
+	ulint	i;
+	ulint	len;
+	ulint	n_fields = key->key_parts;
+	char*	index_name;
+
+	DBUG_ENTER("innobase_create_index_def");
+
+	index->fields = (merge_index_field_t*) mem_heap_alloc(
+		heap, n_fields * sizeof *index->fields);
+
+	index->ind_type = 0;
+	index->n_fields = n_fields;
+	len = strlen(key->name) + 1;
+	index->name = index_name = (char*) mem_heap_alloc(heap,
+							  len + !new_primary);
+
+	if (UNIV_LIKELY(!new_primary)) {
+		*index_name++ = TEMP_INDEX_PREFIX;
+	}
+
+	memcpy(index_name, key->name, len);
+
+	if (key->flags & HA_NOSAME) {
+		index->ind_type |= DICT_UNIQUE;
+	}
+
+	if (key_primary) {
+		index->ind_type |= DICT_CLUSTERED;
+	}
+
+	for (i = 0; i < n_fields; i++) {
+		innobase_create_index_field_def(&key->key_part[i], heap,
+						&index->fields[i]);
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+/*******************************************************************//**
+Copy index field definition */
+static
+void
+innobase_copy_index_field_def(
+/*==========================*/
+	const dict_field_t*	field,		/*!< in: definition to copy */
+	merge_index_field_t*	index_field)	/*!< out: copied definition */
+{
+	DBUG_ENTER("innobase_copy_index_field_def");
+	DBUG_ASSERT(field != NULL);
+	DBUG_ASSERT(index_field != NULL);
+
+	index_field->field_name = field->name;
+	index_field->prefix_len = field->prefix_len;
+
+	DBUG_VOID_RETURN;
+}
+
+/*******************************************************************//**
+Copy index definition for the index */
+static
+void
+innobase_copy_index_def(
+/*====================*/
+	const dict_index_t*	index,	/*!< in: index definition to copy */
+	merge_index_def_t*	new_index,/*!< out: Index definition */
+	mem_heap_t*		heap)	/*!< in: heap where allocated */
+{
+	ulint	n_fields;
+	ulint	i;
+
+	DBUG_ENTER("innobase_copy_index_def");
+
+	/* Note that we take only those fields that user defined to be
+	in the index.  In the internal representation more colums were
+	added and those colums are not copied .*/
+
+	n_fields = index->n_user_defined_cols;
+
+	new_index->fields = (merge_index_field_t*) mem_heap_alloc(
+		heap, n_fields * sizeof *new_index->fields);
+
+	/* When adding a PRIMARY KEY, we may convert a previous
+	clustered index to a secondary index (UNIQUE NOT NULL). */
+	new_index->ind_type = index->type & ~DICT_CLUSTERED;
+	new_index->n_fields = n_fields;
+	new_index->name = index->name;
+
+	for (i = 0; i < n_fields; i++) {
+		innobase_copy_index_field_def(&index->fields[i],
+					      &new_index->fields[i]);
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+/*******************************************************************//**
+Create an index table where indexes are ordered as follows:
+
+IF a new primary key is defined for the table THEN
+
+	1) New primary key
+	2) Original secondary indexes
+	3) New secondary indexes
+
+ELSE
+
+	1) All new indexes in the order they arrive from MySQL
+
+ENDIF
+
+
+@return	key definitions or NULL */
+static
+merge_index_def_t*
+innobase_create_key_def(
+/*====================*/
+	trx_t*		trx,		/*!< in: trx */
+	const dict_table_t*table,		/*!< in: table definition */
+	mem_heap_t*	heap,		/*!< in: heap where space for key
+					definitions are allocated */
+	KEY*		key_info,	/*!< in: Indexes to be created */
+	ulint&		n_keys)		/*!< in/out: Number of indexes to
+					be created */
+{
+	ulint			i = 0;
+	merge_index_def_t*	indexdef;
+	merge_index_def_t*	indexdefs;
+	bool			new_primary;
+
+	DBUG_ENTER("innobase_create_key_def");
+
+	indexdef = indexdefs = (merge_index_def_t*)
+		mem_heap_alloc(heap, sizeof *indexdef
+			       * (n_keys + UT_LIST_GET_LEN(table->indexes)));
+
+	/* If there is a primary key, it is always the first index
+	defined for the table. */
+
+	new_primary = !my_strcasecmp(system_charset_info,
+				     key_info->name, "PRIMARY");
+
+	/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
+	columns, MySQL will treat it as a PRIMARY KEY unless the
+	table already has one. */
+
+	if (!new_primary && (key_info->flags & HA_NOSAME)
+	    && row_table_got_default_clust_index(table)) {
+		uint	key_part = key_info->key_parts;
+
+		new_primary = TRUE;
+
+		while (key_part--) {
+			if (key_info->key_part[key_part].key_type
+			    & FIELDFLAG_MAYBE_NULL) {
+				new_primary = FALSE;
+				break;
+			}
+		}
+	}
+
+	if (new_primary) {
+		const dict_index_t*	index;
+
+		/* Create the PRIMARY key index definition */
+		innobase_create_index_def(&key_info[i++], TRUE, TRUE,
+					  indexdef++, heap);
+
+		row_mysql_lock_data_dictionary(trx);
+
+		index = dict_table_get_first_index(table);
+
+		/* Copy the index definitions of the old table.  Skip
+		the old clustered index if it is a generated clustered
+		index or a PRIMARY KEY.  If the clustered index is a
+		UNIQUE INDEX, it must be converted to a secondary index. */
+
+		if (dict_index_get_nth_col(index, 0)->mtype == DATA_SYS
+		    || !my_strcasecmp(system_charset_info,
+				      index->name, "PRIMARY")) {
+			index = dict_table_get_next_index(index);
+		}
+
+		while (index) {
+			innobase_copy_index_def(index, indexdef++, heap);
+			index = dict_table_get_next_index(index);
+		}
+
+		row_mysql_unlock_data_dictionary(trx);
+	}
+
+	/* Create definitions for added secondary indexes. */
+
+	while (i < n_keys) {
+		innobase_create_index_def(&key_info[i++], new_primary, FALSE,
+					  indexdef++, heap);
+	}
+
+	n_keys = indexdef - indexdefs;
+
+	DBUG_RETURN(indexdefs);
+}
+
+/*******************************************************************//**
+Create a temporary tablename using query id, thread id, and id
+@return	temporary tablename */
+static
+char*
+innobase_create_temporary_tablename(
+/*================================*/
+	mem_heap_t*	heap,		/*!< in: memory heap */
+	char		id,		/*!< in: identifier [0-9a-zA-Z] */
+	const char*     table_name)	/*!< in: table name */
+{
+	char*			name;
+	ulint			len;
+	static const char	suffix[] = "@0023 "; /* "# " */
+
+	len = strlen(table_name);
+
+	name = (char*) mem_heap_alloc(heap, len + sizeof suffix);
+	memcpy(name, table_name, len);
+	memcpy(name + len, suffix, sizeof suffix);
+	name[len + (sizeof suffix - 2)] = id;
+
+	return(name);
+}
+
+/*******************************************************************//**
+Create indexes.
+@return	0 or error number */
+UNIV_INTERN
+int
+ha_innobase::add_index(
+/*===================*/
+	TABLE*	table,		/*!< in: Table where indexes are created */
+	KEY*	key_info,	/*!< in: Indexes to be created */
+	uint	num_of_keys)	/*!< in: Number of indexes to be created */
+{
+	dict_index_t**	index;		/*!< Index to be created */
+	dict_table_t*	innodb_table;	/*!< InnoDB table in dictionary */
+	dict_table_t*	indexed_table;	/*!< Table where indexes are created */
+	merge_index_def_t* index_defs;	/*!< Index definitions */
+	mem_heap_t*     heap;		/*!< Heap for index definitions */
+	trx_t*		trx;		/*!< Transaction */
+	ulint		num_of_idx;
+	ulint		num_created	= 0;
+	ibool		dict_locked	= FALSE;
+	ulint		new_primary;
+	ulint		error;
+
+	DBUG_ENTER("ha_innobase::add_index");
+	ut_a(table);
+	ut_a(key_info);
+	ut_a(num_of_keys);
+
+	if (srv_created_new_raw || srv_force_recovery) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
+	update_thd();
+
+	heap = mem_heap_create(1024);
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads. */
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_start_if_not_started(prebuilt->trx);
+
+	/* Create a background transaction for the operations on
+	the data dictionary tables. */
+	trx = innobase_trx_allocate(user_thd);
+	trx_start_if_not_started(trx);
+
+	innodb_table = indexed_table
+		= dict_table_get(prebuilt->table->name, FALSE);
+
+	/* Check that index keys are sensible */
+
+	error = innobase_check_index_keys(key_info, num_of_keys);
+
+	if (UNIV_UNLIKELY(error)) {
+err_exit:
+		mem_heap_free(heap);
+		trx_general_rollback_for_mysql(trx, FALSE, NULL);
+		trx_free_for_mysql(trx);
+		trx_commit_for_mysql(prebuilt->trx);
+		DBUG_RETURN(error);
+	}
+
+	/* Create table containing all indexes to be built in this
+	alter table add index so that they are in the correct order
+	in the table. */
+
+	num_of_idx = num_of_keys;
+
+	index_defs = innobase_create_key_def(
+		trx, innodb_table, heap, key_info, num_of_idx);
+
+	new_primary = DICT_CLUSTERED & index_defs[0].ind_type;
+
+	/* Allocate memory for dictionary index definitions */
+
+	index = (dict_index_t**) mem_heap_alloc(
+		heap, num_of_idx * sizeof *index);
+
+	/* Flag this transaction as a dictionary operation, so that
+	the data dictionary will be locked in crash recovery. */
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+	/* Acquire a lock on the table before creating any indexes. */
+	error = row_merge_lock_table(prebuilt->trx, innodb_table,
+				     new_primary ? LOCK_X : LOCK_S);
+
+	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+
+		goto error_handling;
+	}
+
+	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
+	or lock waits can happen in it during an index create operation. */
+
+	row_mysql_lock_data_dictionary(trx);
+	dict_locked = TRUE;
+
+	/* If a new primary key is defined for the table we need
+	to drop the original table and rebuild all indexes. */
+
+	if (UNIV_UNLIKELY(new_primary)) {
+		/* This transaction should be the only one
+		operating on the table. */
+		ut_a(innodb_table->n_mysql_handles_opened == 1);
+
+		char*	new_table_name = innobase_create_temporary_tablename(
+			heap, '1', innodb_table->name);
+
+		/* Clone the table. */
+		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+		indexed_table = row_merge_create_temporary_table(
+			new_table_name, index_defs, innodb_table, trx);
+
+		if (!indexed_table) {
+
+			switch (trx->error_state) {
+			case DB_TABLESPACE_ALREADY_EXISTS:
+			case DB_DUPLICATE_KEY:
+				innobase_convert_tablename(new_table_name);
+				my_error(HA_ERR_TABLE_EXIST, MYF(0),
+					 new_table_name);
+				error = HA_ERR_TABLE_EXIST;
+				break;
+			default:
+				error = convert_error_code_to_mysql(
+					trx->error_state, innodb_table->flags,
+					user_thd);
+			}
+
+			row_mysql_unlock_data_dictionary(trx);
+			goto err_exit;
+		}
+
+		trx->table_id = indexed_table->id;
+	}
+
+	/* Create the indexes in SYS_INDEXES and load into dictionary. */
+
+	for (ulint i = 0; i < num_of_idx; i++) {
+
+		index[i] = row_merge_create_index(trx, indexed_table,
+						  &index_defs[i]);
+
+		if (!index[i]) {
+			error = trx->error_state;
+			goto error_handling;
+		}
+
+		num_created++;
+	}
+
+	ut_ad(error == DB_SUCCESS);
+
+	/* Commit the data dictionary transaction in order to release
+	the table locks on the system tables.  Unfortunately, this
+	means that if MySQL crashes while creating a new primary key
+	inside row_merge_build_indexes(), indexed_table will not be
+	dropped on crash recovery.  Thus, it will become orphaned. */
+	trx_commit_for_mysql(trx);
+
+	row_mysql_unlock_data_dictionary(trx);
+	dict_locked = FALSE;
+
+	ut_a(trx->n_active_thrs == 0);
+	ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
+
+	if (UNIV_UNLIKELY(new_primary)) {
+		/* A primary key is to be built.  Acquire an exclusive
+		table lock also on the table that is being created. */
+		ut_ad(indexed_table != innodb_table);
+
+		error = row_merge_lock_table(prebuilt->trx, indexed_table,
+					     LOCK_X);
+
+		if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+
+			goto error_handling;
+		}
+	}
+
+	/* Read the clustered index of the table and build indexes
+	based on this information using temporary files and merge sort. */
+	error = row_merge_build_indexes(prebuilt->trx,
+					innodb_table, indexed_table,
+					index, num_of_idx, table);
+
+error_handling:
+#ifdef UNIV_DEBUG
+	/* TODO: At the moment we can't handle the following statement
+	in our debugging code below:
+
+	alter table t drop index b, add index (b);
+
+	The fix will have to parse the SQL and note that the index
+	being added has the same name as the the one being dropped and
+	ignore that in the dup index check.*/
+	//dict_table_check_for_dup_indexes(prebuilt->table);
+#endif
+
+	/* After an error, remove all those index definitions from the
+	dictionary which were defined. */
+
+	switch (error) {
+		const char*	old_name;
+		char*		tmp_name;
+	case DB_SUCCESS:
+		ut_a(!dict_locked);
+		row_mysql_lock_data_dictionary(trx);
+		dict_locked = TRUE;
+
+		if (!new_primary) {
+			error = row_merge_rename_indexes(trx, indexed_table);
+
+			if (error != DB_SUCCESS) {
+				row_merge_drop_indexes(trx, indexed_table,
+						       index, num_created);
+			}
+
+			goto convert_error;
+		}
+
+		/* If a new primary key was defined for the table and
+		there was no error at this point, we can now rename
+		the old table as a temporary table, rename the new
+		temporary table as the old table and drop the old table. */
+		old_name = innodb_table->name;
+		tmp_name = innobase_create_temporary_tablename(heap, '2',
+							       old_name);
+
+		error = row_merge_rename_tables(innodb_table, indexed_table,
+						tmp_name, trx);
+
+		if (error != DB_SUCCESS) {
+
+			row_merge_drop_table(trx, indexed_table);
+
+			switch (error) {
+			case DB_TABLESPACE_ALREADY_EXISTS:
+			case DB_DUPLICATE_KEY:
+				innobase_convert_tablename(tmp_name);
+				my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
+				error = HA_ERR_TABLE_EXIST;
+				break;
+			default:
+				goto convert_error;
+			}
+			break;
+		}
+
+		trx_commit_for_mysql(prebuilt->trx);
+		row_prebuilt_free(prebuilt, TRUE);
+		prebuilt = row_create_prebuilt(indexed_table);
+
+		indexed_table->n_mysql_handles_opened++;
+
+		error = row_merge_drop_table(trx, innodb_table);
+		goto convert_error;
+
+	case DB_TOO_BIG_RECORD:
+		my_error(HA_ERR_TO_BIG_ROW, MYF(0));
+		goto error;
+	case DB_PRIMARY_KEY_IS_NULL:
+		my_error(ER_PRIMARY_CANT_HAVE_NULL, MYF(0));
+		/* fall through */
+	case DB_DUPLICATE_KEY:
+error:
+		prebuilt->trx->error_info = NULL;
+		/* fall through */
+	default:
+		if (new_primary) {
+			row_merge_drop_table(trx, indexed_table);
+		} else {
+			if (!dict_locked) {
+				row_mysql_lock_data_dictionary(trx);
+				dict_locked = TRUE;
+			}
+
+			row_merge_drop_indexes(trx, indexed_table,
+					       index, num_created);
+		}
+
+convert_error:
+		error = convert_error_code_to_mysql(error,
+						    innodb_table->flags,
+						    user_thd);
+	}
+
+	mem_heap_free(heap);
+	trx_commit_for_mysql(trx);
+	if (prebuilt->trx) {
+		trx_commit_for_mysql(prebuilt->trx);
+	}
+
+	if (dict_locked) {
+		row_mysql_unlock_data_dictionary(trx);
+	}
+
+	trx_free_for_mysql(trx);
+
+	/* There might be work for utility threads.*/
+	srv_active_wake_master_thread();
+
+	DBUG_RETURN(error);
+}
+
+/*******************************************************************//**
+Prepare to drop some indexes of a table.
+@return	0 or error number */
+UNIV_INTERN
+int
+ha_innobase::prepare_drop_index(
+/*============================*/
+	TABLE*	table,		/*!< in: Table where indexes are dropped */
+	uint*	key_num,	/*!< in: Key nums to be dropped */
+	uint	num_of_keys)	/*!< in: Number of keys to be dropped */
+{
+	trx_t*		trx;
+	int		err = 0;
+	uint 		n_key;
+
+	DBUG_ENTER("ha_innobase::prepare_drop_index");
+	ut_ad(table);
+	ut_ad(key_num);
+	ut_ad(num_of_keys);
+	if (srv_created_new_raw || srv_force_recovery) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
+	update_thd();
+
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx = prebuilt->trx;
+
+	/* Test and mark all the indexes to be dropped */
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Check that none of the indexes have previously been flagged
+	for deletion. */
+	{
+		const dict_index_t*	index
+			= dict_table_get_first_index(prebuilt->table);
+		do {
+			ut_a(!index->to_be_dropped);
+			index = dict_table_get_next_index(index);
+		} while (index);
+	}
+
+	for (n_key = 0; n_key < num_of_keys; n_key++) {
+		const KEY*	key;
+		dict_index_t*	index;
+
+		key = table->key_info + key_num[n_key];
+		index = dict_table_get_index_on_name_and_min_id(
+			prebuilt->table, key->name);
+
+		if (!index) {
+			sql_print_error("InnoDB could not find key n:o %u "
+					"with name %s for table %s",
+					key_num[n_key],
+					key ? key->name : "NULL",
+					prebuilt->table->name);
+
+			err = HA_ERR_KEY_NOT_FOUND;
+			goto func_exit;
+		}
+
+		/* Refuse to drop the clustered index.  It would be
+		better to automatically generate a clustered index,
+		but mysql_alter_table() will call this method only
+		after ha_innobase::add_index(). */
+
+		if (dict_index_is_clust(index)) {
+			my_error(ER_REQUIRES_PRIMARY_KEY, MYF(0));
+			err = -1;
+			goto func_exit;
+		}
+
+		index->to_be_dropped = TRUE;
+	}
+
+	/* If FOREIGN_KEY_CHECK = 1 you may not drop an index defined
+	for a foreign key constraint because InnoDB requires that both
+	tables contain indexes for the constraint.  Note that CREATE
+	INDEX id ON table does a CREATE INDEX and DROP INDEX, and we
+	can ignore here foreign keys because a new index for the
+	foreign key has already been created.
+
+	We check for the foreign key constraints after marking the
+	candidate indexes for deletion, because when we check for an
+	equivalent foreign index we don't want to select an index that
+	is later deleted. */
+
+	if (trx->check_foreigns
+	    && thd_sql_command(user_thd) != SQLCOM_CREATE_INDEX) {
+		dict_index_t*	index;
+
+		for (index = dict_table_get_first_index(prebuilt->table);
+		     index;
+		     index = dict_table_get_next_index(index)) {
+			dict_foreign_t*	foreign;
+
+			if (!index->to_be_dropped) {
+
+				continue;
+			}
+
+			/* Check if the index is referenced. */
+			foreign = dict_table_get_referenced_constraint(
+				prebuilt->table, index);
+
+			if (foreign) {
+index_needed:
+				trx_set_detailed_error(
+					trx,
+					"Index needed in foreign key "
+					"constraint");
+
+				trx->error_info = index;
+
+				err = HA_ERR_DROP_INDEX_FK;
+				break;
+			} else {
+				/* Check if this index references some
+				other table */
+				foreign = dict_table_get_foreign_constraint(
+					prebuilt->table, index);
+
+				if (foreign) {
+					ut_a(foreign->foreign_index == index);
+
+					/* Search for an equivalent index that
+					the foreign key constraint could use
+					if this index were to be deleted. */
+					if (!dict_foreign_find_equiv_index(
+						foreign)) {
+
+						goto index_needed;
+					}
+				}
+			}
+		}
+	} else if (thd_sql_command(user_thd) == SQLCOM_CREATE_INDEX) {
+		/* This is a drop of a foreign key constraint index that
+		was created by MySQL when the constraint was added.  MySQL
+		does this when the user creates an index explicitly which
+		can be used in place of the automatically generated index. */
+
+		dict_index_t*	index;
+
+		for (index = dict_table_get_first_index(prebuilt->table);
+		     index;
+		     index = dict_table_get_next_index(index)) {
+			dict_foreign_t*	foreign;
+
+			if (!index->to_be_dropped) {
+
+				continue;
+			}
+
+			/* Check if this index references some other table */
+			foreign = dict_table_get_foreign_constraint(
+				prebuilt->table, index);
+
+			if (foreign == NULL) {
+
+				continue;
+			}
+
+			ut_a(foreign->foreign_index == index);
+
+			/* Search for an equivalent index that the
+			foreign key constraint could use if this index
+			were to be deleted. */
+
+			if (!dict_foreign_find_equiv_index(foreign)) {
+				trx_set_detailed_error(
+					trx,
+					"Index needed in foreign key "
+					"constraint");
+
+				trx->error_info = foreign->foreign_index;
+
+				err = HA_ERR_DROP_INDEX_FK;
+				break;
+			}
+		}
+	}
+
+func_exit:
+	if (err) {
+		/* Undo our changes since there was some sort of error. */
+		dict_index_t*	index
+			= dict_table_get_first_index(prebuilt->table);
+
+		do {
+			index->to_be_dropped = FALSE;
+			index = dict_table_get_next_index(index);
+		} while (index);
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	DBUG_RETURN(err);
+}
+
+/*******************************************************************//**
+Drop the indexes that were passed to a successful prepare_drop_index().
+@return	0 or error number */
+UNIV_INTERN
+int
+ha_innobase::final_drop_index(
+/*==========================*/
+	TABLE*	table)		/*!< in: Table where indexes are dropped */
+{
+	dict_index_t*	index;		/*!< Index to be dropped */
+	trx_t*		trx;		/*!< Transaction */
+	int		err;
+
+	DBUG_ENTER("ha_innobase::final_drop_index");
+	ut_ad(table);
+
+	if (srv_created_new_raw || srv_force_recovery) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
+	update_thd();
+
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_start_if_not_started(prebuilt->trx);
+
+	/* Create a background transaction for the operations on
+	the data dictionary tables. */
+	trx = innobase_trx_allocate(user_thd);
+	trx_start_if_not_started(trx);
+
+	/* Flag this transaction as a dictionary operation, so that
+	the data dictionary will be locked in crash recovery. */
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+	/* Lock the table exclusively, to ensure that no active
+	transaction depends on an index that is being dropped. */
+	err = convert_error_code_to_mysql(
+		row_merge_lock_table(prebuilt->trx, prebuilt->table, LOCK_X),
+		prebuilt->table->flags, user_thd);
+
+	row_mysql_lock_data_dictionary(trx);
+
+	if (UNIV_UNLIKELY(err)) {
+
+		/* Unmark the indexes to be dropped. */
+		for (index = dict_table_get_first_index(prebuilt->table);
+		     index; index = dict_table_get_next_index(index)) {
+
+			index->to_be_dropped = FALSE;
+		}
+
+		goto func_exit;
+	}
+
+	/* Drop indexes marked to be dropped */
+
+	index = dict_table_get_first_index(prebuilt->table);
+
+	while (index) {
+		dict_index_t*	next_index;
+
+		next_index = dict_table_get_next_index(index);
+
+		if (index->to_be_dropped) {
+
+			row_merge_drop_index(index, prebuilt->table, trx);
+		}
+
+		index = next_index;
+	}
+
+	/* Check that all flagged indexes were dropped. */
+	for (index = dict_table_get_first_index(prebuilt->table);
+	     index; index = dict_table_get_next_index(index)) {
+		ut_a(!index->to_be_dropped);
+	}
+
+#ifdef UNIV_DEBUG
+	dict_table_check_for_dup_indexes(prebuilt->table);
+#endif
+
+func_exit:
+	trx_commit_for_mysql(trx);
+	trx_commit_for_mysql(prebuilt->trx);
+	row_mysql_unlock_data_dictionary(trx);
+
+	/* Flush the log to reduce probability that the .frm files and
+	the InnoDB data dictionary get out-of-sync if the user runs
+	with innodb_flush_log_at_trx_commit = 0 */
+
+	log_buffer_flush_to_disk();
+
+	trx_free_for_mysql(trx);
+
+	/* Tell the InnoDB server that there might be work for
+	utility threads: */
+
+	srv_active_wake_master_thread();
+
+	DBUG_RETURN(err);
+}
diff --git a/storage/innodb_plugin/handler/handler0vars.h b/storage/innodb_plugin/handler/handler0vars.h
new file mode 100644
index 00000000000..e0f8f75e34d
--- /dev/null
+++ b/storage/innodb_plugin/handler/handler0vars.h
@@ -0,0 +1,69 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file handler/handler0vars.h
+This file contains accessor functions for dynamic plugin on Windows.
+***********************************************************************/
+
+#if defined __WIN__ && defined MYSQL_DYNAMIC_PLUGIN
+/*******************************************************************//**
+This is a list of externals that can not be resolved by delay loading.
+They have to be resolved indirectly via their addresses in the .map file.
+All of them are external variables. */
+extern	CHARSET_INFO*		wdl_my_charset_bin;
+extern	CHARSET_INFO*		wdl_my_charset_latin1;
+extern	CHARSET_INFO*		wdl_my_charset_filename;
+extern	CHARSET_INFO**		wdl_system_charset_info;
+extern	CHARSET_INFO**		wdl_default_charset_info;
+extern	CHARSET_INFO**		wdl_all_charsets;
+extern	system_variables*	wdl_global_system_variables;
+extern	char*			wdl_mysql_real_data_home;
+extern	char**			wdl_mysql_data_home;
+extern	char**			wdl_tx_isolation_names;
+extern	char**			wdl_binlog_format_names;
+extern	char*			wdl_reg_ext;
+extern	pthread_mutex_t*	wdl_LOCK_thread_count;
+extern	key_map*		wdl_key_map_full;
+extern	MY_TMPDIR*		wdl_mysql_tmpdir_list;
+extern	bool*			wdl_mysqld_embedded;
+extern	uint*			wdl_lower_case_table_names;
+extern	ulong*			wdl_specialflag;
+extern	int*			wdl_my_umask;
+
+#define my_charset_bin		(*wdl_my_charset_bin)
+#define my_charset_latin1	(*wdl_my_charset_latin1)
+#define my_charset_filename	(*wdl_my_charset_filename)
+#define system_charset_info	(*wdl_system_charset_info)
+#define default_charset_info	(*wdl_default_charset_info)
+#define all_charsets		(wdl_all_charsets)
+#define global_system_variables	(*wdl_global_system_variables)
+#define mysql_real_data_home	(wdl_mysql_real_data_home)
+#define mysql_data_home		(*wdl_mysql_data_home)
+#define tx_isolation_names	(wdl_tx_isolation_names)
+#define binlog_format_names	(wdl_binlog_format_names)
+#define reg_ext			(wdl_reg_ext)
+#define LOCK_thread_count	(*wdl_LOCK_thread_count)
+#define key_map_full		(*wdl_key_map_full)
+#define mysql_tmpdir_list	(*wdl_mysql_tmpdir_list)
+#define mysqld_embedded		(*wdl_mysqld_embedded)
+#define lower_case_table_names	(*wdl_lower_case_table_names)
+#define specialflag		(*wdl_specialflag)
+#define my_umask		(*wdl_my_umask)
+
+#endif
diff --git a/storage/innodb_plugin/handler/i_s.cc b/storage/innodb_plugin/handler/i_s.cc
new file mode 100644
index 00000000000..c0d488d1c49
--- /dev/null
+++ b/storage/innodb_plugin/handler/i_s.cc
@@ -0,0 +1,1576 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file handler/i_s.cc
+InnoDB INFORMATION SCHEMA tables interface to MySQL.
+
+Created July 18, 2007 Vasil Dimov
+*******************************************************/
+
+#include <mysql_priv.h>
+#include <mysqld_error.h>
+
+#include <m_ctype.h>
+#include <hash.h>
+#include <myisampack.h>
+#include <mysys_err.h>
+#include <my_sys.h>
+#include "i_s.h"
+#include <mysql/plugin.h>
+
+extern "C" {
+#include "trx0i_s.h"
+#include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
+#include "buf0buddy.h" /* for i_s_cmpmem */
+#include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */
+#include "ha_prototypes.h" /* for innobase_convert_name() */
+#include "srv0start.h" /* for srv_was_started */
+}
+
+static const char plugin_author[] = "Innobase Oy";
+
+#define OK(expr)		\
+	if ((expr) != 0) {	\
+		DBUG_RETURN(1);	\
+	}
+
+#define RETURN_IF_INNODB_NOT_STARTED(plugin_name)			\
+do {									\
+	if (!srv_was_started) {						\
+		push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,	\
+				    ER_CANT_FIND_SYSTEM_REC,		\
+				    "InnoDB: SELECTing from "		\
+				    "INFORMATION_SCHEMA.%s but "	\
+				    "the InnoDB storage engine "	\
+				    "is not installed", plugin_name);	\
+		DBUG_RETURN(0);						\
+	}								\
+} while (0)
+
+#if !defined __STRICT_ANSI__ && defined __GNUC__ && (__GNUC__) > 2 && !defined __INTEL_COMPILER
+#define STRUCT_FLD(name, value)	name: value
+#else
+#define STRUCT_FLD(name, value)	value
+#endif
+
+static const ST_FIELD_INFO END_OF_ST_FIELD_INFO =
+	{STRUCT_FLD(field_name,		NULL),
+	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_NULL),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)};
+
+/*
+Use the following types mapping:
+
+C type	ST_FIELD_INFO::field_type
+---------------------------------
+long			MYSQL_TYPE_LONGLONG
+(field_length=MY_INT64_NUM_DECIMAL_DIGITS)
+
+long unsigned		MYSQL_TYPE_LONGLONG
+(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED)
+
+char*			MYSQL_TYPE_STRING
+(field_length=n)
+
+float			MYSQL_TYPE_FLOAT
+(field_length=0 is ignored)
+
+void*			MYSQL_TYPE_LONGLONG
+(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED)
+
+boolean (if else)	MYSQL_TYPE_LONG
+(field_length=1)
+
+time_t			MYSQL_TYPE_DATETIME
+(field_length=0 ignored)
+---------------------------------
+*/
+
+/* XXX these are defined in mysql_priv.h inside #ifdef MYSQL_SERVER */
+bool schema_table_store_record(THD *thd, TABLE *table);
+void localtime_to_TIME(MYSQL_TIME *to, struct tm *from);
+bool check_global_access(THD *thd, ulong want_access);
+
+/*******************************************************************//**
+Common function to fill any of the dynamic tables:
+INFORMATION_SCHEMA.innodb_trx
+INFORMATION_SCHEMA.innodb_locks
+INFORMATION_SCHEMA.innodb_lock_waits
+@return	0 on success */
+static
+int
+trx_i_s_common_fill_table(
+/*======================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond);	/*!< in: condition (not used) */
+
+/*******************************************************************//**
+Unbind a dynamic INFORMATION_SCHEMA table.
+@return	0 on success */
+static
+int
+i_s_common_deinit(
+/*==============*/
+	void*	p);	/*!< in/out: table schema object */
+
+/*******************************************************************//**
+Auxiliary function to store time_t value in MYSQL_TYPE_DATETIME
+field.
+@return	0 on success */
+static
+int
+field_store_time_t(
+/*===============*/
+	Field*	field,	/*!< in/out: target field for storage */
+	time_t	time)	/*!< in: value to store */
+{
+	MYSQL_TIME	my_time;
+	struct tm	tm_time;
+
+#if 0
+	/* use this if you are sure that `variables' and `time_zone'
+	are always initialized */
+	thd->variables.time_zone->gmt_sec_to_TIME(
+		&my_time, (my_time_t) time);
+#else
+	localtime_r(&time, &tm_time);
+	localtime_to_TIME(&my_time, &tm_time);
+	my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
+#endif
+
+	return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+}
+
+/*******************************************************************//**
+Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
+@return	0 on success */
+static
+int
+field_store_string(
+/*===============*/
+	Field*		field,	/*!< in/out: target field for storage */
+	const char*	str)	/*!< in: NUL-terminated utf-8 string,
+				or NULL */
+{
+	int	ret;
+
+	if (str != NULL) {
+
+		ret = field->store(str, strlen(str),
+				   system_charset_info);
+		field->set_notnull();
+	} else {
+
+		ret = 0; /* success */
+		field->set_null();
+	}
+
+	return(ret);
+}
+
+/*******************************************************************//**
+Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
+If the value is ULINT_UNDEFINED then the field it set to NULL.
+@return	0 on success */
+static
+int
+field_store_ulint(
+/*==============*/
+	Field*	field,	/*!< in/out: target field for storage */
+	ulint	n)	/*!< in: value to store */
+{
+	int	ret;
+
+	if (n != ULINT_UNDEFINED) {
+
+		ret = field->store(n);
+		field->set_notnull();
+	} else {
+
+		ret = 0; /* success */
+		field->set_null();
+	}
+
+	return(ret);
+}
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
+static ST_FIELD_INFO	innodb_trx_fields_info[] =
+{
+#define IDX_TRX_ID		0
+	{STRUCT_FLD(field_name,		"trx_id"),
+	 STRUCT_FLD(field_length,	TRX_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_STATE		1
+	{STRUCT_FLD(field_name,		"trx_state"),
+	 STRUCT_FLD(field_length,	TRX_QUE_STATE_STR_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_STARTED		2
+	{STRUCT_FLD(field_name,		"trx_started"),
+	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_DATETIME),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_REQUESTED_LOCK_ID	3
+	{STRUCT_FLD(field_name,		"trx_requested_lock_id"),
+	 STRUCT_FLD(field_length,	TRX_I_S_LOCK_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_WAIT_STARTED	4
+	{STRUCT_FLD(field_name,		"trx_wait_started"),
+	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_DATETIME),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_WEIGHT		5
+	{STRUCT_FLD(field_name,		"trx_weight"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_MYSQL_THREAD_ID	6
+	{STRUCT_FLD(field_name,		"trx_mysql_thread_id"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TRX_QUERY		7
+	{STRUCT_FLD(field_name,		"trx_query"),
+	 STRUCT_FLD(field_length,	TRX_I_S_TRX_QUERY_MAX_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_trx
+table with it.
+@return	0 on success */
+static
+int
+fill_innodb_trx_from_cache(
+/*=======================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache to read from */
+	THD*			thd,	/*!< in: used to call
+					schema_table_store_record() */
+	TABLE*			table)	/*!< in/out: fill this table */
+{
+	Field**	fields;
+	ulint	rows_num;
+	char	lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
+	ulint	i;
+
+	DBUG_ENTER("fill_innodb_trx_from_cache");
+
+	fields = table->field;
+
+	rows_num = trx_i_s_cache_get_rows_used(cache,
+					       I_S_INNODB_TRX);
+
+	for (i = 0; i < rows_num; i++) {
+
+		i_s_trx_row_t*	row;
+		char		trx_id[TRX_ID_MAX_LEN + 1];
+
+		row = (i_s_trx_row_t*)
+			trx_i_s_cache_get_nth_row(
+				cache, I_S_INNODB_TRX, i);
+
+		/* trx_id */
+		ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, row->trx_id);
+		OK(field_store_string(fields[IDX_TRX_ID], trx_id));
+
+		/* trx_state */
+		OK(field_store_string(fields[IDX_TRX_STATE],
+				      row->trx_state));
+
+		/* trx_started */
+		OK(field_store_time_t(fields[IDX_TRX_STARTED],
+				      (time_t) row->trx_started));
+
+		/* trx_requested_lock_id */
+		/* trx_wait_started */
+		if (row->trx_wait_started != 0) {
+
+			OK(field_store_string(
+				   fields[IDX_TRX_REQUESTED_LOCK_ID],
+				   trx_i_s_create_lock_id(
+					   row->requested_lock_row,
+					   lock_id, sizeof(lock_id))));
+			/* field_store_string() sets it no notnull */
+
+			OK(field_store_time_t(
+				   fields[IDX_TRX_WAIT_STARTED],
+				   (time_t) row->trx_wait_started));
+			fields[IDX_TRX_WAIT_STARTED]->set_notnull();
+		} else {
+
+			fields[IDX_TRX_REQUESTED_LOCK_ID]->set_null();
+			fields[IDX_TRX_WAIT_STARTED]->set_null();
+		}
+
+		/* trx_weight */
+		OK(fields[IDX_TRX_WEIGHT]->store((longlong) row->trx_weight,
+						 true));
+
+		/* trx_mysql_thread_id */
+		OK(fields[IDX_TRX_MYSQL_THREAD_ID]->store(
+			   row->trx_mysql_thread_id));
+
+		/* trx_query */
+		OK(field_store_string(fields[IDX_TRX_QUERY],
+				      row->trx_query));
+
+		OK(schema_table_store_record(thd, table));
+	}
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.innodb_trx
+@return	0 on success */
+static
+int
+innodb_trx_init(
+/*============*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_trx_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_trx_fields_info;
+	schema->fill_table = trx_i_s_common_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+static struct st_mysql_information_schema	i_s_info =
+{
+	MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
+};
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_trx =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_TRX"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB transactions"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_trx_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_locks */
+static ST_FIELD_INFO	innodb_locks_fields_info[] =
+{
+#define IDX_LOCK_ID		0
+	{STRUCT_FLD(field_name,		"lock_id"),
+	 STRUCT_FLD(field_length,	TRX_I_S_LOCK_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_TRX_ID		1
+	{STRUCT_FLD(field_name,		"lock_trx_id"),
+	 STRUCT_FLD(field_length,	TRX_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_MODE		2
+	{STRUCT_FLD(field_name,		"lock_mode"),
+	 /* S[,GAP] X[,GAP] IS[,GAP] IX[,GAP] AUTO_INC UNKNOWN */
+	 STRUCT_FLD(field_length,	32),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_TYPE		3
+	{STRUCT_FLD(field_name,		"lock_type"),
+	 STRUCT_FLD(field_length,	32 /* RECORD|TABLE|UNKNOWN */),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_TABLE		4
+	{STRUCT_FLD(field_name,		"lock_table"),
+	 STRUCT_FLD(field_length,	1024),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_INDEX		5
+	{STRUCT_FLD(field_name,		"lock_index"),
+	 STRUCT_FLD(field_length,	1024),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_SPACE		6
+	{STRUCT_FLD(field_name,		"lock_space"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_PAGE		7
+	{STRUCT_FLD(field_name,		"lock_page"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_REC		8
+	{STRUCT_FLD(field_name,		"lock_rec"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_LOCK_DATA		9
+	{STRUCT_FLD(field_name,		"lock_data"),
+	 STRUCT_FLD(field_length,	TRX_I_S_LOCK_DATA_MAX_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_locks
+table with it.
+@return	0 on success */
+static
+int
+fill_innodb_locks_from_cache(
+/*=========================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache to read from */
+	THD*			thd,	/*!< in: MySQL client connection */
+	TABLE*			table)	/*!< in/out: fill this table */
+{
+	Field**	fields;
+	ulint	rows_num;
+	char	lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
+	ulint	i;
+
+	DBUG_ENTER("fill_innodb_locks_from_cache");
+
+	fields = table->field;
+
+	rows_num = trx_i_s_cache_get_rows_used(cache,
+					       I_S_INNODB_LOCKS);
+
+	for (i = 0; i < rows_num; i++) {
+
+		i_s_locks_row_t*	row;
+
+		/* note that the decoded database or table name is
+		never expected to be longer than NAME_LEN;
+		NAME_LEN for database name
+		2 for surrounding quotes around database name
+		NAME_LEN for table name
+		2 for surrounding quotes around table name
+		1 for the separating dot (.)
+		9 for the #mysql50# prefix */
+		char			buf[2 * NAME_LEN + 14];
+		const char*		bufend;
+
+		char			lock_trx_id[TRX_ID_MAX_LEN + 1];
+
+		row = (i_s_locks_row_t*)
+			trx_i_s_cache_get_nth_row(
+				cache, I_S_INNODB_LOCKS, i);
+
+		/* lock_id */
+		trx_i_s_create_lock_id(row, lock_id, sizeof(lock_id));
+		OK(field_store_string(fields[IDX_LOCK_ID],
+				      lock_id));
+
+		/* lock_trx_id */
+		ut_snprintf(lock_trx_id, sizeof(lock_trx_id),
+			    TRX_ID_FMT, row->lock_trx_id);
+		OK(field_store_string(fields[IDX_LOCK_TRX_ID], lock_trx_id));
+
+		/* lock_mode */
+		OK(field_store_string(fields[IDX_LOCK_MODE],
+				      row->lock_mode));
+
+		/* lock_type */
+		OK(field_store_string(fields[IDX_LOCK_TYPE],
+				      row->lock_type));
+
+		/* lock_table */
+		bufend = innobase_convert_name(buf, sizeof(buf),
+					       row->lock_table,
+					       strlen(row->lock_table),
+					       thd, TRUE);
+		OK(fields[IDX_LOCK_TABLE]->store(buf, bufend - buf,
+						 system_charset_info));
+
+		/* lock_index */
+		if (row->lock_index != NULL) {
+
+			bufend = innobase_convert_name(buf, sizeof(buf),
+						       row->lock_index,
+						       strlen(row->lock_index),
+						       thd, FALSE);
+			OK(fields[IDX_LOCK_INDEX]->store(buf, bufend - buf,
+							 system_charset_info));
+			fields[IDX_LOCK_INDEX]->set_notnull();
+		} else {
+
+			fields[IDX_LOCK_INDEX]->set_null();
+		}
+
+		/* lock_space */
+		OK(field_store_ulint(fields[IDX_LOCK_SPACE],
+				     row->lock_space));
+
+		/* lock_page */
+		OK(field_store_ulint(fields[IDX_LOCK_PAGE],
+				     row->lock_page));
+
+		/* lock_rec */
+		OK(field_store_ulint(fields[IDX_LOCK_REC],
+				     row->lock_rec));
+
+		/* lock_data */
+		OK(field_store_string(fields[IDX_LOCK_DATA],
+				      row->lock_data));
+
+		OK(schema_table_store_record(thd, table));
+	}
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.innodb_locks
+@return	0 on success */
+static
+int
+innodb_locks_init(
+/*==============*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_locks_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_locks_fields_info;
+	schema->fill_table = trx_i_s_common_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_locks =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_LOCKS"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB conflicting locks"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_locks_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_lock_waits */
+static ST_FIELD_INFO	innodb_lock_waits_fields_info[] =
+{
+#define IDX_REQUESTING_TRX_ID	0
+	{STRUCT_FLD(field_name,		"requesting_trx_id"),
+	 STRUCT_FLD(field_length,	TRX_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_REQUESTED_LOCK_ID	1
+	{STRUCT_FLD(field_name,		"requested_lock_id"),
+	 STRUCT_FLD(field_length,	TRX_I_S_LOCK_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_BLOCKING_TRX_ID	2
+	{STRUCT_FLD(field_name,		"blocking_trx_id"),
+	 STRUCT_FLD(field_length,	TRX_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_BLOCKING_LOCK_ID	3
+	{STRUCT_FLD(field_name,		"blocking_lock_id"),
+	 STRUCT_FLD(field_length,	TRX_I_S_LOCK_ID_MAX_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Read data from cache buffer and fill the
+INFORMATION_SCHEMA.innodb_lock_waits table with it.
+@return	0 on success */
+static
+int
+fill_innodb_lock_waits_from_cache(
+/*==============================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache to read from */
+	THD*			thd,	/*!< in: used to call
+					schema_table_store_record() */
+	TABLE*			table)	/*!< in/out: fill this table */
+{
+	Field**	fields;
+	ulint	rows_num;
+	char	requested_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
+	char	blocking_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
+	ulint	i;
+
+	DBUG_ENTER("fill_innodb_lock_waits_from_cache");
+
+	fields = table->field;
+
+	rows_num = trx_i_s_cache_get_rows_used(cache,
+					       I_S_INNODB_LOCK_WAITS);
+
+	for (i = 0; i < rows_num; i++) {
+
+		i_s_lock_waits_row_t*	row;
+
+		char	requesting_trx_id[TRX_ID_MAX_LEN + 1];
+		char	blocking_trx_id[TRX_ID_MAX_LEN + 1];
+
+		row = (i_s_lock_waits_row_t*)
+			trx_i_s_cache_get_nth_row(
+				cache, I_S_INNODB_LOCK_WAITS, i);
+
+		/* requesting_trx_id */
+		ut_snprintf(requesting_trx_id, sizeof(requesting_trx_id),
+			    TRX_ID_FMT, row->requested_lock_row->lock_trx_id);
+		OK(field_store_string(fields[IDX_REQUESTING_TRX_ID],
+				      requesting_trx_id));
+
+		/* requested_lock_id */
+		OK(field_store_string(
+			   fields[IDX_REQUESTED_LOCK_ID],
+			   trx_i_s_create_lock_id(
+				   row->requested_lock_row,
+				   requested_lock_id,
+				   sizeof(requested_lock_id))));
+
+		/* blocking_trx_id */
+		ut_snprintf(blocking_trx_id, sizeof(blocking_trx_id),
+			    TRX_ID_FMT, row->blocking_lock_row->lock_trx_id);
+		OK(field_store_string(fields[IDX_BLOCKING_TRX_ID],
+				      blocking_trx_id));
+
+		/* blocking_lock_id */
+		OK(field_store_string(
+			   fields[IDX_BLOCKING_LOCK_ID],
+			   trx_i_s_create_lock_id(
+				   row->blocking_lock_row,
+				   blocking_lock_id,
+				   sizeof(blocking_lock_id))));
+
+		OK(schema_table_store_record(thd, table));
+	}
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.innodb_lock_waits
+@return	0 on success */
+static
+int
+innodb_lock_waits_init(
+/*===================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_lock_waits_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_lock_waits_fields_info;
+	schema->fill_table = trx_i_s_common_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_lock_waits =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_LOCK_WAITS"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, "Innobase Oy"),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB which lock is blocking which"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_lock_waits_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+/*******************************************************************//**
+Common function to fill any of the dynamic tables:
+INFORMATION_SCHEMA.innodb_trx
+INFORMATION_SCHEMA.innodb_locks
+INFORMATION_SCHEMA.innodb_lock_waits
+@return	0 on success */
+static
+int
+trx_i_s_common_fill_table(
+/*======================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond)	/*!< in: condition (not used) */
+{
+	const char*		table_name;
+	int			ret;
+	trx_i_s_cache_t*	cache;
+
+	DBUG_ENTER("trx_i_s_common_fill_table");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+
+		DBUG_RETURN(0);
+	}
+
+	/* minimize the number of places where global variables are
+	referenced */
+	cache = trx_i_s_cache;
+
+	/* which table we have to fill? */
+	table_name = tables->schema_table_name;
+	/* or table_name = tables->schema_table->table_name; */
+
+	RETURN_IF_INNODB_NOT_STARTED(table_name);
+
+	/* update the cache */
+	trx_i_s_cache_start_write(cache);
+	trx_i_s_possibly_fetch_data_into_cache(cache);
+	trx_i_s_cache_end_write(cache);
+
+	if (trx_i_s_cache_is_truncated(cache)) {
+
+		/* XXX show warning to user if possible */
+		fprintf(stderr, "Warning: data in %s truncated due to "
+			"memory limit of %d bytes\n", table_name,
+			TRX_I_S_MEM_LIMIT);
+	}
+
+	ret = 0;
+
+	trx_i_s_cache_start_read(cache);
+
+	if (innobase_strcasecmp(table_name, "innodb_trx") == 0) {
+
+		if (fill_innodb_trx_from_cache(
+			cache, thd, tables->table) != 0) {
+
+			ret = 1;
+		}
+
+	} else if (innobase_strcasecmp(table_name, "innodb_locks") == 0) {
+
+		if (fill_innodb_locks_from_cache(
+			cache, thd, tables->table) != 0) {
+
+			ret = 1;
+		}
+
+	} else if (innobase_strcasecmp(table_name, "innodb_lock_waits") == 0) {
+
+		if (fill_innodb_lock_waits_from_cache(
+			cache, thd, tables->table) != 0) {
+
+			ret = 1;
+		}
+
+	} else {
+
+		/* huh! what happened!? */
+		fprintf(stderr,
+			"InnoDB: trx_i_s_common_fill_table() was "
+			"called to fill unknown table: %s.\n"
+			"This function only knows how to fill "
+			"innodb_trx, innodb_locks and "
+			"innodb_lock_waits tables.\n", table_name);
+
+		ret = 1;
+	}
+
+	trx_i_s_cache_end_read(cache);
+
+#if 0
+	DBUG_RETURN(ret);
+#else
+	/* if this function returns something else than 0 then a
+	deadlock occurs between the mysqld server and mysql client,
+	see http://bugs.mysql.com/29900 ; when that bug is resolved
+	we can enable the DBUG_RETURN(ret) above */
+	DBUG_RETURN(0);
+#endif
+}
+
+/* Fields of the dynamic table information_schema.innodb_cmp. */
+static ST_FIELD_INFO	i_s_cmp_fields_info[] =
+{
+	{STRUCT_FLD(field_name,		"page_size"),
+	 STRUCT_FLD(field_length,	5),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Compressed Page Size"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"compress_ops"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Number of Compressions"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"compress_ops_ok"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Number of"
+					" Successful Compressions"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"compress_time"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Duration of Compressions,"
+		    " in Seconds"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"uncompress_ops"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Number of Decompressions"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"uncompress_time"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Duration of Decompressions,"
+		    " in Seconds"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp or
+innodb_cmp_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_fill_low(
+/*=============*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond,	/*!< in: condition (ignored) */
+	ibool		reset)	/*!< in: TRUE=reset cumulated counts */
+{
+	TABLE*	table	= (TABLE *) tables->table;
+	int	status	= 0;
+
+	DBUG_ENTER("i_s_cmp_fill_low");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+
+		DBUG_RETURN(0);
+	}
+
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	for (uint i = 0; i < PAGE_ZIP_NUM_SSIZE - 1; i++) {
+		page_zip_stat_t*	zip_stat = &page_zip_stat[i];
+
+		table->field[0]->store(PAGE_ZIP_MIN_SIZE << i);
+
+		/* The cumulated counts are not protected by any
+		mutex.  Thus, some operation in page0zip.c could
+		increment a counter between the time we read it and
+		clear it.  We could introduce mutex protection, but it
+		could cause a measureable performance hit in
+		page0zip.c. */
+		table->field[1]->store(zip_stat->compressed);
+		table->field[2]->store(zip_stat->compressed_ok);
+		table->field[3]->store(
+			(ulong) (zip_stat->compressed_usec / 1000000));
+		table->field[4]->store(zip_stat->decompressed);
+		table->field[5]->store(
+			(ulong) (zip_stat->decompressed_usec / 1000000));
+
+		if (reset) {
+			memset(zip_stat, 0, sizeof *zip_stat);
+		}
+
+		if (schema_table_store_record(thd, table)) {
+			status = 1;
+			break;
+		}
+	}
+
+	DBUG_RETURN(status);
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_fill(
+/*=========*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmp_fill_low(thd, tables, cond, FALSE));
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_reset_fill(
+/*===============*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmp_fill_low(thd, tables, cond, TRUE));
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp.
+@return	0 on success */
+static
+int
+i_s_cmp_init(
+/*=========*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmp_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmp_fields_info;
+	schema->fill_table = i_s_cmp_fill;
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp_reset.
+@return	0 on success */
+static
+int
+i_s_cmp_reset_init(
+/*===============*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmp_reset_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmp_fields_info;
+	schema->fill_table = i_s_cmp_reset_fill;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMP"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compression"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmp_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp_reset =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMP_RESET"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compression;"
+		   " reset cumulated counts"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmp_reset_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+/* Fields of the dynamic table information_schema.innodb_cmpmem. */
+static ST_FIELD_INFO	i_s_cmpmem_fields_info[] =
+{
+	{STRUCT_FLD(field_name,		"page_size"),
+	 STRUCT_FLD(field_length,	5),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Buddy Block Size"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"pages_used"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Currently in Use"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"pages_free"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Currently Available"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"relocation_ops"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Number of Relocations"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"relocation_time"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		"Total Duration of Relocations,"
+		    " in Seconds"),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmpmem or
+innodb_cmpmem_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmpmem_fill_low(
+/*================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond,	/*!< in: condition (ignored) */
+	ibool		reset)	/*!< in: TRUE=reset cumulated counts */
+{
+	TABLE*	table	= (TABLE *) tables->table;
+	int	status	= 0;
+
+	DBUG_ENTER("i_s_cmpmem_fill_low");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+
+		DBUG_RETURN(0);
+	}
+
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	buf_pool_mutex_enter();
+
+	for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+		buf_buddy_stat_t*	buddy_stat = &buf_buddy_stat[x];
+
+		table->field[0]->store(BUF_BUDDY_LOW << x);
+		table->field[1]->store(buddy_stat->used);
+		table->field[2]->store(UNIV_LIKELY(x < BUF_BUDDY_SIZES)
+				       ? UT_LIST_GET_LEN(buf_pool->zip_free[x])
+				       : 0);
+		table->field[3]->store((longlong) buddy_stat->relocated, true);
+		table->field[4]->store(
+			(ulong) (buddy_stat->relocated_usec / 1000000));
+
+		if (reset) {
+			/* This is protected by buf_pool_mutex. */
+			buddy_stat->relocated = 0;
+			buddy_stat->relocated_usec = 0;
+		}
+
+		if (schema_table_store_record(thd, table)) {
+			status = 1;
+			break;
+		}
+	}
+
+	buf_pool_mutex_exit();
+	DBUG_RETURN(status);
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmpmem.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmpmem_fill(
+/*============*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmpmem_fill_low(thd, tables, cond, FALSE));
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmpmem_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmpmem_reset_fill(
+/*==================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	COND*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmpmem_fill_low(thd, tables, cond, TRUE));
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmpmem.
+@return	0 on success */
+static
+int
+i_s_cmpmem_init(
+/*============*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmpmem_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmpmem_fields_info;
+	schema->fill_table = i_s_cmpmem_fill;
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmpmem_reset.
+@return	0 on success */
+static
+int
+i_s_cmpmem_reset_init(
+/*==================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmpmem_reset_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmpmem_fields_info;
+	schema->fill_table = i_s_cmpmem_reset_fill;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmpmem =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMPMEM"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmpmem_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmpmem_reset =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMPMEM_RESET"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool;"
+		   " reset cumulated counts"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmpmem_reset_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+/*******************************************************************//**
+Unbind a dynamic INFORMATION_SCHEMA table.
+@return	0 on success */
+static
+int
+i_s_common_deinit(
+/*==============*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_common_deinit");
+
+	/* Do nothing */
+
+	DBUG_RETURN(0);
+}
diff --git a/storage/innodb_plugin/handler/i_s.h b/storage/innodb_plugin/handler/i_s.h
new file mode 100644
index 00000000000..402c88bbedb
--- /dev/null
+++ b/storage/innodb_plugin/handler/i_s.h
@@ -0,0 +1,37 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file handler/i_s.h
+InnoDB INFORMATION SCHEMA tables interface to MySQL.
+
+Created July 18, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef i_s_h
+#define i_s_h
+
+extern struct st_mysql_plugin	i_s_innodb_trx;
+extern struct st_mysql_plugin	i_s_innodb_locks;
+extern struct st_mysql_plugin	i_s_innodb_lock_waits;
+extern struct st_mysql_plugin	i_s_innodb_cmp;
+extern struct st_mysql_plugin	i_s_innodb_cmp_reset;
+extern struct st_mysql_plugin	i_s_innodb_cmpmem;
+extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
+
+#endif /* i_s_h */
diff --git a/storage/innodb_plugin/handler/mysql_addons.cc b/storage/innodb_plugin/handler/mysql_addons.cc
new file mode 100644
index 00000000000..eae1fe9fbc2
--- /dev/null
+++ b/storage/innodb_plugin/handler/mysql_addons.cc
@@ -0,0 +1,42 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file handler/mysql_addons.cc
+This file contains functions that need to be added to
+MySQL code but have not been added yet.
+
+Whenever you add a function here submit a MySQL bug
+report (feature request) with the implementation. Then
+write the bug number in the comment before the
+function in this file.
+
+When MySQL commits the function it can be deleted from
+here. In a perfect world this file exists but is empty.
+
+Created November 07, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef MYSQL_SERVER
+#define MYSQL_SERVER
+#endif /* MYSQL_SERVER */
+
+#include <mysql_priv.h>
+
+#include "mysql_addons.h"
+#include "univ.i"
diff --git a/storage/innodb_plugin/handler/win_delay_loader.cc b/storage/innodb_plugin/handler/win_delay_loader.cc
new file mode 100644
index 00000000000..9b92f6a9cf2
--- /dev/null
+++ b/storage/innodb_plugin/handler/win_delay_loader.cc
@@ -0,0 +1,1024 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file handler/win_delay_loader.cc
+This file contains functions that implement the delay loader on Windows.
+
+This is a customized version of delay loader with limited functionalities.
+It does not support:
+
+* (manual) unloading
+* multiple delay loaded DLLs
+* multiple loading of the same DLL
+
+This delay loader is used only by the InnoDB plugin. Other components (DLLs)
+can still use the default delay loader, provided by MSVC.
+
+Several acronyms used by Microsoft:
+ * IAT: import address table
+ * INT: import name table
+ * RVA: Relative Virtual Address
+
+See http://msdn.microsoft.com/en-us/magazine/bb985992.aspx for details of
+PE format.
+***********************************************************************/
+#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+# include <delayimp.h>
+# include <mysql_priv.h>
+
+extern "C" {
+# include "univ.i"
+# include "hash0hash.h"
+}
+
+/*******************************************************************//**
+This following contains a list of externals that can not be resolved by
+delay loading. They have to be resolved indirectly via their addresses
+in the .map file. All of them are external variables. */
+CHARSET_INFO*		wdl_my_charset_bin;
+CHARSET_INFO*		wdl_my_charset_latin1;
+CHARSET_INFO*		wdl_my_charset_filename;
+CHARSET_INFO**		wdl_system_charset_info;
+CHARSET_INFO**		wdl_default_charset_info;
+CHARSET_INFO**		wdl_all_charsets;
+system_variables*	wdl_global_system_variables;
+char*			wdl_mysql_real_data_home;
+char**			wdl_mysql_data_home;
+char**			wdl_tx_isolation_names;
+char**			wdl_binlog_format_names;
+char*			wdl_reg_ext;
+pthread_mutex_t*	wdl_LOCK_thread_count;
+key_map*		wdl_key_map_full;
+MY_TMPDIR*		wdl_mysql_tmpdir_list;
+bool*			wdl_mysqld_embedded;
+uint*			wdl_lower_case_table_names;
+ulong*			wdl_specialflag;
+int*			wdl_my_umask;
+
+/*******************************************************************//**
+The preferred load-address defined in PE (portable executable format). */
+#if defined(_M_IA64)
+#pragma section(".base", long, read)
+extern "C"
+__declspec(allocate(".base"))
+const IMAGE_DOS_HEADER __ImageBase;
+#else
+extern "C"
+const IMAGE_DOS_HEADER __ImageBase;
+#endif
+
+/*******************************************************************//**
+A template function for converting a relative address (RVA) to an
+absolute address (VA). This is due to the pointers in the delay
+descriptor (ImgDelayDescr in delayimp.h) have been changed from
+VAs to RVAs to work on both 32- and 64-bit platforms.
+@return	absolute virtual address */
+template <class X>
+X PFromRva(
+/*=======*/
+	RVA	rva)	/*!< in: relative virtual address */
+{
+	return X(PBYTE(&__ImageBase) + rva);
+}
+
+/*******************************************************************//**
+Convert to the old format for convenience. The structure as well as its
+element names follow the definition of ImgDelayDescr in delayimp.h. */
+struct InternalImgDelayDescr
+{
+	DWORD		grAttrs;	/*!< attributes */
+	LPCSTR		szName;		/*!< pointer to dll name */
+	HMODULE*	phmod;		/*!< address of module handle */
+	PImgThunkData	pIAT;		/*!< address of the IAT */
+	PCImgThunkData	pINT;		/*!< address of the INT */
+	PCImgThunkData	pBoundIAT;	/*!< address of the optional bound IAT */
+	PCImgThunkData	pUnloadIAT;	/*!< address of optional copy of
+					   original IAT */
+	DWORD		dwTimeStamp;	/*!< 0 if not bound,
+					   otherwise date/time stamp of DLL
+					   bound to (Old BIND) */
+};
+
+typedef struct map_hash_chain_struct	map_hash_chain_t;
+
+struct map_hash_chain_struct {
+	char*			symbol;	/*!< pointer to a symbol */
+	ulint			value;	/*!< address of the symbol */
+	map_hash_chain_t*	next;	/*!< pointer to the next cell
+					in the same folder. */
+	map_hash_chain_t*	chain;	/*!< a linear chain used for
+					cleanup. */
+};
+
+static HMODULE				my_hmod = 0;
+static struct hash_table_struct*	m_htbl = NULL ;
+static map_hash_chain_t*		chain_header = NULL;
+static ibool				wdl_init = FALSE;
+const ulint				MAP_HASH_CELLS_NUM = 10000;
+
+#ifndef DBUG_OFF
+/*******************************************************************//**
+In the dynamic plugin, it is required to call the following dbug functions
+in the server:
+	_db_pargs_
+	_db_doprnt_
+	_db_enter_
+	_db_return_
+	_db_dump_
+
+The plugin will get those function pointers during the initialization. */
+typedef void (__cdecl* pfn_db_enter_)(
+	const char*	_func_,
+	const char*	_file_,
+	uint		_line_,
+	const char**	_sfunc_,
+	const char**	_sfile_,
+	uint*		_slevel_,
+	char***);
+
+typedef void (__cdecl* pfn_db_return_)(
+	uint		_line_,
+	const char**	_sfunc_,
+	const char**	_sfile_,
+	uint*		_slevel_);
+
+typedef void (__cdecl* pfn_db_pargs_)(
+	uint		_line_,
+	const char*	keyword);
+
+typedef void (__cdecl* pfn_db_doprnt_)(
+	const char*	format,
+	...);
+
+typedef void (__cdecl* pfn_db_dump_)(
+	uint			_line_,
+	const char*		keyword,
+	const unsigned char*	memory,
+	size_t			length);
+
+static pfn_db_enter_	wdl_db_enter_;
+static pfn_db_return_	wdl_db_return_;
+static pfn_db_pargs_	wdl_db_pargs_;
+static pfn_db_doprnt_	wdl_db_doprnt_;
+static pfn_db_dump_	wdl_db_dump_;
+#endif /* !DBUG_OFF */
+
+/*************************************************************//**
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n.
+
+This is the same function as hash_create in hash0hash.c, except the
+memory allocation. This function is invoked before the engine is
+initialized, and buffer pools are not ready yet.
+@return	own: created hash table */
+static
+hash_table_t*
+wdl_hash_create(
+/*============*/
+	ulint	n)	/*!< in: number of array cells */
+{
+	hash_cell_t*	array;
+	ulint		prime;
+	hash_table_t*	table;
+
+	prime = ut_find_prime(n);
+
+	table = (hash_table_t*) malloc(sizeof(hash_table_t));
+	if (table == NULL) {
+		return(NULL);
+	}
+
+	array = (hash_cell_t*) malloc(sizeof(hash_cell_t) * prime);
+	if (array == NULL) {
+		free(table);
+		return(NULL);
+	}
+
+	table->array = array;
+	table->n_cells = prime;
+	table->n_mutexes = 0;
+	table->mutexes = NULL;
+	table->heaps = NULL;
+	table->heap = NULL;
+	table->magic_n = HASH_TABLE_MAGIC_N;
+
+	/* Initialize the cell array */
+	hash_table_clear(table);
+
+	return(table);
+}
+
+/*************************************************************//**
+Frees a hash table. */
+static
+void
+wdl_hash_table_free(
+/*================*/
+	hash_table_t*	table)	/*!< in, own: hash table */
+{
+	ut_a(table != NULL);
+	ut_a(table->mutexes == NULL);
+
+	free(table->array);
+	free(table);
+}
+
+/*******************************************************************//**
+Function for calculating the count of imports given the base of the IAT.
+@return	number of imports */
+static
+ulint
+wdl_import_count(
+/*=============*/
+	PCImgThunkData	pitd_base)	/*!< in: base of the IAT */
+{
+	ulint		ret = 0;
+	PCImgThunkData	pitd = pitd_base;
+
+	while (pitd->u1.Function) {
+		pitd++;
+		ret++;
+	}
+
+	return(ret);
+}
+
+/*******************************************************************//**
+Read Mapfile to a hashtable for faster access
+@return	TRUE if the mapfile is loaded successfully. */
+static
+ibool
+wdl_load_mapfile(
+/*=============*/
+	const char*	filename)	/*!< in: name of the mapfile. */
+{
+	FILE*		fp;
+	const size_t	nSize = 256;
+	char		tmp_buf[nSize];
+	char*		func_name;
+	char*		func_addr;
+	ulint		load_addr = 0;
+	ibool		valid_load_addr = FALSE;
+#ifdef _WIN64
+	const char*	tmp_string = " Preferred load address is %16llx";
+#else
+	const char*	tmp_string = " Preferred load address is %08x";
+#endif
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+
+		return(FALSE);
+	}
+
+	/* Check whether to create the hashtable */
+	if (m_htbl == NULL) {
+
+		m_htbl = wdl_hash_create(MAP_HASH_CELLS_NUM);
+
+		if (m_htbl == NULL) {
+
+			fclose(fp);
+			return(FALSE);
+		}
+	}
+
+	/* Search start of symbol list and get the preferred load address */
+	while (fgets(tmp_buf, sizeof(tmp_buf), fp)) {
+
+		if (sscanf(tmp_buf, tmp_string, &load_addr) == 1) {
+
+			valid_load_addr = TRUE;
+		}
+
+		if (strstr(tmp_buf, "Rva+Base") != NULL) {
+
+			break;
+		}
+	}
+
+	if (valid_load_addr == FALSE) {
+
+		/* No "Preferred load address", the map file is wrong. */
+		fclose(fp);
+		return(FALSE);
+	}
+
+	/* Read symbol list */
+	while (fgets(tmp_buf, sizeof(tmp_buf), fp))
+	{
+		map_hash_chain_t*	map_cell;
+		ulint			map_fold;
+
+		if (*tmp_buf == 0) {
+
+			continue;
+		}
+
+		func_name = strtok(tmp_buf, " ");
+		func_name = strtok(NULL, " ");
+		func_addr = strtok(NULL, " ");
+
+		if (func_name && func_addr) {
+
+			ut_snprintf(tmp_buf, nSize, "0x%s", func_addr);
+			if (*func_name == '_') {
+
+				func_name++;
+			}
+
+			map_cell = (map_hash_chain_t*)
+				   malloc(sizeof(map_hash_chain_t));
+			if (map_cell == NULL) {
+				return(FALSE);
+			}
+
+			/* Chain all cells together */
+			map_cell->chain = chain_header;
+			chain_header = map_cell;
+
+			map_cell->symbol = strdup(func_name);
+			map_cell->value = (ulint) _strtoui64(tmp_buf, NULL, 0)
+					  - load_addr;
+			map_fold = ut_fold_string(map_cell->symbol);
+
+			HASH_INSERT(map_hash_chain_t,
+				    next,
+				    m_htbl,
+				    map_fold,
+				    map_cell);
+		}
+	}
+
+	fclose(fp);
+
+	return(TRUE);
+}
+
+/*************************************************************//**
+Cleanup.during DLL unload */
+static
+void
+wdl_cleanup(void)
+/*=============*/
+{
+	while (chain_header != NULL) {
+		map_hash_chain_t*	tmp;
+
+		tmp = chain_header->chain;
+		free(chain_header->symbol);
+		free(chain_header);
+		chain_header = tmp;
+	}
+
+	if (m_htbl != NULL) {
+
+		wdl_hash_table_free(m_htbl);
+	}
+}
+
+/*******************************************************************//**
+Load the mapfile mysqld.map.
+@return	the module handle */
+static
+HMODULE
+wdl_get_mysqld_mapfile(void)
+/*========================*/
+{
+	char	file_name[MAX_PATH];
+	char*	ext;
+	ulint	err;
+
+	if (my_hmod == 0) {
+
+		size_t	nSize = MAX_PATH - strlen(".map") -1;
+
+		/* First find out the name of current executable */
+		my_hmod = GetModuleHandle(NULL);
+		if (my_hmod == 0) {
+
+			return(my_hmod);
+		}
+
+		err = GetModuleFileName(my_hmod, file_name, nSize);
+		if (err == 0) {
+
+			my_hmod = 0;
+			return(my_hmod);
+		}
+
+		ext = strrchr(file_name, '.');
+		if (ext != NULL) {
+
+			*ext = 0;
+			strcat(file_name, ".map");
+
+			err = wdl_load_mapfile(file_name);
+			if (err == 0) {
+
+				my_hmod = 0;
+			}
+		} else {
+
+			my_hmod = 0;
+		}
+	}
+
+	return(my_hmod);
+}
+
+/*******************************************************************//**
+Retrieves the address of an exported function. It follows the convention
+of GetProcAddress().
+@return	address of exported function. */
+static
+FARPROC
+wdl_get_procaddr_from_map(
+/*======================*/
+	HANDLE		m_handle,	/*!< in: module handle */
+	const char*	import_proc)	/*!< in: procedure name */
+{
+	map_hash_chain_t*	hash_chain;
+	ulint			map_fold;
+
+	map_fold = ut_fold_string(import_proc);
+	HASH_SEARCH(
+		next,
+		m_htbl,
+		map_fold,
+		map_hash_chain_t*,
+		hash_chain,
+		,
+		(ut_strcmp(hash_chain->symbol, import_proc) == 0));
+
+	if (hash_chain == NULL) {
+
+#ifdef _WIN64
+		/* On Win64, the leading '_' may not be taken out. In this
+		case, search again without the leading '_'. */
+		if (*import_proc == '_') {
+
+			import_proc++;
+		}
+
+		map_fold = ut_fold_string(import_proc);
+		HASH_SEARCH(
+			next,
+			m_htbl,
+			map_fold,
+			map_hash_chain_t*,
+			hash_chain,
+			,
+			(ut_strcmp(hash_chain->symbol, import_proc) == 0));
+
+		if (hash_chain == NULL) {
+#endif
+			if (wdl_init == TRUE) {
+
+				sql_print_error(
+					"InnoDB: the procedure pointer of %s"
+					" is not found.",
+					import_proc);
+			}
+
+			return(0);
+#ifdef _WIN64
+		}
+#endif
+	}
+
+	return((FARPROC) ((ulint) m_handle + hash_chain->value));
+}
+
+/*******************************************************************//**
+Retrieves the address of an exported variable.
+Note: It does not follow the Windows call convention FARPROC.
+@return	address of exported variable. */
+static
+void*
+wdl_get_varaddr_from_map(
+/*=====================*/
+	HANDLE		m_handle,		/*!< in: module handle */
+	const char*	import_variable)	/*!< in: variable name */
+{
+	map_hash_chain_t*	hash_chain;
+	ulint			map_fold;
+
+	map_fold = ut_fold_string(import_variable);
+	HASH_SEARCH(
+		next,
+		m_htbl,
+		map_fold,
+		map_hash_chain_t*,
+		hash_chain,
+		,
+		(ut_strcmp(hash_chain->symbol, import_variable) == 0));
+
+	if (hash_chain == NULL) {
+
+#ifdef _WIN64
+		/* On Win64, the leading '_' may not be taken out. In this
+		case, search again without the leading '_'. */
+		if (*import_variable == '_') {
+
+			import_variable++;
+		}
+
+		map_fold = ut_fold_string(import_variable);
+		HASH_SEARCH(
+			next,
+			m_htbl,
+			map_fold,
+			map_hash_chain_t*,
+			hash_chain,
+			,
+			(ut_strcmp(hash_chain->symbol, import_variable) == 0));
+
+		if (hash_chain == NULL) {
+#endif
+			if (wdl_init == TRUE) {
+
+				sql_print_error(
+					"InnoDB: the variable address of %s"
+					" is not found.",
+					import_variable);
+			}
+
+			return(0);
+#ifdef _WIN64
+		}
+#endif
+	}
+
+	return((void*) ((ulint) m_handle + hash_chain->value));
+}
+
+/*******************************************************************//**
+Bind all unresolved external variables from the MySQL executable.
+@return	TRUE if successful */
+static
+bool
+wdl_get_external_variables(void)
+/*============================*/
+{
+	HMODULE	hmod = wdl_get_mysqld_mapfile();
+
+	if (hmod == 0) {
+
+		return(FALSE);
+	}
+
+#define GET_SYM(sym, var, type)					\
+	var = (type*) wdl_get_varaddr_from_map(hmod, sym);	\
+	if (var == NULL) return(FALSE)
+#ifdef _WIN64
+#define GET_SYM2(sym1, sym2, var, type)				\
+	var = (type*) wdl_get_varaddr_from_map(hmod, sym1);	\
+	if (var == NULL) return(FALSE)
+#else
+#define GET_SYM2(sym1, sym2, var, type)				\
+	var = (type*) wdl_get_varaddr_from_map(hmod, sym2);	\
+	if (var == NULL) return(FALSE)
+#endif // (_WIN64)
+#define GET_C_SYM(sym, type) GET_SYM(#sym, wdl_##sym, type)
+#define GET_PROC_ADDR(sym)					\
+	wdl##sym = (pfn##sym) wdl_get_procaddr_from_map(hmod, #sym)
+
+	GET_C_SYM(my_charset_bin, CHARSET_INFO);
+	GET_C_SYM(my_charset_latin1, CHARSET_INFO);
+	GET_C_SYM(my_charset_filename, CHARSET_INFO);
+	GET_C_SYM(default_charset_info, CHARSET_INFO*);
+	GET_C_SYM(all_charsets, CHARSET_INFO*);
+	GET_C_SYM(my_umask, int);
+
+	GET_SYM("?global_system_variables@@3Usystem_variables@@A",
+		wdl_global_system_variables, struct system_variables);
+	GET_SYM("?mysql_real_data_home@@3PADA",
+		wdl_mysql_real_data_home, char);
+	GET_SYM("?reg_ext@@3PADA", wdl_reg_ext, char);
+	GET_SYM("?LOCK_thread_count@@3U_RTL_CRITICAL_SECTION@@A",
+		wdl_LOCK_thread_count, pthread_mutex_t);
+	GET_SYM("?key_map_full@@3V?$Bitmap@$0EA@@@A",
+		wdl_key_map_full, key_map);
+	GET_SYM("?mysql_tmpdir_list@@3Ust_my_tmpdir@@A",
+		wdl_mysql_tmpdir_list, MY_TMPDIR);
+	GET_SYM("?mysqld_embedded@@3_NA",
+		wdl_mysqld_embedded, bool);
+	GET_SYM("?lower_case_table_names@@3IA",
+		wdl_lower_case_table_names, uint);
+	GET_SYM("?specialflag@@3KA", wdl_specialflag, ulong);
+
+	GET_SYM2("?system_charset_info@@3PEAUcharset_info_st@@EA",
+		 "?system_charset_info@@3PAUcharset_info_st@@A",
+		 wdl_system_charset_info, CHARSET_INFO*);
+	GET_SYM2("?mysql_data_home@@3PEADEA",
+		 "?mysql_data_home@@3PADA",
+		 wdl_mysql_data_home, char*);
+	GET_SYM2("?tx_isolation_names@@3PAPEBDA",
+		 "?tx_isolation_names@@3PAPBDA",
+		 wdl_tx_isolation_names, char*);
+	GET_SYM2("?binlog_format_names@@3PAPEBDA",
+		 "?binlog_format_names@@3PAPBDA",
+		 wdl_binlog_format_names, char*);
+
+#ifndef DBUG_OFF
+	GET_PROC_ADDR(_db_enter_);
+	GET_PROC_ADDR(_db_return_);
+	GET_PROC_ADDR(_db_pargs_);
+	GET_PROC_ADDR(_db_doprnt_);
+	GET_PROC_ADDR(_db_dump_);
+
+	/* If any of the dbug functions is not available, just make them
+	all invalid. This is the case when working with a non-debug
+	version of the server. */
+	if (wdl_db_enter_ == NULL || wdl_db_return_ == NULL
+	    || wdl_db_pargs_ == NULL || wdl_db_doprnt_ == NULL
+	    || wdl_db_dump_ == NULL) {
+
+		wdl_db_enter_ = NULL;
+		wdl_db_return_ = NULL;
+		wdl_db_pargs_ = NULL;
+		wdl_db_doprnt_ = NULL;
+		wdl_db_dump_ = NULL;
+	}
+#endif /* !DBUG_OFF */
+
+	wdl_init = TRUE;
+	return(TRUE);
+
+#undef GET_SYM
+#undef GET_SYM2
+#undef GET_C_SYM
+#undef GET_PROC_ADDR
+}
+
+/*******************************************************************//**
+The DLL Delayed Loading Helper Function for resolving externals.
+
+The function may fail due to one of the three reasons:
+
+* Invalid parameter, which happens if the attributes in pidd aren't
+  specified correctly.
+* Failed to load the map file mysqld.map.
+* Failed to find an external name in the map file mysqld.map.
+
+Note: this function is called by run-time as well as __HrLoadAllImportsForDll.
+So, it has to follow Windows call convention.
+@return	the address of the imported function */
+extern "C"
+FARPROC WINAPI
+__delayLoadHelper2(
+/*===============*/
+	PCImgDelayDescr	pidd,		/*!< in: a const pointer to a
+					ImgDelayDescr, see delayimp.h. */
+	FARPROC*	iat_entry)	/*!< in/out: A pointer to the slot in
+					the delay load import address table
+					to be updated with the address of the
+					imported function. */
+{
+	ulint		iIAT, iINT;
+	HMODULE		hmod;
+	PCImgThunkData	pitd;
+	FARPROC		fun = NULL;
+
+	/* Set up data used for the hook procs  */
+	InternalImgDelayDescr	idd = {
+				pidd->grAttrs,
+				PFromRva<LPCSTR>(pidd->rvaDLLName),
+				PFromRva<HMODULE*>(pidd->rvaHmod),
+				PFromRva<PImgThunkData>(pidd->rvaIAT),
+				PFromRva<PCImgThunkData>(pidd->rvaINT),
+				PFromRva<PCImgThunkData>(pidd->rvaBoundIAT),
+				PFromRva<PCImgThunkData>(pidd->rvaUnloadIAT),
+				pidd->dwTimeStamp
+	};
+
+	DelayLoadInfo		dli = {
+				sizeof(DelayLoadInfo),
+				pidd,
+				iat_entry,
+				idd.szName,
+				{0},
+				0,
+				0,
+				0
+	};
+
+	/* Check the Delay Load Attributes, log an error of invalid
+	parameter, which happens if the attributes in pidd are not
+	specified correctly. */
+	if ((idd.grAttrs & dlattrRva) == 0) {
+
+		sql_print_error("InnoDB: invalid parameter for delay loader.");
+		return(0);
+	}
+
+	hmod = *idd.phmod;
+
+	/* Calculate the index for the IAT entry in the import address table.
+	The INT entries are ordered the same as the IAT entries so the
+	calculation can be done on the IAT side. */
+	iIAT = (PCImgThunkData) iat_entry - idd.pIAT;
+	iINT = iIAT;
+
+	pitd = &(idd.pINT[iINT]);
+
+	dli.dlp.fImportByName = !IMAGE_SNAP_BY_ORDINAL(pitd->u1.Ordinal);
+
+	if (dli.dlp.fImportByName) {
+
+		dli.dlp.szProcName = (LPCSTR) (PFromRva<PIMAGE_IMPORT_BY_NAME>
+			((RVA) ((UINT_PTR) pitd->u1.AddressOfData))->Name);
+	} else {
+
+		dli.dlp.dwOrdinal = (ulint) IMAGE_ORDINAL(pitd->u1.Ordinal);
+	}
+
+	/* Now, load the mapfile, if it has not been done yet */
+	if (hmod == 0) {
+
+		hmod = wdl_get_mysqld_mapfile();
+	}
+
+	if (hmod == 0) {
+		/* LoadLibrary failed. */
+		PDelayLoadInfo	rgpdli[1] = {&dli};
+
+		dli.dwLastError = ::GetLastError();
+
+		sql_print_error(
+			"InnoDB: failed to load mysqld.map with error %d.",
+			dli.dwLastError);
+
+		return(0);
+	}
+
+	/* Store the library handle. */
+	idd.phmod = &hmod;
+
+	/* Go for the procedure now. */
+	dli.hmodCur = hmod;
+
+	if (pidd->rvaBoundIAT && pidd->dwTimeStamp) {
+
+		/* Bound imports exist, check the timestamp from the target
+		image */
+		PIMAGE_NT_HEADERS	pinh;
+
+		pinh = (PIMAGE_NT_HEADERS) ((byte*) hmod
+				+ ((PIMAGE_DOS_HEADER) hmod)->e_lfanew);
+
+		if (pinh->Signature == IMAGE_NT_SIGNATURE
+		    && pinh->FileHeader.TimeDateStamp == idd.dwTimeStamp
+		    && (DWORD) hmod == pinh->OptionalHeader.ImageBase) {
+
+			/* We have a decent address in the bound IAT. */
+			fun = (FARPROC) (UINT_PTR)
+					idd.pBoundIAT[iIAT].u1.Function;
+
+			if (fun) {
+
+				*iat_entry = fun;
+				return(fun);
+			}
+		}
+	}
+
+	fun = wdl_get_procaddr_from_map(hmod, dli.dlp.szProcName);
+
+	if (fun == 0) {
+
+		return(0);
+	}
+
+	*iat_entry = fun;
+	return(fun);
+}
+
+/*******************************************************************//**
+Unload a DLL that was delay loaded. This function is called by run-time.
+@return TRUE is returned if the DLL is found and the IAT matches the
+original one. */
+extern "C"
+BOOL WINAPI
+__FUnloadDelayLoadedDLL2(
+/*=====================*/
+	LPCSTR	module_name)	/*!< in: DLL name */
+{
+	return(TRUE);
+}
+
+/**************************************************************//**
+Load all imports from a DLL that was specified with the /delayload linker
+option.
+Note: this function is called by run-time. So, it has to follow Windows call
+convention.
+@return	S_OK if the DLL matches, otherwise ERROR_MOD_NOT_FOUND is returned. */
+extern "C"
+HRESULT WINAPI
+__HrLoadAllImportsForDll(
+/*=====================*/
+	LPCSTR	module_name)	/*!< in: DLL name */
+{
+	PIMAGE_NT_HEADERS	img;
+	PCImgDelayDescr		pidd;
+	IMAGE_DATA_DIRECTORY*	image_data;
+	LPCSTR			current_module;
+	HRESULT			ret = ERROR_MOD_NOT_FOUND;
+	HMODULE			hmod = (HMODULE) &__ImageBase;
+
+	img = (PIMAGE_NT_HEADERS) ((byte*) hmod
+				   + ((PIMAGE_DOS_HEADER) hmod)->e_lfanew);
+	image_data =
+	 &img->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT];
+
+	/* Scan the delay load IAT/INT for the DLL */
+	if (image_data->Size) {
+
+		pidd = PFromRva<PCImgDelayDescr>(image_data->VirtualAddress);
+
+		/* Check all of the listed DLLs we want to load. */
+		while (pidd->rvaDLLName) {
+
+			current_module = PFromRva<LPCSTR>(pidd->rvaDLLName);
+
+			if (stricmp(module_name, current_module) == 0) {
+
+				/* Found it, break out with pidd and
+				current_module set appropriately */
+				break;
+			}
+
+			/* To the next delay import descriptor */
+			pidd++;
+		}
+
+		if (pidd->rvaDLLName) {
+
+			/* Found a matching DLL, now process it. */
+			FARPROC*	iat_entry;
+			size_t		count;
+
+			iat_entry = PFromRva<FARPROC*>(pidd->rvaIAT);
+			count = wdl_import_count((PCImgThunkData) iat_entry);
+
+			/* now load all the imports from the DLL */
+			while (count > 0) {
+
+				/* No need to check the return value */
+				__delayLoadHelper2(pidd, iat_entry);
+				iat_entry++;
+				count--;
+			}
+
+			ret = S_OK;
+		}
+	}
+
+	return ret;
+}
+
+/**************************************************************//**
+The main function of a DLL
+@return	TRUE if the call succeeds */
+BOOL
+WINAPI
+DllMain(
+/*====*/
+	HINSTANCE	hinstDLL,	/*!< in: handle to the DLL module */
+	DWORD		fdwReason,	/*!< Reason code that indicates why the
+					DLL entry-point function is being
+					called.*/
+	LPVOID		lpvReserved)	/*!< in: additional parameter based on
+					fdwReason */
+{
+	BOOL	success = TRUE;
+
+	switch (fdwReason) {
+
+	case DLL_PROCESS_ATTACH:
+		success = wdl_get_external_variables();
+		break;
+
+	case DLL_PROCESS_DETACH:
+		wdl_cleanup();
+		break;
+	}
+
+	return(success);
+}
+
+#ifndef DBUG_OFF
+/**************************************************************//**
+Process entry point to user function. It makes the call to _db_enter_
+in mysqld.exe. The DBUG functions are defined in my_dbug.h. */
+extern "C" UNIV_INTERN
+void
+_db_enter_(
+	const char*	_func_,		/*!< in: current function name */
+	const char*	_file_,		/*!< in: current file name */
+	uint		_line_,		/*!< in: current source line number */
+	const char**	_sfunc_,	/*!< out: previous _func_ */
+	const char**	_sfile_,	/*!< out: previous _file_ */
+	uint*		_slevel_,	/*!< out: previous nesting level */
+	char***		_sframep_)	/*!< out: previous frame pointer */
+{
+	if (wdl_db_enter_ != NULL) {
+
+		wdl_db_enter_(_func_, _file_, _line_, _sfunc_, _sfile_,
+			      _slevel_, _sframep_);
+	}
+}
+
+/**************************************************************//**
+Process exit from user function. It makes the call to _db_return_()
+in the server. */
+extern "C" UNIV_INTERN
+void
+_db_return_(
+	uint		_line_,		/*!< in: current source line number */
+	const char**	_sfunc_,	/*!< out: previous _func_ */
+	const char**	_sfile_,	/*!< out: previous _file_ */
+	uint*		_slevel_)	/*!< out: previous level */
+{
+	if (wdl_db_return_ != NULL) {
+
+		wdl_db_return_(_line_, _sfunc_, _sfile_, _slevel_);
+	}
+}
+
+/**************************************************************//**
+Log arguments for subsequent use. It makes the call to _db_pargs_()
+in the server. */
+extern "C" UNIV_INTERN
+void
+_db_pargs_(
+	uint		_line_,		/*!< in: current source line number */
+	const char*	keyword)	/*!< in: keyword for current macro */
+{
+	if (wdl_db_pargs_ != NULL) {
+
+		wdl_db_pargs_(_line_, keyword);
+	}
+}
+
+/**************************************************************//**
+Handle print of debug lines. It saves the text into a buffer first,
+then makes the call to _db_doprnt_() in the server. The text is
+truncated to the size of buffer. */
+extern "C" UNIV_INTERN
+void
+_db_doprnt_(
+	const char*	format,		/*!< in: the format string */
+	...)				/*!< in: list of arguments */
+{
+	va_list		argp;
+	char		buffer[512];
+
+	if (wdl_db_doprnt_ != NULL) {
+
+		va_start(argp, format);
+		/* it is ok to ignore the trunction. */
+		_vsnprintf(buffer, sizeof(buffer), format, argp);
+		wdl_db_doprnt_(buffer);
+		va_end(argp);
+	}
+}
+
+/**************************************************************//**
+Dump a string in hex. It makes the call to _db_dump_() in the server. */
+extern "C" UNIV_INTERN
+void
+_db_dump_(
+	uint			_line_,		/*!< in: current source line
+						number */
+	const char*		keyword,	/*!< in: keyword list */
+	const unsigned char*	memory,		/*!< in: memory to dump */
+	size_t			length)		/*!< in: bytes to dump */
+{
+	if (wdl_db_dump_ != NULL) {
+
+		wdl_db_dump_(_line_, keyword, memory, length);
+	}
+}
+
+#endif /* !DBUG_OFF */
+#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
similarity index 60%
rename from storage/innobase/ibuf/ibuf0ibuf.c
rename to storage/innodb_plugin/ibuf/ibuf0ibuf.c
index d54a3378993..37c68391477 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
@@ -1,17 +1,44 @@
-/******************************************************
-Insert buffer
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ibuf/ibuf0ibuf.c
+Insert buffer
 
 Created 7/19/1997 Heikki Tuuri
 *******************************************************/
 
 #include "ibuf0ibuf.h"
 
+/** Number of bits describing a single page */
+#define IBUF_BITS_PER_PAGE	4
+#if IBUF_BITS_PER_PAGE % 2
+# error "IBUF_BITS_PER_PAGE must be an even number!"
+#endif
+/** The start address for an insert buffer bitmap page bitmap */
+#define IBUF_BITMAP		PAGE_DATA
+
 #ifdef UNIV_NONINL
 #include "ibuf0ibuf.ic"
 #endif
 
+#ifndef UNIV_HOTBACKUP
+
 #include "buf0buf.h"
 #include "buf0rea.h"
 #include "fsp0fsp.h"
@@ -134,39 +161,45 @@ level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
 it uses synchronous aio, it can access any pages, as long as it obeys the
 access order rules. */
 
-/* Buffer pool size per the maximum insert buffer size */
+/** Buffer pool size per the maximum insert buffer size */
 #define IBUF_POOL_SIZE_PER_MAX_SIZE	2
 
-/* The insert buffer control structure */
-ibuf_t*	ibuf			= NULL;
+/** Table name for the insert buffer. */
+#define IBUF_TABLE_NAME		"SYS_IBUF_TABLE"
 
-static ulint ibuf_rnd		= 986058871;
+/** Operations that can currently be buffered. */
+UNIV_INTERN ibuf_use_t	ibuf_use		= IBUF_USE_INSERT;
 
-ulint	ibuf_flush_count	= 0;
+/** The insert buffer control structure */
+UNIV_INTERN ibuf_t*	ibuf			= NULL;
 
-#ifdef UNIV_IBUF_DEBUG
-/* Dimensions for the ibuf_count array */
-#define IBUF_COUNT_N_SPACES	500
-#define IBUF_COUNT_N_PAGES	2000
+/** Counter for ibuf_should_try() */
+UNIV_INTERN ulint	ibuf_flush_count	= 0;
 
-/* Buffered entry counts for file pages, used in debugging */
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/** Number of tablespaces in the ibuf_counts array */
+#define IBUF_COUNT_N_SPACES	4
+/** Number of pages within each tablespace in the ibuf_counts array */
+#define IBUF_COUNT_N_PAGES	130000
+
+/** Buffered entry counts for file pages, used in debugging */
 static ulint	ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
 
-/**********************************************************************
+/******************************************************************//**
 Checks that the indexes to ibuf_counts[][] are within limits. */
 UNIV_INLINE
 void
 ibuf_count_check(
 /*=============*/
-	ulint	space_id,	/* in: space identifier */
-	ulint	page_no)	/* in: page number */
+	ulint	space_id,	/*!< in: space identifier */
+	ulint	page_no)	/*!< in: page number */
 {
 	if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
 		return;
 	}
 
 	fprintf(stderr,
-		"InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n"
+		"InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
 		"InnoDB: and breaks crash recovery.\n"
 		"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
 		"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
@@ -176,52 +209,52 @@ ibuf_count_check(
 }
 #endif
 
-/* The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP		PAGE_DATA
+/** @name Offsets to the per-page bits in the insert buffer bitmap */
+/* @{ */
+#define	IBUF_BITMAP_FREE	0	/*!< Bits indicating the
+					amount of free space */
+#define IBUF_BITMAP_BUFFERED	2	/*!< TRUE if there are buffered
+					changes for the page */
+#define IBUF_BITMAP_IBUF	3	/*!< TRUE if page is a part of
+					the ibuf tree, excluding the
+					root page, or is in the free
+					list of the ibuf */
+/* @} */
 
-/* Offsets in bits for the bits describing a single page in the bitmap */
-#define	IBUF_BITMAP_FREE	0
-#define IBUF_BITMAP_BUFFERED	2
-#define IBUF_BITMAP_IBUF	3	/* TRUE if page is a part of the ibuf
-					tree, excluding the root page, or is
-					in the free list of the ibuf */
-
-/* Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE	4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
-
-/* The mutex used to block pessimistic inserts to ibuf trees */
+/** The mutex used to block pessimistic inserts to ibuf trees */
 static mutex_t	ibuf_pessimistic_insert_mutex;
 
-/* The mutex protecting the insert buffer structs */
+/** The mutex protecting the insert buffer structs */
 static mutex_t	ibuf_mutex;
 
-/* The mutex protecting the insert buffer bitmaps */
+/** The mutex protecting the insert buffer bitmaps */
 static mutex_t	ibuf_bitmap_mutex;
 
-/* The area in pages from which contract looks for page numbers for merge */
+/** The area in pages from which contract looks for page numbers for merge */
 #define	IBUF_MERGE_AREA			8
 
-/* Inside the merge area, pages which have at most 1 per this number less
+/** Inside the merge area, pages which have at most 1 per this number less
 buffered entries compared to maximum volume that can buffered for a single
 page are merged along with the page whose buffer became full */
 #define IBUF_MERGE_THRESHOLD		4
 
-/* In ibuf_contract at most this number of pages is read to memory in one
+/** In ibuf_contract at most this number of pages is read to memory in one
 batch, in order to merge the entries for them in the insert buffer */
 #define	IBUF_MAX_N_PAGES_MERGED		IBUF_MERGE_AREA
 
-/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
 many pages, we start to contract it in connection to inserts there, using
 non-synchronous contract */
 #define IBUF_CONTRACT_ON_INSERT_NON_SYNC	0
 
-/* Same as above, but use synchronous contract */
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
+many pages, we start to contract it in connection to inserts there, using
+synchronous contract */
 #define IBUF_CONTRACT_ON_INSERT_SYNC		5
 
-/* Same as above, but no insert is done, only contract is called */
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by
+this many pages, we start to contract it synchronous contract, but do
+not insert */
 #define IBUF_CONTRACT_DO_NOT_INSERT		10
 
 /* TODO: how to cope with drop table if there are records in the insert
@@ -230,15 +263,7 @@ because ibuf merge is done to a page when it is read in, and it is
 still physically like the index page even if the index would have been
 dropped! So, there seems to be no problem. */
 
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void);
-/*===================*/
-			/* out: TRUE if ok */
-
-/**********************************************************************
+/******************************************************************//**
 Sets the flag in the current OS thread local storage denoting that it is
 inside an insert buffer routine. */
 UNIV_INLINE
@@ -255,7 +280,7 @@ ibuf_enter(void)
 	*ptr = TRUE;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Sets the flag in the current OS thread local storage denoting that it is
 exiting an insert buffer routine. */
 UNIV_INLINE
@@ -272,97 +297,90 @@ ibuf_exit(void)
 	*ptr = FALSE;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
+routine.
 
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INTERN
 ibool
 ibuf_inside(void)
 /*=============*/
-		/* out: TRUE if inside an insert buffer routine: for instance,
-		a read-ahead of non-ibuf pages is then forbidden */
 {
 	return(*thr_local_get_in_ibuf_field());
 }
 
-/**********************************************************************
-Gets the ibuf header page and x-latches it. */
+/******************************************************************//**
+Gets the ibuf header page and x-latches it.
+@return	insert buffer header page */
 static
 page_t*
 ibuf_header_page_get(
 /*=================*/
-			/* out: insert buffer header page */
-	ulint	space,	/* in: space id */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
-	page_t*	page;
-
-	ut_a(space == 0);
+	buf_block_t*	block;
 
 	ut_ad(!ibuf_inside());
 
-	page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+	block = buf_page_get(
+		IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(page);
+	return(buf_block_get_frame(block));
 }
 
-/**********************************************************************
-Gets the root page and x-latches it. */
+/******************************************************************//**
+Gets the root page and x-latches it.
+@return	insert buffer tree root page */
 static
 page_t*
 ibuf_tree_root_get(
 /*===============*/
-				/* out: insert buffer tree root page */
-	ibuf_data_t*	data,	/* in: ibuf data */
-	ulint		space,	/* in: space id */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*	page;
+	buf_block_t*	block;
 
-	ut_a(space == 0);
 	ut_ad(ibuf_inside());
 
-	mtr_x_lock(dict_index_get_lock(data->index), mtr);
+	mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
 
-	page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
-			    mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(
+		IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
 
-	return(page);
+	buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+	return(buf_block_get_frame(block));
 }
 
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
-
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
 ulint
 ibuf_count_get(
 /*===========*/
-			/* out: number of entries in the insert buffer
-			currently buffered for this page */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
 {
 	ibuf_count_check(space, page_no);
 
 	return(ibuf_counts[space][page_no]);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Sets the ibuf count for a given page. */
 static
 void
 ibuf_count_set(
 /*===========*/
-	ulint	space,	/* in: space id */
-	ulint	page_no,/* in: page number */
-	ulint	val)	/* in: value to set */
+	ulint	space,	/*!< in: space id */
+	ulint	page_no,/*!< in: page number */
+	ulint	val)	/*!< in: value to set */
 {
 	ibuf_count_check(space, page_no);
 	ut_a(val < UNIV_PAGE_SIZE);
@@ -371,16 +389,50 @@ ibuf_count_set(
 }
 #endif
 
-/**********************************************************************
+/******************************************************************//**
+Updates the size information of the ibuf, assuming the segment size has not
+changed. */
+static
+void
+ibuf_size_update(
+/*=============*/
+	const page_t*	root,	/*!< in: ibuf tree root */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(mutex_own(&ibuf_mutex));
+
+	ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
+					   + PAGE_BTR_IBUF_FREE_LIST, mtr);
+
+	ibuf->height = 1 + btr_page_get_level(root, mtr);
+
+	/* the '1 +' is the ibuf header page */
+	ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
+
+	ibuf->empty = page_get_n_recs(root) == 0;
+}
+
+/******************************************************************//**
 Creates the insert buffer data structure at a database startup and initializes
 the data structures for the insert buffer. */
-
+UNIV_INTERN
 void
 ibuf_init_at_db_start(void)
 /*=======================*/
 {
+	page_t*		root;
+	mtr_t		mtr;
+	dict_table_t*	table;
+	mem_heap_t*	heap;
+	dict_index_t*	index;
+	ulint		n_used;
+	page_t*		header_page;
+	ulint		error;
+
 	ibuf = mem_alloc(sizeof(ibuf_t));
 
+	memset(ibuf, 0, sizeof(*ibuf));
+
 	/* Note that also a pessimistic delete can sometimes make a B-tree
 	grow in size, as the references on the upper levels of the tree can
 	change */
@@ -388,10 +440,6 @@ ibuf_init_at_db_start(void)
 	ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
 		/ IBUF_POOL_SIZE_PER_MAX_SIZE;
 
-	UT_LIST_INIT(ibuf->data_list);
-
-	ibuf->size = 0;
-
 	mutex_create(&ibuf_pessimistic_insert_mutex,
 		     SYNC_IBUF_PESS_INSERT_MUTEX);
 
@@ -399,88 +447,13 @@ ibuf_init_at_db_start(void)
 
 	mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
 
-	fil_ibuf_init_at_db_start();
-}
-
-/**********************************************************************
-Updates the size information in an ibuf data, assuming the segment size has
-not changed. */
-static
-void
-ibuf_data_sizes_update(
-/*===================*/
-	ibuf_data_t*	data,	/* in: ibuf data struct */
-	page_t*		root,	/* in: ibuf tree root */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	ulint	old_size;
-
-	ut_ad(mutex_own(&ibuf_mutex));
-
-	old_size = data->size;
-
-	data->free_list_len = flst_get_len(root + PAGE_HEADER
-					   + PAGE_BTR_IBUF_FREE_LIST, mtr);
-
-	data->height = 1 + btr_page_get_level(root, mtr);
-
-	data->size = data->seg_size - (1 + data->free_list_len);
-	/* the '1 +' is the ibuf header page */
-	ut_ad(data->size < data->seg_size);
-
-	if (page_get_n_recs(root) == 0) {
-
-		data->empty = TRUE;
-	} else {
-		data->empty = FALSE;
-	}
-
-	ut_ad(ibuf->size + data->size >= old_size);
-
-	ibuf->size = ibuf->size + data->size - old_size;
-
-#if 0
-	fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n",
-		ibuf->size, data->size);
-#endif
-}
-
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index into this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
-			/* out, own: ibuf data struct, linked to the list
-			in ibuf control structure */
-	ulint	space)	/* in: space id */
-{
-	ibuf_data_t*	data;
-	page_t*		root;
-	page_t*		header_page;
-	mtr_t		mtr;
-	char*		buf;
-	mem_heap_t*	heap;
-	dict_table_t*	table;
-	dict_index_t*	index;
-	ulint		n_used;
-
-	ut_a(space == 0);
-
-	data = mem_alloc(sizeof(ibuf_data_t));
-
-	data->space = space;
-
 	mtr_start(&mtr);
 
 	mutex_enter(&ibuf_mutex);
 
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
 
-	header_page = ibuf_header_page_get(space, &mtr);
+	header_page = ibuf_header_page_get(&mtr);
 
 	fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
 			      &n_used, &mtr);
@@ -488,29 +461,20 @@ ibuf_data_init_for_space(
 
 	ut_ad(n_used >= 2);
 
-	data->seg_size = n_used;
+	ibuf->seg_size = n_used;
 
-	root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
-			    &mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(root, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+	{
+		buf_block_t*	block;
 
-	data->size = 0;
-	data->n_inserts = 0;
-	data->n_merges = 0;
-	data->n_merged_recs = 0;
+		block = buf_page_get(
+			IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
+			RW_X_LATCH, &mtr);
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 
-	ibuf_data_sizes_update(data, root, &mtr);
-	/*
-	if (!data->empty) {
-	fprintf(stderr,
-	"InnoDB: index entries found in the insert buffer\n");
-	} else {
-	fprintf(stderr,
-	"InnoDB: insert buffer empty\n");
+		root = buf_block_get_frame(block);
 	}
-	*/
+
+	ibuf_size_update(root, &mtr);
 	mutex_exit(&ibuf_mutex);
 
 	mtr_commit(&mtr);
@@ -518,104 +482,105 @@ ibuf_data_init_for_space(
 	ibuf_exit();
 
 	heap = mem_heap_create(450);
-	buf = mem_heap_alloc(heap, 50);
 
-	sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space);
-	/* use old-style record format for the insert buffer */
-	table = dict_mem_table_create(buf, space, 2, 0);
+	/* Use old-style record format for the insert buffer. */
+	table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
 
-	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_BINARY, 0, 0);
-	dict_mem_table_add_col(table, heap, "TYPES", DATA_BINARY, 0, 0);
+	dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
 
-	table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+	table->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
 
 	dict_table_add_to_cache(table, heap);
 	mem_heap_free(heap);
 
 	index = dict_mem_index_create(
-		buf, "CLUST_IND", space,
-		DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 2);
+		IBUF_TABLE_NAME, "CLUST_IND",
+		IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
 
-	dict_mem_index_add_field(index, "PAGE_NO", 0);
-	dict_mem_index_add_field(index, "TYPES", 0);
+	dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
 
-	index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+	index->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
 
-	dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO);
+	error = dict_index_add_to_cache(table, index,
+					FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
+	ut_a(error == DB_SUCCESS);
 
-	data->index = dict_table_get_first_index(table);
-
-	mutex_enter(&ibuf_mutex);
-
-	UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
-
-	mutex_exit(&ibuf_mutex);
-
-	return(data);
+	ibuf->index = dict_table_get_first_index(table);
 }
-
-/*************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
 Initializes an ibuf bitmap page. */
-
+UNIV_INTERN
 void
 ibuf_bitmap_page_init(
 /*==================*/
-	page_t*	page,	/* in: bitmap page */
-	mtr_t*	mtr)	/* in: mtr */
+	buf_block_t*	block,	/*!< in: bitmap page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	bit_offset;
+	page_t*	page;
 	ulint	byte_offset;
+	ulint	zip_size = buf_block_get_zip_size(block);
+
+	ut_a(ut_is_2pow(zip_size));
+
+	page = buf_block_get_frame(block);
+	fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
 
 	/* Write all zeros to the bitmap */
 
-	bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
-
-	byte_offset = bit_offset / 8 + 1;
-	/* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */
-
-	fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
+	if (!zip_size) {
+		byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
+					       * IBUF_BITS_PER_PAGE);
+	} else {
+		byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
+	}
 
 	memset(page + IBUF_BITMAP, 0, byte_offset);
 
 	/* The remaining area (up to the page trailer) is uninitialized. */
 
+#ifndef UNIV_HOTBACKUP
 	mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 ibuf_parse_bitmap_init(
 /*===================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr __attribute__((unused)), /*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
 	ut_ad(ptr && end_ptr);
 
-	if (page) {
-		ibuf_bitmap_page_init(page, mtr);
+	if (block) {
+		ibuf_bitmap_page_init(block, mtr);
 	}
 
 	return(ptr);
 }
-
-/************************************************************************
-Gets the desired bits for a given page from a bitmap page. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Gets the desired bits for a given page from a bitmap page.
+@return	value of bits */
 UNIV_INLINE
 ulint
 ibuf_bitmap_page_get_bits(
 /*======================*/
-			/* out: value of bits */
-	page_t*	page,	/* in: bitmap page */
-	ulint	page_no,/* in: page whose bits to get */
-	ulint	bit,	/* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
-	mtr_t*	mtr __attribute__((unused)))	/* in: mtr containing an
-						x-latch to the bitmap
-						page */
+	const page_t*	page,	/*!< in: bitmap page */
+	ulint		page_no,/*!< in: page whose bits to get */
+	ulint		zip_size,/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint		bit,	/*!< in: IBUF_BITMAP_FREE,
+				IBUF_BITMAP_BUFFERED, ... */
+	mtr_t*		mtr __attribute__((unused)))
+				/*!< in: mtr containing an
+				x-latch to the bitmap page */
 {
 	ulint	byte_offset;
 	ulint	bit_offset;
@@ -626,11 +591,16 @@ ibuf_bitmap_page_get_bits(
 #if IBUF_BITS_PER_PAGE % 2
 # error "IBUF_BITS_PER_PAGE % 2 != 0"
 #endif
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 
-	bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
-		+ bit;
+	if (!zip_size) {
+		bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+			+ bit;
+	} else {
+		bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+			+ bit;
+	}
 
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
@@ -650,17 +620,19 @@ ibuf_bitmap_page_get_bits(
 	return(value);
 }
 
-/************************************************************************
+/********************************************************************//**
 Sets the desired bit for a given page in a bitmap page. */
 static
 void
 ibuf_bitmap_page_set_bits(
 /*======================*/
-	page_t*	page,	/* in: bitmap page */
-	ulint	page_no,/* in: page whose bits to set */
-	ulint	bit,	/* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
-	ulint	val,	/* in: value to set */
-	mtr_t*	mtr)	/* in: mtr containing an x-latch to the bitmap page */
+	page_t*	page,	/*!< in: bitmap page */
+	ulint	page_no,/*!< in: page whose bits to set */
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	bit,	/*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
+	ulint	val,	/*!< in: value to set */
+	mtr_t*	mtr)	/*!< in: mtr containing an x-latch to the bitmap page */
 {
 	ulint	byte_offset;
 	ulint	bit_offset;
@@ -670,15 +642,20 @@ ibuf_bitmap_page_set_bits(
 #if IBUF_BITS_PER_PAGE % 2
 # error "IBUF_BITS_PER_PAGE % 2 != 0"
 #endif
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
-				MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_DEBUG
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
-	     || (0 == ibuf_count_get(buf_frame_get_space_id(page),
+	     || (0 == ibuf_count_get(page_get_space_id(page),
 				     page_no)));
 #endif
-	bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
-		+ bit;
+	if (!zip_size) {
+		bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+			+ bit;
+	} else {
+		bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+			+ bit;
+	}
 
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
@@ -702,48 +679,55 @@ ibuf_bitmap_page_set_bits(
 			 MLOG_1BYTE, mtr);
 }
 
-/************************************************************************
-Calculates the bitmap page number for a given page number. */
+/********************************************************************//**
+Calculates the bitmap page number for a given page number.
+@return	the bitmap page number where the file page is mapped */
 UNIV_INLINE
 ulint
 ibuf_bitmap_page_no_calc(
 /*=====================*/
-				/* out: the bitmap page number where
-				the file page is mapped */
-	ulint	page_no)	/* in: tablespace page number */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	page_no)	/*!< in: tablespace page number */
 {
-	return(FSP_IBUF_BITMAP_OFFSET
-	       + XDES_DESCRIBED_PER_PAGE
-	       * (page_no / XDES_DESCRIBED_PER_PAGE));
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return(FSP_IBUF_BITMAP_OFFSET
+		       + (page_no & ~(UNIV_PAGE_SIZE - 1)));
+	} else {
+		return(FSP_IBUF_BITMAP_OFFSET
+		       + (page_no & ~(zip_size - 1)));
+	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets the ibuf bitmap page where the bits describing a given file page are
-stored. */
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched */
 static
 page_t*
 ibuf_bitmap_get_map_page(
 /*=====================*/
-			/* out: bitmap page where the file page is mapped,
-			that is, the bitmap page containing the descriptor
-			bits for the file page; the bitmap page is
-			x-latched */
-	ulint	space,	/* in: space id of the file page */
-	ulint	page_no,/* in: page number of the file page */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	space,	/*!< in: space id of the file page */
+	ulint	page_no,/*!< in: page number of the file page */
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
-	page_t*	page;
+	buf_block_t*	block;
 
-	page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
-			    RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(space, zip_size,
+			     ibuf_bitmap_page_no_calc(zip_size, page_no),
+			     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
 
-	return(page);
+	return(buf_block_get_frame(block));
 }
 
-/****************************************************************************
+/************************************************************************//**
 Sets the free bits of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
@@ -752,184 +736,229 @@ UNIV_INLINE
 void
 ibuf_set_free_bits_low(
 /*===================*/
-	ulint	type,	/* in: index type */
-	page_t*	page,	/* in: index page; free bit is set if the index is
-			non-clustered and page level is 0 */
-	ulint	val,	/* in: value to set: < 4 */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint			zip_size,/*!< in: compressed page size in bytes;
+					0 for uncompressed pages */
+	const buf_block_t*	block,	/*!< in: index page; free bits are set if
+					the index is non-clustered and page
+					level is 0 */
+	ulint			val,	/*!< in: value to set: < 4 */
+	mtr_t*			mtr)	/*!< in/out: mtr */
 {
 	page_t*	bitmap_page;
+	ulint	space;
+	ulint	page_no;
 
-	if (type & DICT_CLUSTERED) {
+	if (!page_is_leaf(buf_block_get_frame(block))) {
 
 		return;
 	}
 
-	if (btr_page_get_level_low(page) != 0) {
-
-		return;
-	}
-
-	bitmap_page = ibuf_bitmap_get_map_page(
-		buf_frame_get_space_id(page),
-		buf_frame_get_page_no(page), mtr);
+	space = buf_block_get_space(block);
+	page_no = buf_block_get_page_no(block);
+	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
 #ifdef UNIV_IBUF_DEBUG
 # if 0
 	fprintf(stderr,
-		"Setting page no %lu free bits to %lu should be %lu\n",
-		buf_frame_get_page_no(page), val,
-		ibuf_index_page_calc_free(page));
+		"Setting space %lu page %lu free bits to %lu should be %lu\n",
+		space, page_no, val,
+		ibuf_index_page_calc_free(zip_size, block));
 # endif
 
-	ut_a(val <= ibuf_index_page_calc_free(page));
+	ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
 #endif /* UNIV_IBUF_DEBUG */
-	ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
 				  IBUF_BITMAP_FREE, val, mtr);
-
 }
 
-/****************************************************************************
+/************************************************************************//**
 Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
 were kept. */
-
+UNIV_INTERN
 void
-ibuf_set_free_bits(
-/*===============*/
-	ulint	type,	/* in: index type */
-	page_t*	page,	/* in: index page; free bit is set if the index is
-			non-clustered and page level is 0 */
-	ulint	val,	/* in: value to set: < 4 */
-	ulint	max_val)/* in: ULINT_UNDEFINED or a maximum value which
-			the bits must have before setting; this is for
-			debugging */
+ibuf_set_free_bits_func(
+/*====================*/
+	buf_block_t*	block,	/*!< in: index page of a non-clustered index;
+				free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+	ulint		max_val,/*!< in: ULINT_UNDEFINED or a maximum
+				value which the bits must have before
+				setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+	ulint		val)	/*!< in: value to set: < 4 */
 {
 	mtr_t	mtr;
+	page_t*	page;
 	page_t*	bitmap_page;
+	ulint	space;
+	ulint	page_no;
+	ulint	zip_size;
 
-	if (type & DICT_CLUSTERED) {
+	page = buf_block_get_frame(block);
 
-		return;
-	}
-
-	if (btr_page_get_level_low(page) != 0) {
+	if (!page_is_leaf(page)) {
 
 		return;
 	}
 
 	mtr_start(&mtr);
 
-	bitmap_page = ibuf_bitmap_get_map_page(
-		buf_frame_get_space_id(page), buf_frame_get_page_no(page),
-		&mtr);
+	space = buf_block_get_space(block);
+	page_no = buf_block_get_page_no(block);
+	zip_size = buf_block_get_zip_size(block);
+	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
 
-	if (max_val != ULINT_UNDEFINED) {
 #ifdef UNIV_IBUF_DEBUG
+	if (max_val != ULINT_UNDEFINED) {
 		ulint	old_val;
 
 		old_val = ibuf_bitmap_page_get_bits(
-			bitmap_page, buf_frame_get_page_no(page),
+			bitmap_page, page_no, zip_size,
 			IBUF_BITMAP_FREE, &mtr);
 # if 0
 		if (old_val != max_val) {
 			fprintf(stderr,
 				"Ibuf: page %lu old val %lu max val %lu\n",
-				buf_frame_get_page_no(page),
+				page_get_page_no(page),
 				old_val, max_val);
 		}
 # endif
 
 		ut_a(old_val <= max_val);
-#endif
 	}
-#ifdef UNIV_IBUF_DEBUG
 # if 0
 	fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
-		buf_frame_get_page_no(page), val,
-		ibuf_index_page_calc_free(page));
+		page_get_page_no(page), val,
+		ibuf_index_page_calc_free(zip_size, block));
 # endif
 
-	ut_a(val <= ibuf_index_page_calc_free(page));
-#endif
-	ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+	ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#endif /* UNIV_IBUF_DEBUG */
+	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
 				  IBUF_BITMAP_FREE, val, &mtr);
 	mtr_commit(&mtr);
 }
 
-/****************************************************************************
+/************************************************************************//**
 Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
-	ulint	type,	/* in: index type */
-	page_t*	page)	/* in: index page; free bits are set to 0 if the index
-			is non-clustered and non-unique and the page level is
-			0 */
-{
-	ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
-}
-
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
 void
 ibuf_reset_free_bits(
 /*=================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page)	/* in: index page; free bits are set to 0 if
-				the index is non-clustered and non-unique and
-				the page level is 0 */
+	buf_block_t*	block)	/*!< in: index page; free bits are set to 0
+				if the index is a non-clustered
+				non-unique, and page level is 0 */
 {
-	ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
+	ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
 }
 
-/**************************************************************************
-Updates the free bits for a page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually prevent
-any further operations for this OS thread until mtr is committed. */
-
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
 void
 ibuf_update_free_bits_low(
 /*======================*/
-	dict_index_t*	index,		/* in: index */
-	page_t*		page,		/* in: index page */
-	ulint		max_ins_size,	/* in: value of maximum insert size
-					with reorganize before the latest
-					operation performed to the page */
-	mtr_t*		mtr)		/* in: mtr */
+	const buf_block_t*	block,		/*!< in: index page */
+	ulint			max_ins_size,	/*!< in: value of
+						maximum insert size
+						with reorganize before
+						the latest operation
+						performed to the page */
+	mtr_t*			mtr)		/*!< in/out: mtr */
 {
 	ulint	before;
 	ulint	after;
 
-	before = ibuf_index_page_calc_free_bits(max_ins_size);
+	ut_a(!buf_block_get_page_zip(block));
 
-	after = ibuf_index_page_calc_free(page);
+	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
 
+	after = ibuf_index_page_calc_free(0, block);
+
+	/* This approach cannot be used on compressed pages, since the
+	computed value of "before" often does not match the current
+	state of the bitmap.  This is because the free space may
+	increase or decrease when a compressed page is reorganized. */
 	if (before != after) {
-		ibuf_set_free_bits_low(index->type, page, after, mtr);
+		ibuf_set_free_bits_low(0, block, after, mtr);
 	}
 }
 
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+	buf_block_t*	block,	/*!< in/out: index page */
+	mtr_t*		mtr)	/*!< in/out: mtr */
+{
+	page_t*	bitmap_page;
+	ulint	space;
+	ulint	page_no;
+	ulint	zip_size;
+	ulint	after;
 
+	space = buf_block_get_space(block);
+	page_no = buf_block_get_page_no(block);
+	zip_size = buf_block_get_zip_size(block);
+
+	ut_a(page_is_leaf(buf_block_get_frame(block)));
+	ut_a(zip_size);
+
+	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+
+	after = ibuf_index_page_calc_free_zip(zip_size, block);
+
+	if (after == 0) {
+		/* We move the page to the front of the buffer pool LRU list:
+		the purpose of this is to prevent those pages to which we
+		cannot make inserts using the insert buffer from slipping
+		out of the buffer pool */
+
+		buf_page_make_young(&block->page);
+	}
+
+	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+				  IBUF_BITMAP_FREE, after, mtr);
+}
+
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page.  It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
 void
 ibuf_update_free_bits_for_two_pages_low(
 /*====================================*/
-	dict_index_t*	index,	/* in: index */
-	page_t*		page1,	/* in: index page */
-	page_t*		page2,	/* in: index page */
-	mtr_t*		mtr)	/* in: mtr */
+	ulint		zip_size,/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	buf_block_t*	block1,	/*!< in: index page */
+	buf_block_t*	block2,	/*!< in: index page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	state;
 
@@ -939,115 +968,93 @@ ibuf_update_free_bits_for_two_pages_low(
 
 	mutex_enter(&ibuf_bitmap_mutex);
 
-	state = ibuf_index_page_calc_free(page1);
+	state = ibuf_index_page_calc_free(zip_size, block1);
 
-	ibuf_set_free_bits_low(index->type, page1, state, mtr);
+	ibuf_set_free_bits_low(zip_size, block1, state, mtr);
 
-	state = ibuf_index_page_calc_free(page2);
+	state = ibuf_index_page_calc_free(zip_size, block2);
 
-	ibuf_set_free_bits_low(index->type, page2, state, mtr);
+	ibuf_set_free_bits_low(zip_size, block2, state, mtr);
 
 	mutex_exit(&ibuf_bitmap_mutex);
 }
 
-/**************************************************************************
-Returns TRUE if the page is one of the fixed address ibuf pages. */
+/**********************************************************************//**
+Returns TRUE if the page is one of the fixed address ibuf pages.
+@return	TRUE if a fixed address ibuf i/o page */
 UNIV_INLINE
 ibool
 ibuf_fixed_addr_page(
 /*=================*/
-			/* out: TRUE if a fixed address ibuf i/o page */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no)/*!< in: page number */
 {
-	return((space == 0 && page_no == IBUF_TREE_ROOT_PAGE_NO)
-	       || ibuf_bitmap_page(page_no));
+	return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
+	       || ibuf_bitmap_page(zip_size, page_no));
 }
 
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return	TRUE if level 2 or level 3 page */
+UNIV_INTERN
 ibool
 ibuf_page(
 /*======*/
-			/* out: TRUE if level 2 or level 3 page */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
-{
-	page_t*	bitmap_page;
-	mtr_t	mtr;
-	ibool	ret;
-
-	if (recv_no_ibuf_operations) {
-		/* Recovery is running: no ibuf operations should be
-		performed */
-
-		return(FALSE);
-	}
-
-	if (ibuf_fixed_addr_page(space, page_no)) {
-
-		return(TRUE);
-	}
-
-	if (space != 0) {
-		/* Currently we only have an ibuf tree in space 0 */
-
-		return(FALSE);
-	}
-
-	ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
-
-	mtr_start(&mtr);
-
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
-	ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
-					&mtr);
-	mtr_commit(&mtr);
-
-	return(ret);
-}
-
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page_low(
-/*==========*/
-			/* out: TRUE if level 2 or level 3 page */
-	ulint	space,	/* in: space id */
-	ulint	page_no,/* in: page number */
-	mtr_t*	mtr)	/* in: mtr which will contain an x-latch to the
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	page_no,/*!< in: page number */
+	mtr_t*	mtr)	/*!< in: mtr which will contain an x-latch to the
 			bitmap page if the page is not one of the fixed
-			address ibuf pages */
+			address ibuf pages, or NULL, in which case a new
+			transaction is created. */
 {
-	page_t*	bitmap_page;
 	ibool	ret;
+	mtr_t	local_mtr;
+	page_t*	bitmap_page;
 
-	if (ibuf_fixed_addr_page(space, page_no)) {
+	ut_ad(!recv_no_ibuf_operations);
+
+	if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
 
 		return(TRUE);
+	} else if (space != IBUF_SPACE_ID) {
+
+		return(FALSE);
 	}
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
+	ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
+
+	if (mtr == NULL) {
+		mtr = &local_mtr;
+		mtr_start(mtr);
+	}
+
+	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+
+	ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+					IBUF_BITMAP_IBUF, mtr);
+
+	if (mtr == &local_mtr) {
+		mtr_commit(mtr);
+	}
 
-	ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
-					mtr);
 	return(ret);
 }
 
-/************************************************************************
-Returns the page number field of an ibuf record. */
+/********************************************************************//**
+Returns the page number field of an ibuf record.
+@return	page number */
 static
 ulint
 ibuf_rec_get_page_no(
 /*=================*/
-			/* out: page number */
-	rec_t*	rec)	/* in: ibuf record */
+	const rec_t*	rec)	/*!< in: ibuf record */
 {
-	byte*	field;
-	ulint	len;
+	const byte*	field;
+	ulint		len;
 
 	ut_ad(ibuf_inside());
 	ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1071,18 +1078,18 @@ ibuf_rec_get_page_no(
 	return(mach_read_from_4(field));
 }
 
-/************************************************************************
+/********************************************************************//**
 Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0. */
+returns 0.
+@return	space id */
 static
 ulint
 ibuf_rec_get_space(
 /*===============*/
-			/* out: space id */
-	rec_t*	rec)	/* in: ibuf record */
+	const rec_t*	rec)	/*!< in: ibuf record */
 {
-	byte*	field;
-	ulint	len;
+	const byte*	field;
+	ulint		len;
 
 	ut_ad(ibuf_inside());
 	ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1105,16 +1112,16 @@ ibuf_rec_get_space(
 	return(0);
 }
 
-/************************************************************************
+/********************************************************************//**
 Creates a dummy index for inserting a record to a non-clustered index.
-*/
+
+@return	dummy index */
 static
 dict_index_t*
 ibuf_dummy_index_create(
 /*====================*/
-				/* out: dummy index */
-	ulint		n,	/* in: number of fields */
-	ibool		comp)	/* in: TRUE=use compact record format */
+	ulint		n,	/*!< in: number of fields */
+	ibool		comp)	/*!< in: TRUE=use compact record format */
 {
 	dict_table_t*	table;
 	dict_index_t*	index;
@@ -1133,32 +1140,31 @@ ibuf_dummy_index_create(
 
 	return(index);
 }
-/************************************************************************
+/********************************************************************//**
 Add a column to the dummy index */
 static
 void
 ibuf_dummy_index_add_col(
 /*=====================*/
-	dict_index_t*	index,	/* in: dummy index */
-	dtype_t*	type,	/* in: the data type of the column */
-	ulint		len)	/* in: length of the column */
+	dict_index_t*	index,	/*!< in: dummy index */
+	const dtype_t*	type,	/*!< in: the data type of the column */
+	ulint		len)	/*!< in: length of the column */
 {
 	ulint	i	= index->table->n_def;
 	dict_mem_table_add_col(index->table, NULL, NULL,
 			       dtype_get_mtype(type),
 			       dtype_get_prtype(type),
 			       dtype_get_len(type));
-	dict_index_add_col(index, index->table, (dict_col_t*)
+	dict_index_add_col(index, index->table,
 			   dict_table_get_nth_col(index->table, i), len);
 }
-/************************************************************************
-Deallocates a dummy index for inserting a record to a non-clustered index.
-*/
+/********************************************************************//**
+Deallocates a dummy index for inserting a record to a non-clustered index. */
 static
 void
 ibuf_dummy_index_free(
 /*==================*/
-	dict_index_t*	index)	/* in: dummy index */
+	dict_index_t*	index)	/*!< in, own: dummy index */
 {
 	dict_table_t*	table = index->table;
 
@@ -1166,28 +1172,79 @@ ibuf_dummy_index_free(
 	dict_mem_table_free(table);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index. */
+corresponding record in an ibuf index.
+
+NOTE that as we copy pointers to fields in ibuf_rec, the caller must
+hold a latch to the ibuf_rec page as long as the entry is used!
+
+@return own: entry to insert to a non-clustered index */
+UNIV_INLINE
+dtuple_t*
+ibuf_build_entry_pre_4_1_x(
+/*=======================*/
+	const rec_t*	ibuf_rec,	/*!< in: record in an insert buffer */
+	mem_heap_t*	heap,		/*!< in: heap where built */
+	dict_index_t**	pindex)		/*!< out, own: dummy index that
+					describes the entry */
+{
+	ulint		i;
+	ulint		len;
+	const byte*	types;
+	dtuple_t*	tuple;
+	ulint		n_fields;
+
+	ut_a(trx_doublewrite_must_reset_space_ids);
+	ut_a(!trx_sys_multiple_tablespace_format);
+
+	n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
+	tuple = dtuple_create(heap, n_fields);
+	types = rec_get_nth_field_old(ibuf_rec, 1, &len);
+
+	ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+	for (i = 0; i < n_fields; i++) {
+		const byte*	data;
+		dfield_t*	field;
+
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
+
+		dfield_set_data(field, data, len);
+
+		dtype_read_for_order_and_null_size(
+			dfield_get_type(field),
+			types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	}
+
+	*pindex = ibuf_dummy_index_create(n_fields, FALSE);
+
+	return(tuple);
+}
+
+/*********************************************************************//**
+Builds the entry to insert into a non-clustered index when we have the
+corresponding record in an ibuf index.
+
+NOTE that as we copy pointers to fields in ibuf_rec, the caller must
+hold a latch to the ibuf_rec page as long as the entry is used!
+
+@return own: entry to insert to a non-clustered index */
 static
 dtuple_t*
 ibuf_build_entry_from_ibuf_rec(
 /*===========================*/
-					/* out, own: entry to insert to
-					a non-clustered index; NOTE that
-					as we copy pointers to fields in
-					ibuf_rec, the caller must hold a
-					latch to the ibuf_rec page as long
-					as the entry is used! */
-	rec_t*		ibuf_rec,	/* in: record in an insert buffer */
-	mem_heap_t*	heap,		/* in: heap where built */
-	dict_index_t**	pindex)		/* out, own: dummy index that
+	const rec_t*	ibuf_rec,	/*!< in: record in an insert buffer */
+	mem_heap_t*	heap,		/*!< in: heap where built */
+	dict_index_t**	pindex)		/*!< out, own: dummy index that
 					describes the entry */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
 	ulint		n_fields;
-	byte*		types;
+	const byte*	types;
 	const byte*	data;
 	ulint		len;
 	ulint		i;
@@ -1198,29 +1255,7 @@ ibuf_build_entry_from_ibuf_rec(
 	if (len > 1) {
 		/* This a < 4.1.x format record */
 
-		ut_a(trx_doublewrite_must_reset_space_ids);
-		ut_a(!trx_sys_multiple_tablespace_format);
-
-		n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
-		tuple = dtuple_create(heap, n_fields);
-		types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
-		ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-
-		for (i = 0; i < n_fields; i++) {
-			field = dtuple_get_nth_field(tuple, i);
-
-			data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
-			dfield_set_data(field, data, len);
-
-			dtype_read_for_order_and_null_size(
-				dfield_get_type(field),
-				types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-		}
-
-		*pindex = ibuf_dummy_index_create(n_fields, FALSE);
-		return(tuple);
+		return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
 	}
 
 	/* This a >= 4.1.x format record */
@@ -1262,29 +1297,38 @@ ibuf_build_entry_from_ibuf_rec(
 		ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
 	}
 
+	/* Prevent an ut_ad() failure in page_zip_write_rec() by
+	adding system columns to the dummy table pointed to by the
+	dummy secondary index.  The insert buffer is only used for
+	secondary indexes, whose records never contain any system
+	columns, such as DB_TRX_ID. */
+	ut_d(dict_table_add_system_columns(index->table, index->table->heap));
+
 	*pindex = index;
+
 	return(tuple);
 }
 
-/************************************************************************
+/********************************************************************//**
 Returns the space taken by a stored non-clustered index entry if converted to
-an index record. */
+an index record.
+@return size of index record in bytes + an upper limit of the space
+taken in the page directory */
 static
 ulint
 ibuf_rec_get_volume(
 /*================*/
-			/* out: size of index record in bytes + an upper
-			limit of the space taken in the page directory */
-	rec_t*	ibuf_rec)/* in: ibuf record */
+	const rec_t*	ibuf_rec)/*!< in: ibuf record */
 {
-	dtype_t	dtype;
-	ibool	new_format	= FALSE;
-	ulint	data_size	= 0;
-	ulint	n_fields;
-	byte*	types;
-	byte*	data;
-	ulint	len;
-	ulint	i;
+	dtype_t		dtype;
+	ibool		new_format	= FALSE;
+	ulint		data_size	= 0;
+	ulint		n_fields;
+	const byte*	types;
+	const byte*	data;
+	ulint		len;
+	ulint		i;
+	ulint		comp;
 
 	ut_ad(ibuf_inside());
 	ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
@@ -1302,6 +1346,7 @@ ibuf_rec_get_volume(
 		types = rec_get_nth_field_old(ibuf_rec, 1, &len);
 
 		ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+		comp = 0;
 	} else {
 		/* >= 4.1.x format record */
 
@@ -1310,15 +1355,17 @@ ibuf_rec_get_volume(
 
 		types = rec_get_nth_field_old(ibuf_rec, 3, &len);
 
-		ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
-		if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
+		comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+
+		ut_a(comp <= 1);
+		if (comp) {
 			/* compact record format */
 			ulint		volume;
 			dict_index_t*	dummy_index;
 			mem_heap_t*	heap = mem_heap_create(500);
 			dtuple_t*	entry = ibuf_build_entry_from_ibuf_rec(
 				ibuf_rec, heap, &dummy_index);
-			volume = rec_get_converted_size(dummy_index, entry);
+			volume = rec_get_converted_size(dummy_index, entry, 0);
 			ibuf_dummy_index_free(dummy_index);
 			mem_heap_free(heap);
 			return(volume + page_dir_calc_reserved_space(1));
@@ -1345,37 +1392,38 @@ ibuf_rec_get_volume(
 		}
 
 		if (len == UNIV_SQL_NULL) {
-			data_size += dtype_get_sql_null_size(&dtype);
+			data_size += dtype_get_sql_null_size(&dtype, comp);
 		} else {
 			data_size += len;
 		}
 	}
 
-	return(data_size + rec_get_converted_extra_size(data_size, n_fields)
+	return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
 	       + page_dir_calc_reserved_space(1));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index. */
+non-clustered index.
+
+NOTE that the original entry must be kept because we copy pointers to
+its fields.
+
+@return	own: entry to insert into an ibuf index tree */
 static
 dtuple_t*
 ibuf_entry_build(
 /*=============*/
-				/* out, own: entry to insert into an ibuf
-				index tree; NOTE that the original entry
-				must be kept because we copy pointers to its
-				fields */
-	dict_index_t*	index,	/* in: non-clustered index */
-	dtuple_t*	entry,	/* in: entry for a non-clustered index */
-	ulint		space,	/* in: space id */
-	ulint		page_no,/* in: index page number where entry should
+	dict_index_t*	index,	/*!< in: non-clustered index */
+	const dtuple_t*	entry,	/*!< in: entry for a non-clustered index */
+	ulint		space,	/*!< in: space id */
+	ulint		page_no,/*!< in: index page number where entry should
 				be inserted */
-	mem_heap_t*	heap)	/* in: heap into which to build */
+	mem_heap_t*	heap)	/*!< in: heap into which to build */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
-	dfield_t*	entry_field;
+	const dfield_t*	entry_field;
 	ulint		n_fields;
 	byte*		buf;
 	byte*		buf2;
@@ -1461,12 +1509,13 @@ ibuf_entry_build(
 #ifdef UNIV_DEBUG
 		if (fixed_len) {
 			/* dict_index_add_col() should guarantee these */
-			ut_ad(fixed_len <= (ulint) entry_field->type.len);
+			ut_ad(fixed_len <= (ulint)
+			      dfield_get_type(entry_field)->len);
 			if (ifield->prefix_len) {
 				ut_ad(ifield->prefix_len == fixed_len);
 			} else {
-				ut_ad(fixed_len
-				      == (ulint) entry_field->type.len);
+				ut_ad(fixed_len == (ulint)
+				      dfield_get_type(entry_field)->len);
 			}
 		}
 #endif /* UNIV_DEBUG */
@@ -1494,17 +1543,17 @@ ibuf_entry_build(
 	return(tuple);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds a search tuple used to search buffered inserts for an index page.
-This is for < 4.1.x format records */
+This is for < 4.1.x format records
+@return	own: search tuple */
 static
 dtuple_t*
 ibuf_search_tuple_build(
 /*====================*/
-				/* out, own: search tuple */
-	ulint		space,	/* in: space id */
-	ulint		page_no,/* in: index page number */
-	mem_heap_t*	heap)	/* in: heap into which to build */
+	ulint		space,	/*!< in: space id */
+	ulint		page_no,/*!< in: index page number */
+	mem_heap_t*	heap)	/*!< in: heap into which to build */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
@@ -1531,17 +1580,17 @@ ibuf_search_tuple_build(
 	return(tuple);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records. */
+This is for >= 4.1.x format records.
+@return	own: search tuple */
 static
 dtuple_t*
 ibuf_new_search_tuple_build(
 /*========================*/
-				/* out, own: search tuple */
-	ulint		space,	/* in: space id */
-	ulint		page_no,/* in: index page number */
-	mem_heap_t*	heap)	/* in: heap into which to build */
+	ulint		space,	/*!< in: space id */
+	ulint		page_no,/*!< in: index page number */
+	mem_heap_t*	heap)	/*!< in: heap into which to build */
 {
 	dtuple_t*	tuple;
 	dfield_t*	field;
@@ -1586,15 +1635,14 @@ ibuf_new_search_tuple_build(
 	return(tuple);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer. */
+dare to start a pessimistic insert to the insert buffer.
+@return	TRUE if enough free pages in list */
 UNIV_INLINE
 ibool
-ibuf_data_enough_free_for_insert(
-/*=============================*/
-				/* out: TRUE if enough free pages in list */
-	ibuf_data_t*	data)	/* in: ibuf data for the space */
+ibuf_data_enough_free_for_insert(void)
+/*==================================*/
 {
 	ut_ad(mutex_own(&ibuf_mutex));
 
@@ -1604,57 +1652,49 @@ ibuf_data_enough_free_for_insert(
 	inserts buffered for pages that we read to the buffer pool, without
 	any risk of running out of free space in the insert buffer. */
 
-	if (data->free_list_len >= data->size / 2 + 3 * data->height) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management. */
+should remove them and free to the file space management.
+@return	TRUE if enough free pages in list */
 UNIV_INLINE
 ibool
-ibuf_data_too_much_free(
-/*====================*/
-				/* out: TRUE if enough free pages in list */
-	ibuf_data_t*	data)	/* in: ibuf data for the space */
+ibuf_data_too_much_free(void)
+/*=========================*/
 {
 	ut_ad(mutex_own(&ibuf_mutex));
 
-	return(data->free_list_len >= 3 + data->size / 2 + 3 * data->height);
+	return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Allocates a new page from the ibuf file segment and adds it to the free
-list. */
+list.
+@return	DB_SUCCESS, or DB_STRONG_FAIL if no space left */
 static
 ulint
-ibuf_add_free_page(
-/*===============*/
-					/* out: DB_SUCCESS, or DB_STRONG_FAIL
-					if no space left */
-	ulint		space,		/* in: space id */
-	ibuf_data_t*	ibuf_data)	/* in: ibuf data for the space */
+ibuf_add_free_page(void)
+/*====================*/
 {
 	mtr_t	mtr;
 	page_t*	header_page;
+	ulint	flags;
+	ulint	zip_size;
 	ulint	page_no;
 	page_t*	page;
 	page_t*	root;
 	page_t*	bitmap_page;
 
-	ut_a(space == 0);
-
 	mtr_start(&mtr);
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
-	header_page = ibuf_header_page_get(space, &mtr);
+	header_page = ibuf_header_page_get(&mtr);
 
 	/* Allocate a new page: NOTE that if the page has been a part of a
 	non-clustered index which has subsequently been dropped, then the
@@ -1666,26 +1706,33 @@ ibuf_add_free_page(
 	of a deadlock. This is the reason why we created a special ibuf
 	header page apart from the ibuf tree. */
 
-	page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
-				       + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
-				       &mtr);
+	page_no = fseg_alloc_free_page(
+		header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
+		&mtr);
+
 	if (page_no == FIL_NULL) {
 		mtr_commit(&mtr);
 
 		return(DB_STRONG_FAIL);
 	}
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+	{
+		buf_block_t*	block;
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+		block = buf_page_get(
+			IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
+
+
+		page = buf_block_get_frame(block);
+	}
 
 	ibuf_enter();
 
 	mutex_enter(&ibuf_mutex);
 
-	root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+	root = ibuf_tree_root_get(&mtr);
 
 	/* Add the page to the free list and update the ibuf size data */
 
@@ -1695,16 +1742,18 @@ ibuf_add_free_page(
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
 			 MLOG_2BYTES, &mtr);
 
-	ibuf_data->seg_size++;
-	ibuf_data->free_list_len++;
+	ibuf->seg_size++;
+	ibuf->free_list_len++;
 
 	/* Set the bit indicating that this page is now an ibuf tree page
 	(level 2 page) */
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(
+		IBUF_SPACE_ID, page_no, zip_size, &mtr);
+
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
 
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
-				  TRUE, &mtr);
 	mtr_commit(&mtr);
 
 	mutex_exit(&ibuf_mutex);
@@ -1714,32 +1763,31 @@ ibuf_add_free_page(
 	return(DB_SUCCESS);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Removes a page from the free list and frees it to the fsp system. */
 static
 void
-ibuf_remove_free_page(
-/*==================*/
-	ulint		space,		/* in: space id */
-	ibuf_data_t*	ibuf_data)	/* in: ibuf data for the space */
+ibuf_remove_free_page(void)
+/*=======================*/
 {
 	mtr_t	mtr;
 	mtr_t	mtr2;
 	page_t*	header_page;
+	ulint	flags;
+	ulint	zip_size;
 	ulint	page_no;
 	page_t*	page;
 	page_t*	root;
 	page_t*	bitmap_page;
 
-	ut_a(space == 0);
-
 	mtr_start(&mtr);
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
-	mtr_x_lock(fil_space_get_latch(space), &mtr);
+	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+	zip_size = dict_table_flags_to_zip_size(flags);
 
-	header_page = ibuf_header_page_get(space, &mtr);
+	header_page = ibuf_header_page_get(&mtr);
 
 	/* Prevent pessimistic inserts to insert buffer trees for a while */
 	mutex_enter(&ibuf_pessimistic_insert_mutex);
@@ -1748,7 +1796,7 @@ ibuf_remove_free_page(
 
 	mutex_enter(&ibuf_mutex);
 
-	if (!ibuf_data_too_much_free(ibuf_data)) {
+	if (!ibuf_data_too_much_free()) {
 
 		mutex_exit(&ibuf_mutex);
 
@@ -1763,11 +1811,10 @@ ibuf_remove_free_page(
 
 	mtr_start(&mtr2);
 
-	root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
+	root = ibuf_tree_root_get(&mtr2);
 
 	page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
-				&mtr2)
-		.page;
+				&mtr2).page;
 
 	/* NOTE that we must release the latch on the ibuf tree root
 	because in fseg_free_page we access level 1 pages, and the root
@@ -1785,45 +1832,54 @@ ibuf_remove_free_page(
 	page from it. */
 
 	fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
-		       space, page_no, &mtr);
+		       IBUF_SPACE_ID, page_no, &mtr);
+
 #ifdef UNIV_DEBUG_FILE_ACCESSES
-	buf_page_reset_file_page_was_freed(space, page_no);
+	buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
 #endif
+
 	ibuf_enter();
 
 	mutex_enter(&ibuf_mutex);
 
-	root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+	root = ibuf_tree_root_get(&mtr);
 
 	ut_ad(page_no == flst_get_last(root + PAGE_HEADER
-				       + PAGE_BTR_IBUF_FREE_LIST, &mtr)
-	      .page);
+				       + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+	{
+		buf_block_t*	block;
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+		block = buf_page_get(
+			IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+		page = buf_block_get_frame(block);
+	}
 
 	/* Remove the page from the free list and update the ibuf size data */
 
 	flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
 		    page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
 
-	ibuf_data->seg_size--;
-	ibuf_data->free_list_len--;
+	ibuf->seg_size--;
+	ibuf->free_list_len--;
 
 	mutex_exit(&ibuf_pessimistic_insert_mutex);
 
 	/* Set the bit indicating that this page is no more an ibuf tree page
 	(level 2 page) */
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(
+		IBUF_SPACE_ID, page_no, zip_size, &mtr);
+
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
 
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
-				  FALSE, &mtr);
 #ifdef UNIV_DEBUG_FILE_ACCESSES
-	buf_page_set_file_page_was_freed(space, page_no);
+	buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
 #endif
 	mtr_commit(&mtr);
 
@@ -1832,45 +1888,34 @@ ibuf_remove_free_page(
 	ibuf_exit();
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Frees excess pages from the ibuf free list. This function is called when an OS
 thread calls fsp services to allocate a new file segment, or a new page to a
 file segment, and the thread did not own the fsp latch before this call. */
-
+UNIV_INTERN
 void
-ibuf_free_excess_pages(
-/*===================*/
-	ulint	space)	/* in: space id */
+ibuf_free_excess_pages(void)
+/*========================*/
 {
-	ibuf_data_t*	ibuf_data;
 	ulint		i;
 
-	if (space != 0) {
-		fprintf(stderr,
-			"InnoDB: Error: calling ibuf_free_excess_pages"
-			" for space %lu\n", (ulong) space);
-		return;
-	}
-
 #ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
+	ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
+			  RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
+
+	ut_ad(rw_lock_get_x_lock_count(
+		fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
+
 	ut_ad(!ibuf_inside());
 
 	/* NOTE: We require that the thread did not own the latch before,
 	because then we know that we can obey the correct latching order
 	for ibuf latches */
 
-	ibuf_data = fil_space_get_ibuf_data(space);
-
-	if (ibuf_data == NULL) {
-		/* Not yet initialized */
-
-#if 0 /* defined UNIV_DEBUG */
-		fprintf(stderr,
-			"Ibuf for space %lu not yet initialized\n", space);
-#endif
+	if (!ibuf) {
+		/* Not yet initialized; not sure if this is possible, but
+		does no harm to check for it. */
 
 		return;
 	}
@@ -1882,7 +1927,7 @@ ibuf_free_excess_pages(
 
 		mutex_enter(&ibuf_mutex);
 
-		if (!ibuf_data_too_much_free(ibuf_data)) {
+		if (!ibuf_data_too_much_free()) {
 
 			mutex_exit(&ibuf_mutex);
 
@@ -1891,32 +1936,32 @@ ibuf_free_excess_pages(
 
 		mutex_exit(&ibuf_mutex);
 
-		ibuf_remove_free_page(space, ibuf_data);
+		ibuf_remove_free_page();
 	}
 }
 
-/*************************************************************************
-Reads page numbers from a leaf in an ibuf tree. */
+/*********************************************************************//**
+Reads page numbers from a leaf in an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
 static
 ulint
 ibuf_get_merge_page_nos(
 /*====================*/
-				/* out: a lower limit for the combined volume
-				of records which will be merged */
-	ibool		contract,/* in: TRUE if this function is called to
+	ibool		contract,/*!< in: TRUE if this function is called to
 				contract the tree, FALSE if this is called
 				when a single page becomes full and we look
 				if it pays to read also nearby pages */
-	rec_t*		rec,	/* in: record from which we read up and down
+	rec_t*		rec,	/*!< in: record from which we read up and down
 				in the chain of records */
-	ulint*		space_ids,/* in/out: space id's of the pages */
-	ib_longlong*	space_versions,/* in/out: tablespace version
+	ulint*		space_ids,/*!< in/out: space id's of the pages */
+	ib_int64_t*	space_versions,/*!< in/out: tablespace version
 				timestamps; used to prevent reading in old
 				pages after DISCARD + IMPORT tablespace */
-	ulint*		page_nos,/* in/out: buffer for at least
+	ulint*		page_nos,/*!< in/out: buffer for at least
 				IBUF_MAX_N_PAGES_MERGED many page numbers;
 				the page numbers are in an ascending order */
-	ulint*		n_stored)/* out: number of page numbers stored to
+	ulint*		n_stored)/*!< out: number of page numbers stored to
 				page_nos in this function */
 {
 	ulint	prev_page_no;
@@ -1966,8 +2011,8 @@ ibuf_get_merge_page_nos(
 		rec_space_id = ibuf_rec_get_space(rec);
 
 		if (rec_space_id != first_space_id
-		    || rec_page_no / IBUF_MERGE_AREA
-		    != first_page_no / IBUF_MERGE_AREA) {
+		    || (rec_page_no / IBUF_MERGE_AREA)
+		    != (first_page_no / IBUF_MERGE_AREA)) {
 
 			break;
 		}
@@ -2068,81 +2113,40 @@ ibuf_get_merge_page_nos(
 	return(sum_volumes);
 }
 
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
 static
 ulint
 ibuf_contract_ext(
 /*==============*/
-			/* out: a lower limit for the combined size in bytes
-			of entries which will be merged from ibuf trees to the
-			pages read, 0 if ibuf is empty */
-	ulint*	n_pages,/* out: number of pages to which merged */
-	ibool	sync)	/* in: TRUE if the caller wants to wait for the
+	ulint*	n_pages,/*!< out: number of pages to which merged */
+	ibool	sync)	/*!< in: TRUE if the caller wants to wait for the
 			issued read with the highest tablespace address
 			to complete */
 {
-	ulint		rnd_pos;
-	ibuf_data_t*	data;
 	btr_pcur_t	pcur;
-	ulint		space;
-	ibool		all_trees_empty;
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
-	ib_longlong	space_versions[IBUF_MAX_N_PAGES_MERGED];
+	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
 	ulint		n_stored;
 	ulint		sum_sizes;
 	mtr_t		mtr;
 
 	*n_pages = 0;
-loop:
 	ut_ad(!ibuf_inside());
 
 	mutex_enter(&ibuf_mutex);
 
-	ut_ad(ibuf_validate_low());
+	if (ibuf->empty) {
+ibuf_is_empty:
+		mutex_exit(&ibuf_mutex);
 
-	/* Choose an ibuf tree at random (though there really is only one tree
-	in the current implementation) */
-	ibuf_rnd += 865558671;
-
-	rnd_pos = ibuf_rnd % ibuf->size;
-
-	all_trees_empty = TRUE;
-
-	data = UT_LIST_GET_FIRST(ibuf->data_list);
-
-	for (;;) {
-		if (!data->empty) {
-			all_trees_empty = FALSE;
-
-			if (rnd_pos < data->size) {
-
-				break;
-			}
-
-			rnd_pos -= data->size;
-		}
-
-		data = UT_LIST_GET_NEXT(data_list, data);
-
-		if (data == NULL) {
-			if (all_trees_empty) {
-				mutex_exit(&ibuf_mutex);
-
-				return(0);
-			}
-
-			data = UT_LIST_GET_FIRST(ibuf->data_list);
-		}
+		return(0);
 	}
 
-	ut_ad(data);
-
-	space = data->index->space;
-
-	ut_a(space == 0);	/* We currently only have an ibuf tree in
-				space 0 */
 	mtr_start(&mtr);
 
 	ibuf_enter();
@@ -2150,22 +2154,23 @@ loop:
 	/* Open a cursor to a randomly chosen leaf of the tree, at a random
 	position within the leaf */
 
-	btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
+	btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
 
-	if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
+	if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+		/* When the ibuf tree is emptied completely, the last record
+		is removed using an optimistic delete and ibuf_size_update
+		is not called, causing ibuf->empty to remain FALSE. If we do
+		not reset it to TRUE here then database shutdown will hang
+		in the loop in ibuf_contract_for_n_pages. */
 
-		/* This tree is empty */
-
-		data->empty = TRUE;
+		ibuf->empty = TRUE;
 
 		ibuf_exit();
 
 		mtr_commit(&mtr);
 		btr_pcur_close(&pcur);
 
-		mutex_exit(&ibuf_mutex);
-
-		goto loop;
+		goto ibuf_is_empty;
 	}
 
 	mutex_exit(&ibuf_mutex);
@@ -2189,16 +2194,16 @@ loop:
 	return(sum_sizes + 1);
 }
 
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
 ulint
 ibuf_contract(
 /*==========*/
-			/* out: a lower limit for the combined size in bytes
-			of entries which will be merged from ibuf trees to the
-			pages read, 0 if ibuf is empty */
-	ibool	sync)	/* in: TRUE if the caller wants to wait for the
+	ibool	sync)	/*!< in: TRUE if the caller wants to wait for the
 			issued read with the highest tablespace address
 			to complete */
 {
@@ -2207,19 +2212,19 @@ ibuf_contract(
 	return(ibuf_contract_ext(&n_pages, sync));
 }
 
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
 ulint
 ibuf_contract_for_n_pages(
 /*======================*/
-			/* out: a lower limit for the combined size in bytes
-			of entries which will be merged from ibuf trees to the
-			pages read, 0 if ibuf is empty */
-	ibool	sync,	/* in: TRUE if the caller wants to wait for the
+	ibool	sync,	/*!< in: TRUE if the caller wants to wait for the
 			issued read with the highest tablespace address
 			to complete */
-	ulint	n_pages)/* in: try to read at least this many pages to
+	ulint	n_pages)/*!< in: try to read at least this many pages to
 			the buffer pool and merge the ibuf contents to
 			them */
 {
@@ -2242,13 +2247,13 @@ ibuf_contract_for_n_pages(
 	return(sum_bytes);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Contract insert buffer trees after insert if they are too big. */
 UNIV_INLINE
 void
 ibuf_contract_after_insert(
 /*=======================*/
-	ulint	entry_size)	/* in: size of a record which was inserted
+	ulint	entry_size)	/*!< in: size of a record which was inserted
 				into an ibuf tree */
 {
 	ibool	sync;
@@ -2283,26 +2288,24 @@ ibuf_contract_after_insert(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page. */
-
+buffer for a given page.
+@return upper limit for the volume of buffered inserts for the index
+page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
+several pages in the insert buffer */
+static
 ulint
 ibuf_get_volume_buffered(
 /*=====================*/
-				/* out: upper limit for the volume of
-				buffered inserts for the index page, in bytes;
-				we may also return UNIV_PAGE_SIZE, if the
-				entries for the index page span on several
-				pages in the insert buffer */
-	btr_pcur_t*	pcur,	/* in: pcur positioned at a place in an
+	btr_pcur_t*	pcur,	/*!< in: pcur positioned at a place in an
 				insert buffer tree where we would insert an
 				entry for the index page whose number is
 				page_no, latch mode has to be BTR_MODIFY_PREV
 				or BTR_MODIFY_TREE */
-	ulint		space,	/* in: space id */
-	ulint		page_no,/* in: page number of an index page */
-	mtr_t*		mtr)	/* in: mtr */
+	ulint		space,	/*!< in: space id */
+	ulint		page_no,/*!< in: page number of an index page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	volume;
 	rec_t*	rec;
@@ -2323,8 +2326,7 @@ ibuf_get_volume_buffered(
 	volume = 0;
 
 	rec = btr_pcur_get_rec(pcur);
-
-	page = buf_frame_align(rec);
+	page = page_align(rec);
 
 	if (page_rec_is_supremum(rec)) {
 		rec = page_rec_get_prev(rec);
@@ -2356,16 +2358,23 @@ ibuf_get_volume_buffered(
 		goto count_later;
 	}
 
-	prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr);
+	{
+		buf_block_t*	block;
+
+		block = buf_page_get(
+			IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+		prev_page = buf_block_get_frame(block);
+	}
+
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_page_get_next(prev_page, mtr)
-	     == buf_frame_get_page_no(page));
+	     == page_get_page_no(page));
 #endif /* UNIV_BTR_DEBUG */
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
 	rec = page_get_supremum_rec(prev_page);
 	rec = page_rec_get_prev(rec);
 
@@ -2423,15 +2432,21 @@ count_later:
 		return(volume);
 	}
 
-	next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
-	ut_a(btr_page_get_prev(next_page, mtr)
-	     == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
+	{
+		buf_block_t*	block;
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+		block = buf_page_get(
+			IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+		next_page = buf_block_get_frame(block);
+	}
+
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
 
 	rec = page_get_infimum_rec(next_page);
 	rec = page_rec_get_next(rec);
@@ -2456,37 +2471,33 @@ count_later:
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reads the biggest tablespace id from the high end of the insert buffer
 tree and updates the counter in fil_system. */
-
+UNIV_INTERN
 void
 ibuf_update_max_tablespace_id(void)
 /*===============================*/
 {
 	ulint		max_space_id;
-	rec_t*		rec;
-	byte*		field;
+	const rec_t*	rec;
+	const byte*	field;
 	ulint		len;
-	ibuf_data_t*	ibuf_data;
-	dict_index_t*	ibuf_index;
 	btr_pcur_t	pcur;
 	mtr_t		mtr;
 
-	ibuf_data = fil_space_get_ibuf_data(0);
-
-	ibuf_index = ibuf_data->index;
-	ut_a(!dict_table_is_comp(ibuf_index->table));
+	ut_a(!dict_table_is_comp(ibuf->index->table));
 
 	ibuf_enter();
 
 	mtr_start(&mtr);
 
-	btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF,
-				    &pcur, TRUE, &mtr);
+	btr_pcur_open_at_index_side(
+		FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
 	btr_pcur_move_to_prev(&pcur, &mtr);
 
-	if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) {
+	if (btr_pcur_is_before_first_on_page(&pcur)) {
 		/* The tree is empty */
 
 		max_space_id = 0;
@@ -2508,24 +2519,26 @@ ibuf_update_max_tablespace_id(void)
 	fil_set_max_space_id_if_bigger(max_space_id);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. */
+page, if this is possible.
+@return	DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
 static
 ulint
 ibuf_insert_low(
 /*============*/
-				/* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
-	ulint		mode,	/* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	dict_index_t*	index,	/* in: index where to insert; must not be
+	ulint		mode,	/*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	ulint		entry_size,
+				/*!< in: rec_get_converted_size(index, entry) */
+	dict_index_t*	index,	/*!< in: index where to insert; must not be
 				unique or clustered */
-	ulint		space,	/* in: space id where to insert */
-	ulint		page_no,/* in: page number where to insert */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint		space,	/*!< in: space id where to insert */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint		page_no,/*!< in: page number where to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	big_rec_t*	dummy_big_rec;
-	ulint		entry_size;
 	btr_pcur_t	pcur;
 	btr_cur_t*	cursor;
 	dtuple_t*	ibuf_entry;
@@ -2534,33 +2547,25 @@ ibuf_insert_low(
 	rec_t*		ins_rec;
 	ibool		old_bit_value;
 	page_t*		bitmap_page;
-	ibuf_data_t*	ibuf_data;
-	dict_index_t*	ibuf_index;
 	page_t*		root;
 	ulint		err;
 	ibool		do_merge;
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
-	ib_longlong	space_versions[IBUF_MAX_N_PAGES_MERGED];
+	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		n_stored;
 	ulint		bits;
 	mtr_t		mtr;
 	mtr_t		bitmap_mtr;
 
-	ut_a(!(index->type & DICT_CLUSTERED));
+	ut_a(!dict_index_is_clust(index));
 	ut_ad(dtuple_check_typed(entry));
+	ut_ad(ut_is_2pow(zip_size));
 
 	ut_a(trx_sys_multiple_tablespace_format);
 
 	do_merge = FALSE;
 
-	/* Currently the insert buffer of space 0 takes care of inserts to all
-	tablespaces */
-
-	ibuf_data = fil_space_get_ibuf_data(0);
-
-	ibuf_index = ibuf_data->index;
-
 	mutex_enter(&ibuf_mutex);
 
 	if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
@@ -2587,7 +2592,7 @@ ibuf_insert_low(
 
 		mutex_enter(&ibuf_mutex);
 
-		while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
+		while (!ibuf_data_enough_free_for_insert()) {
 
 			mutex_exit(&ibuf_mutex);
 
@@ -2595,7 +2600,7 @@ ibuf_insert_low(
 
 			mutex_exit(&ibuf_pessimistic_insert_mutex);
 
-			err = ibuf_add_free_page(0, ibuf_data);
+			err = ibuf_add_free_page();
 
 			if (err == DB_STRONG_FAIL) {
 
@@ -2612,8 +2617,6 @@ ibuf_insert_low(
 		ibuf_enter();
 	}
 
-	entry_size = rec_get_converted_size(index, entry);
-
 	heap = mem_heap_create(512);
 
 	/* Build the entry which contains the space id and the page number as
@@ -2628,18 +2631,19 @@ ibuf_insert_low(
 
 	mtr_start(&mtr);
 
-	btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
+	btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
 
 	/* Find out the volume of already buffered inserts for the same index
 	page */
 	buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
 
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	ut_a((buffered == 0) || ibuf_count_get(space, page_no));
 #endif
 	mtr_start(&bitmap_mtr);
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
+					       zip_size, &bitmap_mtr);
 
 	/* We check if the index page is suitable for buffered entries */
 
@@ -2652,11 +2656,11 @@ ibuf_insert_low(
 		goto function_exit;
 	}
 
-	bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+	bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
 					 IBUF_BITMAP_FREE, &bitmap_mtr);
 
 	if (buffered + entry_size + page_dir_calc_reserved_space(1)
-	    > ibuf_index_page_calc_free_from_bits(bits)) {
+	    > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
 		mtr_commit(&bitmap_mtr);
 
 		/* It may not fit */
@@ -2673,11 +2677,12 @@ ibuf_insert_low(
 	/* Set the bitmap bit denoting that the insert buffer contains
 	buffered entries for this index page, if the bit is not set yet */
 
-	old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
-						  IBUF_BITMAP_BUFFERED,
-						  &bitmap_mtr);
+	old_bit_value = ibuf_bitmap_page_get_bits(
+		bitmap_page, page_no, zip_size,
+		IBUF_BITMAP_BUFFERED, &bitmap_mtr);
+
 	if (!old_bit_value) {
-		ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+		ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
 					  IBUF_BITMAP_BUFFERED, TRUE,
 					  &bitmap_mtr);
 	}
@@ -2689,12 +2694,11 @@ ibuf_insert_low(
 	if (mode == BTR_MODIFY_PREV) {
 		err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
 						ibuf_entry, &ins_rec,
-						&dummy_big_rec, thr,
-						&mtr);
+						&dummy_big_rec, 0, thr, &mtr);
 		if (err == DB_SUCCESS) {
 			/* Update the page max trx id field */
-			page_update_max_trx_id(buf_frame_align(ins_rec),
-					       thr_get_trx(thr)->id);
+			page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
+					       thr_get_trx(thr)->id, &mtr);
 		}
 	} else {
 		ut_ad(mode == BTR_MODIFY_TREE);
@@ -2704,25 +2708,24 @@ ibuf_insert_low(
 		which would cause the x-latching of the root after that to
 		break the latching order. */
 
-		root = ibuf_tree_root_get(ibuf_data, 0, &mtr);
+		root = ibuf_tree_root_get(&mtr);
 
 		err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
 						 | BTR_NO_UNDO_LOG_FLAG,
 						 cursor,
 						 ibuf_entry, &ins_rec,
-						 &dummy_big_rec, thr,
-						 &mtr);
+						 &dummy_big_rec, 0, thr, &mtr);
 		if (err == DB_SUCCESS) {
 			/* Update the page max trx id field */
-			page_update_max_trx_id(buf_frame_align(ins_rec),
-					       thr_get_trx(thr)->id);
+			page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
+					       thr_get_trx(thr)->id, &mtr);
 		}
 
-		ibuf_data_sizes_update(ibuf_data, root, &mtr);
+		ibuf_size_update(root, &mtr);
 	}
 
 function_exit:
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	if (err == DB_SUCCESS) {
 		fprintf(stderr,
 			"Incrementing ibuf count of space %lu page %lu\n"
@@ -2734,7 +2737,6 @@ function_exit:
 	}
 #endif
 	if (mode == BTR_MODIFY_TREE) {
-		ut_ad(ibuf_validate_low());
 
 		mutex_exit(&ibuf_mutex);
 		mutex_exit(&ibuf_pessimistic_insert_mutex);
@@ -2746,17 +2748,17 @@ function_exit:
 
 	mem_heap_free(heap);
 
-	mutex_enter(&ibuf_mutex);
-
 	if (err == DB_SUCCESS) {
-		ibuf_data->empty = FALSE;
-		ibuf_data->n_inserts++;
-	}
+		mutex_enter(&ibuf_mutex);
 
-	mutex_exit(&ibuf_mutex);
+		ibuf->empty = FALSE;
+		ibuf->n_inserts++;
 
-	if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
-		ibuf_contract_after_insert(entry_size);
+		mutex_exit(&ibuf_mutex);
+
+		if (mode == BTR_MODIFY_TREE) {
+			ibuf_contract_after_insert(entry_size);
+		}
 	}
 
 	if (do_merge) {
@@ -2770,39 +2772,56 @@ function_exit:
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Makes an index insert to the insert buffer, instead of directly to the disk
 page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
+or unique.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 ibuf_insert(
 /*========*/
-				/* out: TRUE if success */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	dict_index_t*	index,	/* in: index where to insert */
-	ulint		space,	/* in: space id where to insert */
-	ulint		page_no,/* in: page number where to insert */
-	que_thr_t*	thr)	/* in: query thread */
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	dict_index_t*	index,	/*!< in: index where to insert */
+	ulint		space,	/*!< in: space id where to insert */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint		page_no,/*!< in: page number where to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint	err;
+	ulint	entry_size;
 
 	ut_a(trx_sys_multiple_tablespace_format);
 	ut_ad(dtuple_check_typed(entry));
+	ut_ad(ut_is_2pow(zip_size));
 
-	ut_a(!(index->type & DICT_CLUSTERED));
+	ut_a(!dict_index_is_clust(index));
 
-	if (rec_get_converted_size(index, entry)
+	switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
+	case IBUF_USE_NONE:
+		return(FALSE);
+	case IBUF_USE_INSERT:
+		goto do_insert;
+	case IBUF_USE_COUNT:
+		break;
+	}
+
+	ut_error; /* unknown value of ibuf_use */
+
+do_insert:
+	entry_size = rec_get_converted_size(index, entry, 0);
+
+	if (entry_size
 	    >= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
 		/ 2)) {
 		return(FALSE);
 	}
 
-	err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
-			      thr);
+	err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
+			      index, space, zip_size, page_no, thr);
 	if (err == DB_FAIL) {
-		err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
-				      page_no, thr);
+		err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
+				      index, space, zip_size, page_no, thr);
 	}
 
 	if (err == DB_SUCCESS) {
@@ -2819,21 +2838,22 @@ ibuf_insert(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 During merge, inserts to an index page a secondary index entry extracted
 from the insert buffer. */
 static
 void
 ibuf_insert_to_index_page(
 /*======================*/
-	dtuple_t*	entry,	/* in: buffered entry to insert */
-	page_t*		page,	/* in: index page where the buffered entry
+	dtuple_t*	entry,	/*!< in: buffered entry to insert */
+	buf_block_t*	block,	/*!< in/out: index page where the buffered entry
 				should be placed */
-	dict_index_t*	index,	/* in: record descriptor */
-	mtr_t*		mtr)	/* in: mtr */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_cur_t	page_cur;
 	ulint		low_match;
+	page_t*		page		= buf_block_get_frame(block);
 	rec_t*		rec;
 	page_t*		bitmap_page;
 	ulint		old_bits;
@@ -2859,7 +2879,7 @@ ibuf_insert_to_index_page(
 		      "InnoDB: but the number of fields does not match!\n",
 		      stderr);
 dump:
-		buf_page_print(page);
+		buf_page_print(page, 0);
 
 		dtuple_print(stderr, entry);
 
@@ -2874,97 +2894,108 @@ dump:
 		return;
 	}
 
-	low_match = page_cur_search(page, index, entry,
+	low_match = page_cur_search(block, index, entry,
 				    PAGE_CUR_LE, &page_cur);
 
 	if (low_match == dtuple_get_n_fields(entry)) {
+		page_zip_des_t*	page_zip;
+
 		rec = page_cur_get_rec(&page_cur);
+		page_zip = buf_block_get_page_zip(block);
 
-		btr_cur_del_unmark_for_ibuf(rec, mtr);
+		btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
 	} else {
-		rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
+		rec = page_cur_tuple_insert(&page_cur, entry, index, 0, mtr);
 
-		if (rec == NULL) {
-			/* If the record did not fit, reorganize */
+		if (UNIV_LIKELY(rec != NULL)) {
+			return;
+		}
 
-			btr_page_reorganize(page, index, mtr);
+		/* If the record did not fit, reorganize */
 
-			page_cur_search(page, index, entry,
-					PAGE_CUR_LE, &page_cur);
+		btr_page_reorganize(block, index, mtr);
+		page_cur_search(block, index, entry, PAGE_CUR_LE, &page_cur);
 
-			/* This time the record must fit */
-			if (UNIV_UNLIKELY(!page_cur_tuple_insert(
-						  &page_cur, entry, index,
-						  mtr))) {
+		/* This time the record must fit */
+		if (UNIV_UNLIKELY
+		    (!page_cur_tuple_insert(&page_cur, entry, index,
+					    0, mtr))) {
+			ulint	space;
+			ulint	page_no;
+			ulint	zip_size;
 
-				ut_print_timestamp(stderr);
+			ut_print_timestamp(stderr);
 
-				fprintf(stderr,
-					"  InnoDB: Error: Insert buffer insert"
-					" fails; page free %lu,"
-					" dtuple size %lu\n",
-					(ulong) page_get_max_insert_size(
-						page, 1),
-					(ulong) rec_get_converted_size(
-						index, entry));
-				fputs("InnoDB: Cannot insert index record ",
-				      stderr);
-				dtuple_print(stderr, entry);
-				fputs("\nInnoDB: The table where"
-				      " this index record belongs\n"
-				      "InnoDB: is now probably corrupt."
-				      " Please run CHECK TABLE on\n"
-				      "InnoDB: that table.\n", stderr);
+			fprintf(stderr,
+				"  InnoDB: Error: Insert buffer insert"
+				" fails; page free %lu,"
+				" dtuple size %lu\n",
+				(ulong) page_get_max_insert_size(
+					page, 1),
+				(ulong) rec_get_converted_size(
+					index, entry, 0));
+			fputs("InnoDB: Cannot insert index record ",
+			      stderr);
+			dtuple_print(stderr, entry);
+			fputs("\nInnoDB: The table where"
+			      " this index record belongs\n"
+			      "InnoDB: is now probably corrupt."
+			      " Please run CHECK TABLE on\n"
+			      "InnoDB: that table.\n", stderr);
 
-				bitmap_page = ibuf_bitmap_get_map_page(
-					buf_frame_get_space_id(page),
-					buf_frame_get_page_no(page),
-					mtr);
-				old_bits = ibuf_bitmap_page_get_bits(
-					bitmap_page,
-					buf_frame_get_page_no(page),
-					IBUF_BITMAP_FREE, mtr);
+			space = page_get_space_id(page);
+			zip_size = buf_block_get_zip_size(block);
+			page_no = page_get_page_no(page);
 
-				fprintf(stderr, "InnoDB: Bitmap bits %lu\n",
-					(ulong) old_bits);
+			bitmap_page = ibuf_bitmap_get_map_page(
+				space, page_no, zip_size, mtr);
+			old_bits = ibuf_bitmap_page_get_bits(
+				bitmap_page, page_no, zip_size,
+				IBUF_BITMAP_FREE, mtr);
 
-				fputs("InnoDB: Submit a detailed bug report"
-				      " to http://bugs.mysql.com\n", stderr);
-			}
+			fprintf(stderr,
+				"InnoDB: space %lu, page %lu,"
+				" zip_size %lu, bitmap bits %lu\n",
+				(ulong) space, (ulong) page_no,
+				(ulong) zip_size, (ulong) old_bits);
+
+			fputs("InnoDB: Submit a detailed bug report"
+			      " to http://bugs.mysql.com\n", stderr);
 		}
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Deletes from ibuf the record on which pcur is positioned. If we have to
 resort to a pessimistic delete, this function commits mtr and closes
-the cursor. */
+the cursor.
+@return	TRUE if mtr was committed and pcur closed in this operation */
 static
 ibool
 ibuf_delete_rec(
 /*============*/
-				/* out: TRUE if mtr was committed and pcur
-				closed in this operation */
-	ulint		space,	/* in: space id */
-	ulint		page_no,/* in: index page number where the record
+	ulint		space,	/*!< in: space id */
+	ulint		page_no,/*!< in: index page number where the record
 				should belong */
-	btr_pcur_t*	pcur,	/* in: pcur positioned on the record to
+	btr_pcur_t*	pcur,	/*!< in: pcur positioned on the record to
 				delete, having latch mode BTR_MODIFY_LEAF */
-	dtuple_t*	search_tuple,
-				/* in: search tuple for entries of page_no */
-	mtr_t*		mtr)	/* in: mtr */
+	const dtuple_t*	search_tuple,
+				/*!< in: search tuple for entries of page_no */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ibool		success;
-	ibuf_data_t*	ibuf_data;
 	page_t*		root;
 	ulint		err;
 
 	ut_ad(ibuf_inside());
+	ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+	ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
+	ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
 
 	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
 
 	if (success) {
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 		fprintf(stderr,
 			"Decrementing ibuf count of space %lu page %lu\n"
 			"from %lu by 1\n", space, page_no,
@@ -2975,16 +3006,15 @@ ibuf_delete_rec(
 		return(FALSE);
 	}
 
+	ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+	ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
+	ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
+
 	/* We have to resort to a pessimistic delete from ibuf */
 	btr_pcur_store_position(pcur, mtr);
 
 	btr_pcur_commit_specify_mtr(pcur, mtr);
 
-	/* Currently the insert buffer of space 0 takes care of inserts to all
-	tablespaces */
-
-	ibuf_data = fil_space_get_ibuf_data(0);
-
 	mutex_enter(&ibuf_mutex);
 
 	mtr_start(mtr);
@@ -2992,7 +3022,7 @@ ibuf_delete_rec(
 	success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
 
 	if (!success) {
-		if (fil_space_get_version(space) == -1) {
+		if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
 			/* The tablespace has been dropped.  It is possible
 			that another thread has deleted the insert buffer
 			entry.  Do not complain. */
@@ -3018,7 +3048,7 @@ ibuf_delete_rec(
 		btr_pcur_commit_specify_mtr(pcur, mtr);
 
 		fputs("InnoDB: Validating insert buffer tree:\n", stderr);
-		if (!btr_validate_index(ibuf_data->index, NULL)) {
+		if (!btr_validate_index(ibuf->index, NULL)) {
 			ut_error;
 		}
 
@@ -3028,18 +3058,16 @@ ibuf_delete_rec(
 		goto func_exit;
 	}
 
-	root = ibuf_tree_root_get(ibuf_data, 0, mtr);
+	root = ibuf_tree_root_get(mtr);
 
 	btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
-				   FALSE, mtr);
+				   RB_NONE, mtr);
 	ut_a(err == DB_SUCCESS);
 
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
 #endif
-	ibuf_data_sizes_update(ibuf_data, root, mtr);
-
-	ut_ad(ibuf_validate_low());
+	ibuf_size_update(root, mtr);
 
 commit_and_exit:
 	btr_pcur_commit_specify_mtr(pcur, mtr);
@@ -3052,53 +3080,74 @@ func_exit:
 	return(TRUE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 When an index page is read from a disk to the buffer pool, this function
 inserts to the page the possible index entries buffered in the insert buffer.
 The entries are deleted from the insert buffer. If the page is not read, but
 created in the buffer pool, this function deletes its buffered entries from
 the insert buffer; there can exist entries for such a page if the page
 belonged to an index which subsequently was dropped. */
-
+UNIV_INTERN
 void
 ibuf_merge_or_delete_for_page(
 /*==========================*/
-	page_t*	page,	/* in: if page has been read from disk, pointer to
-			the page x-latched, else NULL */
-	ulint	space,	/* in: space id of the index page */
-	ulint	page_no,/* in: page number of the index page */
-	ibool	update_ibuf_bitmap)/* in: normally this is set to TRUE, but if
-			we have deleted or are deleting the tablespace, then we
-			naturally do not want to update a non-existent bitmap
-			page */
+	buf_block_t*	block,	/*!< in: if page has been read from
+				disk, pointer to the page x-latched,
+				else NULL */
+	ulint		space,	/*!< in: space id of the index page */
+	ulint		page_no,/*!< in: page number of the index page */
+	ulint		zip_size,/*!< in: compressed page size in bytes,
+				or 0 */
+	ibool		update_ibuf_bitmap)/*!< in: normally this is set
+				to TRUE, but if we have deleted or are
+				deleting the tablespace, then we
+				naturally do not want to update a
+				non-existent bitmap page */
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
-	dtuple_t*	entry;
 	dtuple_t*	search_tuple;
-	rec_t*		ibuf_rec;
-	buf_block_t*	block;
-	page_t*		bitmap_page;
-	ibuf_data_t*	ibuf_data;
 	ulint		n_inserts;
 #ifdef UNIV_IBUF_DEBUG
 	ulint		volume;
 #endif
+	page_zip_des_t*	page_zip		= NULL;
 	ibool		tablespace_being_deleted = FALSE;
 	ibool		corruption_noticed	= FALSE;
 	mtr_t		mtr;
 
-	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+	ut_ad(!block || buf_block_get_space(block) == space);
+	ut_ad(!block || buf_block_get_page_no(block) == page_no);
+	ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
 
-		return;
-	}
-
-	if (ibuf_fixed_addr_page(space, page_no) || fsp_descr_page(page_no)
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
 	    || trx_sys_hdr_page(space, page_no)) {
 		return;
 	}
 
-	if (update_ibuf_bitmap) {
+	/* We cannot refer to zip_size in the following, because
+	zip_size is passed as ULINT_UNDEFINED (it is unknown) when
+	buf_read_ibuf_merge_pages() is merging (discarding) changes
+	for a dropped tablespace.  When block != NULL or
+	update_ibuf_bitmap is specified, the zip_size must be known.
+	That is why we will repeat the check below, with zip_size in
+	place of 0.  Passing zip_size as 0 assumes that the
+	uncompressed page size always is a power-of-2 multiple of the
+	compressed page size. */
+
+	if (ibuf_fixed_addr_page(space, 0, page_no)
+	    || fsp_descr_page(0, page_no)) {
+		return;
+	}
+
+	if (UNIV_LIKELY(update_ibuf_bitmap)) {
+		ut_a(ut_is_2pow(zip_size));
+
+		if (ibuf_fixed_addr_page(space, zip_size, page_no)
+		    || fsp_descr_page(zip_size, page_no)) {
+			return;
+		}
+
 		/* If the following returns FALSE, we get the counter
 		incremented, and must decrement it when we leave this
 		function. When the counter is > 0, that prevents tablespace
@@ -3106,38 +3155,42 @@ ibuf_merge_or_delete_for_page(
 
 		tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
 
-		if (tablespace_being_deleted) {
+		if (UNIV_UNLIKELY(tablespace_being_deleted)) {
 			/* Do not try to read the bitmap page from space;
 			just delete the ibuf records for the page */
 
-			page = NULL;
+			block = NULL;
 			update_ibuf_bitmap = FALSE;
-		}
-	}
+		} else {
+			page_t*	bitmap_page;
 
-	if (update_ibuf_bitmap) {
-		mtr_start(&mtr);
-		bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+			mtr_start(&mtr);
 
-		if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
-					       IBUF_BITMAP_BUFFERED, &mtr)) {
-			/* No inserts buffered for this page */
-			mtr_commit(&mtr);
+			bitmap_page = ibuf_bitmap_get_map_page(
+				space, page_no, zip_size, &mtr);
 
-			if (!tablespace_being_deleted) {
-				fil_decr_pending_ibuf_merges(space);
+			if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+						       zip_size,
+						       IBUF_BITMAP_BUFFERED,
+						       &mtr)) {
+				/* No inserts buffered for this page */
+				mtr_commit(&mtr);
+
+				if (!tablespace_being_deleted) {
+					fil_decr_pending_ibuf_merges(space);
+				}
+
+				return;
 			}
-
-			return;
+			mtr_commit(&mtr);
 		}
-		mtr_commit(&mtr);
+	} else if (block
+		   && (ibuf_fixed_addr_page(space, zip_size, page_no)
+		      || fsp_descr_page(zip_size, page_no))) {
+
+		return;
 	}
 
-	/* Currently the insert buffer of space 0 takes care of inserts to all
-	tablespaces */
-
-	ibuf_data = fil_space_get_ibuf_data(0);
-
 	ibuf_enter();
 
 	heap = mem_heap_create(512);
@@ -3150,16 +3203,20 @@ ibuf_merge_or_delete_for_page(
 							   heap);
 	}
 
-	if (page) {
+	if (block) {
 		/* Move the ownership of the x-latch on the page to this OS
 		thread, so that we can acquire a second x-latch on it. This
 		is needed for the insert operations to the index page to pass
 		the debug checks. */
 
-		block = buf_block_align(page);
 		rw_lock_x_lock_move_ownership(&(block->lock));
+		page_zip = buf_block_get_page_zip(block);
 
-		if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
+		if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
+				  != FIL_PAGE_INDEX)
+		    || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
+
+			page_t*	bitmap_page;
 
 			corruption_noticed = TRUE;
 
@@ -3171,14 +3228,14 @@ ibuf_merge_or_delete_for_page(
 			      stderr);
 
 			bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
-							       &mtr);
-			buf_page_print(bitmap_page);
+							       zip_size, &mtr);
+			buf_page_print(bitmap_page, 0);
 
 			mtr_commit(&mtr);
 
 			fputs("\nInnoDB: Dump of the page:\n", stderr);
 
-			buf_page_print(page);
+			buf_page_print(block->frame, 0);
 
 			fprintf(stderr,
 				"InnoDB: Error: corruption in the tablespace."
@@ -3186,7 +3243,7 @@ ibuf_merge_or_delete_for_page(
 				"InnoDB: buffer records to page n:o %lu"
 				" though the page\n"
 				"InnoDB: type is %lu, which is"
-				" not an index page!\n"
+				" not an index leaf page!\n"
 				"InnoDB: We try to resolve the problem"
 				" by skipping the insert buffer\n"
 				"InnoDB: merge for this page."
@@ -3196,7 +3253,8 @@ ibuf_merge_or_delete_for_page(
 				"InnoDB: Please submit a detailed bug report"
 				" to http://bugs.mysql.com\n\n",
 				(ulong) page_no,
-				(ulong) fil_page_get_type(page));
+				(ulong)
+				fil_page_get_type(block->frame));
 		}
 	}
 
@@ -3207,65 +3265,76 @@ ibuf_merge_or_delete_for_page(
 loop:
 	mtr_start(&mtr);
 
-	if (page) {
-		ibool success = buf_page_get_known_nowait(RW_X_LATCH, page,
-							  BUF_KEEP_OLD,
-							  __FILE__, __LINE__,
-							  &mtr);
+	if (block) {
+		ibool success;
+
+		success = buf_page_get_known_nowait(
+			RW_X_LATCH, block,
+			BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
+
 		ut_a(success);
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 	}
 
 	/* Position pcur in the insert buffer at the first entry for this
 	index page */
-	btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
-				  BTR_MODIFY_LEAF, &pcur, &mtr);
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	btr_pcur_open_on_user_rec(
+		ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+		&pcur, &mtr);
+
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
 
 		goto reset_bit;
 	}
 
 	for (;;) {
-		ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+		rec_t*	rec;
 
-		ibuf_rec = btr_pcur_get_rec(&pcur);
+		ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+		rec = btr_pcur_get_rec(&pcur);
 
 		/* Check if the entry is for this index page */
-		if (ibuf_rec_get_page_no(ibuf_rec) != page_no
-		    || ibuf_rec_get_space(ibuf_rec) != space) {
-			if (page) {
-				page_header_reset_last_insert(page, &mtr);
+		if (ibuf_rec_get_page_no(rec) != page_no
+		    || ibuf_rec_get_space(rec) != space) {
+
+			if (block) {
+				page_header_reset_last_insert(
+					block->frame, page_zip, &mtr);
 			}
+
 			goto reset_bit;
 		}
 
-		if (corruption_noticed) {
+		if (UNIV_UNLIKELY(corruption_noticed)) {
 			fputs("InnoDB: Discarding record\n ", stderr);
-			rec_print_old(stderr, ibuf_rec);
-			fputs("\n from the insert buffer!\n\n", stderr);
-		} else if (page) {
+			rec_print_old(stderr, rec);
+			fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
+		} else if (block) {
 			/* Now we have at pcur a record which should be
 			inserted to the index page; NOTE that the call below
-			copies pointers to fields in ibuf_rec, and we must
-			keep the latch to the ibuf_rec page until the
+			copies pointers to fields in rec, and we must
+			keep the latch to the rec page until the
 			insertion is finished! */
+			dtuple_t*	entry;
+			trx_id_t	max_trx_id;
 			dict_index_t*	dummy_index;
-			dulint		max_trx_id = page_get_max_trx_id(
-				buf_frame_align(ibuf_rec));
-			page_update_max_trx_id(page, max_trx_id);
+
+			max_trx_id = page_get_max_trx_id(page_align(rec));
+			page_update_max_trx_id(block, page_zip, max_trx_id,
+					       &mtr);
 
 			entry = ibuf_build_entry_from_ibuf_rec(
-				ibuf_rec, heap, &dummy_index);
+				rec, heap, &dummy_index);
 #ifdef UNIV_IBUF_DEBUG
-			volume += rec_get_converted_size(dummy_index, entry)
+			volume += rec_get_converted_size(dummy_index, entry, 0)
 				+ page_dir_calc_reserved_space(1);
 			ut_a(volume <= 4 * UNIV_PAGE_SIZE
 			     / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 #endif
-			ibuf_insert_to_index_page(entry, page,
+			ibuf_insert_to_index_page(entry, block,
 						  dummy_index, &mtr);
 			ibuf_dummy_index_free(dummy_index);
 		}
@@ -3279,9 +3348,7 @@ loop:
 			we start from the beginning again */
 
 			goto loop;
-		}
-
-		if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+		} else if (btr_pcur_is_after_last_on_page(&pcur)) {
 			mtr_commit(&mtr);
 			btr_pcur_close(&pcur);
 
@@ -3290,39 +3357,38 @@ loop:
 	}
 
 reset_bit:
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	if (ibuf_count_get(space, page_no) > 0) {
 		/* btr_print_tree(ibuf_data->index->tree, 100);
 		ibuf_print(); */
 	}
 #endif
-	if (update_ibuf_bitmap) {
-		bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-		ibuf_bitmap_page_set_bits(bitmap_page, page_no,
-					  IBUF_BITMAP_BUFFERED, FALSE, &mtr);
-		if (page) {
+	if (UNIV_LIKELY(update_ibuf_bitmap)) {
+		page_t*	bitmap_page;
+
+		bitmap_page = ibuf_bitmap_get_map_page(
+			space, page_no, zip_size, &mtr);
+
+		ibuf_bitmap_page_set_bits(
+			bitmap_page, page_no, zip_size,
+			IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+
+		if (block) {
 			ulint old_bits = ibuf_bitmap_page_get_bits(
-				bitmap_page, page_no, IBUF_BITMAP_FREE, &mtr);
-			ulint new_bits = ibuf_index_page_calc_free(page);
-#if 0 /* defined UNIV_IBUF_DEBUG */
-			fprintf(stderr, "Old bits %lu new bits %lu"
-				" max size %lu\n",
-				old_bits, new_bits,
-				page_get_max_insert_size_after_reorganize(
-					page, 1));
-#endif
+				bitmap_page, page_no, zip_size,
+				IBUF_BITMAP_FREE, &mtr);
+
+			ulint new_bits = ibuf_index_page_calc_free(
+				zip_size, block);
+
 			if (old_bits != new_bits) {
-				ibuf_bitmap_page_set_bits(bitmap_page, page_no,
-							  IBUF_BITMAP_FREE,
-							  new_bits, &mtr);
+				ibuf_bitmap_page_set_bits(
+					bitmap_page, page_no, zip_size,
+					IBUF_BITMAP_FREE, new_bits, &mtr);
 			}
 		}
 	}
-#if 0 /* defined UNIV_IBUF_DEBUG */
-	fprintf(stderr,
-		"Ibuf merge %lu records volume %lu to page no %lu\n",
-		n_inserts, volume, page_no);
-#endif
+
 	mtr_commit(&mtr);
 	btr_pcur_close(&pcur);
 	mem_heap_free(heap);
@@ -3330,8 +3396,8 @@ reset_bit:
 	/* Protect our statistics keeping from race conditions */
 	mutex_enter(&ibuf_mutex);
 
-	ibuf_data->n_merges++;
-	ibuf_data->n_merged_recs += n_inserts;
+	ibuf->n_merges++;
+	ibuf->n_merged_recs += n_inserts;
 
 	mutex_exit(&ibuf_mutex);
 
@@ -3341,21 +3407,22 @@ reset_bit:
 	}
 
 	ibuf_exit();
-#ifdef UNIV_IBUF_DEBUG
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	ut_a(ibuf_count_get(space, page_no) == 0);
 #endif
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Deletes all entries in the insert buffer for a given space id. This is used
 in DISCARD TABLESPACE and IMPORT TABLESPACE.
 NOTE: this does not update the page free bitmaps in the space. The space will
 become CORRUPT when you call this function! */
-
+UNIV_INTERN
 void
 ibuf_delete_for_discarded_space(
 /*============================*/
-	ulint	space)	/* in: space id */
+	ulint	space)	/*!< in: space id */
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
@@ -3363,15 +3430,9 @@ ibuf_delete_for_discarded_space(
 	rec_t*		ibuf_rec;
 	ulint		page_no;
 	ibool		closed;
-	ibuf_data_t*	ibuf_data;
 	ulint		n_inserts;
 	mtr_t		mtr;
 
-	/* Currently the insert buffer of space 0 takes care of inserts to all
-	tablespaces */
-
-	ibuf_data = fil_space_get_ibuf_data(0);
-
 	heap = mem_heap_create(512);
 
 	/* Use page number 0 to build the search tuple so that we get the
@@ -3387,16 +3448,18 @@ loop:
 
 	/* Position pcur in the insert buffer at the first entry for the
 	space */
-	btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
-				  BTR_MODIFY_LEAF, &pcur, &mtr);
-	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+	btr_pcur_open_on_user_rec(
+		ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+		&pcur, &mtr);
+
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
 
 		goto leave_loop;
 	}
 
 	for (;;) {
-		ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+		ut_ad(btr_pcur_is_on_user_rec(&pcur));
 
 		ibuf_rec = btr_pcur_get_rec(&pcur);
 
@@ -3422,7 +3485,7 @@ loop:
 			goto loop;
 		}
 
-		if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+		if (btr_pcur_is_after_last_on_page(&pcur)) {
 			mtr_commit(&mtr);
 			btr_pcur_close(&pcur);
 
@@ -3439,77 +3502,41 @@ leave_loop:
 	/* Protect our statistics keeping from race conditions */
 	mutex_enter(&ibuf_mutex);
 
-	ibuf_data->n_merges++;
-	ibuf_data->n_merged_recs += n_inserts;
+	ibuf->n_merges++;
+	ibuf->n_merged_recs += n_inserts;
 
 	mutex_exit(&ibuf_mutex);
-	/*
-	fprintf(stderr,
-	"InnoDB: Discarded %lu ibuf entries for space %lu\n",
-	(ulong) n_inserts, (ulong) space);
-	*/
+
 	ibuf_exit();
 
 	mem_heap_free(heap);
 }
 
-
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void)
-/*===================*/
-			/* out: TRUE if ok */
-{
-	ibuf_data_t*	data;
-	ulint		sum_sizes;
-
-	ut_ad(mutex_own(&ibuf_mutex));
-
-	sum_sizes = 0;
-
-	data = UT_LIST_GET_FIRST(ibuf->data_list);
-
-	while (data) {
-		sum_sizes += data->size;
-
-		data = UT_LIST_GET_NEXT(data_list, data);
-	}
-
-	ut_a(sum_sizes == ibuf->size);
-
-	return(TRUE);
-}
-
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return	TRUE if empty */
+UNIV_INTERN
 ibool
 ibuf_is_empty(void)
 /*===============*/
-			/* out: TRUE if empty */
 {
-	ibuf_data_t*	data;
 	ibool		is_empty;
-	page_t*		root;
+	const page_t*	root;
 	mtr_t		mtr;
 
 	ibuf_enter();
 
 	mutex_enter(&ibuf_mutex);
 
-	data = UT_LIST_GET_FIRST(ibuf->data_list);
-
 	mtr_start(&mtr);
 
-	root = ibuf_tree_root_get(data, 0, &mtr);
+	root = ibuf_tree_root_get(&mtr);
 
 	if (page_get_n_recs(root) == 0) {
 
 		is_empty = TRUE;
 
-		if (data->empty == FALSE) {
+		if (ibuf->empty == FALSE) {
 			fprintf(stderr,
 				"InnoDB: Warning: insert buffer tree is empty"
 				" but the data struct does not\n"
@@ -3518,15 +3545,13 @@ ibuf_is_empty(void)
 				"InnoDB: run to completion.\n");
 		}
 	} else {
-		ut_a(data->empty == FALSE);
+		ut_a(ibuf->empty == FALSE);
 
 		is_empty = FALSE;
 	}
 
 	mtr_commit(&mtr);
 
-	ut_a(data->space == 0);
-
 	mutex_exit(&ibuf_mutex);
 
 	ibuf_exit();
@@ -3534,47 +3559,45 @@ ibuf_is_empty(void)
 	return(is_empty);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Prints info of ibuf. */
-
+UNIV_INTERN
 void
 ibuf_print(
 /*=======*/
-	FILE*	file)	/* in: file where to print */
+	FILE*	file)	/*!< in: file where to print */
 {
-	ibuf_data_t*	data;
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
 	ulint		i;
+	ulint		j;
 #endif
 
 	mutex_enter(&ibuf_mutex);
 
-	data = UT_LIST_GET_FIRST(ibuf->data_list);
-
-	while (data) {
-		fprintf(file,
-			"Ibuf: size %lu, free list len %lu, seg size %lu,\n"
-			"%lu inserts, %lu merged recs, %lu merges\n",
-			(ulong) data->size,
-			(ulong) data->free_list_len,
-			(ulong) data->seg_size,
-			(ulong) data->n_inserts,
-			(ulong) data->n_merged_recs,
-			(ulong) data->n_merges);
-#ifdef UNIV_IBUF_DEBUG
-		for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
-			if (ibuf_count_get(data->space, i) > 0) {
+	fprintf(file,
+		"Ibuf: size %lu, free list len %lu, seg size %lu,\n"
+		"%lu inserts, %lu merged recs, %lu merges\n",
+		(ulong) ibuf->size,
+		(ulong) ibuf->free_list_len,
+		(ulong) ibuf->seg_size,
+		(ulong) ibuf->n_inserts,
+		(ulong) ibuf->n_merged_recs,
+		(ulong) ibuf->n_merges);
+#ifdef UNIV_IBUF_COUNT_DEBUG
+	for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
+		for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
+			ulint	count = ibuf_count_get(i, j);
 
+			if (count > 0) {
 				fprintf(stderr,
-					"Ibuf count for page %lu is %lu\n",
-					(ulong) i,
-					(ulong)
-					ibuf_count_get(data->space, i));
+					"Ibuf count for space/page %lu/%lu"
+					" is %lu\n",
+					(ulong) i, (ulong) j, (ulong) count);
 			}
 		}
-#endif
-		data = UT_LIST_GET_NEXT(data_list, data);
 	}
+#endif /* UNIV_IBUF_COUNT_DEBUG */
 
 	mutex_exit(&ibuf_mutex);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h
new file mode 100644
index 00000000000..d5c8258513c
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0btr.h
@@ -0,0 +1,509 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.h
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0btr_h
+#define btr0btr_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+#ifndef UNIV_HOTBACKUP
+/** Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+
+/** @brief Maximum depth of a B-tree in InnoDB.
+
+Note that this isn't a maximum as such; none of the tree operations
+avoid producing trees bigger than this. It is instead a "max depth
+that other code must work with", useful for e.g.  fixed-size arrays
+that must store some information about each level in a tree. In other
+words: if a B-tree with bigger depth than this is encountered, it is
+not acceptable for it to lead to mysterious memory corruption, but it
+is acceptable for the program to die with a clear assert failure. */
+#define BTR_MAX_LEVELS		100
+
+/** Latching modes for btr_cur_search_to_nth_level(). */
+enum btr_latch_mode {
+	/** Search a record on a leaf page and S-latch it. */
+	BTR_SEARCH_LEAF = RW_S_LATCH,
+	/** (Prepare to) modify a record on a leaf page and X-latch it. */
+	BTR_MODIFY_LEAF	= RW_X_LATCH,
+	/** Obtain no latches. */
+	BTR_NO_LATCHES = RW_NO_LATCH,
+	/** Start modifying the entire B-tree. */
+	BTR_MODIFY_TREE = 33,
+	/** Continue modifying the entire B-tree. */
+	BTR_CONT_MODIFY_TREE = 34,
+	/** Search the previous record. */
+	BTR_SEARCH_PREV = 35,
+	/** Modify the previous record. */
+	BTR_MODIFY_PREV = 36
+};
+
+/** If this is ORed to btr_latch_mode, it means that the search tuple
+will be inserted to the index, at the searched position */
+#define BTR_INSERT		512
+
+/** This flag ORed to btr_latch_mode says that we do the search in query
+optimization */
+#define BTR_ESTIMATE		1024
+
+/** This flag ORed to btr_latch_mode says that we can ignore possible
+UNIQUE definition on secondary indexes when we decide if we can use
+the insert buffer to speed up inserts */
+#define BTR_IGNORE_SEC_UNIQUE	2048
+
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return	root page, x-latched */
+UNIV_INTERN
+page_t*
+btr_root_get(
+/*=========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+buf_block_t*
+btr_block_get(
+/*==========*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	ulint	mode,		/*!< in: latch mode */
+	mtr_t*	mtr);		/*!< in: mtr */
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	ulint	mode,		/*!< in: latch mode */
+	mtr_t*	mtr);		/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/**************************************************************//**
+Gets the index id field of a page.
+@return	index id */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+	const page_t*	page);	/*!< in: index page */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+	const page_t*	page);	/*!< in: index page */
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Gets the next index page number.
+@return	next page number */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Gets the previous index page number.
+@return	prev page number */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/*************************************************************//**
+Gets pointer to the previous user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor.
+@return	previous user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr);	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the previous page */
+/*************************************************************//**
+Gets pointer to the next user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor.
+@return	next user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr);	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the next page */
+/**************************************************************//**
+Releases the latch on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+@return	child node address */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+	const rec_t*	rec,	/*!< in: node pointer record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/************************************************************//**
+Creates the root node for a new index tree.
+@return	page number of the created root, FIL_NULL if did not succeed */
+UNIV_INTERN
+ulint
+btr_create(
+/*=======*/
+	ulint		type,	/*!< in: type of the index */
+	ulint		space,	/*!< in: space where created */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	dulint		index_id,/*!< in: index id */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/************************************************************//**
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+UNIV_INTERN
+void
+btr_free_but_not_root(
+/*==================*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no);	/*!< in: root page number */
+/************************************************************//**
+Frees the B-tree root page. Other tree MUST already have been freed. */
+UNIV_INTERN
+void
+btr_free_root(
+/*==========*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no,	/*!< in: root page number */
+	mtr_t*	mtr);		/*!< in: a mini-transaction which has already
+				been started */
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return	inserted record */
+UNIV_INTERN
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert: must be
+				on the root page; when the function returns,
+				the cursor is positioned on the predecessor
+				of the inserted record */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*************************************************************//**
+Reorganizes an index page.
+IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
+page of a non-clustered index, the caller must update the insert
+buffer free bits in the same mini-transaction in such a way that the
+modification will be redo-logged.
+@return	TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+btr_page_reorganize(
+/*================*/
+	buf_block_t*	block,	/*!< in: page to be reorganized */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*************************************************************//**
+Decides if the page should be split at the convergence point of
+inserts converging to left.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec);/*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*************************************************************//**
+Decides if the page should be split at the convergence point of
+inserts converging to right.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec);/*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*************************************************************//**
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+UNIV_INTERN
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*******************************************************//**
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+UNIV_INTERN
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level, must be > 0 */
+	dtuple_t*	tuple,	/*!< in: the record to be inserted */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
+Sets a record as the predefined minimum record. */
+UNIV_INTERN
+void
+btr_set_min_rec_mark(
+/*=================*/
+	rec_t*	rec,	/*!< in/out: record */
+	mtr_t*	mtr);	/*!< in: mtr */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+UNIV_INTERN
+void
+btr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr);	/*!< in: mtr */
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return	TRUE */
+UNIV_INTERN
+ibool
+btr_check_node_ptr(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist.
+@return	TRUE on success */
+UNIV_INTERN
+ibool
+btr_compress(
+/*=========*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
+				the page must not be empty: in record delete
+				use btr_discard_page if the page would become
+				empty */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*************************************************************//**
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+UNIV_INTERN
+void
+btr_discard_page(
+/*=============*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
+				the root page */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
+Parses the redo log record for setting an index record as the predefined
+minimum record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	ulint	comp,	/*!< in: nonzero=compact page format */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses a redo log record of reorganizing a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Gets the number of pages in a B-tree.
+@return	number of pages */
+UNIV_INTERN
+ulint
+btr_get_size(
+/*=========*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		flag);	/*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@return	new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+	dict_index_t*	index,		/*!< in: index tree */
+	ulint		hint_page_no,	/*!< in: hint of a good page */
+	byte		file_direction,	/*!< in: direction where a possible
+					page split is made */
+	ulint		level,		/*!< in: level where the page is placed
+					in the tree */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**************************************************************//**
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+UNIV_INTERN
+void
+btr_page_free(
+/*==========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
+Frees a file page used in an index tree. Can be used also to BLOB
+external storage pages, because the page level 0 can be given as an
+argument. */
+UNIV_INTERN
+void
+btr_page_free_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	ulint		level,	/*!< in: page level */
+	mtr_t*		mtr);	/*!< in: mtr */
+#ifdef UNIV_BTR_PRINT
+/*************************************************************//**
+Prints size info of a B-tree. */
+UNIV_INTERN
+void
+btr_print_size(
+/*===========*/
+	dict_index_t*	index);	/*!< in: index tree */
+/**************************************************************//**
+Prints directories and other info of all nodes in the index. */
+UNIV_INTERN
+void
+btr_print_index(
+/*============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		width);	/*!< in: print this many entries from start
+				and end */
+#endif /* UNIV_BTR_PRINT */
+/************************************************************//**
+Checks the size and number of fields in a record based on the definition of
+the index.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_index_rec_validate(
+/*===================*/
+	const rec_t*		rec,		/*!< in: index record */
+	const dict_index_t*	index,		/*!< in: index */
+	ibool			dump_on_error);	/*!< in: TRUE if the function
+						should print hex dump of record
+						and page on error */
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_validate_index(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index */
+	trx_t*		trx);	/*!< in: transaction or NULL */
+
+#define BTR_N_LEAF_PAGES	1
+#define BTR_TOTAL_SIZE		2
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/btr0btr.ic b/storage/innodb_plugin/include/btr0btr.ic
new file mode 100644
index 00000000000..2259d22c9a6
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0btr.ic
@@ -0,0 +1,310 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.ic
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0zip.h"
+
+#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
+					(not really a hard limit).
+					Used in debug assertions
+					in btr_page_set_level and
+					btr_page_get_level_low */
+
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+buf_block_t*
+btr_block_get(
+/*==========*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	ulint	mode,		/*!< in: latch mode */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block;
+
+	block = buf_page_get(space, zip_size, page_no, mode, mtr);
+
+	if (mode != RW_NO_LATCH) {
+
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+	}
+
+	return(block);
+}
+
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	ulint	mode,		/*!< in: latch mode */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
+						 mode, mtr)));
+}
+
+/**************************************************************//**
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+	page_t*		page,	/*!< in: page to be created */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	dulint		id,	/*!< in: index id */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_INDEX_ID),
+				      8, mtr);
+	} else {
+		mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
+				  id, mtr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**************************************************************//**
+Gets the index id field of a page.
+@return	index id */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	ulint	level;
+
+	ut_ad(page);
+
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+}
+
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr __attribute__((unused)))
+				/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	return(btr_page_get_level_low(page));
+}
+
+/********************************************************//**
+Sets the node level field in an index page. */
+UNIV_INLINE
+void
+btr_page_set_level(
+/*===============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		level,	/*!< in: level, leaf level == 0 */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_LEVEL),
+				      2, mtr);
+	} else {
+		mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
+				 MLOG_2BYTES, mtr);
+	}
+}
+
+/********************************************************//**
+Gets the next index page number.
+@return	next page number */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr __attribute__((unused)))
+				/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
+
+	return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/********************************************************//**
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		next,	/*!< in: next page number */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_4(page + FIL_PAGE_NEXT, next);
+		page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
+	} else {
+		mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+	}
+}
+
+/********************************************************//**
+Gets the previous index page number.
+@return	prev page number */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*	mtr __attribute__((unused))) /*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/********************************************************//**
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		prev,	/*!< in: previous page number */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_4(page + FIL_PAGE_PREV, prev);
+		page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
+	} else {
+		mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+	}
+}
+
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+@return	child node address */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+	const rec_t*	rec,	/*!< in: node pointer record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	const byte*	field;
+	ulint		len;
+	ulint		page_no;
+
+	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
+
+	/* The child address is in the last field */
+	field = rec_get_nth_field(rec, offsets,
+				  rec_offs_n_fields(offsets) - 1, &len);
+
+	ut_ad(len == 4);
+
+	page_no = mach_read_from_4(field);
+
+	if (UNIV_UNLIKELY(page_no == 0)) {
+		fprintf(stderr,
+			"InnoDB: a nonsensical page number 0"
+			" in a node ptr record at offset %lu\n",
+			(ulong) page_offset(rec));
+		buf_page_print(page_align(rec), 0);
+	}
+
+	return(page_no);
+}
+
+/**************************************************************//**
+Releases the latches on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+	ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
+
+	mtr_memo_release(mtr, block,
+			 latch_mode == BTR_SEARCH_LEAF
+			 ? MTR_MEMO_PAGE_S_FIX
+			 : MTR_MEMO_PAGE_X_FIX);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h
new file mode 100644
index 00000000000..b2d43ae3254
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0cur.h
@@ -0,0 +1,753 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.h
+The index tree cursor
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0cur_h
+#define btr0cur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "page0cur.h"
+#include "btr0types.h"
+
+/* Mode flags for btr_cur operations; these can be ORed */
+#define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
+#define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
+#define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
+					update vector or inserted entry */
+
+#ifndef UNIV_HOTBACKUP
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
+#define BTR_CUR_ADAPT
+#define BTR_CUR_HASH_ADAPT
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+	const btr_cur_t*	cursor);/*!< in: tree cursor */
+#else /* UNIV_DEBUG */
+# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return	pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return	pointer to record */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return	pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return	pointer to page */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the index of a cursor.
+@return	index */
+UNIV_INLINE
+dict_index_t*
+btr_cur_get_index(
+/*==============*/
+	btr_cur_t*	cursor);/*!< in: B-tree cursor */
+/*********************************************************//**
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record in tree */
+	buf_block_t*	block,	/*!< in: buffer block of rec */
+	btr_cur_t*	cursor);/*!< in: cursor */
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+UNIV_INTERN
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be PAGE_CUR_LE,
+				not PAGE_CUR_GE, as the latter may end up on
+				the previous page of the record! Inserts
+				should always be made using PAGE_CUR_LE to
+				search the position! */
+	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
+				BTR_INSERT and BTR_ESTIMATE;
+				cursor->left_block is used to store a pointer
+				to the left neighbor page, in the cases
+				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
+				NOTE that if has_search_latch
+				is != 0, we maybe do not have a latch set
+				on the cursor page, we assume
+				the caller uses his search latch
+				to protect the record! */
+	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
+				s- or x-latched, but see also above! */
+	ulint		has_search_latch,/*!< in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Opens a cursor at either end of an index. */
+UNIV_INTERN
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+	ibool		from_left,	/*!< in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_cur_t*	cursor,		/*!< in: cursor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**********************************************************************//**
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INTERN
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*************************************************************//**
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record.
+@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INTERN
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr);	/*!< in: mtr; if this function returns
+				DB_SUCCESS on a leaf page of a secondary
+				index in a compressed tablespace, the
+				mtr must be committed before latching
+				any further pages */
+/*************************************************************//**
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameter thr should be
+				specified; if no undo logging is specified,
+				then the caller must have reserved enough
+				free extents in the file space so that the
+				insertion will certainly succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*************************************************************//**
+Updates a record when the update causes no size changes in its fields.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+ulint
+btr_cur_update_in_place(
+/*====================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+				latching any further pages */
+/*************************************************************//**
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended.
+@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
+DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
+there is not enough space left on the compressed page */
+UNIV_INTERN
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	const upd_t*	update,	/*!< in: update vector; this must also
+				contain trx id and roll ptr fields */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+				latching any further pages */
+/*************************************************************//**
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging, locking, and rollback
+				flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or NULL */
+	const upd_t*	update,	/*!< in: update vector; this is allowed also
+				contain trx id and roll ptr fields, but
+				the values in update vector have no effect */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+				latching any further pages */
+/***********************************************************//**
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	ibool		val,	/*!< in: value to set */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************//**
+Sets a secondary index record delete mark to TRUE or FALSE.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+	ulint		flags,	/*!< in: locking flag */
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	ibool		val,	/*!< in: value to set */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************//**
+Clear a secondary index record's delete mark.  This function is only
+used by the insert buffer insert merge mechanism. */
+UNIV_INTERN
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+	rec_t*		rec,		/*!< in/out: record to delete unmark */
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
+					corresponding to rec, or NULL
+					when the tablespace is
+					uncompressed */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*************************************************************//**
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done!
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
+				cursor does not stay valid if compression
+				occurs */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*******************************************************//**
+Removes the record on which the tree cursor is positioned. It is assumed
+that the mtr has an x-latch on the page where the cursor is positioned,
+but no latch on the whole tree.
+@return	TRUE if success, i.e., the page did not become too empty */
+UNIV_INTERN
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
+				cursor stays valid: if deletion succeeds,
+				on function exit it points to the successor
+				of the deleted record */
+	mtr_t*		mtr);	/*!< in: mtr; if this function returns
+				TRUE on a leaf page of a secondary
+				index, the mtr must be committed
+				before latching any further pages */
+/*************************************************************//**
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist.
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+	ulint*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+				the latter may occur because we may have
+				to update node pointers on upper levels,
+				and in the case of variable length keys
+				these may actually grow in size */
+	ibool		has_reserved_extents, /*!< in: TRUE if the
+				caller has already reserved enough free
+				extents so that he knows that the operation
+				will succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
+				if compression does not occur, the cursor
+				stays valid: it points to successor of
+				deleted record on function exit */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of updating a record in-place.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index);	/*!< in: index corresponding to page */
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a clustered
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index);	/*!< in: index corresponding to page */
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a secondary
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page, or NULL */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Estimates the number of rows in a given index range.
+@return	estimated number of rows */
+UNIV_INTERN
+ib_int64_t
+btr_estimate_n_rows_in_range(
+/*=========================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
+	ulint		mode1,	/*!< in: search mode for range start */
+	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
+	ulint		mode2);	/*!< in: search mode for range end */
+/*******************************************************************//**
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals. */
+UNIV_INTERN
+void
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+	dict_index_t*	index);	/*!< in: index */
+/*******************************************************************//**
+Marks not updated extern fields as not-owned by this record. The ownership
+is transferred to the updated record which is inserted elsewhere in the
+index tree. In purge only the owner of externally stored field is allowed
+to free the field. */
+UNIV_INTERN
+void
+btr_cur_mark_extern_inherited_fields(
+/*=================================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: record in a clustered index */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	mtr_t*		mtr);	/*!< in: mtr, or NULL if not logged */
+/*******************************************************************//**
+The complement of the previous function: in an update entry may inherit
+some externally stored fields from a record. We must mark them as inherited
+in entry, so that they are not freed in a rollback. */
+UNIV_INTERN
+void
+btr_cur_mark_dtuple_inherited_extern(
+/*=================================*/
+	dtuple_t*	entry,		/*!< in/out: updated entry to be
+					inserted to clustered index */
+	const upd_t*	update);	/*!< in: update vector */
+/*******************************************************************//**
+Marks all extern fields in a dtuple as owned by the record. */
+UNIV_INTERN
+void
+btr_cur_unmark_dtuple_extern_fields(
+/*================================*/
+	dtuple_t*	entry);		/*!< in/out: clustered index entry */
+/*******************************************************************//**
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec.  The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ulint
+btr_store_big_rec_extern_fields(
+/*============================*/
+	dict_index_t*	index,		/*!< in: index of rec; the index tree
+					MUST be X-latched */
+	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
+	rec_t*		rec,		/*!< in: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
+					the "external storage" flags in offsets
+					will not correspond to rec when
+					this function returns */
+	big_rec_t*	big_rec_vec,	/*!< in: vector containing fields
+					to be stored externally */
+	mtr_t*		local_mtr);	/*!< in: mtr containing the latch to
+					rec and to the tree */
+/*******************************************************************//**
+Frees the space in an externally stored field to the file space
+management if the field in data is owned the externally stored field,
+in a rollback we may have the additional condition that the field must
+not be inherited. */
+UNIV_INTERN
+void
+btr_free_externally_stored_field(
+/*=============================*/
+	dict_index_t*	index,		/*!< in: index of the data, the index
+					tree MUST be X-latched; if the tree
+					height is 1, then also the root page
+					must be X-latched! (this is relevant
+					in the case this function is called
+					from purge where 'data' is located on
+					an undo log page, not an index
+					page) */
+	byte*		field_ref,	/*!< in/out: field reference */
+	const rec_t*	rec,		/*!< in: record containing field_ref, for
+					page_zip_write_blob_ptr(), or NULL */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index),
+					or NULL */
+	page_zip_des_t*	page_zip,	/*!< in: compressed page corresponding
+					to rec, or NULL if rec == NULL */
+	ulint		i,		/*!< in: field number of field_ref;
+					ignored if rec == NULL */
+	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
+	mtr_t*		local_mtr);	/*!< in: mtr containing the latch to
+					data an an X-latch to the index
+					tree */
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record.  The
+clustered index record must be protected by a lock or a page latch.
+@return the length of the copied field, or 0 if the column was being
+or has been deleted */
+UNIV_INTERN
+ulint
+btr_copy_externally_stored_field_prefix(
+/*====================================*/
+	byte*		buf,	/*!< out: the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	const byte*	data,	/*!< in: 'internally' stored part of the
+				field containing also the reference to
+				the external part; must be protected by
+				a lock or a page latch */
+	ulint		local_len);/*!< in: length of data, in bytes */
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.
+@return	the field copied to heap */
+UNIV_INTERN
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index;
+				must be protected by a lock or a page latch */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		no,	/*!< in: field number */
+	ulint*		len,	/*!< out: length of the field */
+	mem_heap_t*	heap);	/*!< in: mem heap */
+/*******************************************************************//**
+Flags the data tuple fields that are marked as extern storage in the
+update vector.  We use this function to remember which fields we must
+mark as extern storage in a record inserted for an update.
+@return	number of flagged external columns */
+UNIV_INTERN
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+	dtuple_t*	tuple,	/*!< in/out: data tuple */
+	const upd_t*	update,	/*!< in: update vector */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+	__attribute__((nonnull));
+
+/*######################################################################*/
+
+/** In the pessimistic delete, if the page data size drops below this
+limit, merging it to a neighbor is tried */
+#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+
+/** A slot in the path array. We store here info on a search path down the
+tree. Each slot contains data on a single level of the tree. */
+
+typedef struct btr_path_struct	btr_path_t;
+struct btr_path_struct{
+	ulint	nth_rec;	/*!< index of the record
+				where the page cursor stopped on
+				this level (index in alphabetical
+				order); value ULINT_UNDEFINED
+				denotes array end */
+	ulint	n_recs;		/*!< number of records on the page */
+};
+
+#define BTR_PATH_ARRAY_N_SLOTS	250	/*!< size of path array (in slots) */
+
+/** Values for the flag documenting the used search method */
+enum btr_cur_method {
+	BTR_CUR_HASH = 1,	/*!< successful shortcut using
+				the hash index */
+	BTR_CUR_HASH_FAIL,	/*!< failure using hash, success using
+				binary search: the misleading hash
+				reference is stored in the field
+				hash_node, and might be necessary to
+				update */
+	BTR_CUR_BINARY,		/*!< success using the binary search */
+	BTR_CUR_INSERT_TO_IBUF,	/*!< performed the intended insert to
+				the insert buffer */
+};
+
+/** The tree cursor: the definition appears here only for the compiler
+to know struct size! */
+struct btr_cur_struct {
+	dict_index_t*	index;		/*!< index where positioned */
+	page_cur_t	page_cur;	/*!< page cursor */
+	buf_block_t*	left_block;	/*!< this field is used to store
+					a pointer to the left neighbor
+					page, in the cases
+					BTR_SEARCH_PREV and
+					BTR_MODIFY_PREV */
+	/*------------------------------*/
+	que_thr_t*	thr;		/*!< this field is only used
+					when btr_cur_search_to_nth_level
+					is called for an index entry
+					insertion: the calling query
+					thread is passed here to be
+					used in the insert buffer */
+	/*------------------------------*/
+	/** The following fields are used in
+	btr_cur_search_to_nth_level to pass information: */
+	/* @{ */
+	enum btr_cur_method	flag;	/*!< Search method used */
+	ulint		tree_height;	/*!< Tree height if the search is done
+					for a pessimistic insert or update
+					operation */
+	ulint		up_match;	/*!< If the search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					the first user record to the right of
+					the cursor record after
+					btr_cur_search_to_nth_level;
+					for the mode PAGE_CUR_GE, the matched
+					fields to the first user record AT THE
+					CURSOR or to the right of it;
+					NOTE that the up_match and low_match
+					values may exceed the correct values
+					for comparison to the adjacent user
+					record if that record is on a
+					different leaf page! (See the note in
+					row_ins_duplicate_key.) */
+	ulint		up_bytes;	/*!< number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		low_match;	/*!< if search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					first user record AT THE CURSOR or
+					to the left of it after
+					btr_cur_search_to_nth_level;
+					NOT defined for PAGE_CUR_GE or any
+					other search modes; see also the NOTE
+					in up_match! */
+	ulint		low_bytes;	/*!< number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		n_fields;	/*!< prefix length used in a hash
+					search if hash_node != NULL */
+	ulint		n_bytes;	/*!< hash prefix bytes if hash_node !=
+					NULL */
+	ulint		fold;		/*!< fold value used in the search if
+					flag is BTR_CUR_HASH */
+	/*------------------------------*/
+	/* @} */
+	btr_path_t*	path_arr;	/*!< in estimating the number of
+					rows in range, we store in this array
+					information of the path through
+					the tree */
+};
+
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later.  Try this many
+times. */
+#define BTR_CUR_RETRY_DELETE_N_TIMES	100
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later.  Sleep this many
+microseconds between retries. */
+#define BTR_CUR_RETRY_SLEEP_TIME	50000
+
+/** The reference in a field for which data is stored on a different page.
+The reference is at the end of the 'locally' stored part of the field.
+'Locally' means storage in the index record.
+We store locally a long enough prefix of each column so that we can determine
+the ordering parts of each index record without looking into the externally
+stored part. */
+/*-------------------------------------- @{ */
+#define BTR_EXTERN_SPACE_ID		0	/*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO		4	/*!< page no where stored */
+#define BTR_EXTERN_OFFSET		8	/*!< offset of BLOB header
+						on that page */
+#define BTR_EXTERN_LEN			12	/*!< 8 bytes containing the
+						length of the externally
+						stored part of the BLOB.
+						The 2 highest bits are
+						reserved to the flags below. */
+/*-------------------------------------- @} */
+/* #define BTR_EXTERN_FIELD_REF_SIZE	20 // moved to btr0types.h */
+
+/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
+significant bit of the byte at smallest address) is set to 1 if this
+field does not 'own' the externally stored field; only the owner field
+is allowed to free the field in purge! */
+#define BTR_EXTERN_OWNER_FLAG		128
+/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
+second most significant bit of the byte at smallest address) is 1 then
+it means that the externally stored field was inherited from an
+earlier version of the row.  In rollback we are not allowed to free an
+inherited external field. */
+#define BTR_EXTERN_INHERITED_FLAG	64
+
+/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+extern ulint	btr_cur_n_non_sea;
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+extern ulint	btr_cur_n_sea;
+/** Old value of btr_cur_n_non_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+extern ulint	btr_cur_n_non_sea_old;
+/** Old value of btr_cur_n_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+extern ulint	btr_cur_n_sea_old;
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/btr0cur.ic b/storage/innodb_plugin/include/btr0cur.ic
new file mode 100644
index 00000000000..280583f6ccf
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0cur.ic
@@ -0,0 +1,200 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.ic
+The index tree cursor
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+	const btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(&((btr_cur_t*) cursor)->page_cur);
+}
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return	pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return	pointer to record */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_cur_get_rec(&(cursor->page_cur)));
+}
+
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return	pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
+}
+
+/*********************************************************//**
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	page_cur_invalidate(&(cursor->page_cur));
+}
+
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return	pointer to page */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_align(page_cur_get_rec(&(cursor->page_cur))));
+}
+
+/*********************************************************//**
+Returns the index of a cursor.
+@return	index */
+UNIV_INLINE
+dict_index_t*
+btr_cur_get_index(
+/*==============*/
+	btr_cur_t*	cursor)	/*!< in: B-tree cursor */
+{
+	return(cursor->index);
+}
+
+/*********************************************************//**
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record in tree */
+	buf_block_t*	block,	/*!< in: buffer block of rec */
+	btr_cur_t*	cursor)	/*!< out: cursor */
+{
+	ut_ad(page_align(rec) == block->frame);
+
+	page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
+
+	cursor->index = index;
+}
+
+/*********************************************************************//**
+Checks if compressing an index page where a btr cursor is placed makes
+sense.
+@return	TRUE if compression is recommended */
+UNIV_INLINE
+ibool
+btr_cur_compress_recommendation(
+/*============================*/
+	btr_cur_t*	cursor,	/*!< in: btr cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page;
+
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+
+		/* The page fillfactor has dropped below a predefined
+		minimum value OR the level in the B-tree contains just
+		one page: we recommend compression if this is not the
+		root page. */
+
+		return(dict_index_get_page(cursor->index)
+		       != page_get_page_no(page));
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if the record on which the cursor is placed can be deleted without
+making tree compression necessary (or, recommended).
+@return	TRUE if can be deleted without recommended compression */
+UNIV_INLINE
+ibool
+btr_cur_can_delete_without_compress(
+/*================================*/
+	btr_cur_t*	cursor,	/*!< in: btr cursor */
+	ulint		rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page;
+
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
+	    || (page_get_n_recs(page) < 2)) {
+
+		/* The page fillfactor will drop below a predefined
+		minimum value, OR the level in the B-tree contains just
+		one page, OR the page will become empty: we recommend
+		compression if this is not the root page. */
+
+		return(dict_index_get_page(cursor->index)
+		       == page_get_page_no(page));
+	}
+
+	return(TRUE);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innodb_plugin/include/btr0pcur.h
similarity index 53%
rename from storage/innobase/include/btr0pcur.h
rename to storage/innodb_plugin/include/btr0pcur.h
index ee40e905544..12b1375d8b7 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innodb_plugin/include/btr0pcur.h
@@ -1,7 +1,24 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.h
+The index tree persistent cursor
 
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
@@ -29,164 +46,162 @@ of a scroll cursor easier */
 #define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
 #define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
 
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
+/**************************************************************//**
+Allocates memory for a persistent cursor object and initializes the cursor.
+@return	own: persistent cursor */
+UNIV_INTERN
 btr_pcur_t*
 btr_pcur_create_for_mysql(void);
 /*============================*/
-				/* out, own: persistent cursor */
-/******************************************************************
+/**************************************************************//**
 Frees the memory for a persistent cursor object. */
-
+UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
-	btr_pcur_t*	cursor);	/* in, own: persistent cursor */
-/******************************************************************
+	btr_pcur_t*	cursor);	/*!< in, own: persistent cursor */
+/**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-
+UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
-	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
+	btr_pcur_t*	pcur_receive,	/*!< in: pcur which will receive the
 					position info */
-	btr_pcur_t*	pcur_donate);	/* in: pcur from which the info is
+	btr_pcur_t*	pcur_donate);	/*!< in: pcur from which the info is
 					copied */
-/******************************************************************
+/**************************************************************//**
 Sets the old_rec_buf field to NULL. */
 UNIV_INLINE
 void
 btr_pcur_init(
 /*==========*/
-	btr_pcur_t*	pcur);	/* in: persistent cursor */
-/******************************************************************
+	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+/**************************************************************//**
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
 btr_pcur_open(
 /*==========*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple,	/* in: tuple on which search done */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page from the
 				record! */
-	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/******************************************************************
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
 Opens an persistent cursor to an index tree without initializing the
 cursor. */
 UNIV_INLINE
 void
 btr_pcur_open_with_no_init(
 /*=======================*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple,	/* in: tuple on which search done */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page of the
 				record! */
-	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
 				NOTE that if has_search_latch != 0 then
 				we maybe do not acquire a latch on the cursor
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
-	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/* in: latch mode the caller
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
-	mtr_t*		mtr);	/* in: mtr */
-/*********************************************************************
+	mtr_t*		mtr);	/*!< in: mtr */
+/*****************************************************************//**
 Opens a persistent cursor at either end of an index. */
 UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
-	ibool		from_left,	/* in: TRUE if open to the low end,
+	ibool		from_left,	/*!< in: TRUE if open to the low end,
 					FALSE if to the high end */
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: latch mode */
-	btr_pcur_t*	pcur,		/* in: cursor */
-	ibool		do_init,	/* in: TRUE if should be initialized */
-	mtr_t*		mtr);		/* in: mtr */
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_pcur_t*	pcur,		/*!< in: cursor */
+	ibool		do_init,	/*!< in: TRUE if should be initialized */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
 UNIV_INLINE
 ulint
 btr_pcur_get_up_match(
 /*==================*/
-				/* out: number of matched fields at the cursor
-				or to the right if search mode was PAGE_CUR_GE,
-				otherwise undefined */
-	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
+	btr_pcur_t*	cursor); /*!< in: memory buffer for persistent cursor */
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
 UNIV_INLINE
 ulint
 btr_pcur_get_low_match(
 /*===================*/
-				/* out: number of matched fields at the cursor
-				or to the right if search mode was PAGE_CUR_LE,
-				otherwise undefined */
-	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
+	btr_pcur_t*	cursor); /*!< in: memory buffer for persistent cursor */
+/**************************************************************//**
 If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
 user record satisfying the search condition, in the case PAGE_CUR_L or
 PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-
+UNIV_INTERN
 void
 btr_pcur_open_on_user_rec(
 /*======================*/
-	dict_index_t*	index,		/* in: index */
-	dtuple_t*	tuple,		/* in: tuple on which search done */
-	ulint		mode,		/* in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
+	dict_index_t*	index,		/*!< in: index */
+	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
-	btr_pcur_t*	cursor,		/* in: memory buffer for persistent
+	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
 					cursor */
-	mtr_t*		mtr);		/* in: mtr */
-/**************************************************************************
+	mtr_t*		mtr);		/*!< in: mtr */
+/**********************************************************************//**
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
 btr_pcur_open_at_rnd_pos(
 /*=====================*/
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
-	mtr_t*		mtr);		/* in: mtr */
-/******************************************************************
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**************************************************************//**
 Frees the possible old_rec_buf buffer of a persistent cursor and sets the
 latch mode of the persistent cursor to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_close(
 /*===========*/
-	btr_pcur_t*	cursor);	/* in: persistent cursor */
-/******************************************************************
+	btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/**************************************************************//**
 The position of the cursor is stored by taking an initial segment of the
 record the cursor is positioned on, before, or after, and copying it to the
 cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-
+UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
-	btr_pcur_t*	cursor, /* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/******************************************************************
+	btr_pcur_t*	cursor, /*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
 Restores the stored position of a persistent cursor bufferfixing the page and
 obtaining the specified latches. If the cursor position was saved when the
 (1) cursor was positioned on a user record: this function restores the position
@@ -197,56 +212,54 @@ infimum;
 (3) cursor was positioned on the page supremum: restores to the first record
 GREATER than the user record which was the predecessor of the supremum.
 (4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
+restores to before first or after the last in the tree.
+@return TRUE if the cursor position was stored when it was on a user
+record and it can be restored on a user record whose ordering fields
+are identical to the ones of the original user record */
+UNIV_INTERN
 ibool
 btr_pcur_restore_position(
 /*======================*/
-					/* out: TRUE if the cursor position
-					was stored when it was on a user record
-					and it can be restored on a user record
-					whose ordering fields are identical to
-					the ones of the original user record */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor,		/* in: detached persistent cursor */
-	mtr_t*		mtr);		/* in: mtr */
-/******************************************************************
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**************************************************************//**
 If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
 releases the page latch and bufferfix reserved by the cursor.
 NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
 made by the current mini-transaction to the data protected by the
 cursor latch, as then the latch must not be released until mtr_commit. */
-
+UNIV_INTERN
 void
 btr_pcur_release_leaf(
 /*==================*/
-	btr_pcur_t*	cursor, /* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
+	btr_pcur_t*	cursor, /*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return	BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
 /*=================*/
-				/* out: BTR_PCUR_ON, ... */
-	btr_pcur_t*	cursor);/* in: persistent cursor */
-/*************************************************************
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
 Sets the mtr field for a pcur. */
 UNIV_INLINE
 void
 btr_pcur_set_mtr(
 /*=============*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in, own: mtr */
-/*************************************************************
-Gets the mtr field for a pcur. */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in, own: mtr */
+/*********************************************************//**
+Gets the mtr field for a pcur.
+@return	mtr */
 UNIV_INLINE
 mtr_t*
 btr_pcur_get_mtr(
 /*=============*/
-				/* out: mtr */
-	btr_pcur_t*	cursor);	/* in: persistent cursor */
-/******************************************************************
+	btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/**************************************************************//**
 Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
@@ -256,80 +269,77 @@ UNIV_INLINE
 void
 btr_pcur_commit(
 /*============*/
-	btr_pcur_t*	pcur);	/* in: persistent cursor */
-/******************************************************************
+	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+/**************************************************************//**
 Differs from btr_pcur_commit in that we can specify the mtr to commit. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
 /*========================*/
-	btr_pcur_t*	pcur,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr to commit */
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+	btr_pcur_t*	pcur,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr to commit */
+/**************************************************************//**
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
+@return	TRUE if detached */
 UNIV_INLINE
 ibool
 btr_pcur_is_detached(
 /*=================*/
-				/* out: TRUE if detached */
-	btr_pcur_t*	pcur);	/* in: persistent cursor */
-/*************************************************************
+	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+/*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
+left, the cursor stays 'after last in tree'.
+@return	TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
 /*==================*/
-				/* out: TRUE if the cursor was not after last
-				in tree */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
+are left, the cursor stays 'before first in tree'.
+@return	TRUE if the cursor was not before first in tree */
+UNIV_INTERN
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
-				/* out: TRUE if the cursor was not before first
-				in tree */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor to the last record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_last_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
+records are left, the cursor ends up 'after last in tree'.
+@return	TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
 /*===========================*/
-				/* out: TRUE if the cursor moved forward,
-				ending on a user record */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor to the first record on the next page.
 Releases the latch on the current page, and bufferunfixes it.
 Note that there must not be modifications on the current page,
 as then the x-latch can be released only in mtr_commit. */
-
+UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; must be on the
 				last record of the current page */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor backward if it is on the first record
 of the page. Releases the latch on the current page, and bufferunfixes
 it. Note that to prevent a possible deadlock, the operation first
@@ -339,113 +349,121 @@ The alphabetical position of the cursor is guaranteed to be sensible
 on return, but it may happen that the cursor is not positioned on the
 last record of any page, because the structure of the tree may have
 changed while the cursor had no latches. */
-
+UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor, must be on the
 				first record of the current page */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
+	mtr_t*		mtr);	/*!< in: mtr */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return	pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
 /*=================*/
-				/* out: pointer to btr cursor component */
-	btr_pcur_t*	cursor);	/* in: persistent cursor */
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
+	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return	pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
 /*==================*/
-				/* out: pointer to page cursor component */
-	btr_pcur_t*	cursor);	/* in: persistent cursor */
-/*************************************************************
-Returns the page of a persistent cursor. */
+	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+#else /* UNIV_DEBUG */
+# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
+# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return	pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
 /*==============*/
-				/* out: pointer to the page */
-	btr_pcur_t*	cursor);/* in: persistent cursor */
-/*************************************************************
-Returns the record of a persistent cursor. */
+	btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return	pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+	btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return	pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
 /*=============*/
-				/* out: pointer to the record */
-	btr_pcur_t*	cursor);/* in: persistent cursor */
-/*************************************************************
+	btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
 Checks if the persistent cursor is on a user record. */
 UNIV_INLINE
 ibool
 btr_pcur_is_on_user_rec(
 /*====================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
 Checks if the persistent cursor is after the last user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_on_page(
 /*===========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
 Checks if the persistent cursor is before the first user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_on_page(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_in_tree(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_in_tree(
 /*===========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_next_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
-/*************************************************************
+	btr_pcur_t*	cursor);/*!< in/out: persistent cursor */
+/*********************************************************//**
 Moves the persistent cursor to the previous record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_prev_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr);	/* in: mtr */
+	btr_pcur_t*	cursor);/*!< in/out: persistent cursor */
 
 
 /* The persistent B-tree cursor structure. This is used mainly for SQL
 selects, updates, and deletes. */
 
 struct btr_pcur_struct{
-	btr_cur_t	btr_cur;	/* a B-tree cursor */
-	ulint		latch_mode;	/* see TODO note below!
+	btr_cur_t	btr_cur;	/*!< a B-tree cursor */
+	ulint		latch_mode;	/*!< see TODO note below!
 					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
 					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
 					depending on the latching state of
@@ -456,29 +474,28 @@ struct btr_pcur_struct{
 					detached; it can be restored to
 					attached if the old position was
 					stored in old_rec */
-	ulint		old_stored;	/* BTR_PCUR_OLD_STORED
+	ulint		old_stored;	/*!< BTR_PCUR_OLD_STORED
 					or BTR_PCUR_OLD_NOT_STORED */
-	rec_t*		old_rec;	/* if cursor position is stored,
+	rec_t*		old_rec;	/*!< if cursor position is stored,
 					contains an initial segment of the
 					latest record cursor was positioned
 					either on, before, or after */
-	ulint		old_n_fields;	/* number of fields in old_rec */
-	ulint		rel_pos;	/* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+	ulint		old_n_fields;	/*!< number of fields in old_rec */
+	ulint		rel_pos;	/*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
 					BTR_PCUR_AFTER, depending on whether
 					cursor was on, before, or after the
 					old_rec record */
 	buf_block_t*	block_when_stored;/* buffer block when the position was
-					stored; note that if AWE is on, frames
-					may move */
-	dulint		modify_clock;	/* the modify clock value of the
+					stored */
+	ib_uint64_t	modify_clock;	/*!< the modify clock value of the
 					buffer block when the cursor position
 					was stored */
-	ulint		pos_state;	/* see TODO note below!
+	ulint		pos_state;	/*!< see TODO note below!
 					BTR_PCUR_IS_POSITIONED,
 					BTR_PCUR_WAS_POSITIONED,
 					BTR_PCUR_NOT_POSITIONED */
-	ulint		search_mode;	/* PAGE_CUR_G, ... */
-	trx_t*		trx_if_known;	/* the transaction, if we know it;
+	ulint		search_mode;	/*!< PAGE_CUR_G, ... */
+	trx_t*		trx_if_known;	/*!< the transaction, if we know it;
 					otherwise this field is not defined;
 					can ONLY BE USED in error prints in
 					fatal assertion failures! */
@@ -486,12 +503,12 @@ struct btr_pcur_struct{
 	/* NOTE that the following fields may possess dynamically allocated
 	memory which should be freed if not needed anymore! */
 
-	mtr_t*		mtr;		/* NULL, or this field may contain
+	mtr_t*		mtr;		/*!< NULL, or this field may contain
 					a mini-transaction which holds the
 					latch on the cursor page */
-	byte*		old_rec_buf;	/* NULL, or a dynamically allocated
+	byte*		old_rec_buf;	/*!< NULL, or a dynamically allocated
 					buffer for old_rec */
-	ulint		buf_size;	/* old_rec_buf size if old_rec_buf
+	ulint		buf_size;	/*!< old_rec_buf size if old_rec_buf
 					is not NULL */
 };
 
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innodb_plugin/include/btr0pcur.ic
similarity index 58%
rename from storage/innobase/include/btr0pcur.ic
rename to storage/innodb_plugin/include/btr0pcur.ic
index 66462530716..0ca7223f861 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innodb_plugin/include/btr0pcur.ic
@@ -1,20 +1,37 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.ic
+The index tree persistent cursor
 
 Created 2/23/1996 Heikki Tuuri
 *******************************************************/
 
 
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return	BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
 /*=================*/
-				/* out: BTR_PCUR_ON, ... */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	ut_ad(cursor);
 	ut_ad(cursor->old_rec);
@@ -25,97 +42,112 @@ btr_pcur_get_rel_pos(
 	return(cursor->rel_pos);
 }
 
-/*************************************************************
+/*********************************************************//**
 Sets the mtr field for a pcur. */
 UNIV_INLINE
 void
 btr_pcur_set_mtr(
 /*=============*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in, own: mtr */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in, own: mtr */
 {
 	ut_ad(cursor);
 
 	cursor->mtr = mtr;
 }
 
-/*************************************************************
-Gets the mtr field for a pcur. */
+/*********************************************************//**
+Gets the mtr field for a pcur.
+@return	mtr */
 UNIV_INLINE
 mtr_t*
 btr_pcur_get_mtr(
 /*=============*/
-				/* out: mtr */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	ut_ad(cursor);
 
 	return(cursor->mtr);
 }
 
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return	pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
 /*=================*/
-				/* out: pointer to btr cursor component */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	return(&(cursor->btr_cur));
+	const btr_cur_t*	btr_cur = &cursor->btr_cur;
+	return((btr_cur_t*) btr_cur);
 }
 
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return	pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
 /*==================*/
-				/* out: pointer to page cursor component */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	return(btr_cur_get_page_cur(&(cursor->btr_cur)));
+	return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
 }
-
-/*************************************************************
-Returns the page of a persistent cursor. */
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return	pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
 /*==============*/
-				/* out: pointer to the page */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 
-	return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
+	return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
 }
 
-/*************************************************************
-Returns the record of a persistent cursor. */
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return	pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return	pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
 /*=============*/
-				/* out: pointer to the record */
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
+	return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
 }
 
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
 UNIV_INLINE
 ulint
 btr_pcur_get_up_match(
 /*==================*/
-				/* out: number of matched fields at the cursor
-				or to the right if search mode was PAGE_CUR_GE,
-				otherwise undefined */
-	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+	btr_pcur_t*	cursor) /*!< in: memory buffer for persistent cursor */
 {
 	btr_cur_t*	btr_cursor;
 
@@ -129,16 +161,15 @@ btr_pcur_get_up_match(
 	return(btr_cursor->up_match);
 }
 
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
 UNIV_INLINE
 ulint
 btr_pcur_get_low_match(
 /*===================*/
-				/* out: number of matched fields at the cursor
-				or to the right if search mode was PAGE_CUR_LE,
-				otherwise undefined */
-	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+	btr_pcur_t*	cursor) /*!< in: memory buffer for persistent cursor */
 {
 	btr_cur_t*	btr_cursor;
 
@@ -151,54 +182,49 @@ btr_pcur_get_low_match(
 	return(btr_cursor->low_match);
 }
 
-/*************************************************************
+/*********************************************************//**
 Checks if the persistent cursor is after the last user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_on_page(
 /*===========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	UT_NOT_USED(mtr);
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
 	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
 }
 
-/*************************************************************
+/*********************************************************//**
 Checks if the persistent cursor is before the first user record on
 a page. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_on_page(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	UT_NOT_USED(mtr);
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
 	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
 }
 
-/*************************************************************
+/*********************************************************//**
 Checks if the persistent cursor is on a user record. */
 UNIV_INLINE
 ibool
 btr_pcur_is_on_user_rec(
 /*====================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	if ((btr_pcur_is_before_first_on_page(cursor, mtr))
-	    || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
+	if (btr_pcur_is_before_first_on_page(cursor)
+	    || btr_pcur_is_after_last_on_page(cursor)) {
 
 		return(FALSE);
 	}
@@ -206,15 +232,15 @@ btr_pcur_is_on_user_rec(
 	return(TRUE);
 }
 
-/*************************************************************
+/*********************************************************//**
 Checks if the persistent cursor is before the first user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_before_first_in_tree(
 /*=============================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -227,15 +253,15 @@ btr_pcur_is_before_first_in_tree(
 	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
 }
 
-/*************************************************************
+/*********************************************************//**
 Checks if the persistent cursor is after the last user record in
 the index tree. */
 UNIV_INLINE
 ibool
 btr_pcur_is_after_last_in_tree(
 /*===========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -248,16 +274,14 @@ btr_pcur_is_after_last_in_tree(
 	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the next record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_next_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor)	/*!< in/out: persistent cursor */
 {
-	UT_NOT_USED(mtr);
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
@@ -266,16 +290,14 @@ btr_pcur_move_to_next_on_page(
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the previous record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_prev_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor)	/*!< in/out: persistent cursor */
 {
-	UT_NOT_USED(mtr);
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
@@ -284,42 +306,41 @@ btr_pcur_move_to_prev_on_page(
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the last record on the same page. */
 UNIV_INLINE
 void
 btr_pcur_move_to_last_on_page(
 /*==========================*/
-	btr_pcur_t*	cursor,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	UT_NOT_USED(mtr);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
+	page_cur_set_after_last(btr_pcur_get_block(cursor),
 				btr_pcur_get_page_cur(cursor));
 
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
+records are left, the cursor ends up 'after last in tree'.
+@return	TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
 /*===========================*/
-				/* out: TRUE if the cursor moved forward,
-				ending on a user record */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 loop:
-	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+	if (btr_pcur_is_after_last_on_page(cursor)) {
 
 		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
 
@@ -328,10 +349,10 @@ loop:
 
 		btr_pcur_move_to_next_page(cursor, mtr);
 	} else {
-		btr_pcur_move_to_next_on_page(cursor, mtr);
+		btr_pcur_move_to_next_on_page(cursor);
 	}
 
-	if (btr_pcur_is_on_user_rec(cursor, mtr)) {
+	if (btr_pcur_is_on_user_rec(cursor)) {
 
 		return(TRUE);
 	}
@@ -339,25 +360,24 @@ loop:
 	goto loop;
 }
 
-/*************************************************************
+/*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
+left, the cursor stays 'after last in tree'.
+@return	TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
 /*==================*/
-				/* out: TRUE if the cursor was not after last
-				in tree */
-	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
 				function may release the page latch */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 
-	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+	if (btr_pcur_is_after_last_on_page(cursor)) {
 
 		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
 
@@ -369,12 +389,12 @@ btr_pcur_move_to_next(
 		return(TRUE);
 	}
 
-	btr_pcur_move_to_next_on_page(cursor, mtr);
+	btr_pcur_move_to_next_on_page(cursor);
 
 	return(TRUE);
 }
 
-/******************************************************************
+/**************************************************************//**
 Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
@@ -384,7 +404,7 @@ UNIV_INLINE
 void
 btr_pcur_commit(
 /*============*/
-	btr_pcur_t*	pcur)	/* in: persistent cursor */
+	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -395,14 +415,14 @@ btr_pcur_commit(
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 
-/******************************************************************
+/**************************************************************//**
 Differs from btr_pcur_commit in that we can specify the mtr to commit. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
 /*========================*/
-	btr_pcur_t*	pcur,	/* in: persistent cursor */
-	mtr_t*		mtr)	/* in: mtr to commit */
+	btr_pcur_t*	pcur,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr to commit */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -413,13 +433,13 @@ btr_pcur_commit_specify_mtr(
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 
-/******************************************************************
+/**************************************************************//**
 Sets the pcur latch mode to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_detach(
 /*============*/
-	btr_pcur_t*	pcur)	/* in: persistent cursor */
+	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
 	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -428,14 +448,14 @@ btr_pcur_detach(
 	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
 }
 
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+/**************************************************************//**
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
+@return	TRUE if detached */
 UNIV_INLINE
 ibool
 btr_pcur_is_detached(
 /*=================*/
-				/* out: TRUE if detached */
-	btr_pcur_t*	pcur)	/* in: persistent cursor */
+	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
 	if (pcur->latch_mode == BTR_NO_LATCHES) {
 
@@ -445,37 +465,37 @@ btr_pcur_is_detached(
 	return(FALSE);
 }
 
-/******************************************************************
+/**************************************************************//**
 Sets the old_rec_buf field to NULL. */
 UNIV_INLINE
 void
 btr_pcur_init(
 /*==========*/
-	btr_pcur_t*	pcur)	/* in: persistent cursor */
+	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
 	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
 	pcur->old_rec_buf = NULL;
 	pcur->old_rec = NULL;
 }
 
-/******************************************************************
+/**************************************************************//**
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
 btr_pcur_open(
 /*==========*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple,	/* in: tuple on which search done */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page from the
 				record! */
-	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
-	mtr_t*		mtr)	/* in: mtr */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
 
@@ -497,31 +517,31 @@ btr_pcur_open(
 	cursor->trx_if_known = NULL;
 }
 
-/******************************************************************
+/**************************************************************//**
 Opens an persistent cursor to an index tree without initializing the
 cursor. */
 UNIV_INLINE
 void
 btr_pcur_open_with_no_init(
 /*=======================*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	tuple,	/* in: tuple on which search done */
-	ulint		mode,	/* in: PAGE_CUR_L, ...;
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
 				may end up on the previous page of the
 				record! */
-	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
 				NOTE that if has_search_latch != 0 then
 				we maybe do not acquire a latch on the cursor
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
-	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/* in: latch mode the caller
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
 
@@ -541,19 +561,19 @@ btr_pcur_open_with_no_init(
 	cursor->trx_if_known = NULL;
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Opens a persistent cursor at either end of an index. */
 UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
-	ibool		from_left,	/* in: TRUE if open to the low end,
+	ibool		from_left,	/*!< in: TRUE if open to the low end,
 					FALSE if to the high end */
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: latch mode */
-	btr_pcur_t*	pcur,		/* in: cursor */
-	ibool		do_init,	/* in: TRUE if should be initialized */
-	mtr_t*		mtr)		/* in: mtr */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_pcur_t*	pcur,		/*!< in: cursor */
+	ibool		do_init,	/*!< in: TRUE if should be initialized */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	pcur->latch_mode = latch_mode;
 
@@ -576,16 +596,16 @@ btr_pcur_open_at_index_side(
 	pcur->trx_if_known = NULL;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
 btr_pcur_open_at_rnd_pos(
 /*=====================*/
-	dict_index_t*	index,		/* in: index */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
-	mtr_t*		mtr)		/* in: mtr */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	/* Initialize the cursor */
 
@@ -602,14 +622,14 @@ btr_pcur_open_at_rnd_pos(
 	cursor->trx_if_known = NULL;
 }
 
-/******************************************************************
+/**************************************************************//**
 Frees the possible memory heap of a persistent cursor and sets the latch
 mode of the persistent cursor to BTR_NO_LATCHES. */
 UNIV_INLINE
 void
 btr_pcur_close(
 /*===========*/
-	btr_pcur_t*	cursor)	/* in: persistent cursor */
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
 	if (cursor->old_rec_buf != NULL) {
 
@@ -620,6 +640,7 @@ btr_pcur_close(
 	}
 
 	cursor->btr_cur.page_cur.rec = NULL;
+	cursor->btr_cur.page_cur.block = NULL;
 	cursor->old_rec = NULL;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 
diff --git a/storage/innobase/include/btr0sea.h b/storage/innodb_plugin/include/btr0sea.h
similarity index 50%
rename from storage/innobase/include/btr0sea.h
rename to storage/innodb_plugin/include/btr0sea.h
index 6d1c2bb86d3..631b3bd386c 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innodb_plugin/include/btr0sea.h
@@ -1,7 +1,24 @@
-/************************************************************************
-The index tree adaptive search
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.h
+The index tree adaptive search
 
 Created 2/17/1996 Heikki Tuuri
 *************************************************************************/
@@ -17,195 +34,229 @@ Created 2/17/1996 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "ha0ha.h"
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the adaptive search system at a database start. */
-
+UNIV_INTERN
 void
 btr_search_sys_create(
 /*==================*/
-	ulint	hash_size);	/* in: hash index hash table size */
-/************************************************************************
-Returns search info for an index. */
+	ulint	hash_size);	/*!< in: hash index hash table size */
+
+/********************************************************************//**
+Disable the adaptive hash search system and empty the index. */
+UNIV_INTERN
+void
+btr_search_disable(void);
+/*====================*/
+/********************************************************************//**
+Enable the adaptive hash search system. */
+UNIV_INTERN
+void
+btr_search_enable(void);
+/*====================*/
+
+/********************************************************************//**
+Returns search info for an index.
+@return	search info; search mutex reserved */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
-				/* out: search info; search mutex reserved */
-	dict_index_t*	index);	/* in: index */
-/*********************************************************************
-Creates and initializes a search info struct. */
-
+	dict_index_t*	index);	/*!< in: index */
+/*****************************************************************//**
+Creates and initializes a search info struct.
+@return	own: search info struct */
+UNIV_INTERN
 btr_search_t*
 btr_search_info_create(
 /*===================*/
-				/* out, own: search info struct */
-	mem_heap_t*	heap);	/* in: heap where created */
-/*********************************************************************
+	mem_heap_t*	heap);	/*!< in: heap where created */
+/*****************************************************************//**
 Returns the value of ref_count. The value is protected by
-btr_search_latch. */
+btr_search_latch.
+@return	ref_count value. */
+UNIV_INTERN
 ulint
 btr_search_info_get_ref_count(
 /*==========================*/
-				/* out: ref_count value. */
-	btr_search_t*   info);	/* in: search info. */
-/*************************************************************************
+	btr_search_t*   info);	/*!< in: search info. */
+/*********************************************************************//**
 Updates the search info. */
 UNIV_INLINE
 void
 btr_search_info_update(
 /*===================*/
-	dict_index_t*	index,	/* in: index of the cursor */
-	btr_cur_t*	cursor);/* in: cursor which was just positioned */
-/**********************************************************************
+	dict_index_t*	index,	/*!< in: index of the cursor */
+	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+/******************************************************************//**
 Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
+both have sensible values.
+@return	TRUE if succeeded */
+UNIV_INTERN
 ibool
 btr_search_guess_on_hash(
 /*=====================*/
-					/* out: TRUE if succeeded */
-	dict_index_t*	index,		/* in: index */
-	btr_search_t*	info,		/* in: index search info */
-	dtuple_t*	tuple,		/* in: logical record */
-	ulint		mode,		/* in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/* out: tree cursor */
-	ulint		has_search_latch,/* in: latch mode the caller
+	dict_index_t*	index,		/*!< in: index */
+	btr_search_t*	info,		/*!< in: index search info */
+	const dtuple_t*	tuple,		/*!< in: logical record */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< out: tree cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
 					currently has on btr_search_latch:
 					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr);		/* in: mtr */
-/************************************************************************
+	mtr_t*		mtr);		/*!< in: mtr */
+/********************************************************************//**
 Moves or deletes hash entries for moved records. If new_page is already hashed,
 then the hash index for page, if any, is dropped. If new_page is not hashed,
 and page is hashed, then a new hash index is built to new_page with the same
 parameters as page (this often happens when a page is split). */
-
+UNIV_INTERN
 void
 btr_search_move_or_delete_hash_entries(
 /*===================================*/
-	page_t*		new_page,	/* in: records are copied
+	buf_block_t*	new_block,	/*!< in: records are copied
 					to this page */
-	page_t*		page,		/* in: index page */
-	dict_index_t*	index);		/* in: record descriptor */
-/************************************************************************
+	buf_block_t*	block,		/*!< in: index page from which
+					records were copied, and the
+					copied records will be deleted
+					from this page */
+	dict_index_t*	index);		/*!< in: record descriptor */
+/********************************************************************//**
 Drops a page hash index. */
-
+UNIV_INTERN
 void
 btr_search_drop_page_hash_index(
 /*============================*/
-	page_t*	page);	/* in: index page, s- or x-latched */
-/************************************************************************
+	buf_block_t*	block);	/*!< in: block containing index page,
+				s- or x-latched, or an index page
+				for which we know that
+				block->buf_fix_count == 0 */
+/********************************************************************//**
 Drops a page hash index when a page is freed from a fseg to the file system.
 Drops possible hash index if the page happens to be in the buffer pool. */
-
+UNIV_INTERN
 void
 btr_search_drop_page_hash_when_freed(
 /*=================================*/
-	ulint	space,		/* in: space id */
-	ulint	page_no);	/* in: page number */
-/************************************************************************
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no);	/*!< in: page number */
+/********************************************************************//**
 Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_node_on_insert(
 /*==================================*/
-	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
-/************************************************************************
+/********************************************************************//**
 Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_on_insert(
 /*=============================*/
-	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
-/************************************************************************
+/********************************************************************//**
 Updates the page hash index when a single record is deleted from a page. */
-
+UNIV_INTERN
 void
 btr_search_update_hash_on_delete(
 /*=============================*/
-	btr_cur_t*	cursor);/* in: cursor which was positioned on the
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned on the
 				record to delete using btr_cur_search_...,
 				the record is not yet deleted */
-/************************************************************************
-Validates the search system. */
-
+/********************************************************************//**
+Validates the search system.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 btr_search_validate(void);
 /*======================*/
-				/* out: TRUE if ok */
 
-/* The search info struct in an index */
+/** Flag: has the search system been enabled?
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+extern char btr_search_enabled;
 
+/** The search info struct in an index */
 struct btr_search_struct{
-	ulint	ref_count;	/* Number of blocks in this index tree
+	ulint	ref_count;	/*!< Number of blocks in this index tree
 				that have search index built
 				i.e. block->index points to this index.
 				Protected by btr_search_latch except
 				when during initialization in
 				btr_search_info_create(). */
 
-	/* The following fields are not protected by any latch.
+	/* @{ The following fields are not protected by any latch.
 	Unfortunately, this means that they must be aligned to
 	the machine word, i.e., they cannot be turned into bit-fields. */
-	page_t*	root_guess;	/* the root page frame when it was last time
+	buf_block_t* root_guess;/*!< the root page frame when it was last time
 				fetched, or NULL */
-	ulint	hash_analysis;	/* when this exceeds BTR_SEARCH_HASH_ANALYSIS,
-				the hash analysis starts; this is reset if no
+	ulint	hash_analysis;	/*!< when this exceeds
+				BTR_SEARCH_HASH_ANALYSIS, the hash
+				analysis starts; this is reset if no
 				success noticed */
-	ibool	last_hash_succ;	/* TRUE if the last search would have
+	ibool	last_hash_succ;	/*!< TRUE if the last search would have
 				succeeded, or did succeed, using the hash
 				index; NOTE that the value here is not exact:
 				it is not calculated for every search, and the
 				calculation itself is not always accurate! */
 	ulint	n_hash_potential;
-				/* number of consecutive searches
+				/*!< number of consecutive searches
 				which would have succeeded, or did succeed,
 				using the hash index;
 				the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
-	/*----------------------*/
-	ulint	n_fields;	/* recommended prefix length for hash search:
+	/* @} */
+	/*---------------------- @{ */
+	ulint	n_fields;	/*!< recommended prefix length for hash search:
 				number of full fields */
-	ulint	n_bytes;	/* recommended prefix: number of bytes in
-				an incomplete field;
-				see also BTR_PAGE_MAX_REC_SIZE */
-	ibool	left_side;	/* TRUE or FALSE, depending on whether
+	ulint	n_bytes;	/*!< recommended prefix: number of bytes in
+				an incomplete field
+				@see BTR_PAGE_MAX_REC_SIZE */
+	ibool	left_side;	/*!< TRUE or FALSE, depending on whether
 				the leftmost record of several records with
 				the same prefix should be indexed in the
 				hash index */
-	/*----------------------*/
+	/*---------------------- @} */
 #ifdef UNIV_SEARCH_PERF_STAT
-	ulint	n_hash_succ;	/* number of successful hash searches thus
+	ulint	n_hash_succ;	/*!< number of successful hash searches thus
 				far */
-	ulint	n_hash_fail;	/* number of failed hash searches */
-	ulint	n_patt_succ;	/* number of successful pattern searches thus
+	ulint	n_hash_fail;	/*!< number of failed hash searches */
+	ulint	n_patt_succ;	/*!< number of successful pattern searches thus
 				far */
-	ulint	n_searches;	/* number of searches */
+	ulint	n_searches;	/*!< number of searches */
 #endif /* UNIV_SEARCH_PERF_STAT */
 #ifdef UNIV_DEBUG
-	ulint	magic_n;	/* magic number */
+	ulint	magic_n;	/*!< magic number @see BTR_SEARCH_MAGIC_N */
+/** value of btr_search_struct::magic_n, used in assertions */
 # define BTR_SEARCH_MAGIC_N	1112765
 #endif /* UNIV_DEBUG */
 };
 
-/* The hash index system */
-
+/** The hash index system */
 typedef struct btr_search_sys_struct	btr_search_sys_t;
 
+/** The hash index system */
 struct btr_search_sys_struct{
-	hash_table_t*	hash_index;
+	hash_table_t*	hash_index;	/*!< the adaptive hash index,
+					mapping dtuple_fold values
+					to rec_t pointers on index pages */
 };
 
+/** The adaptive hash index */
 extern btr_search_sys_t*	btr_search_sys;
 
-/* The latch protecting the adaptive search system: this latch protects the
+/** @brief The latch protecting the adaptive search system
+
+This latch protects the
 (1) hash index;
 (2) columns of a record to which we have a pointer in the hash index;
 
@@ -216,36 +267,34 @@ but does NOT protect:
 
 Bear in mind (3) and (4) when using the hash index.
 */
-
 extern rw_lock_t*	btr_search_latch_temp;
 
+/** The latch protecting the adaptive search system */
 #define btr_search_latch	(*btr_search_latch_temp)
 
 #ifdef UNIV_SEARCH_PERF_STAT
+/** Number of successful adaptive hash index lookups */
 extern ulint	btr_search_n_succ;
+/** Number of failed adaptive hash index lookups */
 extern ulint	btr_search_n_hash_fail;
 #endif /* UNIV_SEARCH_PERF_STAT */
 
-/* After change in n_fields or n_bytes in info, this many rounds are waited
+/** After change in n_fields or n_bytes in info, this many rounds are waited
 before starting the hash analysis again: this is to save CPU time when there
 is no hope in building a hash index. */
-
 #define BTR_SEARCH_HASH_ANALYSIS	17
 
-/* Limit of consecutive searches for trying a search shortcut on the search
+/** Limit of consecutive searches for trying a search shortcut on the search
 pattern */
-
 #define BTR_SEARCH_ON_PATTERN_LIMIT	3
 
-/* Limit of consecutive searches for trying a search shortcut using the hash
-index */
-
+/** Limit of consecutive searches for trying a search shortcut using
+the hash index */
 #define BTR_SEARCH_ON_HASH_LIMIT	3
 
-/* We do this many searches before trying to keep the search latch over calls
-from MySQL. If we notice someone waiting for the latch, we again set this
-much timeout. This is to reduce contention. */
-
+/** We do this many searches before trying to keep the search latch
+over calls from MySQL. If we notice someone waiting for the latch, we
+again set this much timeout. This is to reduce contention. */
 #define BTR_SEA_TIMEOUT			10000
 
 #ifndef UNIV_NONINL
diff --git a/storage/innodb_plugin/include/btr0sea.ic b/storage/innodb_plugin/include/btr0sea.ic
new file mode 100644
index 00000000000..beadeeb8d02
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0sea.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.ic
+The index tree adaptive search
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "dict0mem.h"
+#include "btr0cur.h"
+#include "buf0buf.h"
+
+/*********************************************************************//**
+Updates the search info. */
+UNIV_INTERN
+void
+btr_search_info_update_slow(
+/*========================*/
+	btr_search_t*	info,	/*!< in/out: search info */
+	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+
+/********************************************************************//**
+Returns search info for an index.
+@return	search info; search mutex reserved */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+
+	return(index->search_info);
+}
+
+/*********************************************************************//**
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index of the cursor */
+	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
+{
+	btr_search_t*	info;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	info = btr_search_get_info(index);
+
+	info->hash_analysis++;
+
+	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
+
+		/* Do nothing */
+
+		return;
+
+	}
+
+	ut_ad(cursor->flag != BTR_CUR_HASH);
+
+	btr_search_info_update_slow(info, cursor);
+}
diff --git a/storage/innodb_plugin/include/btr0types.h b/storage/innodb_plugin/include/btr0types.h
new file mode 100644
index 00000000000..ef4a6b04b34
--- /dev/null
+++ b/storage/innodb_plugin/include/btr0types.h
@@ -0,0 +1,51 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0types.h
+The index tree general types
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0types_h
+#define btr0types_h
+
+#include "univ.i"
+
+#include "rem0types.h"
+#include "page0types.h"
+
+/** Persistent cursor */
+typedef struct btr_pcur_struct		btr_pcur_t;
+/** B-tree cursor */
+typedef struct btr_cur_struct		btr_cur_t;
+/** B-tree search information for the adaptive hash index */
+typedef struct btr_search_struct	btr_search_t;
+
+/** The size of a reference to data stored on a different page.
+The reference is stored at the end of the prefix of the field
+in the index record. */
+#define BTR_EXTERN_FIELD_REF_SIZE	20
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+
+#endif
diff --git a/storage/innodb_plugin/include/buf0buddy.h b/storage/innodb_plugin/include/buf0buddy.h
new file mode 100644
index 00000000000..7648950d5d1
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0buddy.h
@@ -0,0 +1,90 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buddy.h
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#ifndef buf0buddy_h
+#define buf0buddy_h
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "univ.i"
+#include "buf0types.h"
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool_mutex and must not hold buf_pool_zip_mutex or any
+block->mutex.  The buf_pool_mutex may only be released and reacquired
+if lru != NULL.  This function should only be used for allocating
+compressed page frames or control blocks (buf_page_t).  Allocated
+control blocks must be properly initialized immediately after
+buf_buddy_alloc() has returned the memory, before releasing
+buf_pool_mutex.
+@return	allocated block, possibly NULL if lru == NULL */
+UNIV_INLINE
+void*
+buf_buddy_alloc(
+/*============*/
+	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
+			TRUE if storage was allocated from the LRU list
+			and buf_pool_mutex was temporarily released,
+			or NULL if the LRU list should not be used */
+	__attribute__((malloc));
+
+/**********************************************************************//**
+Release a block. */
+UNIV_INLINE
+void
+buf_buddy_free(
+/*===========*/
+	void*	buf,	/*!< in: block to be freed, must not be
+			pointed to by the buffer pool */
+	ulint	size)	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	__attribute__((nonnull));
+
+/** Statistics of buddy blocks of a given size. */
+struct buf_buddy_stat_struct {
+	/** Number of blocks allocated from the buddy system. */
+	ulint		used;
+	/** Number of blocks relocated by the buddy system. */
+	ib_uint64_t	relocated;
+	/** Total duration of block relocations, in microseconds. */
+	ib_uint64_t	relocated_usec;
+};
+
+/** Statistics of buddy blocks of a given size. */
+typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
+
+/** Statistics of the buddy system, indexed by block size.
+Protected by buf_pool_mutex. */
+extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
+
+#ifndef UNIV_NONINL
+# include "buf0buddy.ic"
+#endif
+
+#endif /* buf0buddy_h */
diff --git a/storage/innodb_plugin/include/buf0buddy.ic b/storage/innodb_plugin/include/buf0buddy.ic
new file mode 100644
index 00000000000..c419a2374d9
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0buddy.ic
@@ -0,0 +1,127 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buddy.ic
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "buf0buf.h"
+#include "buf0buddy.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
+The buf_pool_mutex may only be released and reacquired if lru != NULL.
+@return	allocated block, possibly NULL if lru==NULL */
+UNIV_INTERN
+void*
+buf_buddy_alloc_low(
+/*================*/
+	ulint	i,	/*!< in: index of buf_pool->zip_free[],
+			or BUF_BUDDY_SIZES */
+	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
+			TRUE if storage was allocated from the LRU list
+			and buf_pool_mutex was temporarily released,
+			or NULL if the LRU list should not be used */
+	__attribute__((malloc));
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INTERN
+void
+buf_buddy_free_low(
+/*===============*/
+	void*	buf,	/*!< in: block to be freed, must not be
+			pointed to by the buffer pool */
+	ulint	i)	/*!< in: index of buf_pool->zip_free[],
+			or BUF_BUDDY_SIZES */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Get the index of buf_pool->zip_free[] for a given block size.
+@return	index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
+UNIV_INLINE
+ulint
+buf_buddy_get_slot(
+/*===============*/
+	ulint	size)	/*!< in: block size */
+{
+	ulint	i;
+	ulint	s;
+
+	for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
+	}
+
+	ut_ad(i <= BUF_BUDDY_SIZES);
+	return(i);
+}
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool_mutex and must not hold buf_pool_zip_mutex or any
+block->mutex.  The buf_pool_mutex may only be released and reacquired
+if lru != NULL.  This function should only be used for allocating
+compressed page frames or control blocks (buf_page_t).  Allocated
+control blocks must be properly initialized immediately after
+buf_buddy_alloc() has returned the memory, before releasing
+buf_pool_mutex.
+@return	allocated block, possibly NULL if lru == NULL */
+UNIV_INLINE
+void*
+buf_buddy_alloc(
+/*============*/
+	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
+			TRUE if storage was allocated from the LRU list
+			and buf_pool_mutex was temporarily released,
+			or NULL if the LRU list should not be used */
+{
+	ut_ad(buf_pool_mutex_own());
+
+	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
+}
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INLINE
+void
+buf_buddy_free(
+/*===========*/
+	void*	buf,	/*!< in: block to be freed, must not be
+			pointed to by the buffer pool */
+	ulint	size)	/*!< in: block size, up to UNIV_PAGE_SIZE */
+{
+	ut_ad(buf_pool_mutex_own());
+
+	buf_buddy_free_low(buf, buf_buddy_get_slot(size));
+}
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
new file mode 100644
index 00000000000..65ad42c895a
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0buf.h
@@ -0,0 +1,1531 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.h
+The database buffer pool high-level routines
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0buf_h
+#define buf0buf_h
+
+#include "univ.i"
+#include "fil0fil.h"
+#include "mtr0types.h"
+#include "buf0types.h"
+#include "hash0hash.h"
+#include "ut0byte.h"
+#include "page0types.h"
+#ifndef UNIV_HOTBACKUP
+#include "os0proc.h"
+
+/** @name Modes for buf_page_get_gen */
+/* @{ */
+#define BUF_GET			10	/*!< get always */
+#define	BUF_GET_IF_IN_POOL	11	/*!< get if in pool */
+#define BUF_GET_NO_LATCH	14	/*!< get and bufferfix, but
+					set no latch; we have
+					separated this case, because
+					it is error-prone programming
+					not to set a latch, and it
+					should be used with care */
+/* @} */
+/** @name Modes for buf_page_get_known_nowait */
+/* @{ */
+#define BUF_MAKE_YOUNG	51		/*!< Move the block to the
+					start of the LRU list if there
+					is a danger that the block
+					would drift out of the buffer
+					pool*/
+#define BUF_KEEP_OLD	52		/*!< Preserve the current LRU
+					position of the block. */
+/* @} */
+
+extern buf_pool_t*	buf_pool;	/*!< The buffer pool of the database */
+#ifdef UNIV_DEBUG
+extern ibool		buf_debug_prints;/*!< If this is set TRUE, the program
+					prints info whenever read or flush
+					occurs */
+#endif /* UNIV_DEBUG */
+extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
+					  issued */
+#else /* !UNIV_HOTBACKUP */
+extern buf_block_t*	back_block1;	/*!< first block, for --apply-log */
+extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
+#endif /* !UNIV_HOTBACKUP */
+
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
+/** @brief States of a control block
+@see buf_page_struct
+
+The enumeration values must be 0..7. */
+enum buf_page_state {
+	BUF_BLOCK_ZIP_FREE = 0,		/*!< contains a free
+					compressed page */
+	BUF_BLOCK_ZIP_PAGE,		/*!< contains a clean
+					compressed page */
+	BUF_BLOCK_ZIP_DIRTY,		/*!< contains a compressed
+					page that is in the
+					buf_pool->flush_list */
+
+	BUF_BLOCK_NOT_USED,		/*!< is in the free list;
+					must be after the BUF_BLOCK_ZIP_
+					constants for compressed-only pages
+					@see buf_block_state_valid() */
+	BUF_BLOCK_READY_FOR_USE,	/*!< when buf_LRU_get_free_block
+					returns a block, it is in this state */
+	BUF_BLOCK_FILE_PAGE,		/*!< contains a buffered file page */
+	BUF_BLOCK_MEMORY,		/*!< contains some main memory
+					object */
+	BUF_BLOCK_REMOVE_HASH		/*!< hash index should be removed
+					before putting to the free list */
+};
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Creates the buffer pool.
+@return	own: buf_pool object, NULL if not enough memory or error */
+UNIV_INTERN
+buf_pool_t*
+buf_pool_init(void);
+/*===============*/
+/********************************************************************//**
+Frees the buffer pool at shutdown.  This must not be invoked before
+freeing all mutexes. */
+UNIV_INTERN
+void
+buf_pool_free(void);
+/*===============*/
+
+/********************************************************************//**
+Drops the adaptive hash index.  To prevent a livelock, this function
+is only to be called while holding btr_search_latch and while
+btr_search_enabled == FALSE. */
+UNIV_INTERN
+void
+buf_pool_drop_hash_index(void);
+/*==========================*/
+
+/********************************************************************//**
+Relocate a buffer control block.  Relocates the block on the LRU list
+and in buf_pool->page_hash.  Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+UNIV_INTERN
+void
+buf_relocate(
+/*=========*/
+	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
+				buf_page_get_state(bpage) must be
+				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+	buf_page_t*	dpage)	/*!< in/out: destination control block */
+	__attribute__((nonnull));
+/********************************************************************//**
+Resizes the buffer pool. */
+UNIV_INTERN
+void
+buf_pool_resize(void);
+/*=================*/
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void);
+/*========================*/
+/********************************************************************//**
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+zero if all modified pages have been flushed to disk.
+@return	oldest modification in pool, zero if none */
+UNIV_INLINE
+ib_uint64_t
+buf_pool_get_oldest_modification(void);
+/*==================================*/
+/********************************************************************//**
+Allocates a buffer block.
+@return	own: the allocated block, in state BUF_BLOCK_MEMORY */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(
+/*============*/
+	ulint	zip_size);	/*!< in: compressed page size in bytes,
+				or 0 if uncompressed tablespace */
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block);	/*!< in, own: block to be freed */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return	buf */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+	byte*			buf,	/*!< in: buffer to copy to */
+	const buf_frame_t*	frame);	/*!< in: buffer frame */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+NOTE! The following macros should be used instead of buf_page_get_gen,
+to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
+in LA! */
+#define buf_page_get(SP, ZS, OF, LA, MTR)	 buf_page_get_gen(\
+				SP, ZS, OF, LA, NULL,\
+				BUF_GET, __FILE__, __LINE__, MTR)
+/**************************************************************//**
+Use these macros to bufferfix a page with no latching. Remember not to
+read the contents of the page unless you know it is safe. Do not modify
+the contents of the page! We have separated this case, because it is
+error-prone programming not to set a latch, and it should be used
+with care. */
+#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)	   buf_page_get_gen(\
+				SP, ZS, OF, RW_NO_LATCH, NULL,\
+				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+/**************************************************************//**
+NOTE! The following macros should be used instead of
+buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
+RW_X_LATCH are allowed as LA! */
+#define buf_page_optimistic_get(LA, BL, MC, MTR)			     \
+	buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
+/********************************************************************//**
+This is the general function used to get optimistic access to a database
+page.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: guessed block */
+	ib_uint64_t	modify_clock,/*!< in: modify clock value if mode is
+				..._GUESS_ON_CLOCK */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+/********************************************************************//**
+This is used to get access to a known database page, when no waiting can be
+done.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: the known page */
+	ulint		mode,	/*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/*******************************************************************//**
+Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the kernel mutex. */
+UNIV_INTERN
+const buf_block_t*
+buf_page_try_get_func(
+/*==================*/
+	ulint		space_id,/*!< in: tablespace id */
+	ulint		page_no,/*!< in: page number */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/** Tries to get a page. If the page is not in the buffer pool it is
+not loaded.  Suitable for using when holding the kernel mutex.
+@param space_id	in: tablespace id
+@param page_no	in: page number
+@param mtr	in: mini-transaction
+@return		the page if in buffer pool, NULL if not */
+#define buf_page_try_get(space_id, page_no, mtr)	\
+	buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
+
+/********************************************************************//**
+Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch.  Mutual exclusion has to
+be implemented at a higher level.  In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@return	pointer to the block, or NULL if not compressed */
+UNIV_INTERN
+buf_page_t*
+buf_page_get_zip(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size */
+	ulint		offset);/*!< in: page number */
+/********************************************************************//**
+This is the general function used to get access to a database page.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_page_get_gen(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		offset,	/*!< in: page number */
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+	buf_block_t*	guess,	/*!< in: guessed block or NULL */
+	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
+				BUF_GET_NO_LATCH */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+/********************************************************************//**
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_get_gen).
+@return	pointer to the block, page bufferfixed */
+UNIV_INTERN
+buf_block_t*
+buf_page_create(
+/*============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset,	/*!< in: offset of the page within space in units of
+			a page */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+UNIV_INTERN
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space
+				in units of a page */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	buf_block_t*	block);	/*!< in: block to init */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+	buf_page_t*	bpage);		/*!< in: buffer block */
+/********************************************************************//**
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		rw_latch,	/*!< in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+	mtr_t*		mtr);		/*!< in: mtr */
+/********************************************************************//**
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_make_young(
+/*================*/
+	buf_page_t*	bpage);	/*!< in: buffer block of a file page */
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
+
+NOTE that it is possible that the page is not yet read from disk,
+though.
+
+@return	TRUE if found in the page hash table */
+UNIV_INLINE
+ibool
+buf_page_peek(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+Resets the check_index_page_at_flush field of a page if found in the buffer
+pool. */
+UNIV_INTERN
+void
+buf_reset_check_index_page_at_flush(
+/*================================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+/********************************************************************//**
+Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_set_file_page_was_freed(
+/*=============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_reset_file_page_was_freed(
+/*===============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);	/*!< in: page number */
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+	__attribute__((pure));
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+	const buf_block_t*	block)	/*!< in: block */
+	__attribute__((pure));
+
+/********************************************************************//**
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex.
+@return	TRUE if should be made younger */
+UNIV_INLINE
+ibool
+buf_page_peek_if_too_old(
+/*=====================*/
+	const buf_page_t*	bpage);	/*!< in: block to make younger */
+/********************************************************************//**
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there.
+@return	TRUE if page hash index is built in search system */
+UNIV_INTERN
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet.
+@return	newest modification to page */
+UNIV_INLINE
+ib_uint64_t
+buf_page_get_newest_modification(
+/*=============================*/
+	const buf_page_t*	bpage);	/*!< in: block containing the
+					page frame */
+/********************************************************************//**
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+void
+buf_block_modify_clock_inc(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+/********************************************************************//**
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block.
+@return	value */
+UNIV_INLINE
+ib_uint64_t
+buf_block_get_modify_clock(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+#else /* !UNIV_HOTBACKUP */
+# define buf_block_modify_clock_inc(block) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Calculates a page checksum which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value
+on 32-bit and 64-bit architectures.
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_new_checksum(
+/*=======================*/
+	const byte*	page);	/*!< in: buffer page */
+/********************************************************************//**
+In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+looked at the first few bytes of the page. This calculates that old
+checksum.
+NOTE: we must first store the new formula checksum to
+FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
+because this takes that field as an input!
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_old_checksum(
+/*=======================*/
+	const byte*	 page);	/*!< in: buffer page */
+/********************************************************************//**
+Checks if a page is corrupt.
+@return	TRUE if corrupted */
+UNIV_INTERN
+ibool
+buf_page_is_corrupted(
+/*==================*/
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size);	/*!< in: size of compressed page;
+					0 for uncompressed pages */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	const void*	ptr,	/*!< in: pointer to a buffer frame */
+	ulint*		space,	/*!< out: space id */
+	fil_addr_t*	addr);	/*!< out: page offset and byte offset */
+/**********************************************************************//**
+Gets the hash value of a block. This can be used in searches in the
+lock hash table.
+@return	lock hash value */
+UNIV_INLINE
+ulint
+buf_block_get_lock_hash_val(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: block */
+	__attribute__((pure));
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Finds a block in the buffer pool that points to a
+given compressed page.
+@return	buffer block pointing to the compressed page, or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_pool_contains_zip(
+/*==================*/
+	const void*	data);	/*!< in: pointer to compressed page */
+#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Validates the buffer pool data structure.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_validate(void);
+/*==============*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Prints info of the buffer pool data structure. */
+UNIV_INTERN
+void
+buf_print(void);
+/*============*/
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Prints a page to stderr. */
+UNIV_INTERN
+void
+buf_page_print(
+/*===========*/
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size);	/*!< in: compressed page size, or
+					0 for uncompressed pages */
+/********************************************************************//**
+Decompress a block.
+@return	TRUE if successful */
+UNIV_INTERN
+ibool
+buf_zip_decompress(
+/*===============*/
+	buf_block_t*	block,	/*!< in/out: block */
+	ibool		check);	/*!< in: TRUE=verify the page checksum */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the number of latched pages in the buffer pool.
+@return	number of latched pages */
+UNIV_INTERN
+ulint
+buf_get_latched_pages_number(void);
+/*==============================*/
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Returns the number of pending buf pool ios.
+@return	number of pending I/O operations */
+UNIV_INTERN
+ulint
+buf_get_n_pending_ios(void);
+/*=======================*/
+/*********************************************************************//**
+Prints info of the buffer i/o. */
+UNIV_INTERN
+void
+buf_print_io(
+/*=========*/
+	FILE*	file);	/*!< in: file where to print */
+/*********************************************************************//**
+Returns the ratio in percents of modified pages in the buffer pool /
+database pages in the buffer pool.
+@return	modified page percentage ratio */
+UNIV_INTERN
+ulint
+buf_get_modified_ratio_pct(void);
+/*============================*/
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+buf_refresh_io_stats(void);
+/*======================*/
+/*********************************************************************//**
+Asserts that all file pages in the buffer are in a replaceable state.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_all_freed(void);
+/*===============*/
+/*********************************************************************//**
+Checks that there currently are no pending i/o-operations for the buffer
+pool.
+@return	TRUE if there is no pending i/o */
+UNIV_INTERN
+ibool
+buf_pool_check_no_pending_io(void);
+/*==============================*/
+/*********************************************************************//**
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+UNIV_INTERN
+void
+buf_pool_invalidate(void);
+/*=====================*/
+#endif /* !UNIV_HOTBACKUP */
+
+/*========================================================================
+--------------------------- LOWER LEVEL ROUTINES -------------------------
+=========================================================================*/
+
+#ifdef UNIV_SYNC_DEBUG
+/*********************************************************************//**
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. */
+UNIV_INLINE
+void
+buf_block_dbg_add_level(
+/*====================*/
+	buf_block_t*	block,	/*!< in: buffer page
+				where we have acquired latch */
+	ulint		level);	/*!< in: latching order level */
+#else /* UNIV_SYNC_DEBUG */
+# define buf_block_dbg_add_level(block, level) /* nothing */
+#endif /* UNIV_SYNC_DEBUG */
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+	const buf_page_t*	bpage);	/*!< in: pointer to the control block */
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+	buf_page_t*		bpage,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state);	/*!< in: state */
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state);	/*!< in: state */
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return	TRUE if mapped */
+UNIV_INLINE
+ibool
+buf_page_in_file(
+/*=============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return	TRUE if block belongs to unzip_LRU */
+UNIV_INLINE
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Determine the approximate LRU list position of a block.
+@return	LRU list position */
+UNIV_INLINE
+ulint
+buf_page_get_LRU_position(
+/*======================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Gets the mutex of a block.
+@return	pointer to mutex protecting bpage */
+UNIV_INLINE
+mutex_t*
+buf_page_get_mutex(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Get the flush type of a page.
+@return	flush type */
+UNIV_INLINE
+enum buf_flush
+buf_page_get_flush_type(
+/*====================*/
+	const buf_page_t*	bpage)	/*!< in: buffer page */
+	__attribute__((pure));
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+	buf_page_t*	bpage,		/*!< in: buffer page */
+	enum buf_flush	flush_type);	/*!< in: flush type */
+/*********************************************************************//**
+Map a block to a file page. */
+UNIV_INLINE
+void
+buf_block_set_file_page(
+/*====================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	ulint			space,	/*!< in: tablespace id */
+	ulint			page_no);/*!< in: page number */
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_page_set_io_fix(
+/*================*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix);/*!< in: io_fix state */
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_block_set_io_fix(
+/*=================*/
+	buf_block_t*	block,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix);/*!< in: io_fix state */
+
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory.  The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< control block being relocated */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return	TRUE if old */
+UNIV_INLINE
+ibool
+buf_page_is_old(
+/*============*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	ibool		old);	/*!< in: old */
+/*********************************************************************//**
+Determine if a block has been accessed in the buffer pool.
+@return	TRUE if accessed */
+UNIV_INLINE
+ibool
+buf_page_is_accessed(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
+/*==================*/
+	buf_page_t*	bpage,		/*!< in/out: control block */
+	ibool		accessed);	/*!< in: accessed */
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL.
+@return	control block, or NULL */
+UNIV_INLINE
+buf_block_t*
+buf_page_get_block(
+/*===============*/
+	buf_page_t*	bpage)	/*!< in: control block, or NULL */
+	__attribute__((pure));
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+#else /* UNIV_DEBUG */
+# define buf_block_get_frame(block) (block)->frame
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_page_get_zip_size(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_block_get_zip_size(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable. */
+#define buf_block_get_page_zip(block) \
+	(UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Gets the block to whose frame the pointer is pointing to.
+@return	pointer to block, never NULL */
+UNIV_INTERN
+buf_block_t*
+buf_block_align(
+/*============*/
+	const byte*	ptr);	/*!< in: pointer to a frame */
+/********************************************************************//**
+Find out if a pointer belongs to a buf_block_t. It can be a pointer to
+the buf_block_t itself or a member of it
+@return	TRUE if ptr belongs to a buf_block_t struct */
+UNIV_INTERN
+ibool
+buf_pointer_is_block_field(
+/*=======================*/
+	const void*		ptr);	/*!< in: pointer not
+					dereferenced */
+/** Find out if a pointer corresponds to a buf_block_t::mutex.
+@param m	in: mutex candidate
+@return		TRUE if m is a buf_block_t::mutex */
+#define buf_pool_is_block_mutex(m)			\
+	buf_pointer_is_block_field((const void*)(m))
+/** Find out if a pointer corresponds to a buf_block_t::lock.
+@param l	in: rw-lock candidate
+@return		TRUE if l is a buf_block_t::lock */
+#define buf_pool_is_block_lock(l)			\
+	buf_pointer_is_block_field((const void*)(l))
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return	compressed page descriptor, or NULL */
+UNIV_INLINE
+const page_zip_des_t*
+buf_frame_get_page_zip(
+/*===================*/
+	const byte*	ptr);	/*!< in: pointer to the page */
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+/********************************************************************//**
+Function which inits a page for read to the buffer buf_pool. If the page is
+(1) already in buf_pool, or
+(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
+(3) if the space is deleted or being deleted,
+then this function does nothing.
+Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
+on the buffer frame. The io-handler must take care that the flag is cleared
+and the lock released later.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_init_for_read(
+/*===================*/
+	ulint*		err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size, or 0 */
+	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
+	ib_int64_t	tablespace_version,/*!< in: prevents reading from a wrong
+				version of the tablespace in case we have done
+				DISCARD + IMPORT */
+	ulint		offset);/*!< in: page number */
+/********************************************************************//**
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_io_complete(
+/*=================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+/********************************************************************//**
+Calculates a folded value of a file page address to use in the page hash
+table.
+@return	the folded value */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+	__attribute__((const));
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get(
+/*==============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: offset of the page within space */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found
+or an uncompressed page frame does not exist.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_block_t*
+buf_block_hash_get(
+/*===============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: offset of the page within space */
+/*******************************************************************//**
+Increments the pool clock by one and returns its new value. Remember that
+in the 32 bit version the clock wraps around at 4 billion!
+@return	new clock value */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void);
+/*====================*/
+/*********************************************************************//**
+Gets the current length of the free list of buffer blocks.
+@return	length of the free list */
+UNIV_INTERN
+ulint
+buf_get_free_list_len(void);
+/*=======================*/
+#endif /* !UNIV_HOTBACKUP */
+
+
+/** The common buffer control block structure
+for compressed and uncompressed frames */
+
+struct buf_page_struct{
+	/** @name General fields
+	None of these bit-fields must be modified without holding
+	buf_page_get_mutex() [buf_block_struct::mutex or
+	buf_pool_zip_mutex], since they can be stored in the same
+	machine word.  Some of these fields are additionally protected
+	by buf_pool_mutex. */
+	/* @{ */
+
+	unsigned	space:32;	/*!< tablespace id; also protected
+					by buf_pool_mutex. */
+	unsigned	offset:32;	/*!< page number; also protected
+					by buf_pool_mutex. */
+
+	unsigned	state:3;	/*!< state of the control block; also
+					protected by buf_pool_mutex.
+					State transitions from
+					BUF_BLOCK_READY_FOR_USE to
+					BUF_BLOCK_MEMORY need not be
+					protected by buf_page_get_mutex().
+					@see enum buf_page_state */
+#ifndef UNIV_HOTBACKUP
+	unsigned	flush_type:2;	/*!< if this block is currently being
+					flushed to disk, this tells the
+					flush_type.
+					@see enum buf_flush */
+	unsigned	accessed:1;	/*!< TRUE if the page has been accessed
+					while in the buffer pool: read-ahead
+					may read in pages which have not been
+					accessed yet; a thread is allowed to
+					read this for heuristic purposes
+					without holding any mutex or latch */
+	unsigned	io_fix:2;	/*!< type of pending I/O operation;
+					also protected by buf_pool_mutex
+					@see enum buf_io_fix */
+	unsigned	buf_fix_count:24;/*!< count of how manyfold this block
+					is currently bufferfixed */
+	/* @} */
+#endif /* !UNIV_HOTBACKUP */
+	page_zip_des_t	zip;		/*!< compressed page; zip.data
+					(but not the data it points to) is
+					also protected by buf_pool_mutex */
+#ifndef UNIV_HOTBACKUP
+	buf_page_t*	hash;		/*!< node used in chaining to
+					buf_pool->page_hash or
+					buf_pool->zip_hash */
+#ifdef UNIV_DEBUG
+	ibool		in_page_hash;	/*!< TRUE if in buf_pool->page_hash */
+	ibool		in_zip_hash;	/*!< TRUE if in buf_pool->zip_hash */
+#endif /* UNIV_DEBUG */
+
+	/** @name Page flushing fields
+	All these are protected by buf_pool_mutex. */
+	/* @{ */
+
+	UT_LIST_NODE_T(buf_page_t) list;
+					/*!< based on state, this is a
+					list node, protected only by
+					buf_pool_mutex, in one of the
+					following lists in buf_pool:
+
+					- BUF_BLOCK_NOT_USED:	free
+					- BUF_BLOCK_FILE_PAGE:	flush_list
+					- BUF_BLOCK_ZIP_DIRTY:	flush_list
+					- BUF_BLOCK_ZIP_PAGE:	zip_clean
+					- BUF_BLOCK_ZIP_FREE:	zip_free[] */
+#ifdef UNIV_DEBUG
+	ibool		in_flush_list;	/*!< TRUE if in buf_pool->flush_list;
+					when buf_pool_mutex is free, the
+					following should hold: in_flush_list
+					== (state == BUF_BLOCK_FILE_PAGE
+					    || state == BUF_BLOCK_ZIP_DIRTY) */
+	ibool		in_free_list;	/*!< TRUE if in buf_pool->free; when
+					buf_pool_mutex is free, the following
+					should hold: in_free_list
+					== (state == BUF_BLOCK_NOT_USED) */
+#endif /* UNIV_DEBUG */
+	ib_uint64_t	newest_modification;
+					/*!< log sequence number of
+					the youngest modification to
+					this block, zero if not
+					modified */
+	ib_uint64_t	oldest_modification;
+					/*!< log sequence number of
+					the START of the log entry
+					written of the oldest
+					modification to this block
+					which has not yet been flushed
+					on disk; zero if all
+					modifications are on disk */
+	/* @} */
+	/** @name LRU replacement algorithm fields
+	These fields are protected by buf_pool_mutex only (not
+	buf_pool_zip_mutex or buf_block_struct::mutex). */
+	/* @{ */
+
+	UT_LIST_NODE_T(buf_page_t) LRU;
+					/*!< node of the LRU list */
+#ifdef UNIV_DEBUG
+	ibool		in_LRU_list;	/*!< TRUE if the page is in
+					the LRU list; used in
+					debugging */
+#endif /* UNIV_DEBUG */
+	unsigned	old:1;		/*!< TRUE if the block is in the old
+					blocks in the LRU list */
+	unsigned	LRU_position:31;/*!< value which monotonically
+					decreases (or may stay
+					constant if old==TRUE) toward
+					the end of the LRU list, if
+					buf_pool->ulint_clock has not
+					wrapped around: NOTE that this
+					value can only be used in
+					heuristic algorithms, because
+					of the possibility of a
+					wrap-around! */
+	unsigned	freed_page_clock:32;/*!< the value of
+					buf_pool->freed_page_clock
+					when this block was the last
+					time put to the head of the
+					LRU list; a thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	/* @} */
+# ifdef UNIV_DEBUG_FILE_ACCESSES
+	ibool		file_page_was_freed;
+					/*!< this is set to TRUE when fsp
+					frees a page in buffer pool */
+# endif /* UNIV_DEBUG_FILE_ACCESSES */
+#endif /* !UNIV_HOTBACKUP */
+};
+
+/** The buffer control block structure */
+
+struct buf_block_struct{
+
+	/** @name General fields */
+	/* @{ */
+
+	buf_page_t	page;		/*!< page information; this must
+					be the first field, so that
+					buf_pool->page_hash can point
+					to buf_page_t or buf_block_t */
+	byte*		frame;		/*!< pointer to buffer frame which
+					is of size UNIV_PAGE_SIZE, and
+					aligned to an address divisible by
+					UNIV_PAGE_SIZE */
+#ifndef UNIV_HOTBACKUP
+	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
+					/*!< node of the decompressed LRU list;
+					a block is in the unzip_LRU list
+					if page.state == BUF_BLOCK_FILE_PAGE
+					and page.zip.data != NULL */
+#ifdef UNIV_DEBUG
+	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
+					decompressed LRU list;
+					used in debugging */
+#endif /* UNIV_DEBUG */
+	mutex_t		mutex;		/*!< mutex protecting this block:
+					state (also protected by the buffer
+					pool mutex), io_fix, buf_fix_count,
+					and accessed; we introduce this new
+					mutex in InnoDB-5.1 to relieve
+					contention on the buffer pool mutex */
+	rw_lock_t	lock;		/*!< read-write lock of the buffer
+					frame */
+	unsigned	lock_hash_val:32;/*!< hashed value of the page address
+					in the record lock hash table */
+	unsigned	check_index_page_at_flush:1;
+					/*!< TRUE if we know that this is
+					an index page, and want the database
+					to check its consistency before flush;
+					note that there may be pages in the
+					buffer pool which are index pages,
+					but this flag is not set because
+					we do not keep track of all pages */
+	/* @} */
+	/** @name Optimistic search field */
+	/* @{ */
+
+	ib_uint64_t	modify_clock;	/*!< this clock is incremented every
+					time a pointer to a record on the
+					page may become obsolete; this is
+					used in the optimistic cursor
+					positioning: if the modify clock has
+					not changed, we know that the pointer
+					is still valid; this field may be
+					changed if the thread (1) owns the
+					pool mutex and the page is not
+					bufferfixed, or (2) the thread has an
+					x-latch on the block */
+	/* @} */
+	/** @name Hash search fields (unprotected)
+	NOTE that these fields are NOT protected by any semaphore! */
+	/* @{ */
+
+	ulint		n_hash_helps;	/*!< counter which controls building
+					of a new hash index for the page */
+	ulint		n_fields;	/*!< recommended prefix length for hash
+					search: number of full fields */
+	ulint		n_bytes;	/*!< recommended prefix: number of bytes
+					in an incomplete field */
+	ibool		left_side;	/*!< TRUE or FALSE, depending on
+					whether the leftmost record of several
+					records with the same prefix should be
+					indexed in the hash index */
+	/* @} */
+
+	/** @name Hash search fields
+	These 6 fields may only be modified when we have
+	an x-latch on btr_search_latch AND
+	- we are holding an s-latch or x-latch on buf_block_struct::lock or
+	- we know that buf_block_struct::buf_fix_count == 0.
+
+	An exception to this is when we init or create a page
+	in the buffer pool in buf0buf.c. */
+
+	/* @{ */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	ulint		n_pointers;	/*!< used in debugging: the number of
+					pointers in the adaptive hash index
+					pointing to this frame */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	unsigned	is_hashed:1;	/*!< TRUE if hash index has
+					already been built on this
+					page; note that it does not
+					guarantee that the index is
+					complete, though: there may
+					have been hash collisions,
+					record deletions, etc. */
+	unsigned	curr_n_fields:10;/*!< prefix length for hash indexing:
+					number of full fields */
+	unsigned	curr_n_bytes:15;/*!< number of bytes in hash
+					indexing */
+	unsigned	curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+	dict_index_t*	index;		/*!< Index for which the adaptive
+					hash index has been created. */
+	/* @} */
+# ifdef UNIV_SYNC_DEBUG
+	/** @name Debug fields */
+	/* @{ */
+	rw_lock_t	debug_latch;	/*!< in the debug version, each thread
+					which bufferfixes the block acquires
+					an s-latch here; so we can use the
+					debug utilities in sync0rw */
+	/* @} */
+# endif
+#endif /* !UNIV_HOTBACKUP */
+};
+
+/** Check if a buf_block_t object is in a valid state
+@param block	buffer block
+@return		TRUE if valid */
+#define buf_block_state_valid(block)				\
+(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED		\
+ && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Compute the hash fold value for blocks in buf_pool->zip_hash. */
+/* @{ */
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
+/* @} */
+
+/** @brief The buffer pool structure.
+
+NOTE! The definition appears here only for other modules of this
+directory (buf) to see it. Do not use from outside! */
+
+struct buf_pool_struct{
+
+	/** @name General fields */
+	/* @{ */
+
+	ulint		n_chunks;	/*!< number of buffer pool chunks */
+	buf_chunk_t*	chunks;		/*!< buffer pool chunks */
+	ulint		curr_size;	/*!< current pool size in pages */
+	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
+					buf_block_t file pages,
+					buf_page_in_file() == TRUE,
+					indexed by (space_id, offset) */
+	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
+					whose frames are allocated to the
+					zip buddy system,
+					indexed by block->frame */
+	ulint		n_pend_reads;	/*!< number of pending read operations */
+	ulint		n_pend_unzip;	/*!< number of pending decompressions */
+
+	time_t		last_printout_time; /*!< when buf_print was last time
+					called */
+	ulint		n_pages_read;	/*!< number read operations */
+	ulint		n_pages_written;/*!< number write operations */
+	ulint		n_pages_created;/*!< number of pages created
+					in the pool with no read */
+	ulint		n_page_gets;	/*!< number of page gets performed;
+					also successful searches through
+					the adaptive hash index are
+					counted as page gets; this field
+					is NOT protected by the buffer
+					pool mutex */
+	ulint		n_page_gets_old;/*!< n_page_gets when buf_print was
+					last time called: used to calculate
+					hit rate */
+	ulint		n_pages_read_old;/*!< n_pages_read when buf_print was
+					last time called */
+	ulint		n_pages_written_old;/*!< number write operations */
+	ulint		n_pages_created_old;/*!< number of pages created in
+					the pool with no read */
+	/* @} */
+	/** @name Page flushing algorithm fields */
+	/* @{ */
+
+	UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
+					/*!< base node of the modified block
+					list */
+	ibool		init_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is TRUE when a flush of the
+					given type is being initialized */
+	ulint		n_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is the number of pending
+					writes in the given flush type */
+	os_event_t	no_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is in the set state
+					when there is no flush batch
+					of the given type running */
+	ulint		ulint_clock;	/*!< a sequence number used to count
+					time. NOTE! This counter wraps
+					around at 4 billion (if ulint ==
+					32 bits)! */
+	ulint		freed_page_clock;/*!< a sequence number used
+					to count the number of buffer
+					blocks removed from the end of
+					the LRU list; NOTE that this
+					counter may wrap around at 4
+					billion! A thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	ulint		LRU_flush_ended;/*!< when an LRU flush ends for a page,
+					this is incremented by one; this is
+					set to zero when a buffer block is
+					allocated */
+
+	/* @} */
+	/** @name LRU replacement algorithm fields */
+	/* @{ */
+
+	UT_LIST_BASE_NODE_T(buf_page_t) free;
+					/*!< base node of the free
+					block list */
+	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
+					/*!< base node of the LRU list */
+	buf_page_t*	LRU_old;	/*!< pointer to the about 3/8 oldest
+					blocks in the LRU list; NULL if LRU
+					length less than BUF_LRU_OLD_MIN_LEN;
+					NOTE: when LRU_old != NULL, its length
+					should always equal LRU_old_len */
+	ulint		LRU_old_len;	/*!< length of the LRU list from
+					the block to which LRU_old points
+					onward, including that block;
+					see buf0lru.c for the restrictions
+					on this value; not defined if
+					LRU_old == NULL;
+					NOTE: LRU_old_len must be adjusted
+					whenever LRU_old shrinks or grows! */
+
+	UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
+					/*!< base node of the
+					unzip_LRU list */
+
+	/* @} */
+	/** @name Buddy allocator fields
+	The buddy allocator is used for allocating compressed page
+	frames and buf_page_t descriptors of blocks that exist
+	in the buffer pool only in compressed form. */
+	/* @{ */
+	UT_LIST_BASE_NODE_T(buf_page_t)	zip_clean;
+					/*!< unmodified compressed pages */
+	UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
+					/*!< buddy free lists */
+#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
+# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
+#endif
+#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
+# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
+#endif
+	/* @} */
+};
+
+/** mutex protecting the buffer pool struct and control blocks, except the
+read-write lock in them */
+extern mutex_t	buf_pool_mutex;
+/** mutex protecting the control blocks of compressed-only pages
+(of type buf_page_t, not buf_block_t) */
+extern mutex_t	buf_pool_zip_mutex;
+
+/** @name Accessors for buf_pool_mutex.
+Use these instead of accessing buf_pool_mutex directly. */
+/* @{ */
+
+/** Test if buf_pool_mutex is owned. */
+#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
+/** Acquire the buffer pool mutex. */
+#define buf_pool_mutex_enter() do {		\
+	ut_ad(!mutex_own(&buf_pool_zip_mutex));	\
+	mutex_enter(&buf_pool_mutex);		\
+} while (0)
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** Flag to forbid the release of the buffer pool mutex.
+Protected by buf_pool_mutex. */
+extern ulint	buf_pool_mutex_exit_forbidden;
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() do {	\
+	ut_ad(buf_pool_mutex_own());		\
+	buf_pool_mutex_exit_forbidden++;	\
+} while (0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() do {	\
+	ut_ad(buf_pool_mutex_own());		\
+	ut_a(buf_pool_mutex_exit_forbidden);	\
+	buf_pool_mutex_exit_forbidden--;	\
+} while (0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit() do {		\
+	ut_a(!buf_pool_mutex_exit_forbidden);	\
+	mutex_exit(&buf_pool_mutex);		\
+} while (0)
+#else
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() ((void) 0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() ((void) 0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex)
+#endif
+#endif /* !UNIV_HOTBACKUP */
+/* @} */
+
+/**********************************************************************
+Let us list the consistency conditions for different control block states.
+
+NOT_USED:	is in free list, not in LRU list, not in flush list, nor
+		page hash table
+READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
+		hash table
+MEMORY:		is not in free list, LRU list, or flush list, nor page
+		hash table
+FILE_PAGE:	space and offset are defined, is in page hash table
+		if io_fix == BUF_IO_WRITE,
+			pool: no_flush[flush_type] is in reset state,
+			pool: n_flush[flush_type] > 0
+
+		(1) if buf_fix_count == 0, then
+			is in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			is x-locked,
+				if and only if io_fix == BUF_IO_READ
+			is s-locked,
+				if and only if io_fix == BUF_IO_WRITE
+
+		(2) if buf_fix_count > 0, then
+			is not in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			if io_fix == BUF_IO_READ,
+				is x-locked
+			if io_fix == BUF_IO_WRITE,
+				is s-locked
+
+State transitions:
+
+NOT_USED => READY_FOR_USE
+READY_FOR_USE => MEMORY
+READY_FOR_USE => FILE_PAGE
+MEMORY => NOT_USED
+FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
+				(1) buf_fix_count == 0,
+				(2) oldest_modification == 0, and
+				(3) io_fix == 0.
+*/
+
+#ifndef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
new file mode 100644
index 00000000000..17064342116
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0buf.ic
@@ -0,0 +1,1066 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.ic
+The database buffer buf_pool
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0mtr.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+{
+	/* This is sometimes read without holding buf_pool_mutex. */
+	return(bpage->freed_page_clock);
+}
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+	const buf_block_t*	block)	/*!< in: block */
+{
+	return(buf_page_get_freed_page_clock(&block->page));
+}
+
+/********************************************************************//**
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex.
+@return	TRUE if should be made younger */
+UNIV_INLINE
+ibool
+buf_page_peek_if_too_old(
+/*=====================*/
+	const buf_page_t*	bpage)	/*!< in: block to make younger */
+{
+	return(buf_pool->freed_page_clock
+	       >= buf_page_get_freed_page_clock(bpage)
+	       + 1 + (buf_pool->curr_size / 4));
+}
+
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+{
+	return(buf_pool->curr_size * UNIV_PAGE_SIZE);
+}
+
+/********************************************************************//**
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+zero if all modified pages have been flushed to disk.
+@return	oldest modification in pool, zero if none */
+UNIV_INLINE
+ib_uint64_t
+buf_pool_get_oldest_modification(void)
+/*==================================*/
+{
+	buf_page_t*	bpage;
+	ib_uint64_t	lsn;
+
+	buf_pool_mutex_enter();
+
+	bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+	if (bpage == NULL) {
+		lsn = 0;
+	} else {
+		ut_ad(bpage->in_flush_list);
+		lsn = bpage->oldest_modification;
+	}
+
+	buf_pool_mutex_exit();
+
+	/* The returned answer may be out of date: the flush_list can
+	change after the mutex has been released. */
+
+	return(lsn);
+}
+
+/*******************************************************************//**
+Increments the buf_pool clock by one and returns its new value. Remember
+that in the 32 bit version the clock wraps around at 4 billion!
+@return	new clock value */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void)
+/*====================*/
+{
+	ut_ad(buf_pool_mutex_own());
+
+	buf_pool->ulint_clock++;
+
+	return(buf_pool->ulint_clock);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	enum buf_page_state	state = (enum buf_page_state) bpage->state;
+
+#ifdef UNIV_DEBUG
+	switch (state) {
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_FILE_PAGE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	return(state);
+}
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(buf_page_get_state(&block->page));
+}
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+	buf_page_t*		bpage,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state)	/*!< in: state */
+{
+#ifdef UNIV_DEBUG
+	enum buf_page_state	old_state	= buf_page_get_state(bpage);
+
+	switch (old_state) {
+	case BUF_BLOCK_ZIP_FREE:
+		ut_error;
+		break;
+	case BUF_BLOCK_ZIP_PAGE:
+		ut_a(state == BUF_BLOCK_ZIP_DIRTY);
+		break;
+	case BUF_BLOCK_ZIP_DIRTY:
+		ut_a(state == BUF_BLOCK_ZIP_PAGE);
+		break;
+	case BUF_BLOCK_NOT_USED:
+		ut_a(state == BUF_BLOCK_READY_FOR_USE);
+		break;
+	case BUF_BLOCK_READY_FOR_USE:
+		ut_a(state == BUF_BLOCK_MEMORY
+		     || state == BUF_BLOCK_FILE_PAGE
+		     || state == BUF_BLOCK_NOT_USED);
+		break;
+	case BUF_BLOCK_MEMORY:
+		ut_a(state == BUF_BLOCK_NOT_USED);
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+		ut_a(state == BUF_BLOCK_NOT_USED
+		     || state == BUF_BLOCK_REMOVE_HASH);
+		break;
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_a(state == BUF_BLOCK_MEMORY);
+		break;
+	}
+#endif /* UNIV_DEBUG */
+	bpage->state = state;
+	ut_ad(buf_page_get_state(bpage) == state);
+}
+
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state)	/*!< in: state */
+{
+	buf_page_set_state(&block->page, state);
+}
+
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return	TRUE if mapped */
+UNIV_INLINE
+ibool
+buf_page_in_file(
+/*=============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_FREE:
+		/* This is a free page in buf_pool->zip_free[].
+		Such pages should only be accessed by the buddy allocator. */
+		ut_error;
+		break;
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_FILE_PAGE:
+		return(TRUE);
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	}
+
+	return(FALSE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return	TRUE if block belongs to unzip_LRU */
+UNIV_INLINE
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+
+	return(bpage->zip.data
+	       && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+}
+
+/*********************************************************************//**
+Determine the approximate LRU list position of a block.
+@return	LRU list position */
+UNIV_INLINE
+ulint
+buf_page_get_LRU_position(
+/*======================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own());
+
+	return(bpage->LRU_position);
+}
+
+/*********************************************************************//**
+Gets the mutex of a block.
+@return	pointer to mutex protecting bpage */
+UNIV_INLINE
+mutex_t*
+buf_page_get_mutex(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_FREE:
+		ut_error;
+		return(NULL);
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+		return(&buf_pool_zip_mutex);
+	default:
+		return(&((buf_block_t*) bpage)->mutex);
+	}
+}
+
+/*********************************************************************//**
+Get the flush type of a page.
+@return	flush type */
+UNIV_INLINE
+enum buf_flush
+buf_page_get_flush_type(
+/*====================*/
+	const buf_page_t*	bpage)	/*!< in: buffer page */
+{
+	enum buf_flush	flush_type = (enum buf_flush) bpage->flush_type;
+
+#ifdef UNIV_DEBUG
+	switch (flush_type) {
+	case BUF_FLUSH_LRU:
+	case BUF_FLUSH_SINGLE_PAGE:
+	case BUF_FLUSH_LIST:
+		return(flush_type);
+	case BUF_FLUSH_N_TYPES:
+		break;
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(flush_type);
+}
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+	buf_page_t*	bpage,		/*!< in: buffer page */
+	enum buf_flush	flush_type)	/*!< in: flush type */
+{
+	bpage->flush_type = flush_type;
+	ut_ad(buf_page_get_flush_type(bpage) == flush_type);
+}
+
+/*********************************************************************//**
+Map a block to a file page. */
+UNIV_INLINE
+void
+buf_block_set_file_page(
+/*====================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	ulint			space,	/*!< in: tablespace id */
+	ulint			page_no)/*!< in: page number */
+{
+	buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
+	block->page.space = space;
+	block->page.offset = page_no;
+}
+
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
+#ifdef UNIV_DEBUG
+	switch (io_fix) {
+	case BUF_IO_NONE:
+	case BUF_IO_READ:
+	case BUF_IO_WRITE:
+		return(io_fix);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(io_fix);
+}
+
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(buf_page_get_io_fix(&block->page));
+}
+
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_page_set_io_fix(
+/*================*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix)	/*!< in: io_fix state */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	bpage->io_fix = io_fix;
+	ut_ad(buf_page_get_io_fix(bpage) == io_fix);
+}
+
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_block_set_io_fix(
+/*=================*/
+	buf_block_t*	block,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix)	/*!< in: io_fix state */
+{
+	buf_page_set_io_fix(&block->page, io_fix);
+}
+
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory.  The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< control block being relocated */
+{
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(bpage->in_LRU_list);
+
+	return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
+	       && bpage->buf_fix_count == 0);
+}
+
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return	TRUE if old */
+UNIV_INLINE
+ibool
+buf_page_is_old(
+/*============*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own());
+
+	return(bpage->old);
+}
+
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	ibool		old)	/*!< in: old */
+{
+	ut_a(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own());
+	ut_ad(bpage->in_LRU_list);
+
+#ifdef UNIV_LRU_DEBUG
+	if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)
+	    && UT_LIST_GET_PREV(LRU, bpage)->old
+	    == UT_LIST_GET_NEXT(LRU, bpage)->old) {
+		ut_a(UT_LIST_GET_PREV(LRU, bpage)->old == old);
+	}
+#endif /* UNIV_LRU_DEBUG */
+
+	bpage->old = old;
+}
+
+/*********************************************************************//**
+Determine if a block has been accessed in the buffer pool.
+@return	TRUE if accessed */
+UNIV_INLINE
+ibool
+buf_page_is_accessed(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+
+	return(bpage->accessed);
+}
+
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
+/*==================*/
+	buf_page_t*	bpage,		/*!< in/out: control block */
+	ibool		accessed)	/*!< in: accessed */
+{
+	ut_a(buf_page_in_file(bpage));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	bpage->accessed = accessed;
+}
+
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL.
+@return	control block, or NULL */
+UNIV_INLINE
+buf_block_t*
+buf_page_get_block(
+/*===============*/
+	buf_page_t*	bpage)	/*!< in: control block, or NULL */
+{
+	if (UNIV_LIKELY(bpage != NULL)) {
+		ut_ad(buf_page_in_file(bpage));
+
+		if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+			return((buf_block_t*) bpage);
+		}
+	}
+
+	return(NULL);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+
+	switch (buf_block_get_state(block)) {
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+		ut_error;
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+# ifndef UNIV_HOTBACKUP
+		ut_a(block->page.buf_fix_count > 0);
+# endif /* !UNIV_HOTBACKUP */
+		/* fall through */
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		goto ok;
+	}
+	ut_error;
+ok:
+	return((buf_frame_t*) block->frame);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	ut_ad(bpage);
+	ut_a(buf_page_in_file(bpage));
+
+	return(bpage->space);
+}
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	return(block->page.space);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	ut_ad(bpage);
+	ut_a(buf_page_in_file(bpage));
+
+	return(bpage->offset);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	return(block->page.offset);
+}
+
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_page_get_zip_size(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0);
+}
+
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_block_get_zip_size(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0);
+}
+
+#ifndef UNIV_HOTBACKUP
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return	compressed page descriptor, or NULL */
+UNIV_INLINE
+const page_zip_des_t*
+buf_frame_get_page_zip(
+/*===================*/
+	const byte*	ptr)	/*!< in: pointer to the page */
+{
+	return(buf_block_get_page_zip(buf_block_align(ptr)));
+}
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	const void*	ptr,	/*!< in: pointer to a buffer frame */
+	ulint*		space,	/*!< out: space id */
+	fil_addr_t*	addr)	/*!< out: page offset and byte offset */
+{
+	const page_t*	page = (const page_t*) ut_align_down(ptr,
+							     UNIV_PAGE_SIZE);
+
+	*space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
+	addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table.
+@return	lock hash value */
+UNIV_INLINE
+ulint
+buf_block_get_lock_hash_val(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: block */
+{
+	return(block->lock_hash_val);
+}
+
+/********************************************************************//**
+Allocates a buffer block.
+@return	own: the allocated block, in state BUF_BLOCK_MEMORY */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(
+/*============*/
+	ulint	zip_size)	/*!< in: compressed page size in bytes,
+				or 0 if uncompressed tablespace */
+{
+	buf_block_t*	block;
+
+	block = buf_LRU_get_free_block(zip_size);
+
+	buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+	return(block);
+}
+
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block)	/*!< in, own: block to be freed */
+{
+	buf_pool_mutex_enter();
+
+	mutex_enter(&block->mutex);
+
+	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+	buf_LRU_block_free_non_file_page(block);
+
+	mutex_exit(&block->mutex);
+
+	buf_pool_mutex_exit();
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return	buf */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+	byte*			buf,	/*!< in: buffer to copy to */
+	const buf_frame_t*	frame)	/*!< in: buffer frame */
+{
+	ut_ad(buf && frame);
+
+	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+
+	return(buf);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Calculates a folded value of a file page address to use in the page hash
+table.
+@return	the folded value */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+{
+	return((space << 20) + space + offset);
+}
+
+/********************************************************************//**
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet.
+@return	newest modification to page */
+UNIV_INLINE
+ib_uint64_t
+buf_page_get_newest_modification(
+/*=============================*/
+	const buf_page_t*	bpage)	/*!< in: block containing the
+					page frame */
+{
+	ib_uint64_t	lsn;
+	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+	mutex_enter(block_mutex);
+
+	if (buf_page_in_file(bpage)) {
+		lsn = bpage->newest_modification;
+	} else {
+		lsn = 0;
+	}
+
+	mutex_exit(block_mutex);
+
+	return(lsn);
+}
+
+/********************************************************************//**
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+void
+buf_block_modify_clock_inc(
+/*=======================*/
+	buf_block_t*	block)	/*!< in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((buf_pool_mutex_own()
+	       && (block->page.buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	block->modify_clock++;
+}
+
+/********************************************************************//**
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block.
+@return	value */
+UNIV_INLINE
+ib_uint64_t
+buf_block_get_modify_clock(
+/*=======================*/
+	buf_block_t*	block)	/*!< in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	return(block->modify_clock);
+}
+
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ibool	ret;
+
+	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
+	ut_a(ret);
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&block->mutex));
+
+	block->page.buf_fix_count++;
+}
+#ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+#else /* UNIV_SYNC_DEBUG */
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+#endif /* UNIV_SYNC_DEBUG */
+
+/*******************************************************************//**
+Decrements the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_dec(
+/*==================*/
+	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
+{
+	ut_ad(mutex_own(&block->mutex));
+
+	block->page.buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&block->debug_latch);
+#endif
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get(
+/*==============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+{
+	buf_page_t*	bpage;
+	ulint		fold;
+
+	ut_ad(buf_pool);
+	ut_ad(buf_pool_mutex_own());
+
+	/* Look for the page in the hash table */
+
+	fold = buf_page_address_fold(space, offset);
+
+	HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+		    ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
+			  && buf_page_in_file(bpage)),
+		    bpage->space == space && bpage->offset == offset);
+	if (bpage) {
+		ut_a(buf_page_in_file(bpage));
+		ut_ad(bpage->in_page_hash);
+		ut_ad(!bpage->in_zip_hash);
+		UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+	}
+
+	return(bpage);
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found
+or an uncompressed page frame does not exist.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_block_t*
+buf_block_hash_get(
+/*===============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+{
+	return(buf_page_get_block(buf_page_hash_get(space, offset)));
+}
+
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
+
+NOTE that it is possible that the page is not yet read from disk,
+though.
+
+@return	TRUE if found in the page hash table */
+UNIV_INLINE
+ibool
+buf_page_peek(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	const buf_page_t*	bpage;
+
+	buf_pool_mutex_enter();
+
+	bpage = buf_page_hash_get(space, offset);
+
+	buf_pool_mutex_exit();
+
+	return(bpage != NULL);
+}
+
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+	buf_page_t*	bpage)		/*!< in: buffer block */
+{
+	buf_block_t*	block;
+
+	ut_ad(bpage);
+	ut_a(bpage->buf_fix_count > 0);
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+		mutex_enter(&buf_pool_zip_mutex);
+		bpage->buf_fix_count--;
+		mutex_exit(&buf_pool_zip_mutex);
+		return;
+	case BUF_BLOCK_FILE_PAGE:
+		block = (buf_block_t*) bpage;
+		mutex_enter(&block->mutex);
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_s_unlock(&block->debug_latch);
+#endif
+		bpage->buf_fix_count--;
+		mutex_exit(&block->mutex);
+		return;
+	case BUF_BLOCK_ZIP_FREE:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	}
+
+	ut_error;
+}
+
+/********************************************************************//**
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		rw_latch,	/*!< in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(block);
+
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->page.buf_fix_count > 0);
+
+	if (rw_latch == RW_X_LATCH && mtr->modifications) {
+		buf_pool_mutex_enter();
+		buf_flush_note_modification(block, mtr);
+		buf_pool_mutex_exit();
+	}
+
+	mutex_enter(&block->mutex);
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&(block->debug_latch));
+#endif
+	block->page.buf_fix_count--;
+
+	mutex_exit(&block->mutex);
+
+	if (rw_latch == RW_S_LATCH) {
+		rw_lock_s_unlock(&(block->lock));
+	} else if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_unlock(&(block->lock));
+	}
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/*********************************************************************//**
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. */
+UNIV_INLINE
+void
+buf_block_dbg_add_level(
+/*====================*/
+	buf_block_t*	block,	/*!< in: buffer page
+				where we have acquired latch */
+	ulint		level)	/*!< in: latching order level */
+{
+	sync_thread_add_level(&block->lock, level);
+}
+#endif /* UNIV_SYNC_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/buf0flu.h b/storage/innodb_plugin/include/buf0flu.h
new file mode 100644
index 00000000000..6c751852f54
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0flu.h
@@ -0,0 +1,191 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0flu.h
+The database buffer pool flush algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0flu_h
+#define buf0flu_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0types.h"
+#include "buf0types.h"
+
+/********************************************************************//**
+Remove a block from the flush list of modified blocks. */
+UNIV_INTERN
+void
+buf_flush_remove(
+/*=============*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+/********************************************************************//**
+Updates the flush system data structures when a write is completed. */
+UNIV_INTERN
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+/*********************************************************************//**
+Flushes pages from the end of the LRU list if there is too small
+a margin of replaceable pages there. */
+UNIV_INTERN
+void
+buf_flush_free_margin(void);
+/*=======================*/
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Initializes a page for writing to the tablespace. */
+UNIV_INTERN
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*		page,		/*!< in/out: page */
+	void*		page_zip_,	/*!< in/out: compressed page, or NULL */
+	ib_uint64_t	newest_lsn);	/*!< in: newest modification lsn
+					to the page */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_batch(
+/*============*/
+	enum buf_flush	flush_type,	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+					then the caller must not own any
+					latches on pages */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	ib_uint64_t	lsn_limit);	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+/******************************************************************//**
+Waits until a flush batch of the given type ends */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	enum buf_flush	type);	/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+/********************************************************************//**
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/*!< in: block which is modified */
+	mtr_t*		mtr);	/*!< in: mtr */
+/********************************************************************//**
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/*!< in: block which is modified */
+	ib_uint64_t	start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	ib_uint64_t	end_lsn);	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+/********************************************************************//**
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., transition FILE_PAGE => NOT_USED allowed.
+@return	TRUE if can replace immediately */
+UNIV_INTERN
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+	buf_page_t*	bpage);	/*!< in: buffer control block, must be
+				buf_page_in_file(bpage) and in the LRU list */
+
+/** @brief Statistics for selecting flush rate based on redo log
+generation speed.
+
+These statistics are generated for heuristics used in estimating the
+rate at which we should flush the dirty blocks to avoid bursty IO
+activity. Note that the rate of flushing not only depends on how many
+dirty pages we have in the buffer pool but it is also a fucntion of
+how much redo the workload is generating and at what rate. */
+
+struct buf_flush_stat_struct
+{
+	ib_uint64_t	redo;		/**< amount of redo generated. */
+	ulint		n_flushed;	/**< number of pages flushed. */
+};
+
+/** Statistics for selecting flush rate of dirty pages. */
+typedef struct buf_flush_stat_struct buf_flush_stat_t;
+/*********************************************************************
+Update the historical stats that we are collecting for flush rate
+heuristics at the end of each interval. */
+UNIV_INTERN
+void
+buf_flush_stat_update(void);
+/*=======================*/
+/*********************************************************************
+Determines the fraction of dirty pages that need to be flushed based
+on the speed at which we generate redo log. Note that if redo log
+is generated at significant rate without a corresponding increase
+in the number of dirty pages (for example, an in-memory workload)
+it can cause IO bursts of flushing. This function implements heuristics
+to avoid this burstiness.
+@return	number of dirty pages to be flushed / second */
+UNIV_INTERN
+ulint
+buf_flush_get_desired_flush_rate(void);
+/*==================================*/
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+buf_flush_validate(void);
+/*====================*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/** When buf_flush_free_margin is called, it tries to make this many blocks
+available to replacement in the free list and at the end of the LRU list (to
+make sure that a read-ahead batch can be read efficiently in a single
+sweep). */
+#define BUF_FLUSH_FREE_BLOCK_MARGIN	(5 + BUF_READ_AHEAD_AREA)
+/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
+#define BUF_FLUSH_EXTRA_MARGIN		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/buf0flu.ic b/storage/innodb_plugin/include/buf0flu.ic
new file mode 100644
index 00000000000..c90cd59e4b6
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0flu.ic
@@ -0,0 +1,123 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0flu.ic
+The database buffer pool flush algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+#include "buf0buf.h"
+#include "mtr0mtr.h"
+
+/********************************************************************//**
+Inserts a modified block into the flush list. */
+UNIV_INTERN
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_block_t*	block);	/*!< in/out: block which is modified */
+/********************************************************************//**
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+UNIV_INTERN
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_block_t*	block);	/*!< in/out: block which is modified */
+
+/********************************************************************//**
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it is not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/*!< in: block which is modified */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(block);
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(buf_pool_mutex_own());
+
+	ut_ad(mtr->start_lsn != 0);
+	ut_ad(mtr->modifications);
+	ut_ad(block->page.newest_modification <= mtr->end_lsn);
+
+	block->page.newest_modification = mtr->end_lsn;
+
+	if (!block->page.oldest_modification) {
+
+		block->page.oldest_modification = mtr->start_lsn;
+		ut_ad(block->page.oldest_modification != 0);
+
+		buf_flush_insert_into_flush_list(block);
+	} else {
+		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
+	}
+
+	++srv_buf_pool_write_requests;
+}
+
+/********************************************************************//**
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/*!< in: block which is modified */
+	ib_uint64_t	start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	ib_uint64_t	end_lsn)	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+{
+	ut_ad(block);
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	buf_pool_mutex_enter();
+
+	ut_ad(block->page.newest_modification <= end_lsn);
+
+	block->page.newest_modification = end_lsn;
+
+	if (!block->page.oldest_modification) {
+
+		block->page.oldest_modification = start_lsn;
+
+		ut_ad(block->page.oldest_modification != 0);
+
+		buf_flush_insert_sorted_into_flush_list(block);
+	} else {
+		ut_ad(block->page.oldest_modification <= start_lsn);
+	}
+
+	buf_pool_mutex_exit();
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h
new file mode 100644
index 00000000000..463aca0982c
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0lru.h
@@ -0,0 +1,263 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.h
+The database buffer pool LRU replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0lru_h
+#define buf0lru_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+
+/** The return type of buf_LRU_free_block() */
+enum buf_lru_free_block_status {
+	/** freed */
+	BUF_LRU_FREED = 0,
+	/** not freed because the caller asked to remove the
+	uncompressed frame but the control block cannot be
+	relocated */
+	BUF_LRU_CANNOT_RELOCATE,
+	/** not freed because of some other reason */
+	BUF_LRU_NOT_FREED
+};
+
+/******************************************************************//**
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+UNIV_INTERN
+void
+buf_LRU_try_free_flushed_blocks(void);
+/*==================================*/
+/******************************************************************//**
+Returns TRUE if less than 25 % of the buffer pool is available. This can be
+used in heuristics to prevent huge transactions eating up the whole buffer
+pool for their locks.
+@return	TRUE if less than 25 % of buffer pool left */
+UNIV_INTERN
+ibool
+buf_LRU_buf_pool_running_out(void);
+/*==============================*/
+
+/*#######################################################################
+These are low-level functions
+#########################################################################*/
+
+/** Minimum LRU list length for which the LRU_old pointer is defined */
+#define BUF_LRU_OLD_MIN_LEN	80
+
+/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
+#define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
+
+/******************************************************************//**
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
+what guarantees that it will not try to read in pages after this operation has
+completed? */
+UNIV_INTERN
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+	ulint	id);	/*!< in: space id */
+/******************************************************************//**
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around.
+@return	the limit; zero if could not determine it */
+UNIV_INTERN
+ulint
+buf_LRU_get_recent_limit(void);
+/*==========================*/
+/********************************************************************//**
+Insert a compressed block into buf_pool->zip_clean in the LRU order. */
+UNIV_INTERN
+void
+buf_LRU_insert_zip_clean(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+
+/******************************************************************//**
+Try to free a block.  If bpage is a descriptor of a compressed-only
+page, the descriptor object will be freed as well.
+
+NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
+release buf_pool_mutex.  Furthermore, the page frame will no longer be
+accessible via bpage.
+
+The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
+release these two mutexes after the call.  No other
+buf_page_get_mutex() may be held when calling this function.
+@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
+BUF_LRU_NOT_FREED otherwise. */
+UNIV_INTERN
+enum buf_lru_free_block_status
+buf_LRU_free_block(
+/*===============*/
+	buf_page_t*	bpage,	/*!< in: block to be freed */
+	ibool		zip,	/*!< in: TRUE if should remove also the
+				compressed page of an uncompressed page */
+	ibool*		buf_pool_mutex_released);
+				/*!< in: pointer to a variable that will
+				be assigned TRUE if buf_pool_mutex
+				was temporarily released, or NULL */
+/******************************************************************//**
+Try to free a replaceable block.
+@return	TRUE if found and freed */
+UNIV_INTERN
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+	ulint	n_iterations);	/*!< in: how many times this has been called
+				repeatedly without result: a high value means
+				that we should search farther; if
+				n_iterations < 10, then we search
+				n_iterations / 10 * buf_pool->curr_size
+				pages from the end of the LRU list; if
+				n_iterations < 5, then we will also search
+				n_iterations / 5 of the unzip_LRU list. */
+/******************************************************************//**
+Returns a free block from the buf_pool.  The block is taken off the
+free list.  If it is empty, returns NULL.
+@return	a free control block, or NULL if the buf_block->free list is empty */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_only(void);
+/*=======================*/
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list.
+@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_block(
+/*===================*/
+	ulint	zip_size);	/*!< in: compressed page size in bytes,
+				or 0 if uncompressed tablespace */
+
+/******************************************************************//**
+Puts a block back to the free list. */
+UNIV_INTERN
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block);	/*!< in: block, must not contain a file page */
+/******************************************************************//**
+Adds a block to the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_page_t*	bpage,	/*!< in: control block */
+	ibool		old);	/*!< in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the
+				start; if the LRU list is very short, added to
+				the start regardless of this parameter */
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+UNIV_INTERN
+void
+buf_unzip_LRU_add_block(
+/*====================*/
+	buf_block_t*	block,	/*!< in: control block */
+	ibool		old);	/*!< in: TRUE if should be put to the end
+				of the list, else put to the start */
+/******************************************************************//**
+Moves a block to the start of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: control block */
+/******************************************************************//**
+Moves a block to the end of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_page_t*	bpage);	/*!< in: control block */
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+UNIV_INTERN
+void
+buf_LRU_stat_update(void);
+/*=====================*/
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Validates the LRU list.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_LRU_validate(void);
+/*==================*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Prints the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_print(void);
+/*===============*/
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/** @brief Statistics for selecting the LRU list for eviction.
+
+These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
+and page_zip_decompress() operations.  Based on the statistics we decide
+if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
+struct buf_LRU_stat_struct
+{
+	ulint	io;	/**< Counter of buffer pool I/O operations. */
+	ulint	unzip;	/**< Counter of page_zip_decompress operations. */
+};
+
+/** Statistics for selecting the LRU list for eviction. */
+typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
+
+/** Current operation counters.  Not protected by any mutex.
+Cleared by buf_LRU_stat_update(). */
+extern buf_LRU_stat_t	buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update().  Protected by buf_pool_mutex. */
+extern buf_LRU_stat_t	buf_LRU_stat_sum;
+
+/********************************************************************//**
+Increments the I/O counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
+/********************************************************************//**
+Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
+
+#ifndef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/buf0lru.ic b/storage/innodb_plugin/include/buf0lru.ic
new file mode 100644
index 00000000000..556f45d987f
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0lru.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.ic
+The database buffer replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innobase/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h
similarity index 50%
rename from storage/innobase/include/buf0rea.h
rename to storage/innodb_plugin/include/buf0rea.h
index e4620172860..b4d25e6fde0 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innodb_plugin/include/buf0rea.h
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer read
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0rea.h
+The database buffer read
 
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
@@ -12,21 +29,22 @@ Created 11/5/1995 Heikki Tuuri
 #include "univ.i"
 #include "buf0types.h"
 
-/************************************************************************
+/********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
+sensible.
+@return number of page read requests issued: this can be greater than
+1 if read-ahead occurred */
+UNIV_INTERN
 ulint
 buf_read_page(
 /*==========*/
-			/* out: number of page read requests issued: this can
-			be > 1 if read-ahead occurred */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
-/************************************************************************
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
@@ -48,57 +66,74 @@ function must be written such that it cannot end up waiting for these
 latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
+which could result in a deadlock if the OS does not support asynchronous io.
+@return	number of page read requests issued */
+UNIV_INTERN
 ulint
 buf_read_ahead_linear(
 /*==================*/
-			/* out: number of page read requests issued */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number of a page; NOTE: the current thread
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset);/*!< in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
-/************************************************************************
+/********************************************************************//**
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
-
+UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
-	ibool	sync,		/* in: TRUE if the caller wants this function
-				to wait for the highest address page to get
-				read in, before this function returns */
-	ulint*	space_ids,	/* in: array of space ids */
-	ib_longlong* space_versions,/* in: the spaces must have this version
-				number (timestamp), otherwise we discard the
-				read; we use this to cancel reads if
-				DISCARD + IMPORT may have changed the
-				tablespace size */
-	ulint*	page_nos,	/* in: array of page numbers to read, with the
-				highest page number the last in the array */
-	ulint	n_stored);	/* in: number of page numbers in the array */
-/************************************************************************
+	ibool		sync,		/*!< in: TRUE if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	const ulint*	space_ids,	/*!< in: array of space ids */
+	const ib_int64_t* space_versions,/*!< in: the spaces must have
+					this version number
+					(timestamp), otherwise we
+					discard the read; we use this
+					to cancel reads if DISCARD +
+					IMPORT may have changed the
+					tablespace size */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored);	/*!< in: number of elements
+					in the arrays */
+/********************************************************************//**
 Issues read requests for pages which recovery wants to read in. */
-
+UNIV_INTERN
 void
 buf_read_recv_pages(
 /*================*/
-	ibool	sync,		/* in: TRUE if the caller wants this function
-				to wait for the highest address page to get
-				read in, before this function returns */
-	ulint	space,		/* in: space id */
-	ulint*	page_nos,	/* in: array of page numbers to read, with the
-				highest page number the last in the array */
-	ulint	n_stored);	/* in: number of page numbers in the array */
+	ibool		sync,		/*!< in: TRUE if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in
+					bytes, or 0 */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored);	/*!< in: number of page numbers
+					in the array */
 
-/* The size in pages of the area which the read-ahead algorithms read if
+/** The size in pages of the area which the read-ahead algorithms read if
 invoked */
-
 #define	BUF_READ_AHEAD_AREA					\
 	ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
 
-/* Modes used in read-ahead */
+/** @name Modes used in read-ahead @{ */
+/** read only pages belonging to the insert buffer tree */
 #define BUF_READ_IBUF_PAGES_ONLY	131
+/** read any page */
 #define BUF_READ_ANY_PAGE		132
+/* @} */
 
 #endif
diff --git a/storage/innodb_plugin/include/buf0types.h b/storage/innodb_plugin/include/buf0types.h
new file mode 100644
index 00000000000..e7167d716a0
--- /dev/null
+++ b/storage/innodb_plugin/include/buf0types.h
@@ -0,0 +1,80 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0types.h
+The database buffer pool global types for the directory
+
+Created 11/17/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0types_h
+#define buf0types_h
+
+/** Buffer page (uncompressed or compressed) */
+typedef	struct buf_page_struct		buf_page_t;
+/** Buffer block for which an uncompressed page exists */
+typedef	struct buf_block_struct		buf_block_t;
+/** Buffer pool chunk comprising buf_block_t */
+typedef struct buf_chunk_struct		buf_chunk_t;
+/** Buffer pool comprising buf_chunk_t */
+typedef	struct buf_pool_struct		buf_pool_t;
+
+/** A buffer frame. @see page_t */
+typedef	byte	buf_frame_t;
+
+/** Flags for flush types */
+enum buf_flush {
+	BUF_FLUSH_LRU = 0,		/*!< flush via the LRU list */
+	BUF_FLUSH_SINGLE_PAGE,		/*!< flush a single page */
+	BUF_FLUSH_LIST,			/*!< flush via the flush list
+					of dirty blocks */
+	BUF_FLUSH_N_TYPES		/*!< index of last element + 1  */
+};
+
+/** Flags for io_fix types */
+enum buf_io_fix {
+	BUF_IO_NONE = 0,		/**< no pending I/O */
+	BUF_IO_READ,			/**< read pending */
+	BUF_IO_WRITE			/**< write pending */
+};
+
+/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
+/* @{ */
+#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
+/** Base-2 logarithm of the smallest buddy block size */
+# define BUF_BUDDY_LOW_SHIFT	6
+#else /* 64-bit system */
+/** Base-2 logarithm of the smallest buddy block size */
+# define BUF_BUDDY_LOW_SHIFT	7
+#endif
+#define BUF_BUDDY_LOW		(1 << BUF_BUDDY_LOW_SHIFT)
+					/*!< minimum block size in the binary
+					buddy system; must be at least
+					sizeof(buf_page_t) */
+#define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+					/*!< number of buddy sizes */
+
+/** twice the maximum block size of the buddy system;
+the underlying memory is aligned by this amount:
+this must be equal to UNIV_PAGE_SIZE */
+#define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
+/* @} */
+
+#endif
+
diff --git a/storage/innodb_plugin/include/data0data.h b/storage/innodb_plugin/include/data0data.h
new file mode 100644
index 00000000000..f9fce3f3657
--- /dev/null
+++ b/storage/innodb_plugin/include/data0data.h
@@ -0,0 +1,483 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0data.h
+SQL data field and tuple
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0data_h
+#define data0data_h
+
+#include "univ.i"
+
+#include "data0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+#include "dict0types.h"
+
+/** Storage for overflow data in a big record, that is, a clustered
+index record which needs external storage of data fields */
+typedef struct big_rec_struct		big_rec_t;
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return	pointer to the type struct */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+	const dfield_t*	field);	/*!< in: SQL data field */
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return	pointer to data */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+	const dfield_t* field);	/*!< in: field */
+#else /* UNIV_DEBUG */
+# define dfield_get_type(field) (&(field)->type)
+# define dfield_get_data(field) ((field)->data)
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/*!< in: SQL data field */
+	dtype_t*	type);	/*!< in: pointer to data type struct */
+/*********************************************************************//**
+Gets length of field data.
+@return	length of data; UNIV_SQL_NULL if SQL null data */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+	const dfield_t* field);	/*!< in: field */
+/*********************************************************************//**
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+	dfield_t*	field,	/*!< in: field */
+	ulint		len);	/*!< in: length or UNIV_SQL_NULL */
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return	nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+	const dfield_t* field);	/*!< in: field */
+/*********************************************************************//**
+Determines if a field is externally stored
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+	const dfield_t* field);	/*!< in: field */
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+	dfield_t*	field);	/*!< in/out: field */
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+	dfield_t*	field,	/*!< in: field */
+	const void*	data,	/*!< in: data */
+	ulint		len);	/*!< in: length or UNIV_SQL_NULL */
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+	dfield_t*	field);	/*!< in/out: field */
+/**********************************************************************//**
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/*!< in: pointer to a buffer of size len */
+	ulint	len);	/*!< in: SQL null size in bytes */
+/*********************************************************************//**
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2);/*!< in: field to copy from */
+/*********************************************************************//**
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2);/*!< in: field to copy from */
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+	dfield_t*	field,	/*!< in/out: data field */
+	mem_heap_t*	heap);	/*!< in: memory heap where allocated */
+/*********************************************************************//**
+Tests if data length and content is equal for two dfields.
+@return	TRUE if equal */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+	const dfield_t*	field1,	/*!< in: field */
+	const dfield_t*	field2);/*!< in: field */
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return	TRUE if equal */
+UNIV_INTERN
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+	const dfield_t*	field,	/*!< in: field */
+	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
+	const byte*	data);	/*!< in: data */
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return	nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: tuple */
+	ulint		n);	/*!< in: index of field */
+#else /* UNIV_DEBUG */
+# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return	info bits */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+/*********************************************************************//**
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		info_bits);	/*!< in: info bits */
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return	number of fields used in comparisons in rem0cmp.* */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+/*********************************************************************//**
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields_cmp);	/*!< in: number of fields used in
+					comparisons in rem0cmp.* */
+/**********************************************************//**
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	mem_heap_t*	heap,	/*!< in: memory heap where the tuple
+				is created */
+	ulint		n_fields); /*!< in: number of fields */
+
+/**********************************************************//**
+Wrap data fields in a tuple. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	data tuple */
+UNIV_INLINE
+const dtuple_t*
+dtuple_from_fields(
+/*===============*/
+	dtuple_t*	tuple,		/*!< in: storage for data tuple */
+	const dfield_t*	fields,		/*!< in: fields */
+	ulint		n_fields);	/*!< in: number of fields */
+
+/*********************************************************************//**
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */
+UNIV_INTERN
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields);	/*!< in: number of fields */
+/*********************************************************************//**
+Copies a data tuple to another.  This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return	own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: tuple to copy from */
+	mem_heap_t*	heap);	/*!< in: memory heap
+				where the tuple is created */
+/**********************************************************//**
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted.
+@return	sum of data lens */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: typed data tuple */
+	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+/************************************************************//**
+Compare two data tuples, respecting the collation of character fields.
+@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
+than tuple2 */
+UNIV_INTERN
+int
+dtuple_coll_cmp(
+/*============*/
+	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
+	const dtuple_t*	tuple2);/*!< in: tuple 2 */
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return	the folded value */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: the tuple */
+	ulint		n_fields,/*!< in: number of complete fields to fold */
+	ulint		n_bytes,/*!< in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id)/*!< in: index tree id */
+	__attribute__((pure));
+/*******************************************************************//**
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/*!< in: data tuple */
+	ulint		n);	/*!< in: number of fields to set */
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return	TRUE if some field is SQL null */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+	const dtuple_t*	tuple);	/*!< in: dtuple */
+/**********************************************************//**
+Checks that a data field is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dfield_check_typed(
+/*===============*/
+	const dfield_t*	field);	/*!< in: data field */
+/**********************************************************//**
+Checks that a data tuple is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed(
+/*===============*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+/**********************************************************//**
+Checks that a data tuple is typed.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_validate(
+/*============*/
+	const dtuple_t*	tuple);	/*!< in: tuple */
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. */
+UNIV_INTERN
+void
+dfield_print(
+/*=========*/
+	const dfield_t*	dfield);/*!< in: dfield */
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */
+UNIV_INTERN
+void
+dfield_print_also_hex(
+/*==================*/
+	const dfield_t*	dfield);	 /*!< in: dfield */
+/**********************************************************//**
+The following function prints the contents of a tuple. */
+UNIV_INTERN
+void
+dtuple_print(
+/*=========*/
+	FILE*		f,	/*!< in: output stream */
+	const dtuple_t*	tuple);	/*!< in: tuple */
+/**************************************************************//**
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index.
+@return own: created big record vector, NULL if we are not able to
+shorten the entry enough, i.e., if there are too many fixed-length or
+short fields in entry or the index is clustered */
+UNIV_INTERN
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in/out: index entry */
+	ulint*		n_ext);	/*!< in/out: number of
+				externally stored columns */
+/**************************************************************//**
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+UNIV_INTERN
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: entry whose data was put to vector */
+	big_rec_t*	vector);/*!< in, own: big rec vector; it is
+				freed in this function */
+/**************************************************************//**
+Frees the memory in a big rec vector. */
+UNIV_INLINE
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector);	/*!< in, own: big rec vector; it is
+				freed in this function */
+
+/*######################################################################*/
+
+/** Structure for an SQL data field */
+struct dfield_struct{
+	void*		data;	/*!< pointer to data */
+	unsigned	ext:1;	/*!< TRUE=externally stored, FALSE=local */
+	unsigned	len:32;	/*!< data length; UNIV_SQL_NULL if SQL null */
+	dtype_t		type;	/*!< type of data */
+};
+
+/** Structure for an SQL data tuple of fields (logical record) */
+struct dtuple_struct {
+	ulint		info_bits;	/*!< info bits of an index record:
+					the default is 0; this field is used
+					if an index record is built from
+					a data tuple */
+	ulint		n_fields;	/*!< number of fields in dtuple */
+	ulint		n_fields_cmp;	/*!< number of fields which should
+					be used in comparison services
+					of rem0cmp.*; the index search
+					is performed by comparing only these
+					fields, others are ignored; the
+					default value in dtuple creation is
+					the same value as n_fields */
+	dfield_t*	fields;		/*!< fields */
+	UT_LIST_NODE_T(dtuple_t) tuple_list;
+					/*!< data tuples can be linked into a
+					list using this field */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;	/*!< magic number, used in
+					debug assertions */
+/** Value of dtuple_struct::magic_n */
+# define		DATA_TUPLE_MAGIC_N	65478679
+#endif /* UNIV_DEBUG */
+};
+
+/** A slot for a field in a big rec vector */
+typedef struct big_rec_field_struct	big_rec_field_t;
+/** A slot for a field in a big rec vector */
+struct big_rec_field_struct {
+	ulint		field_no;	/*!< field number in record */
+	ulint		len;		/*!< stored data length, in bytes */
+	const void*	data;		/*!< stored data */
+};
+
+/** Storage format for overflow data in a big record, that is, a
+clustered index record which needs external storage of data fields */
+struct big_rec_struct {
+	mem_heap_t*	heap;		/*!< memory heap from which
+					allocated */
+	ulint		n_fields;	/*!< number of stored fields */
+	big_rec_field_t*fields;		/*!< stored fields */
+};
+
+#ifndef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/data0data.ic b/storage/innodb_plugin/include/data0data.ic
new file mode 100644
index 00000000000..da79aa33702
--- /dev/null
+++ b/storage/innodb_plugin/include/data0data.ic
@@ -0,0 +1,612 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0data.ic
+SQL data field and tuple
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+#ifdef UNIV_DEBUG
+/** Dummy variable to catch access to uninitialized fields.  In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
+extern byte data_error;
+
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return	pointer to the type struct */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+	const dfield_t*	field)	/*!< in: SQL data field */
+{
+	ut_ad(field);
+
+	return((dtype_t*) &(field->type));
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/*!< in: SQL data field */
+	dtype_t*	type)	/*!< in: pointer to data type struct */
+{
+	ut_ad(field && type);
+
+	field->type = *type;
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return	pointer to data */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return((void*) field->data);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets length of field data.
+@return	length of data; UNIV_SQL_NULL if SQL null data */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+	const dfield_t*	field)	/*!< in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return(field->len);
+}
+
+/*********************************************************************//**
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+	dfield_t*	field,	/*!< in: field */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+
+	field->ext = 0;
+	field->len = len;
+}
+
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return	nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+
+	return(field->len == UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
+Determines if a field is externally stored
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+
+	return(UNIV_UNLIKELY(field->ext));
+}
+
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+	dfield_t*	field)	/*!< in/out: field */
+{
+	ut_ad(field);
+
+	field->ext = 1;
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+	dfield_t*	field,	/*!< in: field */
+	const void*	data,	/*!< in: data */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+	field->data = (void*) data;
+	field->ext = 0;
+	field->len = len;
+}
+
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+	dfield_t*	field)	/*!< in/out: field */
+{
+	dfield_set_data(field, NULL, UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+{
+	ut_ad(field1 && field2);
+
+	field1->data = field2->data;
+	field1->len = field2->len;
+	field1->ext = field2->ext;
+}
+
+/*********************************************************************//**
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+{
+	*field1 = *field2;
+}
+
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+	dfield_t*	field,	/*!< in/out: data field */
+	mem_heap_t*	heap)	/*!< in: memory heap where allocated */
+{
+	if (!dfield_is_null(field)) {
+		UNIV_MEM_ASSERT_RW(field->data, field->len);
+		field->data = mem_heap_dup(heap, field->data, field->len);
+	}
+}
+
+/*********************************************************************//**
+Tests if data length and content is equal for two dfields.
+@return	TRUE if equal */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+	const dfield_t*	field1,	/*!< in: field */
+	const dfield_t*	field2)	/*!< in: field */
+{
+	ulint	len;
+
+	len = field1->len;
+
+	return(len == field2->len
+	       && (len == UNIV_SQL_NULL
+		   || !memcmp(field1->data, field2->data, len)));
+}
+
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return	info bits */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->info_bits);
+}
+
+/*********************************************************************//**
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		info_bits)	/*!< in: info bits */
+{
+	ut_ad(tuple);
+
+	tuple->info_bits = info_bits;
+}
+
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return	number of fields used in comparisons in rem0cmp.* */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields_cmp);
+}
+
+/*********************************************************************//**
+Sets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields_cmp)	/*!< in: number of fields used in
+					comparisons in rem0cmp.* */
+{
+	ut_ad(tuple);
+	ut_ad(n_fields_cmp <= tuple->n_fields);
+
+	tuple->n_fields_cmp = n_fields_cmp;
+}
+
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return	nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: tuple */
+	ulint		n)	/*!< in: index of field */
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_fields);
+
+	return((dfield_t*) tuple->fields + n);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************//**
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	mem_heap_t*	heap,	/*!< in: memory heap where the tuple
+				is created */
+	ulint		n_fields) /*!< in: number of fields */
+{
+	dtuple_t*	tuple;
+
+	ut_ad(heap);
+
+	tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+					   + n_fields * sizeof(dfield_t));
+	tuple->info_bits = 0;
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+	tuple->fields = (dfield_t*) &tuple[1];
+
+#ifdef UNIV_DEBUG
+	tuple->magic_n = DATA_TUPLE_MAGIC_N;
+
+	{	/* In the debug version, initialize fields to an error value */
+		ulint	i;
+
+		for (i = 0; i < n_fields; i++) {
+			dfield_t*       field;
+
+			field = dtuple_get_nth_field(tuple, i);
+
+			dfield_set_len(field, UNIV_SQL_NULL);
+			field->data = &data_error;
+			dfield_get_type(field)->mtype = DATA_ERROR;
+		}
+	}
+
+	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
+#endif
+	return(tuple);
+}
+
+/**********************************************************//**
+Wrap data fields in a tuple. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	data tuple */
+UNIV_INLINE
+const dtuple_t*
+dtuple_from_fields(
+/*===============*/
+	dtuple_t*	tuple,		/*!< in: storage for data tuple */
+	const dfield_t*	fields,		/*!< in: fields */
+	ulint		n_fields)	/*!< in: number of fields */
+{
+	tuple->info_bits = 0;
+	tuple->n_fields = tuple->n_fields_cmp = n_fields;
+	tuple->fields = (dfield_t*) fields;
+	ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+
+	return(tuple);
+}
+
+/*********************************************************************//**
+Copies a data tuple to another.  This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return	own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: tuple to copy from */
+	mem_heap_t*	heap)	/*!< in: memory heap
+				where the tuple is created */
+{
+	ulint		n_fields	= dtuple_get_n_fields(tuple);
+	dtuple_t*	new_tuple	= dtuple_create(heap, n_fields);
+	ulint		i;
+
+	for (i = 0; i < n_fields; i++) {
+		dfield_copy(dtuple_get_nth_field(new_tuple, i),
+			    dtuple_get_nth_field(tuple, i));
+	}
+
+	return(new_tuple);
+}
+
+/**********************************************************//**
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. Neither
+is possible space in externally stored parts of the field.
+@return	sum of data lengths */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: typed data tuple */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+{
+	const dfield_t*	field;
+	ulint		n_fields;
+	ulint		len;
+	ulint		i;
+	ulint		sum	= 0;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	n_fields = tuple->n_fields;
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple,  i);
+		len = dfield_get_len(field);
+
+		if (len == UNIV_SQL_NULL) {
+			len = dtype_get_sql_null_size(dfield_get_type(field),
+						      comp);
+		}
+
+		sum += len;
+	}
+
+	return(sum);
+}
+
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return	number of externally stored fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ulint	n_ext		= 0;
+	ulint	n_fields	= tuple->n_fields;
+	ulint	i;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	for (i = 0; i < n_fields; i++) {
+		n_ext += dtuple_get_nth_field(tuple, i)->ext;
+	}
+
+	return(n_ext);
+}
+
+/*******************************************************************//**
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/*!< in: data tuple */
+	ulint		n)	/*!< in: number of fields to set */
+{
+	dtype_t*	dfield_type;
+	ulint		i;
+
+	for (i = 0; i < n; i++) {
+		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+		dtype_set(dfield_type, DATA_BINARY, 0, 0);
+	}
+}
+
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return	the folded value */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: the tuple */
+	ulint		n_fields,/*!< in: number of complete fields to fold */
+	ulint		n_bytes,/*!< in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id)/*!< in: index tree id */
+{
+	const dfield_t*	field;
+	ulint		i;
+	const byte*	data;
+	ulint		len;
+	ulint		fold;
+
+	ut_ad(tuple);
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple));
+
+	fold = ut_fold_dulint(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (const byte*) dfield_get_data(field);
+		len = dfield_get_len(field);
+
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (const byte*) dfield_get_data(field);
+		len = dfield_get_len(field);
+
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	return(fold);
+}
+
+/**********************************************************************//**
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/*!< in: pointer to a buffer of size len */
+	ulint	len)	/*!< in: SQL null size in bytes */
+{
+	memset(data, 0, len);
+}
+
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return	TRUE if some field is SQL null */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: dtuple */
+{
+	ulint	n;
+	ulint	i;
+
+	n = dtuple_get_n_fields(tuple);
+
+	for (i = 0; i < n; i++) {
+		if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/**************************************************************//**
+Frees the memory in a big rec vector. */
+UNIV_INLINE
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
+				freed in this function */
+{
+	mem_heap_free(vector->heap);
+}
diff --git a/storage/innobase/include/data0type.h b/storage/innodb_plugin/include/data0type.h
similarity index 66%
rename from storage/innobase/include/data0type.h
rename to storage/innodb_plugin/include/data0type.h
index e5e9c5076be..a73bed3a9f5 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innodb_plugin/include/data0type.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data types
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.h
+Data types
 
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
@@ -122,6 +139,8 @@ be less than 256 */
 
 #define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
 
+#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
+
 /* Flags ORed to the precise data type */
 #define DATA_NOT_NULL	256	/* this is ORed to the precise type when
 				the column is declared as NOT NULL */
@@ -149,225 +168,240 @@ SQL null*/
 store the charset-collation number; one byte is left unused, though */
 #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
 
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return	MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
 /*=================*/
-				/* out: MySQL type code; this is NOT an InnoDB
-				type code! */
-	dtype_t*	type);	/* in: type struct */
-/*************************************************************************
+	const dtype_t*	type);	/*!< in: type struct */
+/*********************************************************************//**
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
+the characters in the string occupy.
+@return	length of the prefix, in bytes */
+UNIV_INTERN
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
-					/* out: length of the prefix,
-					in bytes */
-	ulint		prtype,		/* in: precise type */
-	ulint		mbminlen,	/* in: minimum length of a
+	ulint		prtype,		/*!< in: precise type */
+	ulint		mbminlen,	/*!< in: minimum length of a
 					multi-byte character */
-	ulint		mbmaxlen,	/* in: maximum length of a
+	ulint		mbmaxlen,	/*!< in: maximum length of a
 					multi-byte character */
-	ulint		prefix_len,	/* in: length of the requested
+	ulint		prefix_len,	/*!< in: length of the requested
 					prefix, in characters, multiplied by
 					dtype_get_mbmaxlen(dtype) */
-	ulint		data_len,	/* in: length of str (in bytes) */
-	const char*	str);		/* in: the string whose prefix
+	ulint		data_len,	/*!< in: length of str (in bytes) */
+	const char*	str);		/*!< in: the string whose prefix
 					length is being determined */
-/*************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
 Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
+string type.
+@return	TRUE if string type */
+UNIV_INTERN
 ibool
 dtype_is_string_type(
 /*=================*/
-			/* out: TRUE if string type */
-	ulint	mtype);	/* in: InnoDB main data type code: DATA_CHAR, ... */
-/*************************************************************************
+	ulint	mtype);	/*!< in: InnoDB main data type code: DATA_CHAR, ... */
+/*********************************************************************//**
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
+those DATA_BLOB columns this function currently returns FALSE.
+@return	TRUE if binary string type */
+UNIV_INTERN
 ibool
 dtype_is_binary_string_type(
 /*========================*/
-			/* out: TRUE if binary string type */
-	ulint	mtype,	/* in: main data type */
-	ulint	prtype);/* in: precise type */
-/*************************************************************************
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype);/*!< in: precise type */
+/*********************************************************************//**
 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
+For those DATA_BLOB columns this function currently returns TRUE.
+@return	TRUE if non-binary string type */
+UNIV_INTERN
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
-			/* out: TRUE if non-binary string type */
-	ulint	mtype,	/* in: main data type */
-	ulint	prtype);/* in: precise type */
-/*************************************************************************
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype);/*!< in: precise type */
+/*********************************************************************//**
 Sets a data type structure. */
 UNIV_INLINE
 void
 dtype_set(
 /*======*/
-	dtype_t*	type,	/* in: type struct to init */
-	ulint		mtype,	/* in: main data type */
-	ulint		prtype,	/* in: precise type */
-	ulint		len);	/* in: precision of type */
-/*************************************************************************
+	dtype_t*	type,	/*!< in: type struct to init */
+	ulint		mtype,	/*!< in: main data type */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len);	/*!< in: precision of type */
+/*********************************************************************//**
 Copies a data type structure. */
 UNIV_INLINE
 void
 dtype_copy(
 /*=======*/
-	dtype_t*	type1,	/* in: type struct to copy to */
-	const dtype_t*	type2);	/* in: type struct to copy from */
-/*************************************************************************
-Gets the SQL main data type. */
+	dtype_t*	type1,	/*!< in: type struct to copy to */
+	const dtype_t*	type2);	/*!< in: type struct to copy from */
+/*********************************************************************//**
+Gets the SQL main data type.
+@return	SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
 /*============*/
-	dtype_t*	type);
-/*************************************************************************
-Gets the precise data type. */
+	const dtype_t*	type);	/*!< in: data type */
+/*********************************************************************//**
+Gets the precise data type.
+@return	precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
 /*=============*/
-	dtype_t*	type);
-/*************************************************************************
+	const dtype_t*	type);	/*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_get_mblen(
 /*============*/
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type (and collation) */
-	ulint*	mbminlen,	/* out: minimum length of a
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type (and collation) */
+	ulint*	mbminlen,	/*!< out: minimum length of a
 				multi-byte character */
-	ulint*	mbmaxlen);	/* out: maximum length of a
+	ulint*	mbmaxlen);	/*!< out: maximum length of a
 				multi-byte character */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
-	ulint	prtype);/* in: precise data type */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return	MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
 /*===================*/
-	ulint	prtype);/* in: precise data type */
-/*************************************************************************
+	ulint	prtype);/*!< in: precise data type */
+/*********************************************************************//**
 Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
+charset-collation code.
+@return precise type, including the charset-collation code */
+UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
-	ulint	old_prtype,	/* in: the MySQL type code and the flags
+	ulint	old_prtype,	/*!< in: the MySQL type code and the flags
 				DATA_BINARY_TYPE etc. */
-	ulint	charset_coll);	/* in: MySQL charset-collation code */
-/*************************************************************************
-Gets the type length. */
+	ulint	charset_coll);	/*!< in: MySQL charset-collation code */
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8.  This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return	TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+	ulint	prtype);/*!< in: precise data type */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the type length.
+@return	fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
 /*==========*/
-	dtype_t*	type);
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
+	const dtype_t*	type);	/*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
 UNIV_INLINE
 ulint
 dtype_get_mbminlen(
 /*===============*/
-				/* out: minimum length of a char, in bytes,
-				or 0 if this is not a character type */
-	const dtype_t*	type);	/* in: type */
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
+	const dtype_t*	type);	/*!< in: type */
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
 UNIV_INLINE
 ulint
 dtype_get_mbmaxlen(
 /*===============*/
-				/* out: maximum length of a char, in bytes,
-				or 0 if this is not a character type */
-	const dtype_t*	type);	/* in: type */
-/*************************************************************************
-Gets the padding character code for the type. */
+	const dtype_t*	type);	/*!< in: type */
+/*********************************************************************//**
+Gets the padding character code for the type.
+@return	padding character code, or ULINT_UNDEFINED if no padding specified */
 UNIV_INLINE
 ulint
 dtype_get_pad_char(
 /*===============*/
-				/* out: padding character code, or
-				ULINT_UNDEFINED if no padding specified */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype);	/* in: precise type */
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype);	/*!< in: precise type */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return	fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
 /*=====================*/
-				/* out: fixed size, or 0 */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type */
-	ulint	len,		/* in: length */
-	ulint	mbminlen,	/* in: minimum length of a multibyte char */
-	ulint	mbmaxlen);	/* in: maximum length of a multibyte char */
-/***************************************************************************
-Returns the minimum size of a data type. */
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
+	ulint	mbmaxlen,	/*!< in: maximum length of a multibyte char */
+	ulint	comp);		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return	minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
 /*===================*/
-				/* out: minimum size */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type */
-	ulint	len,		/* in: length */
-	ulint	mbminlen,	/* in: minimum length of a multibyte char */
-	ulint	mbmaxlen);	/* in: maximum length of a multibyte char */
-/***************************************************************************
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
+	ulint	mbmaxlen);	/*!< in: maximum length of a multibyte char */
+/***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
+incomplete and return incorrect information.
+@return	maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
 /*===================*/
-				/* out: maximum size */
-	ulint	mtype,		/* in: main type */
-	ulint	len);		/* in: length */
-/***************************************************************************
+	ulint	mtype,		/*!< in: main type */
+	ulint	len);		/*!< in: length */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
 /*====================*/
-				/* out: SQL null storage size
-				in ROW_FORMAT=REDUNDANT */
-	const dtype_t*	type);	/* in: type */
-/**************************************************************************
+	const dtype_t*	type,	/*!< in: type */
+	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. */
 UNIV_INLINE
 void
 dtype_read_for_order_and_null_size(
 /*===============================*/
-	dtype_t*	type,	/* in: type struct */
-	byte*		buf);	/* in: buffer for the stored order info */
-/**************************************************************************
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf);	/*!< in: buffer for the stored order info */
+/**********************************************************************//**
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
 format. */
@@ -375,13 +409,13 @@ UNIV_INLINE
 void
 dtype_new_store_for_order_and_null_size(
 /*====================================*/
-	byte*		buf,	/* in: buffer for
+	byte*		buf,	/*!< in: buffer for
 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
-	dtype_t*	type,	/* in: type struct */
-	ulint		prefix_len);/* in: prefix length to
+	const dtype_t*	type,	/*!< in: type struct */
+	ulint		prefix_len);/*!< in: prefix length to
 				replace type->len, or 0 */
-/**************************************************************************
+/**********************************************************************//**
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
 format. */
@@ -389,24 +423,25 @@ UNIV_INLINE
 void
 dtype_new_read_for_order_and_null_size(
 /*===================================*/
-	dtype_t*	type,	/* in: type struct */
-	byte*		buf);	/* in: buffer for stored type order info */
-
-/*************************************************************************
-Validates a data type structure. */
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf);	/*!< in: buffer for stored type order info */
+#endif /* !UNIV_HOTBACKUP */
 
+/*********************************************************************//**
+Validates a data type structure.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 dtype_validate(
 /*===========*/
-				/* out: TRUE if ok */
-	dtype_t*	type);	/* in: type struct to validate */
-/*************************************************************************
+	const dtype_t*	type);	/*!< in: type struct to validate */
+/*********************************************************************//**
 Prints a data type structure. */
-
+UNIV_INTERN
 void
 dtype_print(
 /*========*/
-	dtype_t*	type);	/* in: type */
+	const dtype_t*	type);	/*!< in: type */
 
 /* Structure for an SQL data type.
 If you add fields to this structure, be sure to initialize them everywhere.
@@ -417,8 +452,8 @@ dtype_new_read_for_order_and_null_size()
 sym_tab_add_null_lit() */
 
 struct dtype_struct{
-	unsigned	mtype:8;	/* main data type */
-	unsigned	prtype:24;	/* precise type; MySQL data
+	unsigned	mtype:8;	/*!< main data type */
+	unsigned	prtype:24;	/*!< precise type; MySQL data
 					type, charset code, flags to
 					indicate nullability,
 					signedness, whether this is a
@@ -428,7 +463,7 @@ struct dtype_struct{
 
 	/* the remaining fields do not affect alphabetical ordering: */
 
-	unsigned	len:16;		/* length; for MySQL data this
+	unsigned	len:16;		/*!< length; for MySQL data this
 					is field->pack_length(),
 					except that for a >= 5.0.3
 					type true VARCHAR this is the
@@ -436,11 +471,12 @@ struct dtype_struct{
 					string data (in addition to
 					the string, MySQL uses 1 or 2
 					bytes to store the string length) */
-
-	unsigned	mbminlen:2;	/* minimum length of a
+#ifndef UNIV_HOTBACKUP
+	unsigned	mbminlen:2;	/*!< minimum length of a
 					character, in bytes */
-	unsigned	mbmaxlen:3;	/* maximum length of a
+	unsigned	mbmaxlen:3;	/*!< maximum length of a
 					character, in bytes */
+#endif /* !UNIV_HOTBACKUP */
 };
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/data0type.ic b/storage/innodb_plugin/include/data0type.ic
similarity index 66%
rename from storage/innobase/include/data0type.ic
rename to storage/innodb_plugin/include/data0type.ic
index ad0f95755d2..240b4288f39 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innodb_plugin/include/data0type.ic
@@ -1,88 +1,112 @@
-/******************************************************
-Data types
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.ic
+Data types
 
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 
 #include "mach0data.h"
-
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Get the variable length bounds of the given character set.
+# include "ha_prototypes.h"
 
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_get_cset_width(
-/*====================*/
-	ulint	cset,		/* in: MySQL charset-collation code */
-	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
-	ulint*	mbmaxlen);	/* out: maximum length of a char (in bytes) */
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return	MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
 /*===================*/
-	ulint	prtype)	/* in: precise data type */
+	ulint	prtype)	/*!< in: precise data type */
 {
 	return((prtype >> 16) & 0xFFUL);
 }
 
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8.  This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return	TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+	ulint	prtype)	/*!< in: precise data type */
+{
+	/* These codes have been copied from strings/ctype-extra.c
+	and strings/ctype-utf8.c. */
+	switch (dtype_get_charset_coll(prtype)) {
+	case 11: /* ascii_general_ci */
+	case 65: /* ascii_bin */
+	case 33: /* utf8_general_ci */
+	case 83: /* utf8_bin */
+	case 254: /* utf8_general_cs */
+			return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return	MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
 /*=================*/
-				/* out: MySQL type code; this is NOT an InnoDB
-				type code! */
-	dtype_t*	type)	/* in: type struct */
+	const dtype_t*	type)	/*!< in: type struct */
 {
 	return(type->prtype & 0xFFUL);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_get_mblen(
 /*============*/
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type (and collation) */
-	ulint*	mbminlen,	/* out: minimum length of a
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type (and collation) */
+	ulint*	mbminlen,	/*!< out: minimum length of a
 				multi-byte character */
-	ulint*	mbmaxlen)	/* out: maximum length of a
+	ulint*	mbmaxlen)	/*!< out: maximum length of a
 				multi-byte character */
 {
 	if (dtype_is_string_type(mtype)) {
-#ifndef UNIV_HOTBACKUP
 		innobase_get_cset_width(dtype_get_charset_coll(prtype),
 					mbminlen, mbmaxlen);
 		ut_ad(*mbminlen <= *mbmaxlen);
 		ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
 		ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
-#else /* !UNIV_HOTBACKUP */
-		ut_a(mtype <= DATA_BINARY);
-		*mbminlen = *mbmaxlen = 1;
-#endif /* !UNIV_HOTBACKUP */
 	} else {
 		*mbminlen = *mbmaxlen = 0;
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Compute the mbminlen and mbmaxlen members of a data type structure. */
 UNIV_INLINE
 void
 dtype_set_mblen(
 /*============*/
-	dtype_t*	type)	/* in/out: type */
+	dtype_t*	type)	/*!< in/out: type */
 {
 	ulint	mbminlen;
 	ulint	mbmaxlen;
@@ -93,17 +117,20 @@ dtype_set_mblen(
 
 	ut_ad(dtype_validate(type));
 }
+#else /* !UNIV_HOTBACKUP */
+# define dtype_set_mblen(type) (void) 0
+#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
+/*********************************************************************//**
 Sets a data type structure. */
 UNIV_INLINE
 void
 dtype_set(
 /*======*/
-	dtype_t*	type,	/* in: type struct to init */
-	ulint		mtype,	/* in: main data type */
-	ulint		prtype,	/* in: precise type */
-	ulint		len)	/* in: precision of type */
+	dtype_t*	type,	/*!< in: type struct to init */
+	ulint		mtype,	/*!< in: main data type */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len)	/*!< in: precision of type */
 {
 	ut_ad(type);
 	ut_ad(mtype <= DATA_MTYPE_MAX);
@@ -115,96 +142,99 @@ dtype_set(
 	dtype_set_mblen(type);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Copies a data type structure. */
 UNIV_INLINE
 void
 dtype_copy(
 /*=======*/
-	dtype_t*	type1,	/* in: type struct to copy to */
-	const dtype_t*	type2)	/* in: type struct to copy from */
+	dtype_t*	type1,	/*!< in: type struct to copy to */
+	const dtype_t*	type2)	/*!< in: type struct to copy from */
 {
 	*type1 = *type2;
 
 	ut_ad(dtype_validate(type1));
 }
 
-/*************************************************************************
-Gets the SQL main data type. */
+/*********************************************************************//**
+Gets the SQL main data type.
+@return	SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
 /*============*/
-	dtype_t*	type)
+	const dtype_t*	type)	/*!< in: data type */
 {
 	ut_ad(type);
 
 	return(type->mtype);
 }
 
-/*************************************************************************
-Gets the precise data type. */
+/*********************************************************************//**
+Gets the precise data type.
+@return	precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
 /*=============*/
-	dtype_t*	type)
+	const dtype_t*	type)	/*!< in: data type */
 {
 	ut_ad(type);
 
 	return(type->prtype);
 }
 
-/*************************************************************************
-Gets the type length. */
+/*********************************************************************//**
+Gets the type length.
+@return	fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
 /*==========*/
-	dtype_t*	type)
+	const dtype_t*	type)	/*!< in: data type */
 {
 	ut_ad(type);
 
 	return(type->len);
 }
 
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
 UNIV_INLINE
 ulint
 dtype_get_mbminlen(
 /*===============*/
-				/* out: minimum length of a char, in bytes,
-				or 0 if this is not a character type */
-	const dtype_t*	type)	/* in: type */
+	const dtype_t*	type)	/*!< in: type */
 {
 	ut_ad(type);
 	return(type->mbminlen);
 }
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
 UNIV_INLINE
 ulint
 dtype_get_mbmaxlen(
 /*===============*/
-				/* out: maximum length of a char, in bytes,
-				or 0 if this is not a character type */
-	const dtype_t*	type)	/* in: type */
+	const dtype_t*	type)	/*!< in: type */
 {
 	ut_ad(type);
 	return(type->mbmaxlen);
 }
 
-/*************************************************************************
-Gets the padding character code for a type. */
+/*********************************************************************//**
+Gets the padding character code for a type.
+@return	padding character code, or ULINT_UNDEFINED if no padding specified */
 UNIV_INLINE
 ulint
 dtype_get_pad_char(
 /*===============*/
-				/* out: padding character code, or
-				ULINT_UNDEFINED if no padding specified */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype)		/* in: precise type */
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype)		/*!< in: precise type */
 {
 	switch (mtype) {
 	case DATA_FIXBINARY:
@@ -235,7 +265,7 @@ dtype_get_pad_char(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
 format. */
@@ -243,11 +273,11 @@ UNIV_INLINE
 void
 dtype_new_store_for_order_and_null_size(
 /*====================================*/
-	byte*		buf,	/* in: buffer for
+	byte*		buf,	/*!< in: buffer for
 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
-	dtype_t*	type,	/* in: type struct */
-	ulint		prefix_len)/* in: prefix length to
+	const dtype_t*	type,	/*!< in: type struct */
+	ulint		prefix_len)/*!< in: prefix length to
 				replace type->len, or 0 */
 {
 #if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
@@ -280,7 +310,7 @@ dtype_new_store_for_order_and_null_size(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the < 4.1.x
 storage format. */
@@ -288,8 +318,8 @@ UNIV_INLINE
 void
 dtype_read_for_order_and_null_size(
 /*===============================*/
-	dtype_t*	type,	/* in: type struct */
-	byte*		buf)	/* in: buffer for stored type order info */
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
 #if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
 # error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
@@ -309,7 +339,7 @@ dtype_read_for_order_and_null_size(
 	dtype_set_mblen(type);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Reads to a type the stored information which determines its alphabetical
 ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
 storage format. */
@@ -317,8 +347,8 @@ UNIV_INLINE
 void
 dtype_new_read_for_order_and_null_size(
 /*===================================*/
-	dtype_t*	type,	/* in: type struct */
-	byte*		buf)	/* in: buffer for stored type order info */
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf)	/*!< in: buffer for stored type order info */
 {
 	ulint	charset_coll;
 
@@ -339,8 +369,6 @@ dtype_new_read_for_order_and_null_size(
 
 	type->len = mach_read_from_2(buf + 2);
 
-	mach_read_from_2(buf + 4);
-
 	charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
 
 	if (dtype_is_string_type(type->mtype)) {
@@ -360,19 +388,21 @@ dtype_new_read_for_order_and_null_size(
 	}
 	dtype_set_mblen(type);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return	fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
 /*=====================*/
-				/* out: fixed size, or 0 */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type */
-	ulint	len,		/* in: length */
-	ulint	mbminlen,	/* in: minimum length of a multibyte char */
-	ulint	mbmaxlen)	/* in: maximum length of a multibyte char */
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
+	ulint	mbmaxlen,	/*!< in: maximum length of a multibyte char */
+	ulint	comp)		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
 	switch (mtype) {
 	case DATA_SYS:
@@ -399,14 +429,12 @@ dtype_get_fixed_size_low(
 	case DATA_DOUBLE:
 		return(len);
 	case DATA_MYSQL:
+#ifndef UNIV_HOTBACKUP
 		if (prtype & DATA_BINARY_TYPE) {
 			return(len);
+		} else if (!comp) {
+			return(len);
 		} else {
-#ifdef UNIV_HOTBACKUP
-			if (mbminlen == mbmaxlen) {
-				return(len);
-			}
-#else /* UNIV_HOTBACKUP */
 			/* We play it safe here and ask MySQL for
 			mbminlen and mbmaxlen.	Although
 			mbminlen and mbmaxlen are
@@ -438,8 +466,10 @@ dtype_get_fixed_size_low(
 			if (mbminlen == mbmaxlen) {
 				return(len);
 			}
-#endif /* !UNIV_HOTBACKUP */
 		}
+#else /* !UNIV_HOTBACKUP */
+		return(len);
+#endif /* !UNIV_HOTBACKUP */
 		/* fall through for variable-length charsets */
 	case DATA_VARCHAR:
 	case DATA_BINARY:
@@ -454,18 +484,19 @@ dtype_get_fixed_size_low(
 	return(0);
 }
 
-/***************************************************************************
-Returns the minimum size of a data type. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return	minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
 /*===================*/
-				/* out: minimum size */
-	ulint	mtype,		/* in: main type */
-	ulint	prtype,		/* in: precise type */
-	ulint	len,		/* in: length */
-	ulint	mbminlen,	/* in: minimum length of a multibyte char */
-	ulint	mbmaxlen)	/* in: maximum length of a multibyte char */
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminlen,	/*!< in: minimum length of a multibyte char */
+	ulint	mbmaxlen)	/*!< in: maximum length of a multibyte char */
 {
 	switch (mtype) {
 	case DATA_SYS:
@@ -513,16 +544,16 @@ dtype_get_min_size_low(
 	return(0);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
+incomplete and return incorrect information.
+@return	maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
 /*===================*/
-				/* out: maximum size */
-	ulint	mtype,		/* in: main type */
-	ulint	len)		/* in: length */
+	ulint	mtype,		/*!< in: main type */
+	ulint	len)		/*!< in: length */
 {
 	switch (mtype) {
 	case DATA_SYS:
@@ -545,18 +576,24 @@ dtype_get_max_size_low(
 
 	return(ULINT_MAX);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************************
+/***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
 /*====================*/
-				/* out: SQL null storage size
-				in ROW_FORMAT=REDUNDANT */
-	const dtype_t*	type)	/* in: type */
+	const dtype_t*	type,	/*!< in: type */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
+#ifndef UNIV_HOTBACKUP
 	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
-					type->mbminlen, type->mbmaxlen));
+					type->mbminlen, type->mbmaxlen, comp));
+#else /* !UNIV_HOTBACKUP */
+	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
+					0, 0, 0));
+#endif /* !UNIV_HOTBACKUP */
 }
diff --git a/storage/innodb_plugin/include/data0types.h b/storage/innodb_plugin/include/data0types.h
new file mode 100644
index 00000000000..04e835bc401
--- /dev/null
+++ b/storage/innodb_plugin/include/data0types.h
@@ -0,0 +1,36 @@
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0types.h
+Some type definitions
+
+Created 9/21/2000 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0types_h
+#define data0types_h
+
+/* SQL data field struct */
+typedef struct dfield_struct	dfield_t;
+
+/* SQL data tuple struct */
+typedef struct dtuple_struct	dtuple_t;
+
+#endif
+
diff --git a/storage/innodb_plugin/include/db0err.h b/storage/innodb_plugin/include/db0err.h
new file mode 100644
index 00000000000..23898583b72
--- /dev/null
+++ b/storage/innodb_plugin/include/db0err.h
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/db0err.h
+Global error codes for the database
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+enum db_err {
+	DB_SUCCESS = 10,
+
+	/* The following are error codes */
+	DB_ERROR,
+	DB_OUT_OF_MEMORY,
+	DB_OUT_OF_FILE_SPACE,
+	DB_LOCK_WAIT,
+	DB_DEADLOCK,
+	DB_ROLLBACK,
+	DB_DUPLICATE_KEY,
+	DB_QUE_THR_SUSPENDED,
+	DB_MISSING_HISTORY,		/* required history data has been
+					deleted due to lack of space in
+					rollback segment */
+	DB_CLUSTER_NOT_FOUND = 30,
+	DB_TABLE_NOT_FOUND,
+	DB_MUST_GET_MORE_FILE_SPACE,	/* the database has to be stopped
+					and restarted with more file space */
+	DB_TABLE_IS_BEING_USED,
+	DB_TOO_BIG_RECORD,		/* a record in an index would not fit
+					on a compressed page, or it would
+					become bigger than 1/2 free space in
+					an uncompressed page frame */
+	DB_LOCK_WAIT_TIMEOUT,		/* lock wait lasted too long */
+	DB_NO_REFERENCED_ROW,		/* referenced key value not found
+					for a foreign key in an insert or
+					update of a row */
+	DB_ROW_IS_REFERENCED,		/* cannot delete or update a row
+					because it contains a key value
+					which is referenced */
+	DB_CANNOT_ADD_CONSTRAINT,	/* adding a foreign key constraint
+					to a table failed */
+	DB_CORRUPTION,			/* data structure corruption noticed */
+	DB_COL_APPEARS_TWICE_IN_INDEX,	/* InnoDB cannot handle an index
+					where same column appears twice */
+	DB_CANNOT_DROP_CONSTRAINT,	/* dropping a foreign key constraint
+					from a table failed */
+	DB_NO_SAVEPOINT,		/* no savepoint exists with the given
+					name */
+	DB_TABLESPACE_ALREADY_EXISTS,	/* we cannot create a new single-table
+					tablespace because a file of the same
+					name already exists */
+	DB_TABLESPACE_DELETED,		/* tablespace does not exist or is
+					being dropped right now */
+	DB_LOCK_TABLE_FULL,		/* lock structs have exhausted the
+					buffer pool (for big transactions,
+					InnoDB stores the lock structs in the
+					buffer pool) */
+	DB_FOREIGN_DUPLICATE_KEY,	/* foreign key constraints
+					activated by the operation would
+					lead to a duplicate key in some
+					table */
+	DB_TOO_MANY_CONCURRENT_TRXS,	/* when InnoDB runs out of the
+					preconfigured undo slots, this can
+					only happen when there are too many
+					concurrent transactions */
+	DB_UNSUPPORTED,			/* when InnoDB sees any artefact or
+					a feature that it can't recoginize or
+					work with e.g., FT indexes created by
+					a later version of the engine. */
+
+	DB_PRIMARY_KEY_IS_NULL,		/* a column in the PRIMARY KEY
+					was found to be NULL */
+
+	/* The following are partial failure codes */
+	DB_FAIL = 1000,
+	DB_OVERFLOW,
+	DB_UNDERFLOW,
+	DB_STRONG_FAIL,
+	DB_ZIP_OVERFLOW,
+	DB_RECORD_NOT_FOUND = 1500,
+	DB_END_OF_INDEX
+};
+
+#endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innodb_plugin/include/dict0boot.h
similarity index 70%
rename from storage/innobase/include/dict0boot.h
rename to storage/innodb_plugin/include/dict0boot.h
index cac79410b24..51d37ee98d1 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innodb_plugin/include/dict0boot.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.h
+Data dictionary creation and booting
 
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
@@ -20,56 +37,55 @@ Created 4/18/1996 Heikki Tuuri
 
 typedef	byte	dict_hdr_t;
 
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
+/**********************************************************************//**
+Gets a pointer to the dictionary header and x-latches its page.
+@return	pointer to the dictionary header, page x-latched */
+UNIV_INTERN
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
-			/* out: pointer to the dictionary header,
-			page x-latched */
-	mtr_t*	mtr);	/* in: mtr */
-/**************************************************************************
-Returns a new row, table, index, or tree id. */
-
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Returns a new row, table, index, or tree id.
+@return	the new id */
+UNIV_INTERN
 dulint
 dict_hdr_get_new_id(
 /*================*/
-			/* out: the new id */
-	ulint	type);	/* in: DICT_HDR_ROW_ID, ... */
-/**************************************************************************
-Returns a new row id. */
+	ulint	type);	/*!< in: DICT_HDR_ROW_ID, ... */
+/**********************************************************************//**
+Returns a new row id.
+@return	the new id */
 UNIV_INLINE
 dulint
 dict_sys_get_new_row_id(void);
 /*=========================*/
-			/* out: the new id */
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return	row id */
 UNIV_INLINE
 dulint
 dict_sys_read_row_id(
 /*=================*/
-			/* out: row id */
-	byte*	field);	/* in: record field */
-/**************************************************************************
+	byte*	field);	/*!< in: record field */
+/**********************************************************************//**
 Writes a row id to a record or other 6-byte stored form. */
 UNIV_INLINE
 void
 dict_sys_write_row_id(
 /*==================*/
-	byte*	field,	/* in: record field */
-	dulint	row_id);/* in: row id */
-/*********************************************************************
+	byte*	field,	/*!< in: record field */
+	dulint	row_id);/*!< in: row id */
+/*****************************************************************//**
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created. */
-
+UNIV_INTERN
 void
 dict_boot(void);
 /*===========*/
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the data dictionary at the database creation. */
-
+UNIV_INTERN
 void
 dict_create(void);
 /*=============*/
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innodb_plugin/include/dict0boot.ic
similarity index 50%
rename from storage/innobase/include/dict0boot.ic
rename to storage/innodb_plugin/include/dict0boot.ic
index fe2a9e36653..d5f372e38c4 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innodb_plugin/include/dict0boot.ic
@@ -1,27 +1,44 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.ic
+Data dictionary creation and booting
 
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the current value of the row id counter to the dictionary header file
 page. */
-
+UNIV_INTERN
 void
 dict_hdr_flush_row_id(void);
 /*=======================*/
 
 
-/**************************************************************************
-Returns a new row id. */
+/**********************************************************************//**
+Returns a new row id.
+@return	the new id */
 UNIV_INLINE
 dulint
 dict_sys_get_new_row_id(void)
 /*=========================*/
-			/* out: the new id */
 {
 	dulint	id;
 
@@ -41,14 +58,14 @@ dict_sys_get_new_row_id(void)
 	return(id);
 }
 
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return	row id */
 UNIV_INLINE
 dulint
 dict_sys_read_row_id(
 /*=================*/
-			/* out: row id */
-	byte*	field)	/* in: record field */
+	byte*	field)	/*!< in: record field */
 {
 #if DATA_ROW_ID_LEN != 6
 # error "DATA_ROW_ID_LEN != 6"
@@ -57,14 +74,14 @@ dict_sys_read_row_id(
 	return(mach_read_from_6(field));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Writes a row id to a record or other 6-byte stored form. */
 UNIV_INLINE
 void
 dict_sys_write_row_id(
 /*==================*/
-	byte*	field,	/* in: record field */
-	dulint	row_id)	/* in: row id */
+	byte*	field,	/*!< in: record field */
+	dulint	row_id)	/*!< in: row id */
 {
 #if DATA_ROW_ID_LEN != 6
 # error "DATA_ROW_ID_LEN != 6"
diff --git a/storage/innobase/include/dict0crea.h b/storage/innodb_plugin/include/dict0crea.h
similarity index 58%
rename from storage/innobase/include/dict0crea.h
rename to storage/innodb_plugin/include/dict0crea.h
index f0f30481abe..3107d771d88 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innodb_plugin/include/dict0crea.h
@@ -1,7 +1,24 @@
-/******************************************************
-Database object creation
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.h
+Database object creation
 
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
@@ -16,90 +33,92 @@ Created 1/8/1996 Heikki Tuuri
 #include "row0types.h"
 #include "mtr0mtr.h"
 
-/*************************************************************************
-Creates a table create graph. */
-
+/*********************************************************************//**
+Creates a table create graph.
+@return	own: table create node */
+UNIV_INTERN
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
-				/* out, own: table create node */
-	dict_table_t*	table,	/* in: table to create, built as a memory data
+	dict_table_t*	table,	/*!< in: table to create, built as a memory data
 				structure */
-	mem_heap_t*	heap);	/* in: heap where created */
-/*************************************************************************
-Creates an index create graph. */
-
+	mem_heap_t*	heap);	/*!< in: heap where created */
+/*********************************************************************//**
+Creates an index create graph.
+@return	own: index create node */
+UNIV_INTERN
 ind_node_t*
 ind_create_graph_create(
 /*====================*/
-				/* out, own: index create node */
-	dict_index_t*	index,	/* in: index to create, built as a memory data
+	dict_index_t*	index,	/*!< in: index to create, built as a memory data
 				structure */
-	mem_heap_t*	heap);	/* in: heap where created */
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
+	mem_heap_t*	heap);	/*!< in: heap where created */
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 dict_create_table_step(
 /*===================*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/***************************************************************
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************//**
 Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 dict_create_index_step(
 /*===================*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr);	/* in: query thread */
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
+	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Truncates the index tree associated with a row in SYS_INDEXES table.
+@return	new root page number, or FIL_NULL on failure */
+UNIV_INTERN
 ulint
 dict_truncate_index_tree(
 /*=====================*/
-				/* out: new root page number, or
-				FIL_NULL on failure */
-	dict_table_t*	table,	/* in: the table the index belongs to */
-	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
+	dict_table_t*	table,	/*!< in: the table the index belongs to */
+	ulint		space,	/*!< in: 0=truncate,
+				nonzero=create the index tree in the
+				given tablespace */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
 				record in the clustered index of
 				SYS_INDEXES table. The cursor may be
 				repositioned in this call. */
-	mtr_t*		mtr);	/* in: mtr having the latch
+	mtr_t*		mtr);	/*!< in: mtr having the latch
 				on the record page. The mtr may be
 				committed and restarted in this call. */
-/***********************************************************************
+/*******************************************************************//**
 Drops the index tree associated with a row in SYS_INDEXES table. */
-
+UNIV_INTERN
 void
 dict_drop_index_tree(
 /*=================*/
-	rec_t*	rec,	/* in: record in the clustered index of SYS_INDEXES
-			table */
-	mtr_t*	mtr);	/* in: mtr having the latch on the record page */
-/********************************************************************
+	rec_t*	rec,	/*!< in/out: record in the clustered index
+			of SYS_INDEXES table */
+	mtr_t*	mtr);	/*!< in: mtr having the latch on the record page */
+/****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at database creation or database start if they are not found or are
-not of the right form. */
-
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 dict_create_or_check_foreign_constraint_tables(void);
 /*================================================*/
-				/* out: DB_SUCCESS or error code */
-/************************************************************************
+/********************************************************************//**
 Adds foreign key definitions to data dictionary tables in the database. We
 look at table->foreign_list, and also generate names to constraints that were
 not named by the user. A generated constraint has a name of the format
 databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
 given locally for this table, that is, the number is not global, as in the
-old format constraints < 4.0.18 it used to be. */
-
+old format constraints < 4.0.18 it used to be.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
-				/* out: error code or DB_SUCCESS */
-	ulint		start_id,/* in: if we are actually doing ALTER TABLE
+	ulint		start_id,/*!< in: if we are actually doing ALTER TABLE
 				ADD CONSTRAINT, we want to generate constraint
 				numbers which are bigger than in the table so
 				far; we number the constraints from
@@ -107,15 +126,14 @@ dict_create_add_foreigns_to_dictionary(
 				we are creating a new table, or if the table
 				so far has no constraints for which the name
 				was generated here */
-	dict_table_t*	table,	/* in: table */
-	trx_t*		trx);	/* in: transaction */
-
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx);	/*!< in: transaction */
 
 /* Table create node structure */
 
 struct tab_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_TABLE_CREATE */
-	dict_table_t*	table;	/* table to create, built as a memory data
+	que_common_t	common;	/*!< node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;	/*!< table to create, built as a memory data
 				structure with dict_mem_... functions */
 	ins_node_t*	tab_def; /* child node which does the insert of
 				the table definition; the row to be inserted
@@ -128,9 +146,9 @@ struct tab_node_struct{
 				a successful table creation */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/* node execution state */
-	ulint		col_no;	/* next column definition to insert */
-	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+	ulint		state;	/*!< node execution state */
+	ulint		col_no;	/*!< next column definition to insert */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
 };
 
 /* Table create node states */
@@ -143,8 +161,8 @@ struct tab_node_struct{
 /* Index create node struct */
 
 struct ind_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_INDEX_CREATE */
-	dict_index_t*	index;	/* index to create, built as a memory data
+	que_common_t	common;	/*!< node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;	/*!< index to create, built as a memory data
 				structure with dict_mem_... functions */
 	ins_node_t*	ind_def; /* child node which does the insert of
 				the index definition; the row to be inserted
@@ -157,12 +175,12 @@ struct ind_node_struct{
 				a successful index creation */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/* node execution state */
+	ulint		state;	/*!< node execution state */
 	ulint		page_no;/* root page number of the index */
-	dict_table_t*	table;	/* table which owns the index */
+	dict_table_t*	table;	/*!< table which owns the index */
 	dtuple_t*	ind_row;/* index definition row built */
 	ulint		field_no;/* next field definition to insert */
-	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
 };
 
 /* Index create node states */
diff --git a/storage/innodb_plugin/include/dict0crea.ic b/storage/innodb_plugin/include/dict0crea.ic
new file mode 100644
index 00000000000..c5365ce7489
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0crea.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.ic
+Database object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innodb_plugin/include/dict0dict.h b/storage/innodb_plugin/include/dict0dict.h
new file mode 100644
index 00000000000..b2029699e51
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0dict.h
@@ -0,0 +1,1158 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0dict.h
+Data dictionary system
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0dict_h
+#define dict0dict_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "hash0hash.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "trx0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+UNIV_INTERN
+void
+dict_casedn_str(
+/*============*/
+	char*	a);	/*!< in/out: string to put in lower case */
+/********************************************************************//**
+Get the database name length in a table name.
+@return	database name length */
+UNIV_INTERN
+ulint
+dict_get_db_name_len(
+/*=================*/
+	const char*	name);	/*!< in: table name in the form
+				dbname '/' tablename */
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return	table name */
+
+const char*
+dict_remove_db_name(
+/*================*/
+	const char*	name);	/*!< in: table name in the form
+				dbname '/' tablename */
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+        dulint  table_id,       /*!< in: table id */
+        trx_t*  trx);           /*!< in: transaction handle */
+/********************************************************************//**
+Decrements the count of open MySQL handles to a table. */
+UNIV_INTERN
+void
+dict_table_decrement_handle_count(
+/*==============================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
+/**********************************************************************//**
+Inits the data dictionary module. */
+UNIV_INTERN
+void
+dict_init(void);
+/*===========*/
+/********************************************************************//**
+Gets the space id of every table of the data dictionary and makes a linear
+list and a hash table of them to the data dictionary cache. This function
+can be called at database startup if we did not need to do a crash recovery.
+In crash recovery we must scan the space id's from the .ibd files in MySQL
+database directories. */
+UNIV_INTERN
+void
+dict_load_space_id_list(void);
+/*=========================*/
+/*********************************************************************//**
+Gets the column data type. */
+UNIV_INLINE
+void
+dict_col_copy_type(
+/*===============*/
+	const dict_col_t*	col,	/*!< in: column */
+	dtype_t*		type);	/*!< out: data type */
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return	TRUE */
+UNIV_INLINE
+ibool
+dict_col_type_assert_equal(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	const dtype_t*		type);	/*!< in: data type */
+#endif /* UNIV_DEBUG */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return	minimum size */
+UNIV_INLINE
+ulint
+dict_col_get_min_size(
+/*==================*/
+	const dict_col_t*	col);	/*!< in: column */
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return	maximum size */
+UNIV_INLINE
+ulint
+dict_col_get_max_size(
+/*==================*/
+	const dict_col_t*	col);	/*!< in: column */
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return	fixed size, or 0 */
+UNIV_INLINE
+ulint
+dict_col_get_fixed_size(
+/*====================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+/***********************************************************************//**
+Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+UNIV_INLINE
+ulint
+dict_col_get_sql_null_size(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+
+/*********************************************************************//**
+Gets the column number.
+@return	col->ind, table column position (starting from 0) */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	const dict_col_t*	col);	/*!< in: column */
+/*********************************************************************//**
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	const dict_col_t*	col,		/*!< in: table column */
+	const dict_index_t*	clust_index);	/*!< in: clustered index */
+/****************************************************************//**
+If the given column name is reserved for InnoDB system columns, return
+TRUE.
+@return	TRUE if name is reserved */
+UNIV_INTERN
+ibool
+dict_col_name_is_reserved(
+/*======================*/
+	const char*	name);	/*!< in: column name */
+/********************************************************************//**
+Acquire the autoinc lock. */
+UNIV_INTERN
+void
+dict_table_autoinc_lock(
+/*====================*/
+	dict_table_t*	table);	/*!< in/out: table */
+/********************************************************************//**
+Unconditionally set the autoinc counter. */
+UNIV_INTERN
+void
+dict_table_autoinc_initialize(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value);	/*!< in: next value to assign to a row */
+/********************************************************************//**
+Reads the next autoinc value (== autoinc counter value), 0 if not yet
+initialized.
+@return	value for a new row, or 0 */
+UNIV_INTERN
+ib_uint64_t
+dict_table_autoinc_read(
+/*====================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Updates the autoinc counter if the value supplied is greater than the
+current value. */
+UNIV_INTERN
+void
+dict_table_autoinc_update_if_greater(
+/*=================================*/
+
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value);	/*!< in: value which was assigned to a row */
+/********************************************************************//**
+Release the autoinc lock. */
+UNIV_INTERN
+void
+dict_table_autoinc_unlock(
+/*======================*/
+	dict_table_t*	table);	/*!< in/out: table */
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Adds system columns to a table object. */
+UNIV_INTERN
+void
+dict_table_add_system_columns(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	mem_heap_t*	heap);	/*!< in: temporary heap */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Adds a table object to the dictionary cache. */
+UNIV_INTERN
+void
+dict_table_add_to_cache(
+/*====================*/
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap);	/*!< in: temporary heap */
+/**********************************************************************//**
+Removes a table object from the dictionary cache. */
+UNIV_INTERN
+void
+dict_table_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table);	/*!< in, own: table */
+/**********************************************************************//**
+Renames a table object.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+dict_table_rename_in_cache(
+/*=======================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	const char*	new_name,	/*!< in: new name */
+	ibool		rename_also_foreigns);/*!< in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index);	/*!< in, own: index */
+/**********************************************************************//**
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+UNIV_INTERN
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table object already in cache */
+	dulint		new_id);/*!< in: new id to set */
+/**********************************************************************//**
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of foreign table or referenced table must already be in
+the dictionary cache!
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+dict_foreign_add_to_cache(
+/*======================*/
+	dict_foreign_t*	foreign,	/*!< in, own: foreign key constraint */
+	ibool		check_charsets);/*!< in: TRUE=check charset
+					compatibility */
+/*********************************************************************//**
+Check if the index is referenced by a foreign key, if TRUE return the
+matching instance NULL otherwise.
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_referenced_constraint(
+/*=================================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index);	/*!< in: InnoDB index */
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return	TRUE if table is referenced by a foreign key */
+UNIV_INTERN
+ibool
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+	const dict_table_t*	table);	/*!< in: InnoDB table */
+/**********************************************************************//**
+Replace the index in the foreign key list that matches this index's
+definition with an equivalent index. */
+UNIV_INTERN
+void
+dict_table_replace_index_in_foreign_list(
+/*=====================================*/
+	dict_table_t*	table,  /*!< in/out: table */
+	dict_index_t*	index);	/*!< in: index to be replaced */
+/*********************************************************************//**
+Checks if a index is defined for a foreign key constraint. Index is a part
+of a foreign key constraint if the index is referenced by foreign key
+or index is a foreign key index
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_foreign_constraint(
+/*==============================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index);	/*!< in: InnoDB index */
+/*********************************************************************//**
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+dict_create_foreign_constraints(
+/*============================*/
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
+					foreign keys are declared like:
+					FOREIGN KEY (a, b) REFERENCES
+					table2(c, d), table2 can be written
+					also with the database
+					name before it: test.table2; the
+					default database id the database of
+					parameter name */
+	const char*	name,		/*!< in: table full name in the
+					normalized form
+					database_name/table_name */
+	ibool		reject_fks);	/*!< in: if TRUE, fail with error
+					code DB_CANNOT_ADD_CONSTRAINT if
+					any foreign keys are found. */
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+UNIV_INTERN
+ulint
+dict_foreign_parse_drop_constraints(
+/*================================*/
+	mem_heap_t*	heap,			/*!< in: heap from which we can
+						allocate memory */
+	trx_t*		trx,			/*!< in: transaction */
+	dict_table_t*	table,			/*!< in: table */
+	ulint*		n,			/*!< out: number of constraints
+						to drop */
+	const char***	constraints_to_drop);	/*!< out: id's of the
+						constraints to drop */
+/**********************************************************************//**
+Returns a table object and optionally increment its MySQL open handle count.
+NOTE! This is a high-level function to be used mainly from outside the
+'dict' directory. Inside this directory dict_table_get_low is usually the
+appropriate function.
+@return	table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_get(
+/*===========*/
+	const char*	table_name,	/*!< in: table name */
+	ibool		inc_mysql_count);
+					/*!< in: whether to increment the open
+					handle count on the table */
+/**********************************************************************//**
+Returns a index object, based on table and index id, and memoryfixes it.
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_on_id_low(
+/*=====================*/
+	dict_table_t*	table,		/*!< in: table */
+	dulint		index_id);	/*!< in: index id */
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return	table, NULL if not found */
+
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*=============================*/
+	const char*	table_name);	/*!< in: table name */
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function.
+@return	table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+	const char*	table_name);	/*!< in: table name */
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+	dulint	table_id);	/*!< in: table id */
+/**********************************************************************//**
+Find an index that is equivalent to the one passed in and is not marked
+for deletion.
+@return	index equivalent to foreign->foreign_index, or NULL */
+UNIV_INTERN
+dict_index_t*
+dict_foreign_find_equiv_index(
+/*==========================*/
+	dict_foreign_t*	foreign);/*!< in: foreign key */
+/**********************************************************************//**
+Returns an index object by matching on the name and column names and
+if more than one index matches return the index with the max id
+@return	matching index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_by_max_id(
+/*===========================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name,	/*!< in: the index name to find */
+	const char**	columns,/*!< in: array of column names */
+	ulint		n_cols);/*!< in: number of columns */
+/**********************************************************************//**
+Returns a column's name.
+@return column name. NOTE: not guaranteed to stay valid if table is
+modified in any way (columns added, etc.). */
+UNIV_INTERN
+const char*
+dict_table_get_col_name(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			col_nr);/*!< in: column number */
+
+/**********************************************************************//**
+Prints a table definition. */
+UNIV_INTERN
+void
+dict_table_print(
+/*=============*/
+	dict_table_t*	table);	/*!< in: table */
+/**********************************************************************//**
+Prints a table data. */
+UNIV_INTERN
+void
+dict_table_print_low(
+/*=================*/
+	dict_table_t*	table);	/*!< in: table */
+/**********************************************************************//**
+Prints a table data when we know the table name. */
+UNIV_INTERN
+void
+dict_table_print_by_name(
+/*=====================*/
+	const char*	name);	/*!< in: table name */
+/**********************************************************************//**
+Outputs info on foreign keys of a table. */
+UNIV_INTERN
+void
+dict_print_info_on_foreign_keys(
+/*============================*/
+	ibool		create_table_format, /*!< in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
+	FILE*		file,	/*!< in: file where to print */
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	table);	/*!< in: table */
+/**********************************************************************//**
+Outputs info on a foreign key of a table in a format suitable for
+CREATE TABLE. */
+UNIV_INTERN
+void
+dict_print_info_on_foreign_key_in_create_format(
+/*============================================*/
+	FILE*		file,		/*!< in: file where to print */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
+	ibool		add_newline);	/*!< in: whether to add a newline */
+/********************************************************************//**
+Displays the names of the index and the table. */
+UNIV_INTERN
+void
+dict_index_name_print(
+/*==================*/
+	FILE*			file,	/*!< in: output stream */
+	trx_t*			trx,	/*!< in: transaction */
+	const dict_index_t*	index);	/*!< in: index to print */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Gets the next index on the table.
+@return	index, NULL if none left */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+	const dict_index_t*	index);	/*!< in: index */
+#else /* UNIV_DEBUG */
+# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return	nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((pure));
+/********************************************************************//**
+Check whether the index is unique.
+@return	nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((pure));
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((pure));
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((pure));
+
+/********************************************************************//**
+Gets the number of user-defined columns in a table in the dictionary
+cache.
+@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return	number of system (e.g., ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Gets the number of all columns (also system) in a table in the dictionary
+cache.
+@return	number of columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+	const dict_table_t*	table);	/*!< in: table */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			pos);	/*!< in: position of column */
+/********************************************************************//**
+Gets the given system column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys);	/*!< in: DATA_ROW_ID, ... */
+#else /* UNIV_DEBUG */
+#define dict_table_get_nth_col(table, pos) \
+((table)->cols + (pos))
+#define dict_table_get_sys_col(table, sys) \
+((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets the given system column number of a table.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys);	/*!< in: DATA_ROW_ID, ... */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return	minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+	const dict_index_t*	index);	/*!< in: index */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return	TRUE if table uses the compact page format */
+UNIV_INLINE
+ibool
+dict_table_is_comp(
+/*===============*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Determine the file format of a table.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Set the file format of a table. */
+UNIV_INLINE
+void
+dict_table_set_format(
+/*==================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ulint		format);/*!< in: file format version */
+/********************************************************************//**
+Extract the compressed page size from table flags.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_flags_to_zip_size(
+/*=========================*/
+	ulint	flags)	/*!< in: flags */
+	__attribute__((const));
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+	const dict_table_t*	table);	/*!< in: table */
+/********************************************************************//**
+Checks if a column is in the ordering columns of the clustered index of a
+table. Column prefixes are treated like whole columns.
+@return	TRUE if the column, or its prefix, is in the clustered key */
+UNIV_INTERN
+ibool
+dict_table_col_in_clustered_key(
+/*============================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n);	/*!< in: column number */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create(). */
+UNIV_INTERN
+void
+dict_table_copy_types(
+/*==================*/
+	dtuple_t*		tuple,	/*!< in/out: data tuple */
+	const dict_table_t*	table);	/*!< in: table */
+/**********************************************************************//**
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page!
+@return	index or NULL if not found from cache */
+UNIV_INTERN
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+	dulint	id);	/*!< in: index id */
+/**********************************************************************//**
+Adds an index to the dictionary cache.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+dict_index_add_to_cache(
+/*====================*/
+	dict_table_t*	table,	/*!< in: table on which the index is */
+	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
+				object is freed in this function! */
+	ulint		page_no,/*!< in: root page number of the index */
+	ibool		strict);/*!< in: TRUE=refuse to create the index
+				if records could be too big to fit in
+				an B-tree page */
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index);	/*!< in, own: index */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+	const dict_index_t*	index);	/*!< in: an internal
+					representation of index (in
+					the dictionary cache) */
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+	const dict_index_t*	index);	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+	const dict_index_t*	index);	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+/********************************************************************//**
+Gets the number of user-defined ordering fields in the index. In the internal
+representation we add the row id to the ordering fields to make all indexes
+unique, but this function returns the number of fields the user defined
+in the index as ordering fields.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+	const dict_index_t*	index);	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return	pointer to field object */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos);	/*!< in: position of field */
+#else /* UNIV_DEBUG */
+# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return	column */
+UNIV_INLINE
+const dict_col_t*
+dict_index_get_nth_col(
+/*===================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos);	/*!< in: position of the field */
+/********************************************************************//**
+Gets the column number of the nth field in an index.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos);	/*!< in: position of the field */
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n);	/*!< in: column number */
+/********************************************************************//**
+Returns TRUE if the index contains a column or a prefix of that column.
+@return	TRUE if contains the column or its prefix */
+UNIV_INTERN
+ibool
+dict_index_contains_col_or_prefix(
+/*==============================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n);	/*!< in: column number */
+/********************************************************************//**
+Looks for a matching field in an index. The column has to be the same. The
+column in index must be complete, or must contain a prefix longer than the
+column in index2. That is, we must be able to construct the prefix in index2
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_field_pos(
+/*=========================*/
+	const dict_index_t*	index,	/*!< in: index from which to search */
+	const dict_index_t*	index2,	/*!< in: index */
+	ulint			n);	/*!< in: field number in index2 */
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return	position in internal representation of the clustered index */
+UNIV_INTERN
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n);	/*!< in: column number */
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return	position, ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			type);	/*!< in: DATA_ROW_ID, ... */
+/*******************************************************************//**
+Adds a column to index. */
+UNIV_INTERN
+void
+dict_index_add_col(
+/*===============*/
+	dict_index_t*		index,		/*!< in/out: index */
+	const dict_table_t*	table,		/*!< in: table */
+	dict_col_t*		col,		/*!< in: column */
+	ulint			prefix_len);	/*!< in: column prefix length */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Copies types of fields contained in index to tuple. */
+UNIV_INTERN
+void
+dict_index_copy_types(
+/*==================*/
+	dtuple_t*		tuple,		/*!< in/out: data tuple */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint			n_fields);	/*!< in: number of
+						field types to copy */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the field column.
+@return	field->col, pointer to the table column */
+UNIV_INLINE
+const dict_col_t*
+dict_field_get_col(
+/*===============*/
+	const dict_field_t*	field);	/*!< in: index field */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys->mutex is already being held.
+@return	index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+	dulint	index_id);	/*!< in: index id */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return	index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+	dulint	index_id);	/*!< in: index id */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dict_index_check_search_tuple(
+/*==========================*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	const dtuple_t*		tuple);	/*!< in: tuple used in a search */
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+UNIV_INTERN
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+	const dict_table_t*	table);	/*!< in: Check for dup indexes
+					in this table */
+
+#endif /* UNIV_DEBUG */
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return	own: node pointer */
+UNIV_INTERN
+dtuple_t*
+dict_index_build_node_ptr(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level);	/*!< in: level of rec in tree:
+					0 means leaf level */
+/**********************************************************************//**
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely.
+@return	pointer to the prefix record */
+UNIV_INTERN
+rec_t*
+dict_index_copy_rec_order_prefix(
+/*=============================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to
+					copy prefix */
+	ulint*			n_fields,/*!< out: number of fields copied */
+	byte**			buf,	/*!< in/out: memory buffer for the
+					copied prefix, or NULL */
+	ulint*			buf_size);/*!< in/out: buffer size */
+/**********************************************************************//**
+Builds a typed data tuple out of a physical record.
+@return	own: data tuple */
+UNIV_INTERN
+dtuple_t*
+dict_index_build_data_tuple(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record for which to build data tuple */
+	ulint		n_fields,/*!< in: number of data fields */
+	mem_heap_t*	heap);	/*!< in: memory heap where tuple created */
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return	space id */
+UNIV_INLINE
+ulint
+dict_index_get_space(
+/*=================*/
+	const dict_index_t*	index);	/*!< in: index */
+/*********************************************************************//**
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_index_set_space(
+/*=================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		space);	/*!< in: space id */
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return	page number */
+UNIV_INLINE
+ulint
+dict_index_get_page(
+/*================*/
+	const dict_index_t*	tree);	/*!< in: index */
+/*********************************************************************//**
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_index_set_page(
+/*================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		page);	/*!< in: page number */
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return	read-write lock */
+UNIV_INLINE
+rw_lock_t*
+dict_index_get_lock(
+/*================*/
+	dict_index_t*	index);	/*!< in: index */
+/********************************************************************//**
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index.
+@return	number of free bytes on page, reserved for updates */
+UNIV_INLINE
+ulint
+dict_index_get_space_reserve(void);
+/*==============================*/
+/*********************************************************************//**
+Calculates the minimum record length in an index. */
+UNIV_INTERN
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+	const dict_index_t*	index);	/*!< in: index */
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+UNIV_INTERN
+void
+dict_update_statistics_low(
+/*=======================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	ibool		has_dict_mutex);/*!< in: TRUE if the caller has the
+					dictionary mutex */
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+UNIV_INTERN
+void
+dict_update_statistics(
+/*===================*/
+	dict_table_t*	table);	/*!< in/out: table */
+/********************************************************************//**
+Reserves the dictionary system mutex for MySQL. */
+UNIV_INTERN
+void
+dict_mutex_enter_for_mysql(void);
+/*============================*/
+/********************************************************************//**
+Releases the dictionary system mutex for MySQL. */
+UNIV_INTERN
+void
+dict_mutex_exit_for_mysql(void);
+/*===========================*/
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return	TRUE if same db name */
+UNIV_INTERN
+ibool
+dict_tables_have_same_db(
+/*=====================*/
+	const char*	name1,	/*!< in: table name in the form
+				dbname '/' tablename */
+	const char*	name2);	/*!< in: table name in the form
+				dbname '/' tablename */
+/*********************************************************************//**
+Removes an index from the cache */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index);	/*!< in, own: index */
+/**********************************************************************//**
+Get index by name
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name(
+/*=========================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name);	/*!< in: name of the index to find */
+/**********************************************************************//**
+In case there is more than one index with the same name return the index
+with the min(id).
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name_and_min_id(
+/*====================================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name);	/*!< in: name of the index to find */
+/* Buffers for storing detailed information about the latest foreign key
+and unique key errors */
+extern FILE*	dict_foreign_err_file;
+extern mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+
+/** the dictionary system */
+extern dict_sys_t*	dict_sys;
+/** the data dictionary rw-latch protecting dict_sys */
+extern rw_lock_t	dict_operation_lock;
+
+/* Dictionary system struct */
+struct dict_sys_struct{
+	mutex_t		mutex;		/*!< mutex protecting the data
+					dictionary; protects also the
+					disk-based dictionary system tables;
+					this mutex serializes CREATE TABLE
+					and DROP TABLE, as well as reading
+					the dictionary data for a table from
+					system tables */
+	dulint		row_id;		/*!< the next row id to assign;
+					NOTE that at a checkpoint this
+					must be written to the dict system
+					header and flushed to a file; in
+					recovery this must be derived from
+					the log records */
+	hash_table_t*	table_hash;	/*!< hash table of the tables, based
+					on name */
+	hash_table_t*	table_id_hash;	/*!< hash table of the tables, based
+					on id */
+	UT_LIST_BASE_NODE_T(dict_table_t)
+			table_LRU;	/*!< LRU list of tables */
+	ulint		size;		/*!< varying space in bytes occupied
+					by the data dictionary table and
+					index objects */
+	dict_table_t*	sys_tables;	/*!< SYS_TABLES table */
+	dict_table_t*	sys_columns;	/*!< SYS_COLUMNS table */
+	dict_table_t*	sys_indexes;	/*!< SYS_INDEXES table */
+	dict_table_t*	sys_fields;	/*!< SYS_FIELDS table */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
+extern dict_index_t*	dict_ind_redundant;
+/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
+extern dict_index_t*	dict_ind_compact;
+
+/**********************************************************************//**
+Inits dict_ind_redundant and dict_ind_compact. */
+UNIV_INTERN
+void
+dict_ind_init(void);
+/*===============*/
+
+#ifndef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innodb_plugin/include/dict0dict.ic
similarity index 50%
rename from storage/innobase/include/dict0dict.ic
rename to storage/innodb_plugin/include/dict0dict.ic
index 7d38cbcd1fa..46e78df8272 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innodb_plugin/include/dict0dict.ic
@@ -1,25 +1,41 @@
-/**********************************************************************
-Data dictionary system
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0dict.ic
+Data dictionary system
 
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
 
-#include "dict0load.h"
-#include "trx0undo.h"
-#include "trx0sys.h"
-#include "rem0types.h"
 #include "data0type.h"
+#ifndef UNIV_HOTBACKUP
+#include "dict0load.h"
+#include "rem0types.h"
 
-/*************************************************************************
+/*********************************************************************//**
 Gets the column data type. */
 UNIV_INLINE
 void
 dict_col_copy_type(
 /*===============*/
-	const dict_col_t*	col,	/* in: column */
-	dtype_t*		type)	/* out: data type */
+	const dict_col_t*	col,	/*!< in: column */
+	dtype_t*		type)	/*!< out: data type */
 {
 	ut_ad(col && type);
 
@@ -29,17 +45,18 @@ dict_col_copy_type(
 	type->mbminlen = col->mbminlen;
 	type->mbmaxlen = col->mbmaxlen;
 }
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return	TRUE */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
 /*=======================*/
-					/* out: TRUE */
-	const dict_col_t*	col,	/* in: column */
-	const dtype_t*		type)	/* in: data type */
+	const dict_col_t*	col,	/*!< in: column */
+	const dtype_t*		type)	/*!< in: data type */
 {
 	ut_ad(col);
 	ut_ad(type);
@@ -47,88 +64,95 @@ dict_col_type_assert_equal(
 	ut_ad(col->mtype == type->mtype);
 	ut_ad(col->prtype == type->prtype);
 	ut_ad(col->len == type->len);
+# ifndef UNIV_HOTBACKUP
 	ut_ad(col->mbminlen == type->mbminlen);
 	ut_ad(col->mbmaxlen == type->mbmaxlen);
+# endif /* !UNIV_HOTBACKUP */
 
 	return(TRUE);
 }
 #endif /* UNIV_DEBUG */
 
-/***************************************************************************
-Returns the minimum size of the column. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return	minimum size */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
 /*==================*/
-					/* out: minimum size */
-	const dict_col_t*	col)	/* in: column */
+	const dict_col_t*	col)	/*!< in: column */
 {
 	return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
 				      col->mbminlen, col->mbmaxlen));
 }
-/***************************************************************************
-Returns the maximum size of the column. */
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return	maximum size */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
 /*==================*/
-					/* out: maximum size */
-	const dict_col_t*	col)	/* in: column */
+	const dict_col_t*	col)	/*!< in: column */
 {
 	return(dtype_get_max_size_low(col->mtype, col->len));
 }
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return	fixed size, or 0 */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
 /*====================*/
-					/* out: fixed size, or 0 */
-	const dict_col_t*	col)	/* in: column */
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
 	return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
-					col->mbminlen, col->mbmaxlen));
+					col->mbminlen, col->mbmaxlen, comp));
 }
-/***************************************************************************
+/***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
 /*=======================*/
-					/* out: SQL null storage size
-					in ROW_FORMAT=REDUNDANT */
-	const dict_col_t*	col)	/* in: column */
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
 {
-	return(dict_col_get_fixed_size(col));
+	return(dict_col_get_fixed_size(col, comp));
 }
 
-/*************************************************************************
-Gets the column number. */
+/*********************************************************************//**
+Gets the column number.
+@return	col->ind, table column position (starting from 0) */
 UNIV_INLINE
 ulint
 dict_col_get_no(
 /*============*/
-	const dict_col_t*	col)
+	const dict_col_t*	col)	/*!< in: column */
 {
 	ut_ad(col);
 
 	return(col->ind);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets the column position in the clustered index. */
 UNIV_INLINE
 ulint
 dict_col_get_clust_pos(
 /*===================*/
-	const dict_col_t*	col,		/* in: table column */
-	const dict_index_t*	clust_index)	/* in: clustered index */
+	const dict_col_t*	col,		/*!< in: table column */
+	const dict_index_t*	clust_index)	/*!< in: clustered index */
 {
 	ulint	i;
 
 	ut_ad(col);
-	ut_ad(clust_index && clust_index->type & DICT_CLUSTERED);
+	ut_ad(clust_index);
+	ut_ad(dict_index_is_clust(clust_index));
 
 	for (i = 0; i < clust_index->n_def; i++) {
 		const dict_field_t*	field = &clust_index->fields[i];
@@ -141,46 +165,112 @@ dict_col_get_clust_pos(
 	return(ULINT_UNDEFINED);
 }
 
-/************************************************************************
-Gets the first index on the table (the clustered index). */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return	index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
 /*=======================*/
-				/* out: index, NULL if none exists */
-	dict_table_t*	table)	/* in: table */
+	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(UT_LIST_GET_FIRST(table->indexes));
+	return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
 }
 
-/************************************************************************
-Gets the next index on the table. */
+/********************************************************************//**
+Gets the next index on the table.
+@return	index, NULL if none left */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
 /*======================*/
-				/* out: index, NULL if none left */
-	dict_index_t*	index)	/* in: index */
+	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return(UT_LIST_GET_NEXT(indexes, index));
+	return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
+}
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return	nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
+}
+/********************************************************************//**
+Check whether the index is unique.
+@return	nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
 }
 
-/************************************************************************
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UNIV_UNLIKELY(index->type & DICT_IBUF));
+}
+
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ulint	type;
+
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	type = index->type;
+
+	return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)));
+}
+
+/********************************************************************//**
 Gets the number of user-defined columns in a table in the dictionary
-cache. */
+cache.
+@return	number of user-defined (e.g., not ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
 /*=======================*/
-				/* out: number of user-defined (e.g., not
-				ROW_ID) columns of a table */
-	dict_table_t*	table)	/* in: table */
+	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -188,15 +278,14 @@ dict_table_get_n_user_cols(
 	return(table->n_cols - DATA_N_SYS_COLS);
 }
 
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return	number of system (e.g., ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_sys_cols(
 /*======================*/
-				/* out: number of system (e.g.,
-				ROW_ID) columns of a table */
-	dict_table_t*	table __attribute__((unused)))	/* in: table */
+	const dict_table_t*	table __attribute__((unused)))	/*!< in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -205,15 +294,15 @@ dict_table_get_n_sys_cols(
 	return(DATA_N_SYS_COLS);
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets the number of all columns (also system) in a table in the dictionary
-cache. */
+cache.
+@return	number of columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
 /*==================*/
-				/* out: number of columns of a table */
-	dict_table_t*	table)	/* in: table */
+	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -221,34 +310,35 @@ dict_table_get_n_cols(
 	return(table->n_cols);
 }
 
-/************************************************************************
-Gets the nth column of a table. */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return	pointer to column object */
 UNIV_INLINE
-const dict_col_t*
+dict_col_t*
 dict_table_get_nth_col(
 /*===================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			pos)	/* in: position of column */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			pos)	/*!< in: position of column */
 {
 	ut_ad(table);
 	ut_ad(pos < table->n_def);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return((table->cols) + pos);
+	return((dict_col_t*) (table->cols) + pos);
 }
 
-/************************************************************************
-Gets the given system column of a table. */
+/********************************************************************//**
+Gets the given system column of a table.
+@return	pointer to column object */
 UNIV_INLINE
-const dict_col_t*
+dict_col_t*
 dict_table_get_sys_col(
 /*===================*/
-					/* out: pointer to column object */
-	const dict_table_t*	table,	/* in: table */
-	ulint			sys)	/* in: DATA_ROW_ID, ... */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
-	const dict_col_t*	col;
+	dict_col_t*	col;
 
 	ut_ad(table);
 	ut_ad(sys < DATA_N_SYS_COLS);
@@ -261,16 +351,17 @@ dict_table_get_sys_col(
 
 	return(col);
 }
+#endif /* UNIV_DEBUG */
 
-/************************************************************************
-Gets the given system column number of a table. */
+/********************************************************************//**
+Gets the given system column number of a table.
+@return	column number */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
 /*======================*/
-				/* out: column number */
-	dict_table_t*	table,	/* in: table */
-	ulint		sys)	/* in: DATA_ROW_ID, ... */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
 	ut_ad(sys < DATA_N_SYS_COLS);
@@ -279,15 +370,14 @@ dict_table_get_sys_col_no(
 	return(table->n_cols - DATA_N_SYS_COLS + sys);
 }
 
-/************************************************************************
-Check whether the table uses the compact page format. */
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return	TRUE if table uses the compact page format */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
 /*===============*/
-					/* out: TRUE if table uses the
-					compact page format */
-	const dict_table_t*	table)	/* in: table */
+	const dict_table_t*	table)	/*!< in: table */
 {
 	ut_ad(table);
 
@@ -298,16 +388,81 @@ dict_table_is_comp(
 	return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
 }
 
-/************************************************************************
+/********************************************************************//**
+Determine the file format of a table.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+
+	return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
+}
+
+/********************************************************************//**
+Determine the file format of a table. */
+UNIV_INLINE
+void
+dict_table_set_format(
+/*==================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ulint		format)	/*!< in: file format version */
+{
+	ut_ad(table);
+
+	table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
+		| (format << DICT_TF_FORMAT_SHIFT);
+}
+
+/********************************************************************//**
+Extract the compressed page size from table flags.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_flags_to_zip_size(
+/*=========================*/
+	ulint	flags)	/*!< in: flags */
+{
+	ulint	zip_size = flags & DICT_TF_ZSSIZE_MASK;
+
+	if (UNIV_UNLIKELY(zip_size)) {
+		zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
+			 << (zip_size >> DICT_TF_ZSSIZE_SHIFT));
+
+		ut_ad(zip_size <= UNIV_PAGE_SIZE);
+	}
+
+	return(zip_size);
+}
+
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+
+	return(dict_table_flags_to_zip_size(table->flags));
+}
+
+/********************************************************************//**
 Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
+including fields added by the dictionary system.
+@return	number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
 /*====================*/
-				/* out: number of fields */
-	dict_index_t*	index)	/* in: an internal representation of index
-				(in the dictionary cache) */
+	const dict_index_t*	index)	/*!< in: an internal
+					representation of index (in
+					the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -315,18 +470,18 @@ dict_index_get_n_fields(
 	return(index->n_fields);
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
+returned by dict_index_get_n_unique_in_tree.
+@return	number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
 /*====================*/
-				/* out: number of fields */
-	dict_index_t*	index)	/* in: an internal representation of index
-				(in the dictionary cache) */
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -335,23 +490,23 @@ dict_index_get_n_unique(
 	return(index->n_uniq);
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
+we also take multiversioning into account.
+@return	number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
 /*============================*/
-				/* out: number of fields */
-	dict_index_t*	index)	/* in: an internal representation of index
-				(in the dictionary cache) */
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(index->cached);
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 
 		return(dict_index_get_n_unique(index));
 	}
@@ -359,55 +514,56 @@ dict_index_get_n_unique_in_tree(
 	return(dict_index_get_n_fields(index));
 }
 
-/************************************************************************
+/********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation of clustered indexes we add the row id to the ordering fields
 to make a clustered index unique, but this function returns the number of
-fields the user defined in the index as ordering fields. */
+fields the user defined in the index as ordering fields.
+@return	number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
 /*======================================*/
-				/* out: number of fields */
-	dict_index_t*	index)	/* in: an internal representation of index
-				(in the dictionary cache) */
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
 {
 	return(index->n_user_defined_cols);
 }
 
-/************************************************************************
-Gets the nth field of an index. */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return	pointer to field object */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
 /*=====================*/
-				/* out: pointer to field object */
-	dict_index_t*	index,	/* in: index */
-	ulint		pos)	/* in: position of field */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of field */
 {
 	ut_ad(index);
 	ut_ad(pos < index->n_def);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	return((index->fields) + pos);
+	return((dict_field_t*) (index->fields) + pos);
 }
+#endif /* UNIV_DEBUG */
 
-/************************************************************************
-Returns the position of a system column in an index. */
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return	position, ULINT_UNDEFINED if not contained */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
 /*=======================*/
-				/* out: position, ULINT_UNDEFINED if not
-				contained */
-	dict_index_t*	index,	/* in: index */
-	ulint		type)	/* in: DATA_ROW_ID, ... */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			type)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(!(index->type & DICT_UNIVERSAL));
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 
 		return(dict_col_get_clust_pos(
 			       dict_table_get_sys_col(index->table, type),
@@ -418,54 +574,75 @@ dict_index_get_sys_col_pos(
 		       index, dict_table_get_sys_col_no(index->table, type)));
 }
 
-/*************************************************************************
-Gets the field column. */
+/*********************************************************************//**
+Gets the field column.
+@return	field->col, pointer to the table column */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
 /*===============*/
-	const dict_field_t*	field)
+	const dict_field_t*	field)	/*!< in: index field */
 {
 	ut_ad(field);
 
 	return(field->col);
 }
 
-/************************************************************************
-Gets pointer to the nth column in an index. */
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return	column */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
 /*===================*/
-					/* out: column */
-	const dict_index_t*	index,	/* in: index */
-	ulint			pos)	/* in: position of the field */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
 {
-	return(dict_field_get_col(dict_index_get_nth_field((dict_index_t*)
-							   index, pos)));
+	return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
 }
 
-/************************************************************************
-Gets the column number the nth field in an index. */
+/********************************************************************//**
+Gets the column number the nth field in an index.
+@return	column number */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
 /*======================*/
-					/* out: column number */
-	const dict_index_t*	index,	/* in: index */
-	ulint			pos)	/* in: position of the field */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
 {
 	return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
 }
 
-/*************************************************************************
-Gets the space id of the root of the index tree. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return	minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ulint	n	= dict_index_get_n_fields(index);
+	ulint	size	= 0;
+
+	while (n--) {
+		size += dict_col_get_min_size(dict_index_get_nth_col(index,
+								     n));
+	}
+
+	return(size);
+}
+
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return	space id */
 UNIV_INLINE
 ulint
 dict_index_get_space(
 /*=================*/
-				/* out: space id */
-	dict_index_t*	index)	/* in: index */
+	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -473,14 +650,14 @@ dict_index_get_space(
 	return(index->space);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the space id of the root of the index tree. */
 UNIV_INLINE
 void
 dict_index_set_space(
 /*=================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		space)	/* in: space id */
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		space)	/*!< in: space id */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -488,14 +665,14 @@ dict_index_set_space(
 	index->space = space;
 }
 
-/*************************************************************************
-Gets the page number of the root of the index tree. */
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return	page number */
 UNIV_INLINE
 ulint
 dict_index_get_page(
 /*================*/
-				/* out: page number */
-	dict_index_t*	index)	/* in: index */
+	const dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -503,14 +680,14 @@ dict_index_get_page(
 	return(index->page);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the page number of the root of index tree. */
 UNIV_INLINE
 void
 dict_index_set_page(
 /*================*/
-	dict_index_t*	index,	/* in: index */
-	ulint		page)	/* in: page number */
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		page)	/*!< in: page number */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -518,29 +695,14 @@ dict_index_set_page(
 	index->page = page;
 }
 
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
-				/* out: type */
-	dict_index_t*	index)	/* in: index */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	return(index->type);
-}
-
-/*************************************************************************
-Gets the read-write lock of the index tree. */
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return	read-write lock */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
 /*================*/
-				/* out: read-write lock */
-	dict_index_t*	index)	/* in: index */
+	dict_index_t*	index)	/*!< in: index */
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -548,28 +710,27 @@ dict_index_get_lock(
 	return(&(index->lock));
 }
 
-/************************************************************************
+/********************************************************************//**
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
+which make the records bigger might fragment the index.
+@return	number of free bytes on page, reserved for updates */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void)
 /*==============================*/
-				/* out: number of free bytes on page,
-				reserved for updates */
 {
 	return(UNIV_PAGE_SIZE / 16);
 }
 
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return	table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
 /*=============================*/
-					/* out: table, NULL if not found */
-	const char*	table_name)	/* in: table name */
+	const char*	table_name)	/*!< in: table name */
 {
 	dict_table_t*	table;
 	ulint		table_fold;
@@ -580,20 +741,21 @@ dict_table_check_if_in_cache_low(
 	/* Look for the table name in the hash table */
 	table_fold = ut_fold_string(table_name);
 
-	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
-		    ut_strcmp(table->name, table_name) == 0);
+	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+		    dict_table_t*, table, ut_ad(table->cached),
+		    !strcmp(table->name, table_name));
 	return(table);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
+function.
+@return	table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
 /*===============*/
-					/* out: table, NULL if not found */
-	const char*	table_name)	/* in: table name */
+	const char*	table_name)	/*!< in: table name */
 {
 	dict_table_t*	table;
 
@@ -606,17 +768,19 @@ dict_table_get_low(
 		table = dict_load_table(table_name);
 	}
 
+	ut_ad(!table || table->cached);
+
 	return(table);
 }
 
-/**************************************************************************
-Returns a table object based on table id. */
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_on_id_low(
 /*=====================*/
-				/* out: table, NULL if does not exist */
-	dulint	table_id)	/* in: table id */
+	dulint	table_id)	/*!< in: table id */
 {
 	dict_table_t*	table;
 	ulint		fold;
@@ -626,39 +790,17 @@ dict_table_get_on_id_low(
 	/* Look for the table name in the hash table */
 	fold = ut_fold_dulint(table_id);
 
-	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
-		    ut_dulint_cmp(table->id, table_id) == 0);
+	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
+		    dict_table_t*, table, ut_ad(table->cached),
+		    !ut_dulint_cmp(table->id, table_id));
 	if (table == NULL) {
 		table = dict_load_table_on_id(table_id);
 	}
 
+	ut_ad(!table || table->cached);
+
 	/* TODO: should get the type information from MySQL */
 
 	return(table);
 }
-
-/**************************************************************************
-Returns an index object. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index(
-/*=================*/
-				/* out: index, NULL if does not exist */
-	dict_table_t*	table,	/* in: table */
-	const char*	name)	/* in: index name */
-{
-	dict_index_t*	index	= NULL;
-
-	index = dict_table_get_first_index(table);
-
-	while (index != NULL) {
-		if (ut_strcmp(name, index->name) == 0) {
-
-			break;
-		}
-
-		index = dict_table_get_next_index(index);
-	}
-
-	return(index);
-}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innodb_plugin/include/dict0load.h
similarity index 55%
rename from storage/innobase/include/dict0load.h
rename to storage/innodb_plugin/include/dict0load.h
index 7e19c2eb3c0..60b8c1fb632 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innodb_plugin/include/dict0load.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.h
 Loads to the memory cache database object definitions
 from dictionary tables
 
-(c) 1996 Innobase Oy
-
 Created 4/24/1996 Heikki Tuuri
 *******************************************************/
 
@@ -13,8 +30,9 @@ Created 4/24/1996 Heikki Tuuri
 #include "univ.i"
 #include "dict0types.h"
 #include "ut0byte.h"
+#include "mem0mem.h"
 
-/************************************************************************
+/********************************************************************//**
 In a crash recovery we already have all the tablespace objects created.
 This function compares the space id information in the InnoDB data dictionary
 to what we already read with fil_load_single_table_tablespaces().
@@ -22,72 +40,69 @@ to what we already read with fil_load_single_table_tablespaces().
 In a normal startup, we create the tablespace objects for every table in
 InnoDB's data dictionary, if the corresponding .ibd file exists.
 We also scan the biggest space id, and store it to fil_system. */
-
+UNIV_INTERN
 void
 dict_check_tablespaces_and_store_max_id(
 /*====================================*/
-	ibool	in_crash_recovery);	/* in: are we doing a crash recovery */
-/************************************************************************
-Finds the first table name in the given database. */
-
+	ibool	in_crash_recovery);	/*!< in: are we doing a crash recovery */
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
-				/* out, own: table name, NULL if
-				does not exist; the caller must free
-				the memory in the string! */
-	const char*	name);	/* in: database name which ends to '/' */
-/************************************************************************
+	const char*	name);	/*!< in: database name which ends to '/' */
+/********************************************************************//**
 Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. */
-
+a foreign key references columns in this table.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+UNIV_INTERN
 dict_table_t*
 dict_load_table(
 /*============*/
-				/* out: table, NULL if does not exist;
-				if the table is stored in an .ibd file,
-				but the file does not exist,
-				then we set the ibd_file_missing flag TRUE
-				in the table object we return */
-	const char*	name);	/* in: table name in the
+	const char*	name);	/*!< in: table name in the
 				databasename/tablename format */
-/***************************************************************************
-Loads a table object based on the table id. */
-
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return	table; NULL if table does not exist */
+UNIV_INTERN
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
-				/* out: table; NULL if table does not exist */
-	dulint	table_id);	/* in: table id */
-/************************************************************************
+	dulint	table_id);	/*!< in: table id */
+/********************************************************************//**
 This function is called when the database is booted.
 Loads system table index definitions except for the clustered index which
 is added to the dictionary cache at booting before calling this function. */
-
+UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
-	dict_table_t*	table);	/* in: system table */
-/***************************************************************************
+	dict_table_t*	table);	/*!< in: system table */
+/***********************************************************************//**
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
 constraints to the data dictionary. Note that we know that the dictionary
 cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
+already in the dictionary cache.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 dict_load_foreigns(
 /*===============*/
-					/* out: DB_SUCCESS or error code */
-	const char*	table_name,	/* in: table name */
-	ibool		check_charsets);/* in: TRUE=check charsets
+	const char*	table_name,	/*!< in: table name */
+	ibool		check_charsets);/*!< in: TRUE=check charsets
 					compatibility */
-/************************************************************************
+/********************************************************************//**
 Prints to the standard output information on all tables found in the data
 dictionary system table. */
-
+UNIV_INTERN
 void
 dict_print(void);
 /*============*/
diff --git a/storage/innodb_plugin/include/dict0load.ic b/storage/innodb_plugin/include/dict0load.ic
new file mode 100644
index 00000000000..ccc16db165b
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0load.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.ic
+Loads to the memory cache database object definitions
+from dictionary tables
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h
new file mode 100644
index 00000000000..1ee906fbf57
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0mem.h
@@ -0,0 +1,537 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0mem.h
+Data dictionary memory object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0mem_h
+#define dict0mem_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#ifndef UNIV_HOTBACKUP
+# include "lock0types.h"
+# include "que0types.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+
+/** Type flags of an index: OR'ing of the flags is allowed to define a
+combination of types */
+/* @{ */
+#define DICT_CLUSTERED	1	/*!< clustered index */
+#define DICT_UNIQUE	2	/*!< unique index */
+#define	DICT_UNIVERSAL	4	/*!< index which can contain records from any
+				other index */
+#define	DICT_IBUF 	8	/*!< insert buffer tree */
+/* @} */
+
+/** Types for a table object */
+#define DICT_TABLE_ORDINARY		1 /*!< ordinary table */
+#if 0 /* not implemented */
+#define	DICT_TABLE_CLUSTER_MEMBER	2
+#define	DICT_TABLE_CLUSTER		3 /* this means that the table is
+					  really a cluster definition */
+#endif
+
+/** Table flags.  All unused bits must be 0. */
+/* @{ */
+#define DICT_TF_COMPACT			1	/* Compact page format.
+						This must be set for
+						new file formats
+						(later than
+						DICT_TF_FORMAT_51). */
+
+/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */
+/* @{ */
+#define DICT_TF_ZSSIZE_SHIFT		1
+#define DICT_TF_ZSSIZE_MASK		(15 << DICT_TF_ZSSIZE_SHIFT)
+#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
+/* @} */
+
+/** File format */
+/* @{ */
+#define DICT_TF_FORMAT_SHIFT		5	/* file format */
+#define DICT_TF_FORMAT_MASK		(127 << DICT_TF_FORMAT_SHIFT)
+#define DICT_TF_FORMAT_51		0	/*!< InnoDB/MySQL up to 5.1 */
+#define DICT_TF_FORMAT_ZIP		1	/*!< InnoDB plugin for 5.1:
+						compressed tables,
+						new BLOB treatment */
+/** Maximum supported file format */
+#define DICT_TF_FORMAT_MAX		DICT_TF_FORMAT_ZIP
+
+#define DICT_TF_BITS			6	/*!< number of flag bits */
+#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
+# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
+#endif
+/* @} */
+/* @} */
+
+/**********************************************************************//**
+Creates a table memory object.
+@return	own: table object */
+UNIV_INTERN
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+	const char*	name,		/*!< in: table name */
+	ulint		space,		/*!< in: space where the clustered index
+					of the table is placed; this parameter
+					is ignored if the table is made
+					a member of a cluster */
+	ulint		n_cols,		/*!< in: number of columns */
+	ulint		flags);		/*!< in: table flags */
+/****************************************************************//**
+Free a table memory object. */
+UNIV_INTERN
+void
+dict_mem_table_free(
+/*================*/
+	dict_table_t*	table);		/*!< in: table */
+/**********************************************************************//**
+Adds a column definition to a table. */
+UNIV_INTERN
+void
+dict_mem_table_add_col(
+/*===================*/
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap,	/*!< in: temporary memory heap, or NULL */
+	const char*	name,	/*!< in: column name, or NULL */
+	ulint		mtype,	/*!< in: main datatype */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len);	/*!< in: precision */
+/**********************************************************************//**
+Creates an index memory object.
+@return	own: index object */
+UNIV_INTERN
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+	const char*	table_name,	/*!< in: table name */
+	const char*	index_name,	/*!< in: index name */
+	ulint		space,		/*!< in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/*!< in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields);	/*!< in: number of fields */
+/**********************************************************************//**
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+UNIV_INTERN
+void
+dict_mem_index_add_field(
+/*=====================*/
+	dict_index_t*	index,		/*!< in: index */
+	const char*	name,		/*!< in: column name */
+	ulint		prefix_len);	/*!< in: 0 or the column prefix length
+					in a MySQL index like
+					INDEX (textcol(25)) */
+/**********************************************************************//**
+Frees an index memory object. */
+UNIV_INTERN
+void
+dict_mem_index_free(
+/*================*/
+	dict_index_t*	index);	/*!< in: index */
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return	own: foreign constraint struct */
+UNIV_INTERN
+dict_foreign_t*
+dict_mem_foreign_create(void);
+/*=========================*/
+
+/** Data structure for a column in a table */
+struct dict_col_struct{
+	/*----------------------*/
+	/** The following are copied from dtype_t,
+	so that all bit-fields can be packed tightly. */
+	/* @{ */
+	unsigned	mtype:8;	/*!< main data type */
+	unsigned	prtype:24;	/*!< precise type; MySQL data
+					type, charset code, flags to
+					indicate nullability,
+					signedness, whether this is a
+					binary string, whether this is
+					a true VARCHAR where MySQL
+					uses 2 bytes to store the length */
+
+	/* the remaining fields do not affect alphabetical ordering: */
+
+	unsigned	len:16;		/*!< length; for MySQL data this
+					is field->pack_length(),
+					except that for a >= 5.0.3
+					type true VARCHAR this is the
+					maximum byte length of the
+					string data (in addition to
+					the string, MySQL uses 1 or 2
+					bytes to store the string length) */
+
+	unsigned	mbminlen:2;	/*!< minimum length of a
+					character, in bytes */
+	unsigned	mbmaxlen:3;	/*!< maximum length of a
+					character, in bytes */
+	/*----------------------*/
+	/* End of definitions copied from dtype_t */
+	/* @} */
+
+	unsigned	ind:10;		/*!< table column position
+					(starting from 0) */
+	unsigned	ord_part:1;	/*!< nonzero if this column
+					appears in the ordering fields
+					of an index */
+};
+
+/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed column length (or indexed prefix length).
+
+It is set to 3*256, so that one can create a column prefix index on
+256 characters of a TEXT or VARCHAR column also in the UTF-8
+charset. In that charset, a character may take at most 3 bytes.  This
+constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define DICT_MAX_INDEX_COL_LEN		REC_MAX_INDEX_COL_LEN
+
+/** Data structure for a field in an index */
+struct dict_field_struct{
+	dict_col_t*	col;		/*!< pointer to the table column */
+	const char*	name;		/*!< name of the column */
+	unsigned	prefix_len:10;	/*!< 0 or the length of the column
+					prefix in bytes in a MySQL index of
+					type, e.g., INDEX (textcol(25));
+					must be smaller than
+					DICT_MAX_INDEX_COL_LEN; NOTE that
+					in the UTF-8 charset, MySQL sets this
+					to 3 * the prefix len in UTF-8 chars */
+	unsigned	fixed_len:10;	/*!< 0 or the fixed length of the
+					column if smaller than
+					DICT_MAX_INDEX_COL_LEN */
+};
+
+/** Data structure for an index.  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_index_create(). */
+struct dict_index_struct{
+	dulint		id;	/*!< id of the index */
+	mem_heap_t*	heap;	/*!< memory heap */
+	const char*	name;	/*!< index name */
+	const char*	table_name;/*!< table name */
+	dict_table_t*	table;	/*!< back pointer to table */
+#ifndef UNIV_HOTBACKUP
+	unsigned	space:32;
+				/*!< space where the index tree is placed */
+	unsigned	page:32;/*!< index tree root page number */
+#endif /* !UNIV_HOTBACKUP */
+	unsigned	type:4;	/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
+				DICT_UNIVERSAL, DICT_IBUF) */
+	unsigned	trx_id_offset:10;/*!< position of the trx id column
+				in a clustered index record, if the fields
+				before it are known to be of a fixed size,
+				0 otherwise */
+	unsigned	n_user_defined_cols:10;
+				/*!< number of columns the user defined to
+				be in the index: in the internal
+				representation we add more columns */
+	unsigned	n_uniq:10;/*!< number of fields from the beginning
+				which are enough to determine an index
+				entry uniquely */
+	unsigned	n_def:10;/*!< number of fields defined so far */
+	unsigned	n_fields:10;/*!< number of fields in the index */
+	unsigned	n_nullable:10;/*!< number of nullable fields */
+	unsigned	cached:1;/*!< TRUE if the index object is in the
+				dictionary cache */
+	unsigned	to_be_dropped:1;
+				/*!< TRUE if this index is marked to be
+				dropped in ha_innobase::prepare_drop_index(),
+				otherwise FALSE */
+	dict_field_t*	fields;	/*!< array of field descriptions */
+#ifndef UNIV_HOTBACKUP
+	UT_LIST_NODE_T(dict_index_t)
+			indexes;/*!< list of indexes of the table */
+	btr_search_t*	search_info; /*!< info used in optimistic searches */
+	/*----------------------*/
+	/** Statistics for query optimization */
+	/* @{ */
+	ib_int64_t*	stat_n_diff_key_vals;
+				/*!< approximate number of different
+				key values for this index, for each
+				n-column prefix where n <=
+				dict_get_n_unique(index); we
+				periodically calculate new
+				estimates */
+	ulint		stat_index_size;
+				/*!< approximate index size in
+				database pages */
+	ulint		stat_n_leaf_pages;
+				/*!< approximate number of leaf pages in the
+				index tree */
+	/* @} */
+	rw_lock_t	lock;	/*!< read-write lock protecting the
+				upper levels of the index tree */
+	ib_uint64_t	trx_id; /*!< id of the transaction that created this
+				index, or 0 if the index existed
+				when InnoDB was started up */
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;/*!< magic number */
+/** Value of dict_index_struct::magic_n */
+# define DICT_INDEX_MAGIC_N	76789786
+#endif
+};
+
+/** Data structure for a foreign key constraint; an example:
+FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
+struct dict_foreign_struct{
+	mem_heap_t*	heap;		/*!< this object is allocated from
+					this memory heap */
+	char*		id;		/*!< id of the constraint as a
+					null-terminated string */
+	unsigned	n_fields:10;	/*!< number of indexes' first fields
+					for which the the foreign key
+					constraint is defined: we allow the
+					indexes to contain more fields than
+					mentioned in the constraint, as long
+					as the first fields are as mentioned */
+	unsigned	type:6;		/*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
+					or DICT_FOREIGN_ON_DELETE_SET_NULL */
+	char*		foreign_table_name;/*!< foreign table name */
+	dict_table_t*	foreign_table;	/*!< table where the foreign key is */
+	const char**	foreign_col_names;/*!< names of the columns in the
+					foreign key */
+	char*		referenced_table_name;/*!< referenced table name */
+	dict_table_t*	referenced_table;/*!< table where the referenced key
+					is */
+	const char**	referenced_col_names;/*!< names of the referenced
+					columns in the referenced table */
+	dict_index_t*	foreign_index;	/*!< foreign index; we require that
+					both tables contain explicitly defined
+					indexes for the constraint: InnoDB
+					does not generate new indexes
+					implicitly */
+	dict_index_t*	referenced_index;/*!< referenced index */
+	UT_LIST_NODE_T(dict_foreign_t)
+			foreign_list;	/*!< list node for foreign keys of the
+					table */
+	UT_LIST_NODE_T(dict_foreign_t)
+			referenced_list;/*!< list node for referenced
+					keys of the table */
+};
+
+/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
+a foreign key constraint is enforced, therefore RESTRICT just means no flag */
+/* @{ */
+#define DICT_FOREIGN_ON_DELETE_CASCADE	1	/*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_DELETE_SET_NULL	2	/*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_UPDATE_CASCADE	4	/*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL	8	/*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16	/*!< ON DELETE NO ACTION */
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32	/*!< ON UPDATE NO ACTION */
+/* @} */
+
+
+/** Data structure for a database table.  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_table_create(). */
+struct dict_table_struct{
+	dulint		id;	/*!< id of the table */
+	mem_heap_t*	heap;	/*!< memory heap */
+	const char*	name;	/*!< table name */
+	const char*	dir_path_of_temp_table;/*!< NULL or the directory path
+				where a TEMPORARY table that was explicitly
+				created by a user should be placed if
+				innodb_file_per_table is defined in my.cnf;
+				in Unix this is usually /tmp/..., in Windows
+				temp\... */
+	unsigned	space:32;
+				/*!< space where the clustered index of the
+				table is placed */
+	unsigned	flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
+	unsigned	ibd_file_missing:1;
+				/*!< TRUE if this is in a single-table
+				tablespace and the .ibd file is missing; then
+				we must return in ha_innodb.cc an error if the
+				user tries to query such an orphaned table */
+	unsigned	tablespace_discarded:1;
+				/*!< this flag is set TRUE when the user
+				calls DISCARD TABLESPACE on this
+				table, and reset to FALSE in IMPORT
+				TABLESPACE */
+	unsigned	cached:1;/*!< TRUE if the table object has been added
+				to the dictionary cache */
+	unsigned	n_def:10;/*!< number of columns defined so far */
+	unsigned	n_cols:10;/*!< number of columns */
+	dict_col_t*	cols;	/*!< array of column descriptions */
+	const char*	col_names;
+				/*!< Column names packed in a character string
+				"name1\0name2\0...nameN\0".  Until
+				the string contains n_cols, it will be
+				allocated from a temporary heap.  The final
+				string will be allocated from table->heap. */
+#ifndef UNIV_HOTBACKUP
+	hash_node_t	name_hash; /*!< hash chain node */
+	hash_node_t	id_hash; /*!< hash chain node */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			indexes; /*!< list of indexes of the table */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)
+			foreign_list;/*!< list of foreign key constraints
+				in the table; these refer to columns
+				in other tables */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)
+			referenced_list;/*!< list of foreign key constraints
+				which refer to this table */
+	UT_LIST_NODE_T(dict_table_t)
+			table_LRU; /*!< node of the LRU list of tables */
+	ulint		n_mysql_handles_opened;
+				/*!< count of how many handles MySQL has opened
+				to this table; dropping of the table is
+				NOT allowed until this count gets to zero;
+				MySQL does NOT itself check the number of
+				open handles at drop */
+	ulint		n_foreign_key_checks_running;
+				/*!< count of how many foreign key check
+				operations are currently being performed
+				on the table: we cannot drop the table while
+				there are foreign key checks running on
+				it! */
+	trx_id_t	query_cache_inv_trx_id;
+				/*!< transactions whose trx id is
+				smaller than this number are not
+				allowed to store to the MySQL query
+				cache or retrieve from it; when a trx
+				with undo logs commits, it sets this
+				to the value of the trx id counter for
+				the tables it had an IX lock on */
+	UT_LIST_BASE_NODE_T(lock_t)
+			locks; /*!< list of locks on the table */
+#ifdef UNIV_DEBUG
+	/*----------------------*/
+	ibool		does_not_fit_in_memory;
+				/*!< this field is used to specify in
+				simulations tables which are so big
+				that disk should be accessed: disk
+				access is simulated by putting the
+				thread to sleep for a while; NOTE that
+				this flag is not stored to the data
+				dictionary on disk, and the database
+				will forget about value TRUE if it has
+				to reload the table definition from
+				disk */
+#endif /* UNIV_DEBUG */
+	/*----------------------*/
+	unsigned	big_rows:1;
+				/*!< flag: TRUE if the maximum length of
+				a single row exceeds BIG_ROW_SIZE;
+				initialized in dict_table_add_to_cache() */
+				/** Statistics for query optimization */
+				/* @{ */
+	unsigned	stat_initialized:1; /*!< TRUE if statistics have
+				been calculated the first time
+				after database startup or table creation */
+	ib_int64_t	stat_n_rows;
+				/*!< approximate number of rows in the table;
+				we periodically calculate new estimates */
+	ulint		stat_clustered_index_size;
+				/*!< approximate clustered index size in
+				database pages */
+	ulint		stat_sum_of_other_index_sizes;
+				/*!< other indexes in database pages */
+	ulint		stat_modified_counter;
+				/*!< when a row is inserted, updated,
+				or deleted,
+				we add 1 to this number; we calculate new
+				estimates for the stat_... values for the
+				table and the indexes at an interval of 2 GB
+				or when about 1 / 16 of table has been
+				modified; also when the estimate operation is
+				called for MySQL SHOW TABLE STATUS; the
+				counter is reset to zero at statistics
+				calculation; this counter is not protected by
+				any latch, because this is only used for
+				heuristics */
+				/* @} */
+	/*----------------------*/
+				/**!< The following fields are used by the
+				AUTOINC code.  The actual collection of
+				tables locked during AUTOINC read/write is
+				kept in trx_t. In order to quickly determine
+				whether a transaction has locked the AUTOINC
+				lock we keep a pointer to the transaction
+				here in the autoinc_trx variable. This is to
+				avoid acquiring the kernel mutex and scanning
+				the vector in trx_t.
+
+				When an AUTOINC lock has to wait, the
+				corresponding lock instance is created on
+				the trx lock heap rather than use the
+				pre-allocated instance in autoinc_lock below.*/
+				/* @{ */
+	lock_t*		autoinc_lock;
+				/*!< a buffer for an AUTOINC lock
+				for this table: we allocate the memory here
+				so that individual transactions can get it
+				and release it without a need to allocate
+				space from the lock heap of the trx:
+				otherwise the lock heap would grow rapidly
+				if we do a large insert from a select */
+	mutex_t		autoinc_mutex;
+				/*!< mutex protecting the autoincrement
+				counter */
+	ib_uint64_t	autoinc;/*!< autoinc counter value to give to the
+				next inserted row */
+	ulong		n_waiting_or_granted_auto_inc_locks;
+				/*!< This counter is used to track the number
+				of granted and pending autoinc locks on this
+				table. This value is set after acquiring the
+				kernel mutex but we peek the contents to
+				determine whether other transactions have
+				acquired the AUTOINC lock or not. Of course
+				only one transaction can be granted the
+				lock but there can be multiple waiters. */
+	const trx_t*		autoinc_trx;
+				/*!< The transaction that currently holds the
+				the AUTOINC lock on this table. */
+				/* @} */
+	/*----------------------*/
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+	ulint		magic_n;/*!< magic number */
+/** Value of dict_table_struct::magic_n */
+# define DICT_TABLE_MAGIC_N	76333786
+#endif /* UNIV_DEBUG */
+};
+
+#ifndef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/dict0mem.ic b/storage/innodb_plugin/include/dict0mem.ic
new file mode 100644
index 00000000000..c36adb07a18
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0mem.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0mem.ic
+Data dictionary memory object creation
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+
diff --git a/storage/innodb_plugin/include/dict0types.h b/storage/innodb_plugin/include/dict0types.h
new file mode 100644
index 00000000000..7ad69193cc9
--- /dev/null
+++ b/storage/innodb_plugin/include/dict0types.h
@@ -0,0 +1,48 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0types.h
+Data dictionary global types
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0types_h
+#define dict0types_h
+
+typedef struct dict_sys_struct		dict_sys_t;
+typedef struct dict_col_struct		dict_col_t;
+typedef struct dict_field_struct	dict_field_t;
+typedef struct dict_index_struct	dict_index_t;
+typedef struct dict_table_struct	dict_table_t;
+typedef struct dict_foreign_struct	dict_foreign_t;
+
+/* A cluster object is a table object with the type field set to
+DICT_CLUSTERED */
+
+typedef dict_table_t			dict_cluster_t;
+
+typedef struct ind_node_struct		ind_node_t;
+typedef struct tab_node_struct		tab_node_t;
+
+/* Space id and page no where the dictionary header resides */
+#define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
+#define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
+
+#endif
diff --git a/storage/innodb_plugin/include/dyn0dyn.h b/storage/innodb_plugin/include/dyn0dyn.h
new file mode 100644
index 00000000000..121a5946ac7
--- /dev/null
+++ b/storage/innodb_plugin/include/dyn0dyn.h
@@ -0,0 +1,188 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.h
+The dynamically allocated array
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dyn0dyn_h
+#define dyn0dyn_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+
+/** A block in a dynamically allocated array */
+typedef struct dyn_block_struct		dyn_block_t;
+/** Dynamically allocated array */
+typedef dyn_block_t			dyn_array_t;
+
+
+/** This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define	DYN_ARRAY_DATA_SIZE	512
+
+/*********************************************************************//**
+Initializes a dynamic array.
+@return	initialized dyn array */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+	dyn_array_t*	arr);	/*!< in: pointer to a memory buffer of
+				size sizeof(dyn_array_t) */
+/************************************************************//**
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr);	/*!< in: dyn array */
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close.
+@return	pointer to the buffer */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size);	/*!< in: size in bytes of the buffer; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
+/*********************************************************************//**
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	byte*		ptr);	/*!< in: buffer space from ptr up was not used */
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to
+the added element. The caller must copy the element to
+the pointer returned.
+@return	pointer to the element */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size);	/*!< in: size in bytes of the element */
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return	pointer to element */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	ulint		pos);	/*!< in: position of element as bytes
+				from array start */
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return	data size in bytes */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+	dyn_array_t*	arr);	/*!< in: dyn array */
+/************************************************************//**
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+	dyn_array_t*	arr);	/*!< in: dyn array */
+/************************************************************//**
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+	dyn_array_t*	arr);	/*!< in: dyn array */
+/********************************************************************//**
+Gets the next block in a dyn array.
+@return	pointer to next, NULL if end of list */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	dyn_block_t*	block);	/*!< in: dyn array block */
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return	number of bytes used */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+	dyn_block_t*	block);	/*!< in: dyn array block */
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return	pointer to data */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+	dyn_block_t*	block);	/*!< in: dyn array block */
+/********************************************************//**
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len);	/*!< in: string length */
+
+/*#################################################################*/
+
+/** @brief A block in a dynamically allocated array.
+NOTE! Do not access the fields of the struct directly: the definition
+appears here only for the compiler to know its size! */
+struct dyn_block_struct{
+	mem_heap_t*	heap;	/*!< in the first block this is != NULL
+				if dynamic allocation has been needed */
+	ulint		used;	/*!< number of data bytes used in this block;
+				DYN_BLOCK_FULL_FLAG is set when the block
+				becomes full */
+	byte		data[DYN_ARRAY_DATA_SIZE];
+				/*!< storage for array elements */
+	UT_LIST_BASE_NODE_T(dyn_block_t) base;
+				/*!< linear list of dyn blocks: this node is
+				used only in the first block */
+	UT_LIST_NODE_T(dyn_block_t) list;
+				/*!< linear list node: used in all blocks */
+#ifdef UNIV_DEBUG
+	ulint		buf_end;/*!< only in the debug version: if dyn
+				array is opened, this is the buffer
+				end offset, else this is 0 */
+	ulint		magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
+#endif
+};
+
+
+#ifndef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innodb_plugin/include/dyn0dyn.ic
similarity index 62%
rename from storage/innobase/include/dyn0dyn.ic
rename to storage/innodb_plugin/include/dyn0dyn.ic
index fcb3c17287a..110e674abff 100644
--- a/storage/innobase/include/dyn0dyn.ic
+++ b/storage/innodb_plugin/include/dyn0dyn.ic
@@ -1,42 +1,61 @@
-/******************************************************
-The dynamically allocated array
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.ic
+The dynamically allocated array
 
 Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 
+/** Value of dyn_block_struct::magic_n */
 #define DYN_BLOCK_MAGIC_N	375767
+/** Flag for dyn_block_struct::used that indicates a full block */
 #define DYN_BLOCK_FULL_FLAG	0x1000000UL
 
-/****************************************************************
-Adds a new block to a dyn array. */
-
+/************************************************************//**
+Adds a new block to a dyn array.
+@return	created block */
+UNIV_INTERN
 dyn_block_t*
 dyn_array_add_block(
 /*================*/
-				/* out: created block */
-	dyn_array_t*	arr);	/* in: dyn array */
+	dyn_array_t*	arr);	/*!< in: dyn array */
 
 
-/****************************************************************
+/************************************************************//**
 Gets the first block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_first_block(
 /*======================*/
-	dyn_array_t*	arr)	/* in: dyn array */
+	dyn_array_t*	arr)	/*!< in: dyn array */
 {
 	return(arr);
 }
 
-/****************************************************************
+/************************************************************//**
 Gets the last block in a dyn array. */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_last_block(
 /*=====================*/
-	dyn_array_t*	arr)	/* in: dyn array */
+	dyn_array_t*	arr)	/*!< in: dyn array */
 {
 	if (arr->heap == NULL) {
 
@@ -46,15 +65,15 @@ dyn_array_get_last_block(
 	return(UT_LIST_GET_LAST(arr->base));
 }
 
-/************************************************************************
-Gets the next block in a dyn array. */
+/********************************************************************//**
+Gets the next block in a dyn array.
+@return	pointer to next, NULL if end of list */
 UNIV_INLINE
 dyn_block_t*
 dyn_array_get_next_block(
 /*=====================*/
-				/* out: pointer to next, NULL if end of list */
-	dyn_array_t*	arr,	/* in: dyn array */
-	dyn_block_t*	block)	/* in: dyn array block */
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	dyn_block_t*	block)	/*!< in: dyn array block */
 {
 	ut_ad(arr && block);
 
@@ -67,42 +86,42 @@ dyn_array_get_next_block(
 	return(UT_LIST_GET_NEXT(list, block));
 }
 
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return	number of bytes used */
 UNIV_INLINE
 ulint
 dyn_block_get_used(
 /*===============*/
-				/* out: number of bytes used */
-	dyn_block_t*	block)	/* in: dyn array block */
+	dyn_block_t*	block)	/*!< in: dyn array block */
 {
 	ut_ad(block);
 
 	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
 }
 
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return	pointer to data */
 UNIV_INLINE
 byte*
 dyn_block_get_data(
 /*===============*/
-				/* out: pointer to data */
-	dyn_block_t*	block)	/* in: dyn array block */
+	dyn_block_t*	block)	/*!< in: dyn array block */
 {
 	ut_ad(block);
 
 	return(block->data);
 }
 
-/*************************************************************************
-Initializes a dynamic array. */
+/*********************************************************************//**
+Initializes a dynamic array.
+@return	initialized dyn array */
 UNIV_INLINE
 dyn_array_t*
 dyn_array_create(
 /*=============*/
-				/* out: initialized dyn array */
-	dyn_array_t*	arr)	/* in: pointer to a memory buffer of
+	dyn_array_t*	arr)	/*!< in: pointer to a memory buffer of
 				size sizeof(dyn_array_t) */
 {
 	ut_ad(arr);
@@ -120,13 +139,13 @@ dyn_array_create(
 	return(arr);
 }
 
-/****************************************************************
+/************************************************************//**
 Frees a dynamic array. */
 UNIV_INLINE
 void
 dyn_array_free(
 /*===========*/
-	dyn_array_t*	arr)	/* in: dyn array */
+	dyn_array_t*	arr)	/*!< in: dyn array */
 {
 	if (arr->heap != NULL) {
 		mem_heap_free(arr->heap);
@@ -137,16 +156,16 @@ dyn_array_free(
 #endif
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned. */
+The caller must copy the element to the pointer returned.
+@return	pointer to the element */
 UNIV_INLINE
 void*
 dyn_array_push(
 /*===========*/
-				/* out: pointer to the element */
-	dyn_array_t*	arr,	/* in: dynamic array */
-	ulint		size)	/* in: size in bytes of the element */
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the element */
 {
 	dyn_block_t*	block;
 	ulint		used;
@@ -177,17 +196,17 @@ dyn_array_push(
 	return((block->data) + used);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Makes room on top of a dyn array and returns a pointer to a buffer in it.
 After copying the elements, the caller must close the buffer using
-dyn_array_close. */
+dyn_array_close.
+@return	pointer to the buffer */
 UNIV_INLINE
 byte*
 dyn_array_open(
 /*===========*/
-				/* out: pointer to the buffer */
-	dyn_array_t*	arr,	/* in: dynamic array */
-	ulint		size)	/* in: size in bytes of the buffer; MUST be
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
 				smaller than DYN_ARRAY_DATA_SIZE! */
 {
 	dyn_block_t*	block;
@@ -223,14 +242,14 @@ dyn_array_open(
 	return((block->data) + used);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Closes the buffer returned by dyn_array_open. */
 UNIV_INLINE
 void
 dyn_array_close(
 /*============*/
-	dyn_array_t*	arr,	/* in: dynamic array */
-	byte*		ptr)	/* in: buffer space from ptr up was not used */
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	byte*		ptr)	/*!< in: buffer space from ptr up was not used */
 {
 	dyn_block_t*	block;
 
@@ -250,15 +269,15 @@ dyn_array_close(
 #endif
 }
 
-/****************************************************************
-Returns pointer to an element in dyn array. */
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return	pointer to element */
 UNIV_INLINE
 void*
 dyn_array_get_element(
 /*==================*/
-				/* out: pointer to element */
-	dyn_array_t*	arr,	/* in: dyn array */
-	ulint		pos)	/* in: position of element as bytes
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	ulint		pos)	/*!< in: position of element as bytes
 				from array start */
 {
 	dyn_block_t*	block;
@@ -288,14 +307,14 @@ dyn_array_get_element(
 	return(block->data + pos);
 }
 
-/****************************************************************
-Returns the size of stored data in a dyn array. */
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return	data size in bytes */
 UNIV_INLINE
 ulint
 dyn_array_get_data_size(
 /*====================*/
-				/* out: data size in bytes */
-	dyn_array_t*	arr)	/* in: dyn array */
+	dyn_array_t*	arr)	/*!< in: dyn array */
 {
 	dyn_block_t*	block;
 	ulint		sum	= 0;
@@ -319,15 +338,15 @@ dyn_array_get_data_size(
 	return(sum);
 }
 
-/************************************************************
+/********************************************************//**
 Pushes n bytes to a dyn array. */
 UNIV_INLINE
 void
 dyn_push_string(
 /*============*/
-	dyn_array_t*	arr,	/* in: dyn array */
-	const byte*	str,	/* in: string to write */
-	ulint		len)	/* in: string length */
+	dyn_array_t*	arr,	/*!< in: dyn array */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len)	/*!< in: string length */
 {
 	ulint	n_copied;
 
diff --git a/storage/innobase/include/eval0eval.h b/storage/innodb_plugin/include/eval0eval.h
similarity index 50%
rename from storage/innobase/include/eval0eval.h
rename to storage/innodb_plugin/include/eval0eval.h
index f950512adfd..60aefd8d453 100644
--- a/storage/innobase/include/eval0eval.h
+++ b/storage/innodb_plugin/include/eval0eval.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.h
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 
-(c) 1997 Innobase Oy
-
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 
@@ -15,79 +32,79 @@ Created 12/29/1997 Heikki Tuuri
 #include "pars0sym.h"
 #include "pars0pars.h"
 
-/*********************************************************************
+/*****************************************************************//**
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
-
+UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
-	que_node_t*	node);	/* in: query graph node */
-/*********************************************************************
+	que_node_t*	node);	/*!< in: query graph node */
+/*****************************************************************//**
 Evaluates a symbol table symbol. */
 UNIV_INLINE
 void
 eval_sym(
 /*=====*/
-	sym_node_t*	sym_node);	/* in: symbol table node */
-/*********************************************************************
+	sym_node_t*	sym_node);	/*!< in: symbol table node */
+/*****************************************************************//**
 Evaluates an expression. */
 UNIV_INLINE
 void
 eval_exp(
 /*=====*/
-	que_node_t*	exp_node);	/* in: expression */
-/*********************************************************************
+	que_node_t*	exp_node);	/*!< in: expression */
+/*****************************************************************//**
 Sets an integer value as the value of an expression node. */
 UNIV_INLINE
 void
 eval_node_set_int_val(
 /*==================*/
-	que_node_t*	node,	/* in: expression node */
-	lint		val);	/* in: value to set */
-/*********************************************************************
-Gets an integer value from an expression node. */
+	que_node_t*	node,	/*!< in: expression node */
+	lint		val);	/*!< in: value to set */
+/*****************************************************************//**
+Gets an integer value from an expression node.
+@return	integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
 /*==================*/
-				/* out: integer value */
-	que_node_t*	node);	/* in: expression node */
-/*********************************************************************
+	que_node_t*	node);	/*!< in: expression node */
+/*****************************************************************//**
 Copies a binary string value as the value of a query graph node. Allocates a
 new buffer if necessary. */
 UNIV_INLINE
 void
 eval_node_copy_and_alloc_val(
 /*=========================*/
-	que_node_t*	node,	/* in: query graph node */
-	byte*		str,	/* in: binary string */
-	ulint		len);	/* in: string length or UNIV_SQL_NULL */
-/*********************************************************************
+	que_node_t*	node,	/*!< in: query graph node */
+	const byte*	str,	/*!< in: binary string */
+	ulint		len);	/*!< in: string length or UNIV_SQL_NULL */
+/*****************************************************************//**
 Copies a query node value to another node. */
 UNIV_INLINE
 void
 eval_node_copy_val(
 /*===============*/
-	que_node_t*	node1,	/* in: node to copy to */
-	que_node_t*	node2);	/* in: node to copy from */
-/*********************************************************************
-Gets a iboolean value from a query node. */
+	que_node_t*	node1,	/*!< in: node to copy to */
+	que_node_t*	node2);	/*!< in: node to copy from */
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return	iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
 /*====================*/
-				/* out: iboolean value */
-	que_node_t*	node);	/* in: query graph node */
-/*********************************************************************
-Evaluates a comparison node. */
-
+	que_node_t*	node);	/*!< in: query graph node */
+/*****************************************************************//**
+Evaluates a comparison node.
+@return	the result of the comparison */
+UNIV_INTERN
 ibool
 eval_cmp(
 /*=====*/
-					/* out: the result of the comparison */
-	func_node_t*	cmp_node);	/* in: comparison node */
+	func_node_t*	cmp_node);	/*!< in: comparison node */
 
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innodb_plugin/include/eval0eval.ic
similarity index 65%
rename from storage/innobase/include/eval0eval.ic
rename to storage/innodb_plugin/include/eval0eval.ic
index caffa2e0bfd..fe767f39b00 100644
--- a/storage/innobase/include/eval0eval.ic
+++ b/storage/innodb_plugin/include/eval0eval.ic
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.ic
 SQL evaluator: evaluates simple data structures, like expressions, in
 a query graph
 
-(c) 1997 Innobase Oy
-
 Created 12/29/1997 Heikki Tuuri
 *******************************************************/
 
@@ -11,41 +28,41 @@ Created 12/29/1997 Heikki Tuuri
 #include "rem0cmp.h"
 #include "pars0grm.h"
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a function node. */
-
+UNIV_INTERN
 void
 eval_func(
 /*======*/
-	func_node_t*	func_node);	/* in: function node */
-/*********************************************************************
+	func_node_t*	func_node);	/*!< in: function node */
+/*****************************************************************//**
 Allocate a buffer from global dynamic memory for a value of a que_node.
 NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
+allocated for a query node val field.
+@return	pointer to allocated buffer */
+UNIV_INTERN
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
-				/* out: pointer to allocated buffer */
-	que_node_t*	node,	/* in: query graph node; sets the val field
+	que_node_t*	node,	/*!< in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
-	ulint		size);	/* in: buffer size */
+	ulint		size);	/*!< in: buffer size */
 
 
-/*********************************************************************
-Allocates a new buffer if needed. */
+/*****************************************************************//**
+Allocates a new buffer if needed.
+@return	pointer to buffer */
 UNIV_INLINE
 byte*
 eval_node_ensure_val_buf(
 /*=====================*/
-				/* out: pointer to buffer */
-	que_node_t*	node,	/* in: query graph node; sets the val field
+	que_node_t*	node,	/*!< in: query graph node; sets the val field
 				data field to point to the new buffer, and
 				len field equal to size */
-	ulint		size)	/* in: buffer size */
+	ulint		size)	/*!< in: buffer size */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -63,13 +80,13 @@ eval_node_ensure_val_buf(
 	return(data);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates a symbol table symbol. */
 UNIV_INLINE
 void
 eval_sym(
 /*=====*/
-	sym_node_t*	sym_node)	/* in: symbol table node */
+	sym_node_t*	sym_node)	/*!< in: symbol table node */
 {
 
 	ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
@@ -83,13 +100,13 @@ eval_sym(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Evaluates an expression. */
 UNIV_INLINE
 void
 eval_exp(
 /*=====*/
-	que_node_t*	exp_node)	/* in: expression */
+	que_node_t*	exp_node)	/*!< in: expression */
 {
 	if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
 
@@ -101,14 +118,14 @@ eval_exp(
 	eval_func(exp_node);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Sets an integer value as the value of an expression node. */
 UNIV_INLINE
 void
 eval_node_set_int_val(
 /*==================*/
-	que_node_t*	node,	/* in: expression node */
-	lint		val)	/* in: value to set */
+	que_node_t*	node,	/*!< in: expression node */
+	lint		val)	/*!< in: value to set */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -126,14 +143,14 @@ eval_node_set_int_val(
 	mach_write_to_4(data, (ulint)val);
 }
 
-/*********************************************************************
-Gets an integer non-SQL null value from an expression node. */
+/*****************************************************************//**
+Gets an integer non-SQL null value from an expression node.
+@return	integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
 /*==================*/
-				/* out: integer value */
-	que_node_t*	node)	/* in: expression node */
+	que_node_t*	node)	/*!< in: expression node */
 {
 	dfield_t*	dfield;
 
@@ -144,14 +161,14 @@ eval_node_get_int_val(
 	return((int)mach_read_from_4(dfield_get_data(dfield)));
 }
 
-/*********************************************************************
-Gets a iboolean value from a query node. */
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return	iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
 /*====================*/
-				/* out: iboolean value */
-	que_node_t*	node)	/* in: query graph node */
+	que_node_t*	node)	/*!< in: query graph node */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -165,14 +182,14 @@ eval_node_get_ibool_val(
 	return(mach_read_from_1(data));
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Sets a iboolean value as the value of a function node. */
 UNIV_INLINE
 void
 eval_node_set_ibool_val(
 /*====================*/
-	func_node_t*	func_node,	/* in: function node */
-	ibool		val)		/* in: value to set */
+	func_node_t*	func_node,	/*!< in: function node */
+	ibool		val)		/*!< in: value to set */
 {
 	dfield_t*	dfield;
 	byte*		data;
@@ -192,16 +209,16 @@ eval_node_set_ibool_val(
 	mach_write_to_1(data, val);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Copies a binary string value as the value of a query graph node. Allocates a
 new buffer if necessary. */
 UNIV_INLINE
 void
 eval_node_copy_and_alloc_val(
 /*=========================*/
-	que_node_t*	node,	/* in: query graph node */
-	byte*		str,	/* in: binary string */
-	ulint		len)	/* in: string length or UNIV_SQL_NULL */
+	que_node_t*	node,	/*!< in: query graph node */
+	const byte*	str,	/*!< in: binary string */
+	ulint		len)	/*!< in: string length or UNIV_SQL_NULL */
 {
 	byte*		data;
 
@@ -216,14 +233,14 @@ eval_node_copy_and_alloc_val(
 	ut_memcpy(data, str, len);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Copies a query node value to another node. */
 UNIV_INLINE
 void
 eval_node_copy_val(
 /*===============*/
-	que_node_t*	node1,	/* in: node to copy to */
-	que_node_t*	node2)	/* in: node to copy from */
+	que_node_t*	node1,	/*!< in: node to copy to */
+	que_node_t*	node2)	/*!< in: node to copy from */
 {
 	dfield_t*	dfield2;
 
diff --git a/storage/innodb_plugin/include/eval0proc.h b/storage/innodb_plugin/include/eval0proc.h
new file mode 100644
index 00000000000..13e2e365320
--- /dev/null
+++ b/storage/innodb_plugin/include/eval0proc.h
@@ -0,0 +1,104 @@
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.h
+Executes SQL stored procedures and their control structures
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0proc_h
+#define eval0proc_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an if-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+if_step(
+/*====*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a while-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+while_step(
+/*=======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a for-loop node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+for_step(
+/*=====*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an assignment statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+assign_step(
+/*========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an exit statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+exit_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a return-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+return_step(
+/*========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/eval0proc.ic b/storage/innodb_plugin/include/eval0proc.ic
new file mode 100644
index 00000000000..c602af0a694
--- /dev/null
+++ b/storage/innodb_plugin/include/eval0proc.ic
@@ -0,0 +1,88 @@
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.ic
+Executes SQL stored procedures and their control structures
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	proc_node_t*	node;
+
+	ut_ad(thr);
+
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
+
+	if (thr->prev_node == que_node_get_parent(node)) {
+		/* Start execution from the first statement in the statement
+		list */
+
+		thr->run_node = node->stat_list;
+	} else {
+		/* Move to the next statement */
+		ut_ad(que_node_get_next(thr->prev_node) == NULL);
+
+		thr->run_node = NULL;
+	}
+
+	if (thr->run_node == NULL) {
+		thr->run_node = que_node_get_parent(node);
+	}
+
+	return(thr);
+}
+
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	func_node_t*	node;
+
+	ut_ad(thr);
+
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+	/* Evaluate the procedure */
+
+	eval_exp(node);
+
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h
similarity index 57%
rename from storage/innobase/include/fil0fil.h
rename to storage/innodb_plugin/include/fil0fil.h
index 6b8fd4b03d5..a36deaf16ce 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innodb_plugin/include/fil0fil.h
@@ -1,7 +1,24 @@
-/******************************************************
-The low-level file system
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fil0fil.h
+The low-level file system
 
 Created 10/25/1995 Heikki Tuuri
 *******************************************************/
@@ -10,59 +27,62 @@ Created 10/25/1995 Heikki Tuuri
 #define fil0fil_h
 
 #include "univ.i"
+#ifndef UNIV_HOTBACKUP
 #include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
 #include "dict0types.h"
-#include "ibuf0types.h"
 #include "ut0byte.h"
 #include "os0file.h"
 
-/* When mysqld is run, the default directory "." is the mysqld datadir, but in
-ibbackup we must set it explicitly; the patgh must NOT contain the trailing
-'/' or '\' */
+/** When mysqld is run, the default directory "." is the mysqld datadir,
+but in the MySQL Embedded Server Library and ibbackup it is not the default
+directory, and we must set the base file path explicitly */
 extern const char*	fil_path_to_mysql_datadir;
 
-/* Initial size of a single-table tablespace in pages */
+/** Initial size of a single-table tablespace in pages */
 #define FIL_IBD_FILE_INITIAL_SIZE	4
 
-/* 'null' (undefined) page offset in the context of file spaces */
+/** 'null' (undefined) page offset in the context of file spaces */
 #define	FIL_NULL	ULINT32_UNDEFINED
 
 /* Space address data type; this is intended to be used when
 addresses accurate to a byte are stored in file pages. If the page part
 of the address is FIL_NULL, the address is considered undefined. */
 
-typedef	byte	fil_faddr_t;	/* 'type' definition in C: an address
+typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 				stored in a file page is a string of bytes */
 #define FIL_ADDR_PAGE	0	/* first in address is the page offset */
 #define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
 
 #define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
 
-/* A struct for storing a space address FIL_ADDR, when it is used
+/** A struct for storing a space address FIL_ADDR, when it is used
 in C program data structures. */
 
 typedef struct fil_addr_struct	fil_addr_t;
+/** File space address */
 struct fil_addr_struct{
-	ulint	page;		/* page number within a space */
-	ulint	boffset;	/* byte offset within the page */
+	ulint	page;		/*!< page number within a space */
+	ulint	boffset;	/*!< byte offset within the page */
 };
 
-/* Null file address */
+/** The null file address */
 extern fil_addr_t	fil_addr_null;
 
-/* The byte offsets on a file page for various variables */
-#define FIL_PAGE_SPACE_OR_CHKSUM 0	/* in < MySQL-4.0.14 space id the
+/** The byte offsets on a file page for various variables @{ */
+#define FIL_PAGE_SPACE_OR_CHKSUM 0	/*!< in < MySQL-4.0.14 space id the
 					page belongs to (== 0) but in later
 					versions the 'new' checksum of the
 					page */
-#define FIL_PAGE_OFFSET		4	/* page offset inside space */
-#define FIL_PAGE_PREV		8	/* if there is a 'natural' predecessor
-					of the page, its offset.
-					Otherwise FIL_NULL.
-					This field is not set on BLOB pages,
-					which are stored as a singly-linked
-					list.  See also FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT		12	/* if there is a 'natural' successor
+#define FIL_PAGE_OFFSET		4	/*!< page offset inside space */
+#define FIL_PAGE_PREV		8	/*!< if there is a 'natural'
+					predecessor of the page, its
+					offset.  Otherwise FIL_NULL.
+					This field is not set on BLOB
+					pages, which are stored as a
+					singly-linked list.  See also
+					FIL_PAGE_NEXT. */
+#define FIL_PAGE_NEXT		12	/*!< if there is a 'natural' successor
 					of the page, its offset.
 					Otherwise FIL_NULL.
 					B-tree index pages
@@ -72,9 +92,9 @@ extern fil_addr_t	fil_addr_null;
 					FIL_PAGE_PREV and FIL_PAGE_NEXT
 					in the collation order of the
 					smallest user record on each page. */
-#define FIL_PAGE_LSN		16	/* lsn of the end of the newest
+#define FIL_PAGE_LSN		16	/*!< lsn of the end of the newest
 					modification log record to the page */
-#define	FIL_PAGE_TYPE		24	/* file page type: FIL_PAGE_INDEX,...,
+#define	FIL_PAGE_TYPE		24	/*!< file page type: FIL_PAGE_INDEX,...,
 					2 bytes.
 
 					The contents of this field can only
@@ -89,224 +109,246 @@ extern fil_addr_t	fil_addr_null;
 					MySQL/InnoDB 5.1.7 or later, the
 					contents of this field is valid
 					for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN	26	/* this is only defined for the
+#define FIL_PAGE_FILE_FLUSH_LSN	26	/*!< this is only defined for the
 					first page in a data file: the file
 					has been flushed to disk at least up
 					to this lsn */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /* starting from 4.1.x this
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
 					contains the space id of the page */
-#define FIL_PAGE_DATA		38	/* start of the data on the page */
-
-/* File page trailer */
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/* the low 4 bytes of this are used
+#define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+/* @} */
+/** File page trailer @{ */
+#define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/*!< the low 4 bytes of this are used
 					to store the page checksum, the
 					last 4 bytes should be identical
 					to the last 4 bytes of FIL_PAGE_LSN */
-#define FIL_PAGE_DATA_END	8
+#define FIL_PAGE_DATA_END	8	/*!< size of the page trailer */
+/* @} */
 
-/* File page types (values of FIL_PAGE_TYPE) */
-#define FIL_PAGE_INDEX		17855	/* B-tree node */
-#define FIL_PAGE_UNDO_LOG	2	/* Undo log page */
-#define FIL_PAGE_INODE		3	/* Index node */
-#define FIL_PAGE_IBUF_FREE_LIST	4	/* Insert buffer free list */
+/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_INDEX		17855	/*!< B-tree node */
+#define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
+#define FIL_PAGE_INODE		3	/*!< Index node */
+#define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
 /* File page types introduced in MySQL/InnoDB 5.1.7 */
-#define FIL_PAGE_TYPE_ALLOCATED	0	/* Freshly allocated page */
-#define FIL_PAGE_IBUF_BITMAP	5	/* Insert buffer bitmap */
-#define FIL_PAGE_TYPE_SYS	6	/* System page */
-#define FIL_PAGE_TYPE_TRX_SYS	7	/* Transaction system data */
-#define FIL_PAGE_TYPE_FSP_HDR	8	/* File space header */
-#define FIL_PAGE_TYPE_XDES	9	/* Extent descriptor page */
-#define FIL_PAGE_TYPE_BLOB	10	/* Uncompressed BLOB page */
+#define FIL_PAGE_TYPE_ALLOCATED	0	/*!< Freshly allocated page */
+#define FIL_PAGE_IBUF_BITMAP	5	/*!< Insert buffer bitmap */
+#define FIL_PAGE_TYPE_SYS	6	/*!< System page */
+#define FIL_PAGE_TYPE_TRX_SYS	7	/*!< Transaction system data */
+#define FIL_PAGE_TYPE_FSP_HDR	8	/*!< File space header */
+#define FIL_PAGE_TYPE_XDES	9	/*!< Extent descriptor page */
+#define FIL_PAGE_TYPE_BLOB	10	/*!< Uncompressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB	11	/*!< First compressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB2	12	/*!< Subsequent compressed BLOB page */
+/* @} */
 
-/* Space types */
-#define FIL_TABLESPACE		501
-#define FIL_LOG			502
+/** Space types @{ */
+#define FIL_TABLESPACE		501	/*!< tablespace */
+#define FIL_LOG			502	/*!< redo log */
+/* @} */
 
+/** The number of fsyncs done to the log */
 extern ulint	fil_n_log_flushes;
 
+/** Number of pending redo log flushes */
 extern ulint	fil_n_pending_log_flushes;
+/** Number of pending tablespace flushes */
 extern ulint	fil_n_pending_tablespace_flushes;
 
 
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
-
-ib_longlong
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns the version number of a tablespace, -1 if not found.
+@return version number, -1 if the tablespace does not exist in the
+memory cache */
+UNIV_INTERN
+ib_int64_t
 fil_space_get_version(
 /*==================*/
-			/* out: version number, -1 if the tablespace does not
-			exist in the memory cache */
-	ulint	id);	/* in: space id */
-/***********************************************************************
-Returns the latch of a file space. */
-
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the latch of a file space.
+@return	latch protecting storage allocation */
+UNIV_INTERN
 rw_lock_t*
 fil_space_get_latch(
 /*================*/
-			/* out: latch protecting storage allocation */
-	ulint	id);	/* in: space id */
-/***********************************************************************
-Returns the type of a file space. */
-
+	ulint	id,	/*!< in: space id */
+	ulint*	zip_size);/*!< out: compressed page size, or
+			0 for uncompressed tablespaces */
+/*******************************************************************//**
+Returns the type of a file space.
+@return	FIL_TABLESPACE or FIL_LOG */
+UNIV_INTERN
 ulint
 fil_space_get_type(
 /*===============*/
-			/* out: FIL_TABLESPACE or FIL_LOG */
-	ulint	id);	/* in: space id */
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
-			/* out: ibuf data for this space */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
 Appends a new file to the chain of files of a space. File must be closed. */
-
+UNIV_INTERN
 void
 fil_node_create(
 /*============*/
-	const char*	name,	/* in: file name (file must be closed) */
-	ulint		size,	/* in: file size in database blocks, rounded
+	const char*	name,	/*!< in: file name (file must be closed) */
+	ulint		size,	/*!< in: file size in database blocks, rounded
 				downwards to an integer */
-	ulint		id,	/* in: space id where to append */
-	ibool		is_raw);/* in: TRUE if a raw device or
+	ulint		id,	/*!< in: space id where to append */
+	ibool		is_raw);/*!< in: TRUE if a raw device or
 				a raw disk partition */
-/********************************************************************
+#ifdef UNIV_LOG_ARCHIVE
+/****************************************************************//**
 Drops files from the start of a file space, so that its size is cut by
 the amount given. */
-
+UNIV_INTERN
 void
 fil_space_truncate_start(
 /*=====================*/
-	ulint	id,		/* in: space id */
-	ulint	trunc_len);	/* in: truncate by this much; it is an error
+	ulint	id,		/*!< in: space id */
+	ulint	trunc_len);	/*!< in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
-/***********************************************************************
+#endif /* UNIV_LOG_ARCHIVE */
+/*******************************************************************//**
 Creates a space memory object and puts it to the 'fil system' hash table. If
-there is an error, prints an error message to the .err log. */
-
+there is an error, prints an error message to the .err log.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_space_create(
 /*=============*/
-				/* out: TRUE if success */
-	const char*	name,	/* in: space name */
-	ulint		id,	/* in: space id */
-	ulint		purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
-/***********************************************************************
+	const char*	name,	/*!< in: space name */
+	ulint		id,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size, or
+				0 for uncompressed tablespaces */
+	ulint		purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+/*******************************************************************//**
 Frees a space object from a the tablespace memory cache. Closes the files in
-the chain but does not delete them. */
-
+the chain but does not delete them.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_space_free(
 /*===========*/
-			/* out: TRUE if success */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return	space size, 0 if space not found */
+UNIV_INTERN
 ulint
 fil_space_get_size(
 /*===============*/
-			/* out: space size, 0 if space not found */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the flags of the space. The tablespace must be cached
+in the memory cache.
+@return	flags, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_flags(
+/*================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the compressed page size of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	compressed page size, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_zip_size(
+/*===================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
 Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
-
+file space. The tablespace must be cached in the memory cache.
+@return	TRUE if the address is meaningful */
+UNIV_INTERN
 ibool
 fil_check_adress_in_tablespace(
 /*===========================*/
-			/* out: TRUE if the address is meaningful */
-	ulint	id,	/* in: space id */
-	ulint	page_no);/* in: page number */
-/********************************************************************
+	ulint	id,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+/****************************************************************//**
 Initializes the tablespace memory cache. */
-
+UNIV_INTERN
 void
 fil_init(
 /*=====*/
-	ulint	max_n_open);	/* in: max number of open files */
-/***********************************************************************
+	ulint	hash_size,	/*!< in: hash table size */
+	ulint	max_n_open);	/*!< in: max number of open files */
+/*******************************************************************//**
 Opens all log files and system tablespace data files. They stay open until the
 database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
-
+UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void);
 /*==========================================*/
-/***********************************************************************
+/*******************************************************************//**
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
-
+UNIV_INTERN
 void
 fil_close_all_files(void);
 /*=====================*/
-/***********************************************************************
+/*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
-
+UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
-	ulint	max_id);/* in: maximum known id */
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void);
-/*===========================*/
-/********************************************************************
+	ulint	max_id);/*!< in: maximum known id */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
 Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
+header of the first page of each data file in the system tablespace.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 fil_write_flushed_lsn_to_data_files(
 /*================================*/
-				/* out: DB_SUCCESS or error number */
-	dulint	lsn,		/* in: lsn to write */
-	ulint	arch_log_no);	/* in: latest archived log file number */
-/***********************************************************************
+	ib_uint64_t	lsn,		/*!< in: lsn to write */
+	ulint		arch_log_no);	/*!< in: latest archived log
+					file number */
+/*******************************************************************//**
 Reads the flushed lsn and arch no fields from a data file at database
 startup. */
-
+UNIV_INTERN
 void
 fil_read_flushed_lsn_and_arch_log_no(
 /*=================================*/
-	os_file_t data_file,		/* in: open data file */
-	ibool	one_read_already,	/* in: TRUE if min and max parameters
-					below already contain sensible data */
+	os_file_t	data_file,		/*!< in: open data file */
+	ibool		one_read_already,	/*!< in: TRUE if min and max
+						parameters below already
+						contain sensible data */
 #ifdef UNIV_LOG_ARCHIVE
-	ulint*	min_arch_log_no,	/* in/out: */
-	ulint*	max_arch_log_no,	/* in/out: */
+	ulint*		min_arch_log_no,	/*!< in/out: */
+	ulint*		max_arch_log_no,	/*!< in/out: */
 #endif /* UNIV_LOG_ARCHIVE */
-	dulint*	min_flushed_lsn,	/* in/out: */
-	dulint*	max_flushed_lsn);	/* in/out: */
-/***********************************************************************
+	ib_uint64_t*	min_flushed_lsn,	/*!< in/out: */
+	ib_uint64_t*	max_flushed_lsn);	/*!< in/out: */
+/*******************************************************************//**
 Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
+being deleted.
+@return	TRUE if being deleted, and ibuf merges should be skipped */
+UNIV_INTERN
 ibool
 fil_inc_pending_ibuf_merges(
 /*========================*/
-			/* out: TRUE if being deleted, and ibuf merges should
-			be skipped */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
 Decrements the count of pending insert buffer page merges. */
-
+UNIV_INTERN
 void
 fil_decr_pending_ibuf_merges(
 /*=========================*/
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
 Parses the body of a log record written about an .ibd file operation. That is,
 the log record part after the standard (type, space id, page no) header of the
 log record.
@@ -317,88 +359,91 @@ at that path does not exist yet. If the database directory for the file to be
 created does not exist, then we create the directory, too.
 
 Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
+datadir that we should use in replaying the file operations.
+@return end of log record, or NULL if the record was not completely
+contained between ptr and end_ptr */
+UNIV_INTERN
 byte*
 fil_op_log_parse_or_replay(
 /*=======================*/
-				/* out: end of log record, or NULL if the
-				record was not completely contained between
-				ptr and end_ptr */
-	byte*	ptr,		/* in: buffer containing the log record body,
+	byte*	ptr,		/*!< in: buffer containing the log record body,
 				or an initial segment of it, if the record does
 				not fir completely between ptr and end_ptr */
-	byte*	end_ptr,	/* in: buffer end */
-	ulint	type,		/* in: the type of this log record */
-	ibool	do_replay,	/* in: TRUE if we want to replay the
-				operation, and not just parse the log record */
-	ulint	space_id);	/* in: if do_replay is TRUE, the space id of
-				the tablespace in question; otherwise
-				ignored */
-/***********************************************************************
+	byte*	end_ptr,	/*!< in: buffer end */
+	ulint	type,		/*!< in: the type of this log record */
+	ulint	space_id,	/*!< in: the space id of the tablespace in
+				question, or 0 if the log record should
+				only be parsed but not replayed */
+	ulint	log_flags);	/*!< in: redo log flags
+				(stored in the page number parameter) */
+/*******************************************************************//**
 Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_delete_tablespace(
 /*==================*/
-			/* out: TRUE if success */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Discards a single-table tablespace. The tablespace must be cached in the
 memory cache. Discarding is like deleting a tablespace, but
 1) we do not drop the table from the data dictionary;
 2) we remove all insert buffer entries for the tablespace immediately; in DROP
 TABLE they are only removed gradually in the background;
 3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
+as it originally had.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_discard_tablespace(
 /*===================*/
-			/* out: TRUE if success */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
 Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
+tablespace memory cache.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_rename_tablespace(
 /*==================*/
-					/* out: TRUE if success */
-	const char*	old_name,	/* in: old table name in the standard
+	const char*	old_name,	/*!< in: old table name in the standard
 					databasename/tablename format of
 					InnoDB, or NULL if we do the rename
 					based on the space id only */
-	ulint		id,		/* in: space id */
-	const char*	new_name);	/* in: new table name in the standard
+	ulint		id,		/*!< in: space id */
+	const char*	new_name);	/*!< in: new table name in the standard
 					databasename/tablename format
 					of InnoDB */
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a new single-table tablespace to a database directory of MySQL.
 Database directories are under the 'datadir' of MySQL. The datadir is the
 directory of a running mysqld program. We can refer to it by simply the
 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
+dir of the mysqld server.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 fil_create_new_single_table_tablespace(
 /*===================================*/
-					/* out: DB_SUCCESS or error code */
-	ulint*		space_id,	/* in/out: space id; if this is != 0,
+	ulint*		space_id,	/*!< in/out: space id; if this is != 0,
 					then this is an input parameter,
 					otherwise output */
-	const char*	tablename,	/* in: the table name in the usual
+	const char*	tablename,	/*!< in: the table name in the usual
 					databasename/tablename format
 					of InnoDB, or a dir path to a temp
 					table */
-	ibool		is_temp,	/* in: TRUE if a table created with
+	ibool		is_temp,	/*!< in: TRUE if a table created with
 					CREATE TEMPORARY TABLE */
-	ulint		size);		/* in: the initial size of the
+	ulint		flags,		/*!< in: tablespace flags */
+	ulint		size);		/*!< in: the initial size of the
 					tablespace file in pages,
 					must be >= FIL_IBD_FILE_INITIAL_SIZE */
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
 function is used to open a tablespace when we start up mysqld, and also in
@@ -406,23 +451,24 @@ IMPORT TABLESPACE.
 NOTE that we assume this operation is used either at the database startup
 or under the protection of the dictionary mutex, so that two users cannot
 race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
+tablespace open, but closes it after we have looked at the space id in it.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_open_single_table_tablespace(
 /*=============================*/
-					/* out: TRUE if success */
-	ibool		check_space_id,	/* in: should we check that the space
+	ibool		check_space_id,	/*!< in: should we check that the space
 					id in the file is right; we assume
 					that this function runs much faster
 					if no check is made, since accessing
 					the file inode probably is much
 					faster (the OS caches them) than
 					accessing the first page of the file */
-	ulint		id,		/* in: space id */
-	const char*	name);		/* in: table name in the
+	ulint		id,		/*!< in: space id */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	name);		/*!< in: table name in the
 					databasename/tablename format */
-/************************************************************************
+/********************************************************************//**
 It is possible, though very improbable, that the lsn's in the tablespace to be
 imported have risen above the current system lsn, if a lengthy purge, ibuf
 merge, or rollback was performed on a backup taken with ibbackup. If that is
@@ -430,150 +476,149 @@ the case, reset page lsn's in the file. We assume that mysqld was shut down
 after it performed these cleanup operations on the .ibd file, so that it at
 the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
 first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
+lsn's just by looking at that flush lsn.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 fil_reset_too_high_lsns(
 /*====================*/
-					/* out: TRUE if success */
-	const char*	name,		/* in: table name in the
+	const char*	name,		/*!< in: table name in the
 					databasename/tablename format */
-	dulint		current_lsn);	/* in: reset lsn's if the lsn stamped
+	ib_uint64_t	current_lsn);	/*!< in: reset lsn's if the lsn stamped
 					to FIL_PAGE_FILE_FLUSH_LSN in the
 					first page is too high */
-/************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
 At the server startup, if we need crash recovery, scans the database
 directories under the MySQL datadir, looking for .ibd files. Those files are
 single-table tablespaces. We need to know the space id in each of them so that
 we know into which file we should look to check the contents of a page stored
 in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
+space id is != 0.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 fil_load_single_table_tablespaces(void);
 /*===================================*/
-			/* out: DB_SUCCESS or error number */
-/************************************************************************
+/********************************************************************//**
 If we need crash recovery, and we have called
 fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
 we can call this function to print an error message of orphaned .ibd files
 for which there is not a data dictionary entry with a matching table name
 and space id. */
-
+UNIV_INTERN
 void
 fil_print_orphaned_tablespaces(void);
 /*================================*/
-/***********************************************************************
+/*******************************************************************//**
 Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
+or is being deleted there.
+@return	TRUE if does not exist or is being\ deleted */
+UNIV_INTERN
 ibool
 fil_tablespace_deleted_or_being_deleted_in_mem(
 /*===========================================*/
-				/* out: TRUE if does not exist or is being\
-				deleted */
-	ulint		id,	/* in: space id */
-	ib_longlong	version);/* in: tablespace_version should be this; if
+	ulint		id,	/*!< in: space id */
+	ib_int64_t	version);/*!< in: tablespace_version should be this; if
 				you pass -1 as the value of this, then this
 				parameter is ignored */
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace exists in the memory cache.
+@return	TRUE if exists */
+UNIV_INTERN
 ibool
 fil_tablespace_exists_in_mem(
 /*=========================*/
-			/* out: TRUE if exists */
-	ulint	id);	/* in: space id */
-/***********************************************************************
+	ulint	id);	/*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
+there may be many tablespaces which are not yet in the memory cache.
+@return	TRUE if a matching tablespace exists in the memory cache */
+UNIV_INTERN
 ibool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
-					/* out: TRUE if a matching tablespace
-					exists in the memory cache */
-	ulint		id,		/* in: space id */
-	const char*	name,		/* in: table name in the standard
+	ulint		id,		/*!< in: space id */
+	const char*	name,		/*!< in: table name in the standard
 					'databasename/tablename' format or
 					the dir path to a temp table */
-	ibool		is_temp,	/* in: TRUE if created with CREATE
+	ibool		is_temp,	/*!< in: TRUE if created with CREATE
 					TEMPORARY TABLE */
-	ibool		mark_space,	/* in: in crash recovery, at database
+	ibool		mark_space,	/*!< in: in crash recovery, at database
 					startup we mark all spaces which have
 					an associated table in the InnoDB
 					data dictionary, so that
 					we can print a warning about orphaned
 					tablespaces */
 	ibool		print_error_if_does_not_exist);
-					/* in: print detailed error
+					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
 					memory */
-/**************************************************************************
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
-				/* out: TRUE if success */
-	ulint*	actual_size,	/* out: size of the space after extension;
-				if we ran out of disk space this may be lower
-				than the desired size */
-	ulint	space_id,	/* in: space id */
-	ulint	size_after_extend);/* in: desired size in pages after the
-				extension; if the current space size is bigger
-				than this already, the function does nothing */
-#ifdef UNIV_HOTBACKUP
-/************************************************************************
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
 Extends all tablespaces to the size stored in the space header. During the
 ibbackup --apply-log phase we extended the spaces on-demand so that log records
 could be appllied, but that may have left spaces still too small compared to
 the size stored in the space header. */
-
+UNIV_INTERN
 void
 fil_extend_tablespaces_to_stored_len(void);
 /*======================================*/
-#endif
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. If the space is big
+enough already, does nothing.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+fil_extend_space_to_desired_size(
+/*=============================*/
+	ulint*	actual_size,	/*!< out: size of the space after extension;
+				if we ran out of disk space this may be lower
+				than the desired size */
+	ulint	space_id,	/*!< in: space id */
+	ulint	size_after_extend);/*!< in: desired size in pages after the
+				extension; if the current space size is bigger
+				than this already, the function does nothing */
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return	TRUE if succeed */
+UNIV_INTERN
 ibool
 fil_space_reserve_free_extents(
 /*===========================*/
-				/* out: TRUE if succeed */
-	ulint	id,		/* in: space id */
-	ulint	n_free_now,	/* in: number of free extents now */
-	ulint	n_to_reserve);	/* in: how many one wants to reserve */
-/***********************************************************************
+	ulint	id,		/*!< in: space id */
+	ulint	n_free_now,	/*!< in: number of free extents now */
+	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
+/*******************************************************************//**
 Releases free extents in a file space. */
-
+UNIV_INTERN
 void
 fil_space_release_free_extents(
 /*===========================*/
-	ulint	id,		/* in: space id */
-	ulint	n_reserved);	/* in: how many one reserved */
-/***********************************************************************
+	ulint	id,		/*!< in: space id */
+	ulint	n_reserved);	/*!< in: how many one reserved */
+/*******************************************************************//**
 Gets the number of reserved extents. If the database is silent, this number
 should be zero. */
-
+UNIV_INTERN
 ulint
 fil_space_get_n_reserved_extents(
 /*=============================*/
-	ulint	id);		/* in: space id */
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
+	ulint	id);		/*!< in: space id */
+/********************************************************************//**
+Reads or writes data. This operation is asynchronous (aio).
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INTERN
 ulint
 fil_io(
 /*===*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
+	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
 				ORed to OS_FILE_LOG, if a log i/o
 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
 				if simulated aio and we want to post a
@@ -582,133 +627,98 @@ fil_io(
 				because i/os are not actually handled until
 				all have been posted: use with great
 				caution! */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in
+	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	block_offset,	/*!< in: offset in number of blocks */
+	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
-	ulint	len,		/* in: how many bytes to read or write; this
+	ulint	len,		/*!< in: how many bytes to read or write; this
 				must not cross a file boundary; in aio this
 				must be a block size multiple */
-	void*	buf,		/* in/out: buffer where to store read data
+	void*	buf,		/*!< in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message);	/* in: message for aio handler if non-sync
+	void*	message);	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/* in: how many bytes to read; this must not
-				cross a file boundary; in aio this must be a
-				block size multiple */
-	void*	buf,		/* in/out: buffer where to store data read;
-				in aio this must be appropriately aligned */
-	void*	message);	/* in: message for aio handler if non-sync
-				aio used, else ignored */
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
-				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
-				if we are trying to do i/o on a tablespace
-				which does not exist */
-	ibool	sync,		/* in: TRUE if synchronous aio is desired */
-	ulint	space_id,	/* in: space id */
-	ulint	block_offset,	/* in: offset in number of blocks */
-	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/* in: how many bytes to write; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
-	void*	buf,		/* in: buffer from which to write; in aio
-				this must be appropriately aligned */
-	void*	message);	/* in: message for aio handler if non-sync
-				aio used, else ignored */
-/**************************************************************************
+/**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
 into segments (see os0file.c for more info). The thread specifies which
 segment it wants to wait for. */
-
+UNIV_INTERN
 void
 fil_aio_wait(
 /*=========*/
-	ulint	segment);	/* in: the number of the segment in the aio
+	ulint	segment);	/*!< in: the number of the segment in the aio
 				array to wait for */
-/**************************************************************************
+/**********************************************************************//**
 Flushes to disk possible writes cached by the OS. If the space does not exist
 or is being dropped, does not do anything. */
-
+UNIV_INTERN
 void
 fil_flush(
 /*======*/
-	ulint	space_id);	/* in: file space id (this can be a group of
+	ulint	space_id);	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
-/**************************************************************************
+/**********************************************************************//**
 Flushes to disk writes in file spaces of the given type possibly cached by
 the OS. */
-
+UNIV_INTERN
 void
 fil_flush_file_spaces(
 /*==================*/
-	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
+	ulint	purpose);	/*!< in: FIL_TABLESPACE, FIL_LOG */
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 fil_validate(void);
 /*==============*/
-			/* out: TRUE if ok */
-/************************************************************************
-Returns TRUE if file address is undefined. */
-
+/********************************************************************//**
+Returns TRUE if file address is undefined.
+@return	TRUE if undefined */
+UNIV_INTERN
 ibool
 fil_addr_is_null(
 /*=============*/
-				/* out: TRUE if undefined */
-	fil_addr_t	addr);	/* in: address */
-/************************************************************************
-Accessor functions for a file page */
-
+	fil_addr_t	addr);	/*!< in: address */
+/********************************************************************//**
+Get the predecessor of a file page.
+@return	FIL_PAGE_PREV */
+UNIV_INTERN
 ulint
-fil_page_get_prev(byte*	page);
+fil_page_get_prev(
+/*==============*/
+	const byte*	page);	/*!< in: file page */
+/********************************************************************//**
+Get the successor of a file page.
+@return	FIL_PAGE_NEXT */
+UNIV_INTERN
 ulint
-fil_page_get_next(byte*	page);
-/*************************************************************************
+fil_page_get_next(
+/*==============*/
+	const byte*	page);	/*!< in: file page */
+/*********************************************************************//**
 Sets the file page type. */
-
+UNIV_INTERN
 void
 fil_page_set_type(
 /*==============*/
-	byte*	page,	/* in: file page */
-	ulint	type);	/* in: type */
-/*************************************************************************
-Gets the file page type. */
-
+	byte*	page,	/*!< in/out: file page */
+	ulint	type);	/*!< in: type */
+/*********************************************************************//**
+Gets the file page type.
+@return type; NOTE that if the type has not been written to page, the
+return value not defined */
+UNIV_INTERN
 ulint
 fil_page_get_type(
 /*==============*/
-			/* out: type; NOTE that if the type has not been
-			written to page, the return value not defined */
-	byte*	page);	/* in: file page */
+	const byte*	page);	/*!< in: file page */
 
 
 typedef	struct fil_space_struct	fil_space_t;
diff --git a/storage/innodb_plugin/include/fsp0fsp.h b/storage/innodb_plugin/include/fsp0fsp.h
new file mode 100644
index 00000000000..5f7dc58eedc
--- /dev/null
+++ b/storage/innodb_plugin/include/fsp0fsp.h
@@ -0,0 +1,359 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.h
+File space management
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fsp0fsp_h
+#define fsp0fsp_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "fut0lst.h"
+#include "ut0byte.h"
+#include "page0types.h"
+#include "fsp0types.h"
+
+/**********************************************************************//**
+Initializes the file space system. */
+UNIV_INTERN
+void
+fsp_init(void);
+/*==========*/
+/**********************************************************************//**
+Gets the current free limit of the system tablespace.  The free limit
+means the place of the first page which has never been put to the the
+free list for allocation.  The space above that address is initialized
+to zero.  Sets also the global variable log_fsp_current_free_limit.
+@return	free limit in megabytes */
+UNIV_INTERN
+ulint
+fsp_header_get_free_limit(void);
+/*===========================*/
+/**********************************************************************//**
+Gets the size of the system tablespace from the tablespace header.  If
+we do not have an auto-extending data file, this should be equal to
+the size of the data files.  If there is an auto-extending data file,
+this can be smaller.
+@return	size in pages */
+UNIV_INTERN
+ulint
+fsp_header_get_tablespace_size(void);
+/*================================*/
+/**********************************************************************//**
+Reads the file space size stored in the header page.
+@return	tablespace size stored in the space header */
+UNIV_INTERN
+ulint
+fsp_get_size_low(
+/*=============*/
+	page_t*	page);	/*!< in: header page (page 0 in the tablespace) */
+/**********************************************************************//**
+Reads the space id from the first page of a tablespace.
+@return	space id, ULINT UNDEFINED if error */
+UNIV_INTERN
+ulint
+fsp_header_get_space_id(
+/*====================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the space flags from the first page of a tablespace.
+@return	flags */
+UNIV_INTERN
+ulint
+fsp_header_get_flags(
+/*=================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the compressed page size from the first page of a tablespace.
+@return	compressed page size in bytes, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_zip_size(
+/*====================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Writes the space id and compressed page size to a tablespace header.
+This function is used past the buffer pool when we in fil0fil.c create
+a new single-table tablespace. */
+UNIV_INTERN
+void
+fsp_header_init_fields(
+/*===================*/
+	page_t*	page,		/*!< in/out: first page in the space */
+	ulint	space_id,	/*!< in: space id */
+	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
+				0, or table->flags if newer than COMPACT */
+/**********************************************************************//**
+Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0. */
+UNIV_INTERN
+void
+fsp_header_init(
+/*============*/
+	ulint	space,		/*!< in: space id */
+	ulint	size,		/*!< in: current size in blocks */
+	mtr_t*	mtr);		/*!< in: mini-transaction handle */
+/**********************************************************************//**
+Increases the space size field of a space. */
+UNIV_INTERN
+void
+fsp_header_inc_size(
+/*================*/
+	ulint	space,	/*!< in: space id */
+	ulint	size_inc,/*!< in: size increment in pages */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
+fseg_create(
+/*========*/
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
+			on the page */
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
+fseg_create_general(
+/*================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
+			on the page */
+	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
+			done the reservation for the pages with
+			fsp_reserve_free_extents (at least 2 extents: one for
+			the inode and the other for the segment) then there is
+			no need to do the check for this individual
+			operation */
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used.
+@return	number of reserved pages */
+UNIV_INTERN
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+	fseg_header_t*	header,	/*!< in: segment header */
+	ulint*		used,	/*!< out: number of pages used (<= reserved) */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize
+file space fragmentation.
+@return	the allocated page offset FIL_NULL if no page could be allocated */
+UNIV_INTERN
+ulint
+fseg_alloc_free_page(
+/*=================*/
+	fseg_header_t*	seg_header, /*!< in: segment header */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction, /*!< in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation.
+@return	allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
+ulint
+fseg_alloc_free_page_general(
+/*=========================*/
+	fseg_header_t*	seg_header,/*!< in: segment header */
+	ulint		hint,	/*!< in: hint of which page would be desirable */
+	byte		direction,/*!< in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	ibool		has_done_reservation, /*!< in: TRUE if the caller has
+				already done the reservation for the page
+				with fsp_reserve_free_extents, then there
+				is no need to do the check for this individual
+				page */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/**********************************************************************//**
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space.
+
+Single-table tablespaces whose size is < 32 pages are a special case. In this
+function we would liberally reserve several 64 page extents for every page
+split or merge in a B-tree. But we do not want to waste disk space if the table
+only occupies < 32 pages. That is why we apply different rules in that special
+case, just ensuring that there are 3 free pages available.
+@return	TRUE if we were able to make the reservation */
+UNIV_INTERN
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
+			return TRUE and the tablespace size is < 64 pages,
+			then this can be 0, otherwise it is n_ext */
+	ulint	space,	/*!< in: space id */
+	ulint	n_ext,	/*!< in: number of extents to reserve */
+	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************************//**
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents.
+@return	available space in kB */
+UNIV_INTERN
+ullint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+	ulint	space);	/*!< in: space id */
+/**********************************************************************//**
+Frees a single page of a segment. */
+UNIV_INTERN
+void
+fseg_free_page(
+/*===========*/
+	fseg_header_t*	seg_header, /*!< in: segment header */
+	ulint		space,	/*!< in: space id */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+/**********************************************************************//**
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in
+too big a mini-transaction.
+@return	TRUE if freeing completed */
+UNIV_INTERN
+ibool
+fseg_free_step(
+/*===========*/
+	fseg_header_t*	header,	/*!< in, own: segment header; NOTE: if the header
+				resides on the first page of the frag list
+				of the segment, this pointer becomes obsolete
+				after the last freeing step */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed.
+@return	TRUE if freeing completed, except the header page */
+UNIV_INTERN
+ibool
+fseg_free_step_not_header(
+/*======================*/
+	fseg_header_t*	header,	/*!< in: segment header which must reside on
+				the first fragment page of the segment */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return	TRUE if a descriptor page */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no);/*!< in: page number */
+/***********************************************************//**
+Parses a redo log record of a file page init.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr, /*!< in: buffer end */
+	buf_block_t*	block);	/*!< in: block or NULL */
+/*******************************************************************//**
+Validates the file space system and its segments.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+fsp_validate(
+/*=========*/
+	ulint	space);	/*!< in: space id */
+/*******************************************************************//**
+Prints info of a file space. */
+UNIV_INTERN
+void
+fsp_print(
+/*======*/
+	ulint	space);	/*!< in: space id */
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Validates a segment.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+fseg_validate(
+/*==========*/
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
+Writes info of a segment. */
+UNIV_INTERN
+void
+fseg_print(
+/*=======*/
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* UNIV_BTR_PRINT */
+
+#ifndef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/fsp0fsp.ic b/storage/innodb_plugin/include/fsp0fsp.ic
new file mode 100644
index 00000000000..434c370b527
--- /dev/null
+++ b/storage/innodb_plugin/include/fsp0fsp.ic
@@ -0,0 +1,45 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.ic
+File space management
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return	TRUE if a descriptor page */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no)/*!< in: page number */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
+				     == FSP_XDES_OFFSET));
+	}
+
+	return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
+}
diff --git a/storage/innodb_plugin/include/fsp0types.h b/storage/innodb_plugin/include/fsp0types.h
new file mode 100644
index 00000000000..496081c2346
--- /dev/null
+++ b/storage/innodb_plugin/include/fsp0types.h
@@ -0,0 +1,110 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+@file include/fsp0types.h
+File space management types
+
+Created May 26, 2009 Vasil Dimov
+*******************************************************/
+
+#ifndef fsp0types_h
+#define fsp0types_h
+
+#include "univ.i"
+
+#include "fil0fil.h" /* for FIL_PAGE_DATA */
+
+/** @name Flags for inserting records in order
+If records are inserted in order, there are the following
+flags to tell this (their type is made byte for the compiler
+to warn if direction and hint parameters are switched in
+fseg_alloc_free_page) */
+/* @{ */
+#define	FSP_UP		((byte)111)	/*!< alphabetically upwards */
+#define	FSP_DOWN	((byte)112)	/*!< alphabetically downwards */
+#define	FSP_NO_DIR	((byte)113)	/*!< no order */
+/* @} */
+
+/** File space extent size (one megabyte) in pages */
+#define	FSP_EXTENT_SIZE		(1 << (20 - UNIV_PAGE_SIZE_SHIFT))
+
+/** On a page of any file segment, data may be put starting from this
+offset */
+#define FSEG_PAGE_DATA		FIL_PAGE_DATA
+
+/** @name File segment header
+The file segment header points to the inode describing the file segment. */
+/* @{ */
+/** Data type for file segment header */
+typedef	byte	fseg_header_t;
+
+#define FSEG_HDR_SPACE		0	/*!< space id of the inode */
+#define FSEG_HDR_PAGE_NO	4	/*!< page number of the inode */
+#define FSEG_HDR_OFFSET		8	/*!< byte offset of the inode */
+
+#define FSEG_HEADER_SIZE	10	/*!< Length of the file system
+					header, in bytes */
+/* @} */
+
+/** Flags for fsp_reserve_free_extents @{ */
+#define FSP_NORMAL	1000000
+#define	FSP_UNDO	2000000
+#define FSP_CLEANING	3000000
+/* @} */
+
+/* Number of pages described in a single descriptor page: currently each page
+description takes less than 1 byte; a descriptor page is repeated every
+this many file pages */
+/* #define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE */
+/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
+
+/** @name The space low address page map
+The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
+every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
+/* @{ */
+/*--------------------------------------*/
+#define FSP_XDES_OFFSET			0	/* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET		1	/* !< insert buffer bitmap */
+				/* The ibuf bitmap pages are the ones whose
+				page number is the number above plus a
+				multiple of XDES_DESCRIBED_PER_PAGE */
+
+#define FSP_FIRST_INODE_PAGE_NO		2	/*!< in every tablespace */
+				/* The following pages exist
+				in the system tablespace (space 0). */
+#define FSP_IBUF_HEADER_PAGE_NO		3	/*!< insert buffer
+						header page, in
+						tablespace 0 */
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/*!< insert buffer
+						B-tree root page in
+						tablespace 0 */
+				/* The ibuf tree root page number in
+				tablespace 0; its fseg inode is on the page
+				number FSP_FIRST_INODE_PAGE_NO */
+#define FSP_TRX_SYS_PAGE_NO		5	/*!< transaction
+						system header, in
+						tablespace 0 */
+#define	FSP_FIRST_RSEG_PAGE_NO		6	/*!< first rollback segment
+						page, in tablespace 0 */
+#define FSP_DICT_HDR_PAGE_NO		7	/*!< data dictionary header
+						page, in tablespace 0 */
+/*--------------------------------------*/
+/* @} */
+
+#endif /* fsp0types_h */
diff --git a/storage/innodb_plugin/include/fut0fut.h b/storage/innodb_plugin/include/fut0fut.h
new file mode 100644
index 00000000000..dce20b3bad6
--- /dev/null
+++ b/storage/innodb_plugin/include/fut0fut.h
@@ -0,0 +1,55 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.h
+File-based utilities
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+
+#ifndef fut0fut_h
+#define fut0fut_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fil_addr_t	addr,	/*!< in: file address */
+	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+
+#ifndef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
+#endif
+
diff --git a/storage/innodb_plugin/include/fut0fut.ic b/storage/innodb_plugin/include/fut0fut.ic
new file mode 100644
index 00000000000..0b52719a055
--- /dev/null
+++ b/storage/innodb_plugin/include/fut0fut.ic
@@ -0,0 +1,56 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.ic
+File-based utilities
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fil_addr_t	addr,	/*!< in: file address */
+	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr)	/*!< in: mtr handle */
+{
+	buf_block_t*	block;
+	byte*		ptr;
+
+	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
+	ptr = buf_block_get_frame(block) + addr.boffset;
+
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+	return(ptr);
+}
diff --git a/storage/innodb_plugin/include/fut0lst.h b/storage/innodb_plugin/include/fut0lst.h
new file mode 100644
index 00000000000..fe024c2498f
--- /dev/null
+++ b/storage/innodb_plugin/include/fut0lst.h
@@ -0,0 +1,217 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.h
+File-based list utilities
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef fut0lst_h
+#define fut0lst_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+
+/* The C 'types' of base node and list node: these should be used to
+write self-documenting code. Of course, the sizeof macro cannot be
+applied to these types! */
+
+typedef	byte	flst_base_node_t;
+typedef	byte	flst_node_t;
+
+/* The physical size of a list base node in bytes */
+#define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+
+/* The physical size of a list node in bytes */
+#define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Adds a node as the last node in a list. */
+UNIV_INTERN
+void
+flst_add_last(
+/*==========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Adds a node as the first node in a list. */
+UNIV_INTERN
+void
+flst_add_first(
+/*===========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Inserts a node after another in a list. */
+UNIV_INTERN
+void
+flst_insert_after(
+/*==============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node1,	/*!< in: node to insert after */
+	flst_node_t*		node2,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Inserts a node before another in a list. */
+UNIV_INTERN
+void
+flst_insert_before(
+/*===============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to insert */
+	flst_node_t*		node3,	/*!< in: node to insert before */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Removes a node. */
+UNIV_INTERN
+void
+flst_remove(
+/*========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to remove */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+UNIV_INTERN
+void
+flst_cut_end(
+/*=========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove,
+					must be >= 1 */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+UNIV_INTERN
+void
+flst_truncate_end(
+/*==============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node not to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list length.
+@return	length */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list first node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list last node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list next node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list prev node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	fil_addr_t	addr,	/*!< in: file address */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Reads a file address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+	const fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Validates a file-based list.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+flst_validate(
+/*==========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr1);	/*!< in: mtr */
+/********************************************************************//**
+Prints info of a file-based list. */
+UNIV_INTERN
+void
+flst_print(
+/*=======*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+
+#ifndef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innodb_plugin/include/fut0lst.ic
similarity index 52%
rename from storage/innobase/include/fut0lst.ic
rename to storage/innodb_plugin/include/fut0lst.ic
index 6c7e863b078..dcd13c61871 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innodb_plugin/include/fut0lst.ic
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based list utilities
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.ic
+File-based list utilities
 
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
@@ -26,34 +43,35 @@ Created 11/28/1995 Heikki Tuuri
 				last element of the list; undefined
 				if empty list */
 
-/************************************************************************
+/********************************************************************//**
 Writes a file address. */
 UNIV_INLINE
 void
 flst_write_addr(
 /*============*/
-	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
-	fil_addr_t	addr,	/* in: file address */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	fil_addr_t	addr,	/*!< in: file address */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	ut_ad(faddr && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
 
 	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
 	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
 			 MLOG_2BYTES, mtr);
 }
 
-/************************************************************************
-Reads a file address. */
+/********************************************************************//**
+Reads a file address.
+@return	file address */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
 /*===========*/
-				/* out: file address */
-	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	const fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	fil_addr_t	addr;
 
@@ -62,86 +80,88 @@ flst_read_addr(
 	addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
 	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
 				      mtr);
+	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
 	return(addr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Initializes a list base node. */
 UNIV_INLINE
 void
 flst_init(
 /*======*/
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+
 	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
 	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
 	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
 }
 
-/************************************************************************
-Gets list length. */
+/********************************************************************//**
+Gets list length.
+@return	length */
 UNIV_INLINE
 ulint
 flst_get_len(
 /*=========*/
-					/* out: length */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
 }
 
-/************************************************************************
-Gets list first node address. */
+/********************************************************************//**
+Gets list first node address.
+@return	file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
 /*===========*/
-					/* out: file address */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	return(flst_read_addr(base + FLST_FIRST, mtr));
 }
 
-/************************************************************************
-Gets list last node address. */
+/********************************************************************//**
+Gets list last node address.
+@return	file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
 /*==========*/
-					/* out: file address */
-	flst_base_node_t*	base,	/* in: pointer to base node */
-	mtr_t*			mtr)	/* in: mini-transaction handle */
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	return(flst_read_addr(base + FLST_LAST, mtr));
 }
 
-/************************************************************************
-Gets list next node address. */
+/********************************************************************//**
+Gets list next node address.
+@return	file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
 /*===============*/
-				/* out: file address */
-	flst_node_t*	node,	/* in: pointer to node */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	return(flst_read_addr(node + FLST_NEXT, mtr));
 }
 
-/************************************************************************
-Gets list prev node address. */
+/********************************************************************//**
+Gets list prev node address.
+@return	file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
 /*===============*/
-				/* out: file address */
-	flst_node_t*	node,	/* in: pointer to node */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	return(flst_read_addr(node + FLST_PREV, mtr));
 }
diff --git a/storage/innodb_plugin/include/ha0ha.h b/storage/innodb_plugin/include/ha0ha.h
new file mode 100644
index 00000000000..1ffbd3440aa
--- /dev/null
+++ b/storage/innodb_plugin/include/ha0ha.h
@@ -0,0 +1,241 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0ha.h
+The hash table with external chains
+
+Created 8/18/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef ha0ha_h
+#define ha0ha_h
+
+#include "univ.i"
+
+#include "hash0hash.h"
+#include "page0types.h"
+#include "buf0types.h"
+
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: folded value of the searched data */
+/*********************************************************//**
+Looks for an element when we know the pointer to the data and updates
+the pointer to data if found. */
+UNIV_INTERN
+void
+ha_search_and_update_if_found_func(
+/*===============================*/
+	hash_table_t*	table,	/*!< in/out: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	void*		data,	/*!< in: pointer to the data */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	new_block,/*!< in: block containing new_data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		new_data);/*!< in: new pointer to the data */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table		in/out: hash table
+@param fold		in: folded value of the searched data
+@param data		in: pointer to the data
+@param new_block	in: block containing new_data
+@param new_data		in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+	ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table		in/out: hash table
+@param fold		in: folded value of the searched data
+@param data		in: pointer to the data
+@param new_block	ignored: block containing new_data
+@param new_data		in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+	ha_search_and_update_if_found_func(table,fold,data,new_data)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/*************************************************************//**
+Creates a hash table with at least n array cells.  The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
+hash_table_t*
+ha_create_func(
+/*===========*/
+	ulint	n,		/*!< in: number of array cells */
+#ifdef UNIV_SYNC_DEBUG
+	ulint	mutex_level,	/*!< in: level of the mutexes in the latching
+				order: this is used in the debug version */
+#endif /* UNIV_SYNC_DEBUG */
+	ulint	n_mutexes);	/*!< in: number of mutexes to protect the
+				hash table: must be a power of 2, or 0 */
+#ifdef UNIV_SYNC_DEBUG
+/** Creates a hash table.
+@return		own: created table
+@param n_c	in: number of array cells.  The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level	in: level of the mutexes in the latching order
+@param n_m	in: number of mutexes to protect the hash table;
+		must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
+#else /* UNIV_SYNC_DEBUG */
+/** Creates a hash table.
+@return		own: created table
+@param n_c	in: number of array cells.  The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level	in: level of the mutexes in the latching order
+@param n_m	in: number of mutexes to protect the hash table;
+		must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
+#endif /* UNIV_SYNC_DEBUG */
+
+/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+	hash_table_t*	table);	/*!< in, own: hash table */
+
+/*************************************************************//**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated */
+UNIV_INTERN
+ibool
+ha_insert_for_fold_func(
+/*====================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block,	/*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		data);	/*!< in: data, must not be NULL */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated
+@param t	in: hash table
+@param f	in: folded value of data
+@param b	in: buffer block containing the data
+@param d	in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated
+@param t	in: hash table
+@param f	in: folded value of data
+@param b	ignored: buffer block containing the data
+@param d	in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data and deletes
+it from the hash table if found.
+@return	TRUE if found */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	void*		data);	/*!< in: pointer to the data */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+UNIV_INTERN
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: fold value */
+	const page_t*	page);	/*!< in: buffer page */
+/*************************************************************//**
+Validates a given range of the cells in hash table.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+ha_validate(
+/*========*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ulint		start_index,	/*!< in: start index */
+	ulint		end_index);	/*!< in: end index */
+/*************************************************************//**
+Prints info of a hash table. */
+UNIV_INTERN
+void
+ha_print_info(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	hash_table_t*	table);	/*!< in: hash table */
+#endif /* !UNIV_HOTBACKUP */
+
+/** The hash table external chain node */
+typedef struct ha_node_struct ha_node_t;
+
+/** The hash table external chain node */
+struct ha_node_struct {
+	ha_node_t*	next;	/*!< next chain node or NULL if none */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block;	/*!< buffer block containing the data, or NULL */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		data;	/*!< pointer to the data */
+	ulint		fold;	/*!< fold value for the data */
+};
+
+#ifndef UNIV_HOTBACKUP
+/** Assert that the current thread is holding the mutex protecting a
+hash bucket corresponding to a fold value.
+@param table	in: hash table
+@param fold	in: fold value */
+# define ASSERT_HASH_MUTEX_OWN(table, fold)				\
+	ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold)))
+#else /* !UNIV_HOTBACKUP */
+/** Assert that the current thread is holding the mutex protecting a
+hash bucket corresponding to a fold value.
+@param table	in: hash table
+@param fold	in: fold value */
+# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/ha0ha.ic b/storage/innodb_plugin/include/ha0ha.ic
new file mode 100644
index 00000000000..734403c4cd9
--- /dev/null
+++ b/storage/innodb_plugin/include/ha0ha.ic
@@ -0,0 +1,220 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/ha0ha.ic
+The hash table with external chains
+
+Created 8/18/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0rnd.h"
+#include "mem0mem.h"
+
+/***********************************************************//**
+Deletes a hash node. */
+UNIV_INTERN
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ha_node_t*	del_node);	/*!< in: node to be deleted */
+
+/******************************************************************//**
+Gets a hash node data.
+@return	pointer to the data */
+UNIV_INLINE
+void*
+ha_node_get_data(
+/*=============*/
+	ha_node_t*	node)	/*!< in: hash chain node */
+{
+	return(node->data);
+}
+
+/******************************************************************//**
+Sets hash node data. */
+UNIV_INLINE
+void
+ha_node_set_data_func(
+/*==================*/
+	ha_node_t*	node,	/*!< in: hash chain node */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block,	/*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	void*		data)	/*!< in: pointer to the data */
+{
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	node->block = block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	node->data = data;
+}
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Sets hash node data.
+@param n	in: hash chain node
+@param b	in: buffer block containing the data
+@param d	in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Sets hash node data.
+@param n	in: hash chain node
+@param b	in: buffer block containing the data
+@param d	in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/******************************************************************//**
+Gets the next node in a hash chain.
+@return	next node, NULL if none */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_next(
+/*==============*/
+	ha_node_t*	node)	/*!< in: hash chain node */
+{
+	return(node->next);
+}
+
+/******************************************************************//**
+Gets the first node in a hash chain.
+@return	first node, NULL if none */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_first(
+/*===============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold value determining the chain */
+{
+	return((ha_node_t*)
+	       hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+}
+
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the first hash table node in chain having the fold
+number, NULL if not found */
+UNIV_INLINE
+ha_node_t*
+ha_search(
+/*======*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node->data);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data.
+@return	pointer to the hash table node, NULL if not found in the table */
+UNIV_INLINE
+ha_node_t*
+ha_search_with_data(
+/*================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	void*		data)	/*!< in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->data == data) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data, and deletes
+it from the hash table, if found.
+@return	TRUE if found */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	void*		data)	/*!< in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	ASSERT_HASH_MUTEX_OWN(table, fold);
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		ha_delete_hash_node(table, node);
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/storage/innodb_plugin/include/ha0storage.h b/storage/innodb_plugin/include/ha0storage.h
new file mode 100644
index 00000000000..c30bd840579
--- /dev/null
+++ b/storage/innodb_plugin/include/ha0storage.h
@@ -0,0 +1,140 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0storage.h
+Hash storage.
+Provides a data structure that stores chunks of data in
+its own storage, avoiding duplicates.
+
+Created September 22, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef ha0storage_h
+#define ha0storage_h
+
+#include "univ.i"
+
+/** This value is used by default by ha_storage_create(). More memory
+is allocated later when/if it is needed. */
+#define HA_STORAGE_DEFAULT_HEAP_BYTES	1024
+
+/** This value is used by default by ha_storage_create(). It is a
+constant per ha_storage's lifetime. */
+#define HA_STORAGE_DEFAULT_HASH_CELLS	4096
+
+/** Hash storage */
+typedef struct ha_storage_struct	ha_storage_t;
+
+/*******************************************************************//**
+Creates a hash storage. If any of the parameters is 0, then a default
+value is used.
+@return	own: hash storage */
+UNIV_INLINE
+ha_storage_t*
+ha_storage_create(
+/*==============*/
+	ulint	initial_heap_bytes,	/*!< in: initial heap's size */
+	ulint	initial_hash_cells);	/*!< in: initial number of cells
+					in the hash table */
+
+/*******************************************************************//**
+Copies data into the storage and returns a pointer to the copy. If the
+same data chunk is already present, then pointer to it is returned.
+Data chunks are considered to be equal if len1 == len2 and
+memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
+data_len bytes need to be allocated) and the size of storage is going to
+become more than "memlim" then "data" is not added and NULL is returned.
+To disable this behavior "memlim" can be set to 0, which stands for
+"no limit".
+@return	pointer to the copy */
+UNIV_INTERN
+const void*
+ha_storage_put_memlim(
+/*==================*/
+	ha_storage_t*	storage,	/*!< in/out: hash storage */
+	const void*	data,		/*!< in: data to store */
+	ulint		data_len,	/*!< in: data length */
+	ulint		memlim);	/*!< in: memory limit to obey */
+
+/*******************************************************************//**
+Same as ha_storage_put_memlim() but without memory limit.
+@param storage	in/out: hash storage
+@param data	in: data to store
+@param data_len	in: data length
+@return		pointer to the copy of the string */
+#define ha_storage_put(storage, data, data_len)	\
+	ha_storage_put_memlim((storage), (data), (data_len), 0)
+
+/*******************************************************************//**
+Copies string into the storage and returns a pointer to the copy. If the
+same string is already present, then pointer to it is returned.
+Strings are considered to be equal if strcmp(str1, str2) == 0.
+@param storage	in/out: hash storage
+@param str	in: string to put
+@return		pointer to the copy of the string */
+#define ha_storage_put_str(storage, str)	\
+	((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
+
+/*******************************************************************//**
+Copies string into the storage and returns a pointer to the copy obeying
+a memory limit.
+If the same string is already present, then pointer to it is returned.
+Strings are considered to be equal if strcmp(str1, str2) == 0.
+@param storage	in/out: hash storage
+@param str	in: string to put
+@param memlim	in: memory limit to obey
+@return		pointer to the copy of the string */
+#define ha_storage_put_str_memlim(storage, str, memlim)	\
+	((const char*) ha_storage_put_memlim((storage), (str),	\
+					     strlen(str) + 1, (memlim)))
+
+/*******************************************************************//**
+Empties a hash storage, freeing memory occupied by data chunks.
+This invalidates any pointers previously returned by ha_storage_put().
+The hash storage is not invalidated itself and can be used again. */
+UNIV_INLINE
+void
+ha_storage_empty(
+/*=============*/
+	ha_storage_t**	storage);	/*!< in/out: hash storage */
+
+/*******************************************************************//**
+Frees a hash storage and everything it contains, it cannot be used after
+this call.
+This invalidates any pointers previously returned by ha_storage_put(). */
+UNIV_INLINE
+void
+ha_storage_free(
+/*============*/
+	ha_storage_t*	storage);	/*!< in, own: hash storage */
+
+/*******************************************************************//**
+Gets the size of the memory used by a storage.
+@return	bytes used */
+UNIV_INLINE
+ulint
+ha_storage_get_size(
+/*================*/
+	const ha_storage_t*	storage);	/*!< in: hash storage */
+
+#ifndef UNIV_NONINL
+#include "ha0storage.ic"
+#endif
+
+#endif /* ha0storage_h */
diff --git a/storage/innodb_plugin/include/ha0storage.ic b/storage/innodb_plugin/include/ha0storage.ic
new file mode 100644
index 00000000000..5acbf82f005
--- /dev/null
+++ b/storage/innodb_plugin/include/ha0storage.ic
@@ -0,0 +1,148 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0storage.ic
+Hash storage.
+Provides a data structure that stores chunks of data in
+its own storage, avoiding duplicates.
+
+Created September 24, 2007 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "ha0storage.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+
+/** Hash storage for strings */
+struct ha_storage_struct {
+	mem_heap_t*	heap;	/*!< memory heap from which memory is
+				allocated */
+	hash_table_t*	hash;	/*!< hash table used to avoid
+				duplicates */
+};
+
+/** Objects of this type are stored in ha_storage_t */
+typedef struct ha_storage_node_struct ha_storage_node_t;
+/** Objects of this type are stored in ha_storage_struct */
+struct ha_storage_node_struct {
+	ulint			data_len;/*!< length of the data */
+	const void*		data;	/*!< pointer to data */
+	ha_storage_node_t*	next;	/*!< next node in hash chain */
+};
+
+/*******************************************************************//**
+Creates a hash storage. If any of the parameters is 0, then a default
+value is used.
+@return	own: hash storage */
+UNIV_INLINE
+ha_storage_t*
+ha_storage_create(
+/*==============*/
+	ulint	initial_heap_bytes,	/*!< in: initial heap's size */
+	ulint	initial_hash_cells)	/*!< in: initial number of cells
+					in the hash table */
+{
+	ha_storage_t*	storage;
+	mem_heap_t*	heap;
+
+	if (initial_heap_bytes == 0) {
+
+		initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
+	}
+
+	if (initial_hash_cells == 0) {
+
+		initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
+	}
+
+	/* we put "storage" within "storage->heap" */
+
+	heap = mem_heap_create(sizeof(ha_storage_t)
+			       + initial_heap_bytes);
+
+	storage = (ha_storage_t*) mem_heap_alloc(heap,
+						 sizeof(ha_storage_t));
+
+	storage->heap = heap;
+	storage->hash = hash_create(initial_hash_cells);
+
+	return(storage);
+}
+
+/*******************************************************************//**
+Empties a hash storage, freeing memory occupied by data chunks.
+This invalidates any pointers previously returned by ha_storage_put().
+The hash storage is not invalidated itself and can be used again. */
+UNIV_INLINE
+void
+ha_storage_empty(
+/*=============*/
+	ha_storage_t**	storage)	/*!< in/out: hash storage */
+{
+	ha_storage_t	temp_storage;
+
+	temp_storage.heap = (*storage)->heap;
+	temp_storage.hash = (*storage)->hash;
+
+	hash_table_clear(temp_storage.hash);
+	mem_heap_empty(temp_storage.heap);
+
+	*storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
+						  sizeof(ha_storage_t));
+
+	(*storage)->heap = temp_storage.heap;
+	(*storage)->hash = temp_storage.hash;
+}
+
+/*******************************************************************//**
+Frees a hash storage and everything it contains, it cannot be used after
+this call.
+This invalidates any pointers previously returned by ha_storage_put(). */
+UNIV_INLINE
+void
+ha_storage_free(
+/*============*/
+	ha_storage_t*	storage)	/*!< in, own: hash storage */
+{
+	/* order is important because the pointer storage->hash is
+	within the heap */
+	hash_table_free(storage->hash);
+	mem_heap_free(storage->heap);
+}
+
+/*******************************************************************//**
+Gets the size of the memory used by a storage.
+@return	bytes used */
+UNIV_INLINE
+ulint
+ha_storage_get_size(
+/*================*/
+	const ha_storage_t*	storage)	/*!< in: hash storage */
+{
+	ulint	ret;
+
+	ret = mem_heap_get_size(storage->heap);
+
+	/* this assumes hash->heap and hash->heaps are NULL */
+	ret += sizeof(hash_table_t);
+	ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
+
+	return(ret);
+}
diff --git a/storage/innodb_plugin/include/ha_prototypes.h b/storage/innodb_plugin/include/ha_prototypes.h
new file mode 100644
index 00000000000..e8789d1638b
--- /dev/null
+++ b/storage/innodb_plugin/include/ha_prototypes.h
@@ -0,0 +1,283 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ha_prototypes.h
+Prototypes for global functions in ha_innodb.cc that are called by
+InnoDB C code
+
+Created 5/11/2006 Osku Salerma
+************************************************************************/
+
+#ifndef HA_INNODB_PROTOTYPES_H
+#define HA_INNODB_PROTOTYPES_H
+
+#include "trx0types.h"
+#include "m_ctype.h" /* CHARSET_INFO */
+
+/*********************************************************************//**
+Wrapper around MySQL's copy_and_convert function.
+@return	number of bytes copied to 'to' */
+UNIV_INTERN
+ulint
+innobase_convert_string(
+/*====================*/
+	void*		to,		/*!< out: converted string */
+	ulint		to_length,	/*!< in: number of bytes reserved
+					for the converted string */
+	CHARSET_INFO*	to_cs,		/*!< in: character set to convert to */
+	const void*	from,		/*!< in: string to convert */
+	ulint		from_length,	/*!< in: number of bytes to convert */
+	CHARSET_INFO*	from_cs,	/*!< in: character set to convert from */
+	uint*		errors);	/*!< out: number of errors encountered
+					during the conversion */
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
+the result to "buf". The result is converted to "system_charset_info".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+UNIV_INTERN
+ulint
+innobase_raw_format(
+/*================*/
+	const char*	data,		/*!< in: raw data */
+	ulint		data_len,	/*!< in: raw data length
+					in bytes */
+	ulint		charset_coll,	/*!< in: charset collation */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size);	/*!< in: output buffer size
+					in bytes */
+
+/*****************************************************************//**
+Invalidates the MySQL query cache for the table. */
+UNIV_INTERN
+void
+innobase_invalidate_query_cache(
+/*============================*/
+	trx_t*		trx,		/*!< in: transaction which
+					modifies the table */
+	const char*	full_name,	/*!< in: concatenation of
+					database name, null char NUL,
+					table name, null char NUL;
+					NOTE that in Windows this is
+					always in LOWER CASE! */
+	ulint		full_name_len);	/*!< in: full name length where
+					also the null chars count */
+
+/*****************************************************************//**
+Convert a table or index name to the MySQL system_charset_info (UTF-8)
+and quote it if needed.
+@return	pointer to the end of buf */
+UNIV_INTERN
+char*
+innobase_convert_name(
+/*==================*/
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	id,	/*!< in: identifier to convert */
+	ulint		idlen,	/*!< in: length of id, in bytes */
+	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	ibool		table_id);/*!< in: TRUE=id is a table or database name;
+				FALSE=id is an index name */
+
+/******************************************************************//**
+Returns true if the thread is the replication thread on the slave
+server. Used in srv_conc_enter_innodb() to determine if the thread
+should be allowed to enter InnoDB - the replication thread is treated
+differently than other threads. Also used in
+srv_conc_force_exit_innodb().
+@return	true if thd is the replication thread */
+UNIV_INTERN
+ibool
+thd_is_replication_slave_thread(
+/*============================*/
+	void*	thd);	/*!< in: thread handle (THD*) */
+
+/******************************************************************//**
+Returns true if the transaction this thread is processing has edited
+non-transactional tables. Used by the deadlock detector when deciding
+which transaction to rollback in case of a deadlock - we try to avoid
+rolling back transactions that have edited non-transactional tables.
+@return	true if non-transactional tables have been edited */
+UNIV_INTERN
+ibool
+thd_has_edited_nontrans_tables(
+/*===========================*/
+	void*	thd);	/*!< in: thread handle (THD*) */
+
+/*************************************************************//**
+Prints info of a THD object (== user session thread) to the given file. */
+UNIV_INTERN
+void
+innobase_mysql_print_thd(
+/*=====================*/
+	FILE*	f,		/*!< in: output stream */
+	void*	thd,		/*!< in: pointer to a MySQL THD object */
+	uint	max_query_len);	/*!< in: max query length to print, or 0 to
+				   use the default max length */
+
+/**************************************************************//**
+Converts a MySQL type to an InnoDB type. Note that this function returns
+the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
+@return	DATA_BINARY, DATA_VARCHAR, ... */
+UNIV_INTERN
+ulint
+get_innobase_type_from_mysql_type(
+/*==============================*/
+	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
+					'unsigned type';
+					at least ENUM and SET,
+					and unsigned integer
+					types are 'unsigned types' */
+	const void*	field)		/*!< in: MySQL Field */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+If you want to print a thd that is not associated with the current thread,
+you must call this function before reserving the InnoDB kernel_mutex, to
+protect MySQL from setting thd->query NULL. If you print a thd of the current
+thread, we know that MySQL cannot modify thd->query, and it is not necessary
+to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
+the kernel_mutex. */
+UNIV_INTERN
+void
+innobase_mysql_prepare_print_arbitrary_thd(void);
+/*============================================*/
+
+/*************************************************************//**
+Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
+In the InnoDB latching order, the mutex sits right above the
+kernel_mutex.  In debug builds, we assert that the kernel_mutex is
+released before this function is invoked. */
+UNIV_INTERN
+void
+innobase_mysql_end_print_arbitrary_thd(void);
+/*========================================*/
+
+/******************************************************************//**
+Get the variable length bounds of the given character set. */
+UNIV_INTERN
+void
+innobase_get_cset_width(
+/*====================*/
+	ulint	cset,		/*!< in: MySQL charset-collation code */
+	ulint*	mbminlen,	/*!< out: minimum length of a char (in bytes) */
+	ulint*	mbmaxlen);	/*!< out: maximum length of a char (in bytes) */
+
+/******************************************************************//**
+Compares NUL-terminated UTF-8 strings case insensitively.
+@return	0 if a=b, <0 if a<b, >1 if a>b */
+UNIV_INTERN
+int
+innobase_strcasecmp(
+/*================*/
+	const char*	a,	/*!< in: first string to compare */
+	const char*	b);	/*!< in: second string to compare */
+
+/******************************************************************//**
+Returns true if the thread is executing a SELECT statement.
+@return	true if thd is executing SELECT */
+
+ibool
+thd_is_select(
+/*==========*/
+	const void*	thd);	/*!< in: thread handle (THD*) */
+
+/******************************************************************//**
+Converts an identifier to a table name. */
+UNIV_INTERN
+void
+innobase_convert_from_table_id(
+/*===========================*/
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len);	/*!< in: length of 'to', in bytes; should
+					be at least 5 * strlen(to) + 1 */
+/******************************************************************//**
+Converts an identifier to UTF-8. */
+UNIV_INTERN
+void
+innobase_convert_from_id(
+/*=====================*/
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len);	/*!< in: length of 'to', in bytes; should
+					be at least 3 * strlen(to) + 1 */
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+UNIV_INTERN
+void
+innobase_casedn_str(
+/*================*/
+	char*	a);	/*!< in/out: string to put in lower case */
+
+/**********************************************************************//**
+Determines the connection character set.
+@return	connection character set */
+struct charset_info_st*
+innobase_get_charset(
+/*=================*/
+	void*	mysql_thd);	/*!< in: MySQL thread handle */
+
+/******************************************************************//**
+This function is used to find the storage length in bytes of the first n
+characters for prefix indexes using a multibyte character set. The function
+finds charset information and returns length of prefix_len characters in the
+index field in bytes.
+@return	number of bytes occupied by the first n characters */
+UNIV_INTERN
+ulint
+innobase_get_at_most_n_mbchars(
+/*===========================*/
+	ulint charset_id,	/*!< in: character set id */
+	ulint prefix_len,	/*!< in: prefix length in bytes of the index
+				(this has to be divided by mbmaxlen to get the
+				number of CHARACTERS n in the prefix) */
+	ulint data_len,		/*!< in: length of the string in bytes */
+	const char* str);	/*!< in: character string */
+
+/******************************************************************//**
+Returns true if the thread supports XA,
+global value of innodb_supports_xa if thd is NULL.
+@return	true if thd supports XA */
+
+ibool
+thd_supports_xa(
+/*============*/
+	void*	thd);	/*!< in: thread handle (THD*), or NULL to query
+			the global innodb_supports_xa */
+
+/******************************************************************//**
+Returns the lock wait timeout for the current connection.
+@return	the lock wait timeout, in seconds */
+
+ulong
+thd_lock_wait_timeout(
+/*==================*/
+	void*	thd);	/*!< in: thread handle (THD*), or NULL to query
+			the global innodb_lock_wait_timeout */
+
+#endif
diff --git a/storage/innodb_plugin/include/handler0alter.h b/storage/innodb_plugin/include/handler0alter.h
new file mode 100644
index 00000000000..985b76f4f50
--- /dev/null
+++ b/storage/innodb_plugin/include/handler0alter.h
@@ -0,0 +1,42 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/handler0alter.h
+Smart ALTER TABLE
+*******************************************************/
+
+/*************************************************************//**
+Copies an InnoDB record to table->record[0]. */
+UNIV_INTERN
+void
+innobase_rec_to_mysql(
+/*==================*/
+	TABLE*			table,		/*!< in/out: MySQL table */
+	const rec_t*		rec,		/*!< in: record */
+	const dict_index_t*	index,		/*!< in: index */
+	const ulint*		offsets);	/*!< in: rec_get_offsets(
+						rec, index, ...) */
+
+/*************************************************************//**
+Resets table->record[0]. */
+UNIV_INTERN
+void
+innobase_rec_reset(
+/*===============*/
+	TABLE*			table);		/*!< in/out: MySQL table */
diff --git a/storage/innobase/include/hash0hash.h b/storage/innodb_plugin/include/hash0hash.h
similarity index 51%
rename from storage/innobase/include/hash0hash.h
rename to storage/innodb_plugin/include/hash0hash.h
index e119a117c94..977cb829f35 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innodb_plugin/include/hash0hash.h
@@ -1,7 +1,24 @@
-/******************************************************
-The simple hash table utility
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/hash0hash.h
+The simple hash table utility
 
 Created 5/20/1997 Heikki Tuuri
 *******************************************************/
@@ -11,7 +28,9 @@ Created 5/20/1997 Heikki Tuuri
 
 #include "univ.i"
 #include "mem0mem.h"
-#include "sync0sync.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+#endif /* !UNIV_HOTBACKUP */
 
 typedef struct hash_table_struct hash_table_t;
 typedef struct hash_cell_struct hash_cell_t;
@@ -21,59 +40,61 @@ typedef void*	hash_node_t;
 /* Fix Bug #13859: symbol collision between imap/mysql */
 #define hash_create hash0_create
 
-/*****************************************************************
+/*************************************************************//**
 Creates a hash table with >= n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n. */
-
+of cells is chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
 hash_table_t*
 hash_create(
 /*========*/
-			/* out, own: created table */
-	ulint	n);	/* in: number of array cells */
-/*****************************************************************
+	ulint	n);	/*!< in: number of array cells */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
 Creates a mutex array to protect a hash table. */
-
+UNIV_INTERN
 void
 hash_create_mutexes_func(
 /*=====================*/
-	hash_table_t*	table,		/* in: hash table */
+	hash_table_t*	table,		/*!< in: hash table */
 #ifdef UNIV_SYNC_DEBUG
-	ulint		sync_level,	/* in: latching order level of the
+	ulint		sync_level,	/*!< in: latching order level of the
 					mutexes: used in the debug version */
 #endif /* UNIV_SYNC_DEBUG */
-	ulint		n_mutexes);	/* in: number of mutexes */
+	ulint		n_mutexes);	/*!< in: number of mutexes */
 #ifdef UNIV_SYNC_DEBUG
 # define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
 #else /* UNIV_SYNC_DEBUG */
 # define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
 #endif /* UNIV_SYNC_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
 
-/*****************************************************************
+/*************************************************************//**
 Frees a hash table. */
-
+UNIV_INTERN
 void
 hash_table_free(
 /*============*/
-	hash_table_t*	table);	/* in, own: hash table */
-/******************************************************************
-Calculates the hash value from a folded value. */
+	hash_table_t*	table);	/*!< in, own: hash table */
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return	hashed value */
 UNIV_INLINE
 ulint
 hash_calc_hash(
 /*===========*/
-				/* out: hashed value */
-	ulint		fold,	/* in: folded value */
-	hash_table_t*	table);	/* in: hash table */
-/************************************************************************
+	ulint		fold,	/*!< in: folded value */
+	hash_table_t*	table);	/*!< in: hash table */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
 Assert that the mutex for the table in a hash operation is owned. */
-#ifdef UNIV_SYNC_DEBUG
-# define HASH_ASSERT_OWNED(TABLE, FOLD) \
+# define HASH_ASSERT_OWNED(TABLE, FOLD)					\
 ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
-#else
+#else /* !UNIV_HOTBACKUP */
 # define HASH_ASSERT_OWNED(TABLE, FOLD)
-#endif
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
+/*******************************************************************//**
 Inserts a struct to a hash table. */
 
 #define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
@@ -90,18 +111,26 @@ do {\
 	if (cell3333->node == NULL) {\
 		cell3333->node = DATA;\
 	} else {\
-		struct3333 = cell3333->node;\
+		struct3333 = (TYPE*) cell3333->node;\
 \
 		while (struct3333->NAME != NULL) {\
 \
-			struct3333 = struct3333->NAME;\
+			struct3333 = (TYPE*) struct3333->NAME;\
 		}\
 \
 		struct3333->NAME = DATA;\
 	}\
 } while (0)
 
-/***********************************************************************
+#ifdef UNIV_HASH_DEBUG
+# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
+# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
+#else
+# define HASH_ASSERT_VALID(DATA) do {} while (0)
+# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
+#endif
+
+/*******************************************************************//**
 Deletes a struct from a hash table. */
 
 #define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
@@ -114,67 +143,107 @@ do {\
 	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
 \
 	if (cell3333->node == DATA) {\
+		HASH_ASSERT_VALID(DATA->NAME);\
 		cell3333->node = DATA->NAME;\
 	} else {\
-		struct3333 = cell3333->node;\
+		struct3333 = (TYPE*) cell3333->node;\
 \
 		while (struct3333->NAME != DATA) {\
 \
-			struct3333 = struct3333->NAME;\
+			struct3333 = (TYPE*) struct3333->NAME;\
 			ut_a(struct3333);\
 		}\
 \
 		struct3333->NAME = DATA->NAME;\
 	}\
+	HASH_INVALIDATE(DATA, NAME);\
 } while (0)
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the first struct in a hash chain, NULL if none. */
 
 #define HASH_GET_FIRST(TABLE, HASH_VAL)\
 	(hash_get_nth_cell(TABLE, HASH_VAL)->node)
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the next struct in a hash chain, NULL if none. */
 
 #define HASH_GET_NEXT(NAME, DATA)	((DATA)->NAME)
 
-/************************************************************************
+/********************************************************************//**
 Looks for a struct in a hash table. */
-#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
+#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
 {\
 \
 	HASH_ASSERT_OWNED(TABLE, FOLD)\
 \
-	(DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+	(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+	HASH_ASSERT_VALID(DATA);\
 \
 	while ((DATA) != NULL) {\
+		ASSERTION;\
 		if (TEST) {\
 			break;\
 		} else {\
-			(DATA) = HASH_GET_NEXT(NAME, DATA);\
+			HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
+			(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
 		}\
 	}\
 }
 
-/****************************************************************
-Gets the nth cell in a hash table. */
+/********************************************************************//**
+Looks for an item in all hash buckets. */
+#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST)	\
+do {									\
+	ulint	i3333;							\
+									\
+	for (i3333 = (TABLE)->n_cells; i3333--; ) {			\
+		(DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333);		\
+									\
+		while ((DATA) != NULL) {				\
+			HASH_ASSERT_VALID(DATA);			\
+			ASSERTION;					\
+									\
+			if (TEST) {					\
+				break;					\
+			}						\
+									\
+			(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);	\
+		}							\
+									\
+		if ((DATA) != NULL) {					\
+			break;						\
+		}							\
+	}								\
+} while (0)
+
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return	pointer to cell */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
 /*==============*/
-				/* out: pointer to cell */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		n);	/* in: cell index */
-/*****************************************************************
-Returns the number of cells in a hash table. */
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		n);	/*!< in: cell index */
+
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+	hash_table_t*	table);	/*!< in/out: hash table */
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return	number of cells */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
 /*=============*/
-				/* out: number of cells */
-	hash_table_t*	table);	/* in: table */
-/***********************************************************************
+	hash_table_t*	table);	/*!< in: table */
+/*******************************************************************//**
 Deletes a struct which is stored in the heap of the hash table, and compacts
 the heap. The fold value must be stored in the struct NODE in a field named
 'fold'. */
@@ -233,8 +302,9 @@ do {\
 	mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
 } while (0)
 
-/********************************************************************
-Move all hash table entries from OLD_TABLE to NEW_TABLE.*/
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Move all hash table entries from OLD_TABLE to NEW_TABLE. */
 
 #define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
 do {\
@@ -258,102 +328,111 @@ do {\
 	}\
 } while (0)
 
-
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
+/************************************************************//**
+Gets the mutex index for a fold value in a hash table.
+@return	mutex number */
 UNIV_INLINE
 ulint
 hash_get_mutex_no(
 /*==============*/
-				/* out: mutex number */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: fold */
-/****************************************************************
-Gets the nth heap in a hash table. */
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return	mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
 /*==============*/
-				/* out: mem heap */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		i);	/* in: index of the heap */
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i);	/*!< in: index of the heap */
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return	mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
 /*==========*/
-				/* out: mem heap */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: fold */
-/****************************************************************
-Gets the nth mutex in a hash table. */
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return	mutex */
 UNIV_INLINE
 mutex_t*
 hash_get_nth_mutex(
 /*===============*/
-				/* out: mutex */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		i);	/* in: index of the mutex */
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i);	/*!< in: index of the mutex */
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return	mutex */
 UNIV_INLINE
 mutex_t*
 hash_get_mutex(
 /*===========*/
-				/* out: mutex */
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: fold */
-/****************************************************************
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
 Reserves the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: fold */
-/****************************************************************
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
 Releases the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold);	/* in: fold */
-/****************************************************************
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
 Reserves all the mutexes of a hash table, in an ascending order. */
-
+UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
-	hash_table_t*	table);	/* in: hash table */
-/****************************************************************
+	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
 Releases all the mutexes of a hash table. */
-
+UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
-	hash_table_t*	table);	/* in: hash table */
-
+	hash_table_t*	table);	/*!< in: hash table */
+#else /* !UNIV_HOTBACKUP */
+# define hash_get_heap(table, fold)	((table)->heap)
+# define hash_mutex_enter(table, fold)	((void) 0)
+# define hash_mutex_exit(table, fold)	((void) 0)
+#endif /* !UNIV_HOTBACKUP */
 
 struct hash_cell_struct{
-	void*	node;	/* hash chain node, NULL if none */
+	void*	node;	/*!< hash chain node, NULL if none */
 };
 
 /* The hash table structure */
 struct hash_table_struct {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
 	ibool		adaptive;/* TRUE if this is the hash table of the
 				adaptive hash index */
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 	ulint		n_cells;/* number of cells in the hash table */
-	hash_cell_t*	array;	/* pointer to cell array */
+	hash_cell_t*	array;	/*!< pointer to cell array */
+#ifndef UNIV_HOTBACKUP
 	ulint		n_mutexes;/* if mutexes != NULL, then the number of
 				mutexes, must be a power of 2 */
 	mutex_t*	mutexes;/* NULL, or an array of mutexes used to
 				protect segments of the hash table */
-	mem_heap_t**	heaps;	/* if this is non-NULL, hash chain nodes for
+	mem_heap_t**	heaps;	/*!< if this is non-NULL, hash chain nodes for
 				external chaining can be allocated from these
 				memory heaps; there are then n_mutexes many of
 				these heaps */
+#endif /* !UNIV_HOTBACKUP */
 	mem_heap_t*	heap;
 	ulint		magic_n;
 };
diff --git a/storage/innodb_plugin/include/hash0hash.ic b/storage/innodb_plugin/include/hash0hash.ic
new file mode 100644
index 00000000000..19da2d50701
--- /dev/null
+++ b/storage/innodb_plugin/include/hash0hash.ic
@@ -0,0 +1,163 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/hash0hash.ic
+The simple hash table utility
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ut0rnd.h"
+
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return	pointer to cell */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		n)	/*!< in: cell index */
+{
+	ut_ad(n < table->n_cells);
+
+	return(table->array + n);
+}
+
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+	hash_table_t*	table)	/*!< in/out: hash table */
+{
+	memset(table->array, 0x0,
+	       table->n_cells * sizeof(*table->array));
+}
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return	number of cells */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+	hash_table_t*	table)	/*!< in: table */
+{
+	return(table->n_cells);
+}
+
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return	hashed value */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+	ulint		fold,	/*!< in: folded value */
+	hash_table_t*	table)	/*!< in: hash table */
+{
+	return(ut_hash_ulint(fold, table->n_cells));
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Gets the mutex index for a fold value in a hash table.
+@return	mutex number */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ut_ad(ut_is_2pow(table->n_mutexes));
+	return(ut_2pow_remainder(hash_calc_hash(fold, table),
+				 table->n_mutexes));
+}
+
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i)	/*!< in: index of the heap */
+{
+	ut_ad(i < table->n_mutexes);
+
+	return(table->heaps[i]);
+}
+
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ulint	i;
+
+	if (table->heap) {
+		return(table->heap);
+	}
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_heap(table, i));
+}
+
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return	mutex */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i)	/*!< in: index of the mutex */
+{
+	ut_ad(i < table->n_mutexes);
+
+	return(table->mutexes + i);
+}
+
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return	mutex */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ulint	i;
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_mutex(table, i));
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/ibuf0ibuf.h b/storage/innodb_plugin/include/ibuf0ibuf.h
new file mode 100644
index 00000000000..21330997df3
--- /dev/null
+++ b/storage/innodb_plugin/include/ibuf0ibuf.h
@@ -0,0 +1,377 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.h
+Insert buffer
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0ibuf_h
+#define ibuf0ibuf_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "dict0mem.h"
+#include "fsp0fsp.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "ibuf0types.h"
+
+/** Combinations of operations that can be buffered.  Because the enum
+values are used for indexing innobase_change_buffering_values[], they
+should start at 0 and there should not be any gaps. */
+typedef enum {
+	IBUF_USE_NONE = 0,
+	IBUF_USE_INSERT,	/* insert */
+
+	IBUF_USE_COUNT		/* number of entries in ibuf_use_t */
+} ibuf_use_t;
+
+/** Operations that can currently be buffered. */
+extern ibuf_use_t	ibuf_use;
+
+/** The insert buffer control structure */
+extern ibuf_t*		ibuf;
+
+/* The purpose of the insert buffer is to reduce random disk access.
+When we wish to insert a record into a non-unique secondary index and
+the B-tree leaf page where the record belongs to is not in the buffer
+pool, we insert the record into the insert buffer B-tree, indexed by
+(space_id, page_no).  When the page is eventually read into the buffer
+pool, we look up the insert buffer B-tree for any modifications to the
+page, and apply these upon the completion of the read operation.  This
+is called the insert buffer merge. */
+
+/* The insert buffer merge must always succeed.  To guarantee this,
+the insert buffer subsystem keeps track of the free space in pages for
+which it can buffer operations.  Two bits per page in the insert
+buffer bitmap indicate the available space in coarse increments.  The
+free bits in the insert buffer bitmap must never exceed the free space
+on a page.  It is safe to decrement or reset the bits in the bitmap in
+a mini-transaction that is committed before the mini-transaction that
+affects the free space.  It is unsafe to increment the bits in a
+separately committed mini-transaction, because in crash recovery, the
+free bits could momentarily be set too high. */
+
+/******************************************************************//**
+Creates the insert buffer data structure at a database startup and
+initializes the data structures for the insert buffer of each tablespace. */
+UNIV_INTERN
+void
+ibuf_init_at_db_start(void);
+/*=======================*/
+/*********************************************************************//**
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+UNIV_INTERN
+void
+ibuf_update_max_tablespace_id(void);
+/*===============================*/
+/*********************************************************************//**
+Initializes an ibuf bitmap page. */
+UNIV_INTERN
+void
+ibuf_bitmap_page_init(
+/*==================*/
+	buf_block_t*	block,	/*!< in: bitmap page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/************************************************************************//**
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
+void
+ibuf_reset_free_bits(
+/*=================*/
+	buf_block_t*	block);	/*!< in: index page; free bits are set to 0
+				if the index is a non-clustered
+				non-unique, and page level is 0 */
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more.  This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high.  It is only safe to use this function for
+decrementing the free bits.  Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/*!< in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase);/*!< in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_low(
+/*======================*/
+	const buf_block_t*	block,		/*!< in: index page */
+	ulint			max_ins_size,	/*!< in: value of
+						maximum insert size
+						with reorganize before
+						the latest operation
+						performed to the page */
+	mtr_t*			mtr);		/*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+	buf_block_t*	block,	/*!< in/out: index page */
+	mtr_t*		mtr);	/*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page.  It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+	ulint		zip_size,/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	buf_block_t*	block1,	/*!< in: index page */
+	buf_block_t*	block2,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/*!< in: index where to insert */
+	ulint		ignore_sec_unique);	/*!< in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+/******************************************************************//**
+Returns TRUE if the current OS thread is performing an insert buffer
+routine.
+
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INTERN
+ibool
+ibuf_inside(void);
+/*=============*/
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@return	TRUE if a bitmap page */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no);/*!< in: page number */
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return	TRUE if level 2 or level 3 page */
+UNIV_INTERN
+ibool
+ibuf_page(
+/*======*/
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	page_no,/*!< in: page number */
+	mtr_t*	mtr);	/*!< in: mtr which will contain an x-latch to the
+			bitmap page if the page is not one of the fixed
+			address ibuf pages, or NULL, in which case a new
+			transaction is created. */
+/***********************************************************************//**
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */
+UNIV_INTERN
+void
+ibuf_free_excess_pages(void);
+/*========================*/
+/*********************************************************************//**
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. Does not do insert if the index is clustered
+or unique.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+ibuf_insert(
+/*========*/
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	dict_index_t*	index,	/*!< in: index where to insert */
+	ulint		space,	/*!< in: space id where to insert */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint		page_no,/*!< in: page number where to insert */
+	que_thr_t*	thr);	/*!< in: query thread */
+/*********************************************************************//**
+When an index page is read from a disk to the buffer pool, this function
+inserts to the page the possible index entries buffered in the insert buffer.
+The entries are deleted from the insert buffer. If the page is not read, but
+created in the buffer pool, this function deletes its buffered entries from
+the insert buffer; there can exist entries for such a page if the page
+belonged to an index which subsequently was dropped. */
+UNIV_INTERN
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: if page has been read from
+				disk, pointer to the page x-latched,
+				else NULL */
+	ulint		space,	/*!< in: space id of the index page */
+	ulint		page_no,/*!< in: page number of the index page */
+	ulint		zip_size,/*!< in: compressed page size in bytes,
+				or 0 */
+	ibool		update_ibuf_bitmap);/*!< in: normally this is set
+				to TRUE, but if we have deleted or are
+				deleting the tablespace, then we
+				naturally do not want to update a
+				non-existent bitmap page */
+/*********************************************************************//**
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+UNIV_INTERN
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+	ulint	space);	/*!< in: space id */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
+ulint
+ibuf_contract(
+/*==========*/
+	ibool	sync);	/*!< in: TRUE if the caller wants to wait for the
+			issued read with the highest tablespace address
+			to complete */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
+ulint
+ibuf_contract_for_n_pages(
+/*======================*/
+	ibool	sync,	/*!< in: TRUE if the caller wants to wait for the
+			issued read with the highest tablespace address
+			to complete */
+	ulint	n_pages);/*!< in: try to read at least this many pages to
+			the buffer pool and merge the ibuf contents to
+			them */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
+ulint
+ibuf_count_get(
+/*===========*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+#endif
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return	TRUE if empty */
+UNIV_INTERN
+ibool
+ibuf_is_empty(void);
+/*===============*/
+/******************************************************************//**
+Prints info of ibuf. */
+UNIV_INTERN
+void
+ibuf_print(
+/*=======*/
+	FILE*	file);	/*!< in: file where to print */
+
+#define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
+#define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
+
+#endif /* !UNIV_HOTBACKUP */
+
+/* The ibuf header page currently contains only the file segment header
+for the file segment from which the pages for the ibuf tree are allocated */
+#define IBUF_HEADER		PAGE_DATA
+#define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
+
+/* The insert buffer tree itself is always located in space 0. */
+#define IBUF_SPACE_ID		0
+
+#ifndef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/ibuf0ibuf.ic b/storage/innodb_plugin/include/ibuf0ibuf.ic
new file mode 100644
index 00000000000..15bbe61ab30
--- /dev/null
+++ b/storage/innodb_plugin/include/ibuf0ibuf.ic
@@ -0,0 +1,327 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.ic
+Insert buffer
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "page0page.h"
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0lru.h"
+
+/** Counter for ibuf_should_try() */
+extern ulint	ibuf_flush_count;
+
+/** An index page must contain at least UNIV_PAGE_SIZE /
+IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
+buffer inserts to this page.  If there is this much of free space, the
+corresponding bits are set in the ibuf bitmap. */
+#define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
+
+/** Insert buffer struct */
+struct ibuf_struct{
+	ulint		size;		/*!< current size of the ibuf index
+					tree, in pages */
+	ulint		max_size;	/*!< recommended maximum size of the
+					ibuf index tree, in pages */
+	ulint		seg_size;	/*!< allocated pages of the file
+					segment containing ibuf header and
+					tree */
+	ibool		empty;		/*!< after an insert to the ibuf tree
+					is performed, this is set to FALSE,
+					and if a contract operation finds
+					the tree empty, this is set to
+					TRUE */
+	ulint		free_list_len;	/*!< length of the free list */
+	ulint		height;		/*!< tree height */
+	dict_index_t*	index;		/*!< insert buffer index */
+
+	ulint		n_inserts;	/*!< number of inserts made to
+					the insert buffer */
+	ulint		n_merges;	/*!< number of pages merged */
+	ulint		n_merged_recs;	/*!< number of records merged */
+};
+
+/************************************************************************//**
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+UNIV_INTERN
+void
+ibuf_set_free_bits_func(
+/*====================*/
+	buf_block_t*	block,	/*!< in: index page of a non-clustered index;
+				free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+	ulint		max_val,/*!< in: ULINT_UNDEFINED or a maximum
+				value which the bits must have before
+				setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+	ulint		val);	/*!< in: value to set: < 4 */
+#ifdef UNIV_IBUF_DEBUG
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
+#else /* UNIV_IBUF_DEBUG */
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
+#endif /* UNIV_IBUF_DEBUG */
+
+/**********************************************************************//**
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/*!< in: index where to insert */
+	ulint		ignore_sec_unique)	/*!< in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+{
+	if (ibuf_use != IBUF_USE_NONE
+	    && !dict_index_is_clust(index)
+	    && (ignore_sec_unique || !dict_index_is_unique(index))) {
+
+		ibuf_flush_count++;
+
+		if (ibuf_flush_count % 4 == 0) {
+
+			buf_LRU_try_free_flushed_blocks();
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page address.
+@return	TRUE if a bitmap page */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no)/*!< in: page number */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
+				     == FSP_IBUF_BITMAP_OFFSET));
+	}
+
+	return(UNIV_UNLIKELY((page_no & (zip_size - 1))
+			     == FSP_IBUF_BITMAP_OFFSET));
+}
+
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_bits(
+/*===========================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	max_ins_size)	/*!< in: maximum insert size after reorganize
+				for the page */
+{
+	ulint	n;
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+	if (zip_size) {
+		n = max_ins_size
+			/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	} else {
+		n = max_ins_size
+			/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	if (n == 3) {
+		n = 2;
+	}
+
+	if (n > 3) {
+		n = 3;
+	}
+
+	return(n);
+}
+
+/*********************************************************************//**
+Translates the ibuf free bits to the free space on a page in bytes.
+@return	maximum insert size after reorganize for the page */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_from_bits(
+/*================================*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	bits)	/*!< in: value for ibuf bitmap bits */
+{
+	ut_ad(bits < 4);
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+	if (zip_size) {
+		if (bits == 3) {
+			return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+		}
+
+		return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	if (bits == 3) {
+		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+}
+
+/*********************************************************************//**
+Translates the free space on a compressed page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_zip(
+/*==========================*/
+	ulint			zip_size,
+					/*!< in: compressed page size in bytes */
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	ulint			max_ins_size;
+	const page_zip_des_t*	page_zip;
+	lint			zip_max_ins;
+
+	ut_ad(zip_size == buf_block_get_zip_size(block));
+	ut_ad(zip_size);
+
+	max_ins_size = page_get_max_insert_size_after_reorganize(
+		buf_block_get_frame(block), 1);
+
+	page_zip = buf_block_get_page_zip(block);
+	zip_max_ins = page_zip_max_ins_size(page_zip,
+					    FALSE/* not clustered */);
+
+	if (UNIV_UNLIKELY(zip_max_ins < 0)) {
+		return(0);
+	} else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
+		max_ins_size = (ulint) zip_max_ins;
+	}
+
+	return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
+}
+
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free(
+/*======================*/
+	ulint			zip_size,/*!< in: compressed page size in bytes;
+					0 for uncompressed pages */
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	ut_ad(zip_size == buf_block_get_zip_size(block));
+
+	if (!zip_size) {
+		ulint	max_ins_size;
+
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+			buf_block_get_frame(block), 1);
+
+		return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+	} else {
+		return(ibuf_index_page_calc_free_zip(zip_size, block));
+	}
+}
+
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more.  This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high.  It is only safe to use this function for
+decrementing the free bits.  Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/*!< in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase)/*!< in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+{
+	ulint	before;
+	ulint	after;
+
+	ut_ad(!buf_block_get_page_zip(block));
+
+	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+
+	if (max_ins_size >= increase) {
+#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
+# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
+#endif
+		after = ibuf_index_page_calc_free_bits(0, max_ins_size
+						       - increase);
+#ifdef UNIV_IBUF_DEBUG
+		ut_a(after <= ibuf_index_page_calc_free(0, block));
+#endif
+	} else {
+		after = ibuf_index_page_calc_free(0, block);
+	}
+
+	if (after == 0) {
+		/* We move the page to the front of the buffer pool LRU list:
+		the purpose of this is to prevent those pages to which we
+		cannot make inserts using the insert buffer from slipping
+		out of the buffer pool */
+
+		buf_page_make_young(&block->page);
+	}
+
+	if (before > after) {
+		ibuf_set_free_bits(block, after, before);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/ibuf0types.h b/storage/innodb_plugin/include/ibuf0types.h
new file mode 100644
index 00000000000..55944f879b2
--- /dev/null
+++ b/storage/innodb_plugin/include/ibuf0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0types.h
+Insert buffer global types
+
+Created 7/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0types_h
+#define ibuf0types_h
+
+typedef	struct ibuf_struct	ibuf_t;
+
+#endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innodb_plugin/include/lock0iter.h
similarity index 51%
rename from storage/innobase/include/lock0iter.h
rename to storage/innodb_plugin/include/lock0iter.h
index d063a360c1f..25a57c9740c 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innodb_plugin/include/lock0iter.h
@@ -1,7 +1,24 @@
-/******************************************************
-Lock queue iterator type and function prototypes.
+/*****************************************************************************
 
-(c) 2007 Innobase Oy
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0iter.h
+Lock queue iterator type and function prototypes.
 
 Created July 16, 2007 Vasil Dimov
 *******************************************************/
@@ -13,14 +30,14 @@ Created July 16, 2007 Vasil Dimov
 #include "lock0types.h"
 
 typedef struct lock_queue_iterator_struct {
-	lock_t*	current_lock;
+	const lock_t*	current_lock;
 	/* In case this is a record lock queue (not table lock queue)
 	then bit_no is the record number within the heap in which the
 	record is stored. */
-	ulint	bit_no;
+	ulint		bit_no;
 } lock_queue_iterator_t;
 
-/***********************************************************************
+/*******************************************************************//**
 Initialize lock queue iterator so that it starts to iterate from
 "lock". bit_no specifies the record number within the heap where the
 record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
@@ -29,24 +46,24 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
    bit_no is calculated in this function by using
    lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
    of a wait lock. */
-
+UNIV_INTERN
 void
 lock_queue_iterator_reset(
 /*======================*/
-	lock_queue_iterator_t*	iter,	/* out: iterator */
-	lock_t*			lock,	/* in: lock to start from */
-	ulint			bit_no);/* in: record number in the
+	lock_queue_iterator_t*	iter,	/*!< out: iterator */
+	const lock_t*		lock,	/*!< in: lock to start from */
+	ulint			bit_no);/*!< in: record number in the
 					heap */
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the previous lock in the lock queue, returns NULL if there are no
 more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
+receded (if not-NULL is returned).
+@return	previous lock or NULL */
 
-lock_t*
+const lock_t*
 lock_queue_iterator_get_prev(
 /*=========================*/
-					/* out: previous lock or NULL */
-	lock_queue_iterator_t*	iter);	/* in/out: iterator */
+	lock_queue_iterator_t*	iter);	/*!< in/out: iterator */
 
 #endif /* lock0iter_h */
diff --git a/storage/innodb_plugin/include/lock0lock.h b/storage/innodb_plugin/include/lock0lock.h
new file mode 100644
index 00000000000..fa5db831d4f
--- /dev/null
+++ b/storage/innodb_plugin/include/lock0lock.h
@@ -0,0 +1,809 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.h
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0lock_h
+#define lock0lock_h
+
+#include "univ.i"
+#include "buf0types.h"
+#include "trx0types.h"
+#include "mtr0types.h"
+#include "rem0types.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "lock0types.h"
+#include "read0types.h"
+#include "hash0hash.h"
+#include "ut0vec.h"
+
+#ifdef UNIV_DEBUG
+extern ibool	lock_print_waits;
+#endif /* UNIV_DEBUG */
+/* Buffer for storing information about the most recent deadlock error */
+extern FILE*	lock_latest_err_file;
+
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return	size in bytes */
+UNIV_INTERN
+ulint
+lock_get_size(void);
+/*===============*/
+/*********************************************************************//**
+Creates the lock system at database start. */
+UNIV_INTERN
+void
+lock_sys_create(
+/*============*/
+	ulint	n_cells);	/*!< in: number of slots in lock hash table */
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return	transaction which has the x-lock, or NULL */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+	const rec_t*	rec,	/*!< in: user record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+	const buf_block_t*	block);	/*!< in: buffer block */
+/*************************************************************//**
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+UNIV_INTERN
+void
+lock_move_reorganize_page(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: old index page, now
+					reorganized */
+	const buf_block_t*	oblock);/*!< in: copy of the old, not
+					reorganized page */
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_end(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec);		/*!< in: record on page: this
+						is the first record moved */
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_start(
+/*=====================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec,		/*!< in: record on page:
+						this is the first
+						record NOT copied */
+	const rec_t*		old_end);	/*!< in: old
+						previous-to-last
+						record on new_page
+						before the records
+						were copied */
+/*************************************************************//**
+Updates the lock table when a page is split to the right. */
+UNIV_INTERN
+void
+lock_update_split_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block);	/*!< in: left page */
+/*************************************************************//**
+Updates the lock table when a page is merged to the right. */
+UNIV_INTERN
+void
+lock_update_merge_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page to
+						which merged */
+	const rec_t*		orig_succ,	/*!< in: original
+						successor of infimum
+						on the right page
+						before merge */
+	const buf_block_t*	left_block);	/*!< in: merged index
+						page which will be
+						discarded */
+/*************************************************************//**
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+UNIV_INTERN
+void
+lock_update_root_raise(
+/*===================*/
+	const buf_block_t*	block,	/*!< in: index page to which copied */
+	const buf_block_t*	root);	/*!< in: root page */
+/*************************************************************//**
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+UNIV_INTERN
+void
+lock_update_copy_and_discard(
+/*=========================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						which copied */
+	const buf_block_t*	block);		/*!< in: index page;
+						NOT the root! */
+/*************************************************************//**
+Updates the lock table when a page is split to the left. */
+UNIV_INTERN
+void
+lock_update_split_left(
+/*===================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block);	/*!< in: left page */
+/*************************************************************//**
+Updates the lock table when a page is merged to the left. */
+UNIV_INTERN
+void
+lock_update_merge_left(
+/*===================*/
+	const buf_block_t*	left_block,	/*!< in: left page to
+						which merged */
+	const rec_t*		orig_pred,	/*!< in: original predecessor
+						of supremum on the left page
+						before merge */
+	const buf_block_t*	right_block);	/*!< in: merged index page
+						which will be discarded */
+/*************************************************************//**
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+UNIV_INTERN
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no);	/*!< in: heap_no of the
+						donating record */
+/*************************************************************//**
+Updates the lock table when a page is discarded. */
+UNIV_INTERN
+void
+lock_update_discard(
+/*================*/
+	const buf_block_t*	heir_block,	/*!< in: index page
+						which will inherit the locks */
+	ulint			heir_heap_no,	/*!< in: heap_no of the record
+						which will inherit the locks */
+	const buf_block_t*	block);		/*!< in: index page
+						which will be discarded */
+/*************************************************************//**
+Updates the lock table when a new user record is inserted. */
+UNIV_INTERN
+void
+lock_update_insert(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: the inserted record */
+/*************************************************************//**
+Updates the lock table when a record is removed. */
+UNIV_INTERN
+void
+lock_update_delete(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: the record to be removed */
+/*********************************************************************//**
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is in such an update moved, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+UNIV_INTERN
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: record whose lock state
+					is stored on the infimum
+					record of the same page; lock
+					bits are reset on the
+					record */
+/*********************************************************************//**
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+UNIV_INTERN
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record whose lock state
+					is restored */
+	const buf_block_t*	donator);/*!< in: page (rec is not
+					necessarily on this page)
+					whose infimum stored the lock
+					state; lock bits are reset on
+					the infimum */
+/*********************************************************************//**
+Returns TRUE if there are explicit record locks on a page.
+@return	TRUE if there are explicit record locks on the page */
+UNIV_INTERN
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool*		inherit);/*!< out: set to TRUE if the new
+				inserted record maybe should inherit
+				LOCK_GAP type locks from the successor
+				record */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record which should be
+					modified */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify
+(delete mark or delete unmark) of a secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+				bit is set, does nothing */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	const rec_t*	rec,	/*!< in: record which should be
+				modified; NOTE: as this is a secondary
+				index, we always have to modify the
+				clustered index record first: see the
+				comment below */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/*********************************************************************//**
+Like the counterpart for a clustered index below, but now we read a
+secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: secondary index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets".
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return TRUE if sees, or FALSE if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+	const rec_t*	rec,	/*!< in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	read_view_t*	view);	/*!< in: consistent read view */
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case FALSE, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return TRUE if certainly sees, or FALSE if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+	const rec_t*		rec,	/*!< in: user record which
+					should be read or passed over
+					by a read cursor */
+	const read_view_t*	view);	/*!< in: consistent read view */
+/*********************************************************************//**
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+ulint
+lock_table(
+/*=======*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	dict_table_t*	table,	/*!< in: database table in dictionary cache */
+	enum lock_mode	mode,	/*!< in: lock mode */
+	que_thr_t*	thr);	/*!< in: query thread */
+/*************************************************************//**
+Removes a granted record lock of a transaction from the queue and grants
+locks to other transactions waiting in the queue if they now are entitled
+to a lock. */
+UNIV_INTERN
+void
+lock_rec_unlock(
+/*============*/
+	trx_t*			trx,	/*!< in: transaction that has
+					set a record lock */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record */
+	enum lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
+/*********************************************************************//**
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+UNIV_INTERN
+void
+lock_release_off_kernel(
+/*====================*/
+	trx_t*	trx);	/*!< in: transaction */
+/*********************************************************************//**
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+UNIV_INTERN
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+	lock_t*	lock);	/*!< in: waiting lock request */
+
+/*********************************************************************//**
+Removes locks on a table to be dropped or truncated.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock, that is going to be removed, is allowed to be a wait lock. */
+UNIV_INTERN
+void
+lock_remove_all_on_table(
+/*=====================*/
+	dict_table_t*	table,			/*!< in: table to be dropped
+						or truncated */
+	ibool		remove_also_table_sx_locks);/*!< in: also removes
+						table S and X locks */
+
+/*********************************************************************//**
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	folded value */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+	__attribute__((const));
+/*********************************************************************//**
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	hashed value */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no);/*!< in: page number */
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+	const lock_t*	lock);	/*!< in: record lock with at least one
+				bit set */
+
+/*********************************************************************//**
+Gets the source table of an ALTER TABLE transaction.  The table must be
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
+dict_table_t*
+lock_get_src_table(
+/*===============*/
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
+	enum lock_mode*	mode);	/*!< out: lock mode of the source table */
+/*********************************************************************//**
+Determine if the given table is exclusively "owned" by the given
+transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
+ibool
+lock_is_table_exclusive(
+/*====================*/
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx);	/*!< in: transaction */
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return	TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
+ibool
+lock_has_to_wait(
+/*=============*/
+	const lock_t*	lock1,	/*!< in: waiting lock */
+	const lock_t*	lock2);	/*!< in: another lock; NOTE that it is
+				assumed that this has a lock bit set
+				on the same record as in lock1 if the
+				locks are record locks */
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+lock_check_trx_id_sanity(
+/*=====================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: clustered index */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	ibool		has_kernel_mutex);/*!< in: TRUE if the caller owns the
+					kernel mutex */
+/*********************************************************************//**
+Prints info of a table lock. */
+UNIV_INTERN
+void
+lock_table_print(
+/*=============*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock);	/*!< in: table type lock */
+/*********************************************************************//**
+Prints info of a record lock. */
+UNIV_INTERN
+void
+lock_rec_print(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock);	/*!< in: record type lock */
+/*********************************************************************//**
+Prints info of locks for all transactions. */
+UNIV_INTERN
+void
+lock_print_info_summary(
+/*====================*/
+	FILE*	file);	/*!< in: file where to print */
+/*********************************************************************//**
+Prints info of locks for each transaction. */
+UNIV_INTERN
+void
+lock_print_info_all_transactions(
+/*=============================*/
+	FILE*	file);	/*!< in: file where to print */
+/*********************************************************************//**
+Return approximate number or record locks (bits set in the bitmap) for
+this transaction. Since delete-marked records may be removed, the
+record count will not be precise. */
+UNIV_INTERN
+ulint
+lock_number_of_rows_locked(
+/*=======================*/
+	trx_t*	trx);	/*!< in: transaction */
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+UNIV_INTERN
+void
+lock_release_autoinc_locks(
+/*=======================*/
+	trx_t*		trx);		/*!< in/out: transaction */
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return	transaction id */
+UNIV_INTERN
+ullint
+lock_get_trx_id(
+/*============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return	id of the table */
+UNIV_INTERN
+ullint
+lock_get_table_id(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return	index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return	tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return	page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/** Lock modes and types */
+/* @{ */
+#define LOCK_MODE_MASK	0xFUL	/*!< mask used to extract mode from the
+				type_mode field in a lock */
+/** Lock types */
+/* @{ */
+#define LOCK_TABLE	16	/*!< table lock */
+#define	LOCK_REC	32	/*!< record lock */
+#define LOCK_TYPE_MASK	0xF0UL	/*!< mask used to extract lock type from the
+				type_mode field in a lock */
+#if LOCK_MODE_MASK & LOCK_TYPE_MASK
+# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
+#endif
+
+#define LOCK_WAIT	256	/*!< Waiting lock flag; when set, it
+				means that the lock has not yet been
+				granted, it is just waiting for its
+				turn in the wait queue */
+/* Precise modes */
+#define LOCK_ORDINARY	0	/*!< this flag denotes an ordinary
+				next-key lock in contrast to LOCK_GAP
+				or LOCK_REC_NOT_GAP */
+#define LOCK_GAP	512	/*!< when this bit is set, it means that the
+				lock holds only on the gap before the record;
+				for instance, an x-lock on the gap does not
+				give permission to modify the record on which
+				the bit is set; locks of this type are created
+				when records are removed from the index chain
+				of records */
+#define LOCK_REC_NOT_GAP 1024	/*!< this bit means that the lock is only on
+				the index record and does NOT block inserts
+				to the gap before the index record; this is
+				used in the case when we retrieve a record
+				with a unique key, and is also used in
+				locking plain SELECTs (not part of UPDATE
+				or DELETE) when the user has set the READ
+				COMMITTED isolation level */
+#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
+				gap type record lock request in order to let
+				an insert of an index record to wait until
+				there are no conflicting locks by other
+				transactions on the gap; note that this flag
+				remains set when the waiting lock is granted,
+				or if the lock is inherited to a neighboring
+				record */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+# error
+#endif
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+# error
+#endif
+/* @} */
+
+/** Lock operation struct */
+typedef struct lock_op_struct	lock_op_t;
+/** Lock operation struct */
+struct lock_op_struct{
+	dict_table_t*	table;	/*!< table to be locked */
+	enum lock_mode	mode;	/*!< lock mode */
+};
+
+/** The lock system struct */
+struct lock_sys_struct{
+	hash_table_t*	rec_hash;	/*!< hash table of the record locks */
+};
+
+/** The lock system */
+extern lock_sys_t*	lock_sys;
+
+
+#ifndef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/lock0lock.ic b/storage/innodb_plugin/include/lock0lock.ic
new file mode 100644
index 00000000000..014722f51c4
--- /dev/null
+++ b/storage/innodb_plugin/include/lock0lock.ic
@@ -0,0 +1,121 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.ic
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "row0row.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "row0vers.h"
+#include "que0que.h"
+#include "btr0cur.h"
+#include "read0read.h"
+#include "log0recv.h"
+
+/*********************************************************************//**
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	folded value */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*********************************************************************//**
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	hashed value */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	return(hash_calc_hash(lock_rec_fold(space, page_no),
+			      lock_sys->rec_hash));
+}
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return	transaction which has the x-lock, or NULL */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+	const rec_t*	rec,	/*!< in: user record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	trx_id_t	trx_id;
+
+	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(page_rec_is_user_rec(rec));
+
+	trx_id = row_get_rec_trx_id(rec, index, offsets);
+
+	if (trx_is_active(trx_id)) {
+		/* The modifying or inserting transaction is active */
+
+		return(trx_get_on_id(trx_id));
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	const page_t*	page	= block->frame;
+
+	if (page_is_comp(page)) {
+		return(rec_get_heap_no_new(
+			       page
+			       + rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+						   TRUE)));
+	} else {
+		return(rec_get_heap_no_old(
+			       page
+			       + rec_get_next_offs(page + PAGE_OLD_INFIMUM,
+						   FALSE)));
+	}
+}
diff --git a/storage/innodb_plugin/include/lock0priv.h b/storage/innodb_plugin/include/lock0priv.h
new file mode 100644
index 00000000000..287c151b19f
--- /dev/null
+++ b/storage/innodb_plugin/include/lock0priv.h
@@ -0,0 +1,108 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0priv.h
+Lock module internal structures and methods.
+
+Created July 12, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef lock0priv_h
+#define lock0priv_h
+
+#ifndef LOCK_MODULE_IMPLEMENTATION
+/* If you need to access members of the structures defined in this
+file, please write appropriate functions that retrieve them and put
+those functions in lock/ */
+#error Do not include lock0priv.h outside of the lock/ module
+#endif
+
+#include "univ.i"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+#include "ut0lst.h"
+
+/** A table lock */
+typedef struct lock_table_struct	lock_table_t;
+/** A table lock */
+struct lock_table_struct {
+	dict_table_t*	table;		/*!< database table in dictionary
+					cache */
+	UT_LIST_NODE_T(lock_t)
+			locks;		/*!< list of locks on the same
+					table */
+};
+
+/** Record lock for a page */
+typedef struct lock_rec_struct		lock_rec_t;
+/** Record lock for a page */
+struct lock_rec_struct {
+	ulint	space;			/*!< space id */
+	ulint	page_no;		/*!< page number */
+	ulint	n_bits;			/*!< number of bits in the lock
+					bitmap; NOTE: the lock bitmap is
+					placed immediately after the
+					lock struct */
+};
+
+/** Lock struct */
+struct lock_struct {
+	trx_t*		trx;		/*!< transaction owning the
+					lock */
+	UT_LIST_NODE_T(lock_t)
+			trx_locks;	/*!< list of the locks of the
+					transaction */
+	ulint		type_mode;	/*!< lock type, mode, LOCK_GAP or
+					LOCK_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION,
+					wait flag, ORed */
+	hash_node_t	hash;		/*!< hash chain node for a record
+					lock */
+	dict_index_t*	index;		/*!< index for a record lock */
+	union {
+		lock_table_t	tab_lock;/*!< table lock */
+		lock_rec_t	rec_lock;/*!< record lock */
+	} un_member;			/*!< lock details */
+};
+
+/*********************************************************************//**
+Gets the type of a lock.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INLINE
+ulint
+lock_get_type_low(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return	previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
+lock_rec_get_prev(
+/*==============*/
+	const lock_t*	in_lock,/*!< in: record lock */
+	ulint		heap_no);/*!< in: heap number of the record */
+
+#ifndef UNIV_NONINL
+#include "lock0priv.ic"
+#endif
+
+#endif /* lock0priv_h */
diff --git a/storage/innodb_plugin/include/lock0priv.ic b/storage/innodb_plugin/include/lock0priv.ic
new file mode 100644
index 00000000000..30447c99848
--- /dev/null
+++ b/storage/innodb_plugin/include/lock0priv.ic
@@ -0,0 +1,49 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0priv.ic
+Lock module internal inline methods.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+/* This file contains only methods which are used in
+lock/lock0* files, other than lock/lock0lock.c.
+I.e. lock/lock0lock.c contains more internal inline
+methods but they are used only in that file. */
+
+#ifndef LOCK_MODULE_IMPLEMENTATION
+#error Do not include lock0priv.ic outside of the lock/ module
+#endif
+
+/*********************************************************************//**
+Gets the type of a lock.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INLINE
+ulint
+lock_get_type_low(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(lock->type_mode & LOCK_TYPE_MASK);
+}
+
+/* vim: set filetype=c: */
diff --git a/storage/innodb_plugin/include/lock0types.h b/storage/innodb_plugin/include/lock0types.h
new file mode 100644
index 00000000000..45f29e90fe9
--- /dev/null
+++ b/storage/innodb_plugin/include/lock0types.h
@@ -0,0 +1,45 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0types.h
+The transaction lock system global types
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0types_h
+#define lock0types_h
+
+#define lock_t ib_lock_t
+typedef struct lock_struct	lock_t;
+typedef struct lock_sys_struct	lock_sys_t;
+
+/* Basic lock modes */
+enum lock_mode {
+	LOCK_IS = 0,	/* intention shared */
+	LOCK_IX,	/* intention exclusive */
+	LOCK_S,		/* shared */
+	LOCK_X,		/* exclusive */
+	LOCK_AUTO_INC,	/* locks the auto-inc counter of a table
+			in an exclusive mode */
+	LOCK_NONE,	/* this is used elsewhere to note consistent read */
+	LOCK_NUM = LOCK_NONE/* number of lock modes */
+};
+
+#endif
diff --git a/storage/innobase/include/log0log.h b/storage/innodb_plugin/include/log0log.h
similarity index 51%
rename from storage/innobase/include/log0log.h
rename to storage/innodb_plugin/include/log0log.h
index 337b9f1e783..059f548a085 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innodb_plugin/include/log0log.h
@@ -1,7 +1,48 @@
-/******************************************************
-Database log
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0log.h
+Database log
 
 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
@@ -11,71 +52,83 @@ Created 12/9/1995 Heikki Tuuri
 
 #include "univ.i"
 #include "ut0byte.h"
+#include "ut0lst.h"
+#ifndef UNIV_HOTBACKUP
 #include "sync0sync.h"
 #include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
 
+/** Redo log buffer */
 typedef struct log_struct	log_t;
+/** Redo log group */
 typedef struct log_group_struct	log_group_t;
 
 #ifdef UNIV_DEBUG
+/** Flag: write to log file? */
 extern	ibool	log_do_write;
+/** Flag: enable debug output when writing to the log? */
 extern	ibool	log_debug_writes;
 #else /* UNIV_DEBUG */
+/** Write to log */
 # define log_do_write TRUE
 #endif /* UNIV_DEBUG */
 
-/* Wait modes for log_write_up_to */
+/** Wait modes for log_write_up_to @{ */
 #define LOG_NO_WAIT		91
 #define LOG_WAIT_ONE_GROUP	92
 #define	LOG_WAIT_ALL_GROUPS	93
+/* @} */
+/** Maximum number of log groups in log_group_struct::checkpoint_buf */
 #define LOG_MAX_N_GROUPS	32
 
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
 Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
 so that we know that the limit has been written to a log checkpoint field
 on disk. */
-
+UNIV_INTERN
 void
 log_fsp_current_free_limit_set_and_checkpoint(
 /*==========================================*/
-	ulint	limit);	/* in: limit to set */
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
-
+	ulint	limit);	/*!< in: limit to set */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Calculates where in log files we find a specified lsn.
+@return	log file number */
+UNIV_INTERN
 ulint
 log_calc_where_lsn_is(
 /*==================*/
-						/* out: log file number */
-	ib_longlong*	log_file_offset,	/* out: offset in that file
+	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
 						(including the header) */
-	dulint		first_header_lsn,	/* in: first log file start
+	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 						lsn */
-	dulint		lsn,			/* in: lsn whose position to
+	ib_uint64_t	lsn,			/*!< in: lsn whose position to
 						determine */
-	ulint		n_log_files,		/* in: total number of log
+	ulint		n_log_files,		/*!< in: total number of log
 						files */
-	ib_longlong	log_file_size);		/* in: log file size
+	ib_int64_t	log_file_size);		/*!< in: log file size
 						(including the header) */
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
 Writes to the log the string given. The log must be released with
-log_release. */
+log_release.
+@return	end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
-dulint
+ib_uint64_t
 log_reserve_and_write_fast(
 /*=======================*/
-			/* out: end lsn of the log record, ut_dulint_zero if
-			did not succeed */
-	byte*	str,	/* in: string */
-	ulint	len,	/* in: string length */
-	dulint*	start_lsn,/* out: start lsn of the log record */
-	ibool*	success);/* out: TRUE if success */
-/***************************************************************************
+	byte*		str,	/*!< in: string */
+	ulint		len,	/*!< in: string length */
+	ib_uint64_t*	start_lsn,/*!< out: start lsn of the log record */
+	ibool*		success);/*!< out: TRUE if success */
+/***********************************************************************//**
 Releases the log mutex. */
 UNIV_INLINE
 void
 log_release(void);
 /*=============*/
-/***************************************************************************
+/***********************************************************************//**
 Checks if there is need for a log buffer flush or a new checkpoint, and does
 this if yes. Any database operation should call this when it has modified
 more than about 4 pages. NOTE that this function may only be called when the
@@ -84,428 +137,439 @@ UNIV_INLINE
 void
 log_free_check(void);
 /*================*/
-/****************************************************************
+/************************************************************//**
 Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
+released with log_release.
+@return	start lsn of the log record */
+UNIV_INTERN
+ib_uint64_t
 log_reserve_and_open(
 /*=================*/
-			/* out: start lsn of the log record */
-	ulint	len);	/* in: length of data to be catenated */
-/****************************************************************
+	ulint	len);	/*!< in: length of data to be catenated */
+/************************************************************//**
 Writes to the log the string given. It is assumed that the caller holds the
 log mutex. */
-
+UNIV_INTERN
 void
 log_write_low(
 /*==========*/
-	byte*	str,		/* in: string */
-	ulint	str_len);	/* in: string length */
-/****************************************************************
-Closes the log. */
-
-dulint
+	byte*	str,		/*!< in: string */
+	ulint	str_len);	/*!< in: string length */
+/************************************************************//**
+Closes the log.
+@return	lsn */
+UNIV_INTERN
+ib_uint64_t
 log_close(void);
 /*===========*/
-			/* out: lsn */
-/****************************************************************
-Gets the current lsn. */
+/************************************************************//**
+Gets the current lsn.
+@return	current lsn */
 UNIV_INLINE
-dulint
+ib_uint64_t
 log_get_lsn(void);
 /*=============*/
-			/* out: current lsn */
-/**********************************************************
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return	log group capacity */
+UNIV_INLINE
+ulint
+log_get_capacity(void);
+/*==================*/
+/******************************************************//**
 Initializes the log. */
-
+UNIV_INTERN
 void
 log_init(void);
 /*==========*/
-/**********************************************************************
+/******************************************************************//**
 Inits a log group to the log system. */
-
+UNIV_INTERN
 void
 log_group_init(
 /*===========*/
-	ulint	id,			/* in: group id */
-	ulint	n_files,		/* in: number of log files */
-	ulint	file_size,		/* in: log file size in bytes */
-	ulint	space_id,		/* in: space id of the file space
+	ulint	id,			/*!< in: group id */
+	ulint	n_files,		/*!< in: number of log files */
+	ulint	file_size,		/*!< in: log file size in bytes */
+	ulint	space_id,		/*!< in: space id of the file space
 					which contains the log files of this
 					group */
-	ulint	archive_space_id);	/* in: space id of the file space
+	ulint	archive_space_id);	/*!< in: space id of the file space
 					which contains some archived log
 					files for this group; currently, only
 					for the first log group this is
 					used */
-/**********************************************************
+/******************************************************//**
 Completes an i/o to a log file. */
-
+UNIV_INTERN
 void
 log_io_complete(
 /*============*/
-	log_group_t*	group);	/* in: log group */
-/**********************************************************
+	log_group_t*	group);	/*!< in: log group */
+/******************************************************//**
 This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
 flush flushed enough. If not, starts a new flush. */
-
+UNIV_INTERN
 void
 log_write_up_to(
 /*============*/
-	dulint	lsn,	/* in: log sequence number up to which the log should
-			be written, ut_dulint_max if not specified */
-	ulint	wait,	/* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
-			or LOG_WAIT_ALL_GROUPS */
-	ibool	flush_to_disk);
-			/* in: TRUE if we want the written log also to be
-			flushed to disk */
-/********************************************************************
+	ib_uint64_t	lsn,	/*!< in: log sequence number up to which
+				the log should be written,
+				IB_ULONGLONG_MAX if not specified */
+	ulint		wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+				or LOG_WAIT_ALL_GROUPS */
+	ibool		flush_to_disk);
+				/*!< in: TRUE if we want the written log
+				also to be flushed to disk */
+/****************************************************************//**
 Does a syncronous flush of the log buffer to disk. */
-
+UNIV_INTERN
 void
 log_buffer_flush_to_disk(void);
 /*==========================*/
-/********************************************************************
-Flushes the log buffer. Forces it to disk depending on the value of
-the configuration parameter innodb_flush_log_at_trx_commit. */
-
+/****************************************************************//**
+This functions writes the log buffer to the log file and if 'flush'
+is set it forces a flush of the log file as well. This is meant to be
+called from background master thread only as it does not wait for
+the write (+ possible flush) to finish. */
+UNIV_INTERN
 void
-log_buffer_flush_maybe_sync(void);
+log_buffer_sync_in_background(
 /*==========================*/
-/********************************************************************
+	ibool	flush);	/*<! in: flush the logs to disk */
+/****************************************************************//**
 Advances the smallest lsn for which there are unflushed dirty blocks in the
 buffer pool and also may make a new checkpoint. NOTE: this function may only
-be called if the calling thread owns no synchronization objects! */
-
+be called if the calling thread owns no synchronization objects!
+@return FALSE if there was a flush batch of the same type running,
+which means that we could not start this flush batch */
+UNIV_INTERN
 ibool
 log_preflush_pool_modified_pages(
 /*=============================*/
-				/* out: FALSE if there was a flush batch of
-				the same type running, which means that we
-				could not start this flush batch */
-	dulint	new_oldest,	/* in: try to advance oldest_modified_lsn
-				at least to this lsn */
-	ibool	sync);		/* in: TRUE if synchronous operation is
-				desired */
-/**********************************************************
+	ib_uint64_t	new_oldest,	/*!< in: try to advance
+					oldest_modified_lsn at least
+					to this lsn */
+	ibool		sync);		/*!< in: TRUE if synchronous
+					operation is desired */
+/******************************************************//**
 Makes a checkpoint. Note that this function does not flush dirty
 blocks from the buffer pool: it only checks what is lsn of the oldest
 modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
+log files. Use log_make_checkpoint_at to flush also the pool.
+@return	TRUE if success, FALSE if a checkpoint write was already running */
+UNIV_INTERN
 ibool
 log_checkpoint(
 /*===========*/
-				/* out: TRUE if success, FALSE if a checkpoint
-				write was already running */
-	ibool	sync,		/* in: TRUE if synchronous operation is
+	ibool	sync,		/*!< in: TRUE if synchronous operation is
 				desired */
-	ibool	write_always);	/* in: the function normally checks if the
+	ibool	write_always);	/*!< in: the function normally checks if the
 				the new checkpoint would have a greater
 				lsn than the previous one: if not, then no
 				physical write is done; by setting this
 				parameter TRUE, a physical write will always be
 				made to log files */
-/********************************************************************
+/****************************************************************//**
 Makes a checkpoint at a given lsn or later. */
-
+UNIV_INTERN
 void
 log_make_checkpoint_at(
 /*===================*/
-	dulint	lsn,		/* in: make a checkpoint at this or a later
-				lsn, if ut_dulint_max, makes a checkpoint at
-				the latest lsn */
-	ibool	write_always);	/* in: the function normally checks if the
-				the new checkpoint would have a greater
-				lsn than the previous one: if not, then no
-				physical write is done; by setting this
-				parameter TRUE, a physical write will always be
-				made to log files */
-/********************************************************************
+	ib_uint64_t	lsn,		/*!< in: make a checkpoint at this or a
+					later lsn, if IB_ULONGLONG_MAX, makes
+					a checkpoint at the latest lsn */
+	ibool		write_always);	/*!< in: the function normally checks if
+					the the new checkpoint would have a
+					greater lsn than the previous one: if
+					not, then no physical write is done;
+					by setting this parameter TRUE, a
+					physical write will always be made to
+					log files */
+/****************************************************************//**
 Makes a checkpoint at the latest lsn and writes it to first page of each
 data file in the database, so that we know that the file spaces contain
 all modifications up to that lsn. This can only be called at database
 shutdown. This function also writes all log in log files to the log archive. */
-
+UNIV_INTERN
 void
 logs_empty_and_mark_files_at_shutdown(void);
 /*=======================================*/
-/**********************************************************
+/******************************************************//**
 Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
+UNIV_INTERN
 void
 log_group_read_checkpoint_info(
 /*===========================*/
-	log_group_t*	group,	/* in: log group */
-	ulint		field);	/* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/***********************************************************************
+	log_group_t*	group,	/*!< in: log group */
+	ulint		field);	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+/*******************************************************************//**
 Gets info from a checkpoint about a log group. */
-
+UNIV_INTERN
 void
 log_checkpoint_get_nth_group_info(
 /*==============================*/
-	byte*	buf,	/* in: buffer containing checkpoint info */
-	ulint	n,	/* in: nth slot */
-	ulint*	file_no,/* out: archived file number */
-	ulint*	offset);/* out: archived file offset */
-/**********************************************************
+	const byte*	buf,	/*!< in: buffer containing checkpoint info */
+	ulint		n,	/*!< in: nth slot */
+	ulint*		file_no,/*!< out: archived file number */
+	ulint*		offset);/*!< out: archived file offset */
+/******************************************************//**
 Writes checkpoint info to groups. */
-
+UNIV_INTERN
 void
 log_groups_write_checkpoint_info(void);
 /*==================================*/
-/**********************************************************
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
-	byte*	hdr_buf,/* in: buffer which will be written to the start
-			of the first log file */
-	dulint	start);	/* in: lsn of the start of the first log file;
-			we pretend that there is a checkpoint at
-			start + LOG_BLOCK_HDR_SIZE */
-/************************************************************************
-Starts an archiving operation. */
-
+/********************************************************************//**
+Starts an archiving operation.
+@return	TRUE if succeed, FALSE if an archiving operation was already running */
+UNIV_INTERN
 ibool
 log_archive_do(
 /*===========*/
-			/* out: TRUE if succeed, FALSE if an archiving
-			operation was already running */
-	ibool	sync,	/* in: TRUE if synchronous operation is desired */
-	ulint*	n_bytes);/* out: archive log buffer size, 0 if nothing to
+	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
+	ulint*	n_bytes);/*!< out: archive log buffer size, 0 if nothing to
 			archive */
-/********************************************************************
+/****************************************************************//**
 Writes the log contents to the archive up to the lsn when this function was
 called, and stops the archiving. When archiving is started again, the archived
 log file numbers start from a number one higher, so that the archiving will
 not write again to the archived log files which exist when this function
-returns. */
-
+returns.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_stop(void);
 /*==================*/
-				/* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Starts again archiving which has been stopped. */
-
+/****************************************************************//**
+Starts again archiving which has been stopped.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_start(void);
 /*===================*/
-			/* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Stop archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_noarchivelog(void);
 /*==========================*/
-			/* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Start archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_archivelog(void);
 /*========================*/
-			/* out: DB_SUCCESS or DB_ERROR */
-/**********************************************************
+/******************************************************//**
 Generates an archived log file name. */
-
+UNIV_INTERN
 void
 log_archived_file_name_gen(
 /*=======================*/
-	char*	buf,	/* in: buffer where to write */
-	ulint	id,	/* in: group id */
-	ulint	file_no);/* in: file number */
-/************************************************************************
+	char*	buf,	/*!< in: buffer where to write */
+	ulint	id,	/*!< in: group id */
+	ulint	file_no);/*!< in: file number */
+#else /* !UNIV_HOTBACKUP */
+/******************************************************//**
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+UNIV_INTERN
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*		hdr_buf,/*!< in: buffer which will be written to the
+				start of the first log file */
+	ib_uint64_t	start);	/*!< in: lsn of the start of the first log file;
+				we pretend that there is a checkpoint at
+				start + LOG_BLOCK_HDR_SIZE */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
 Checks that there is enough free space in the log to start a new query step.
 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 function may only be called if the calling thread owns no synchronization
 objects! */
-
+UNIV_INTERN
 void
 log_check_margins(void);
 /*===================*/
-/**********************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************//**
 Reads a specified log segment to a buffer. */
-
+UNIV_INTERN
 void
 log_group_read_log_seg(
 /*===================*/
-	ulint		type,		/* in: LOG_ARCHIVE or LOG_RECOVER */
-	byte*		buf,		/* in: buffer where to read */
-	log_group_t*	group,		/* in: log group */
-	dulint		start_lsn,	/* in: read area start */
-	dulint		end_lsn);	/* in: read area end */
-/**********************************************************
+	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
+	byte*		buf,		/*!< in: buffer where to read */
+	log_group_t*	group,		/*!< in: log group */
+	ib_uint64_t	start_lsn,	/*!< in: read area start */
+	ib_uint64_t	end_lsn);	/*!< in: read area end */
+/******************************************************//**
 Writes a buffer to a log file group. */
-
+UNIV_INTERN
 void
 log_group_write_buf(
 /*================*/
-	log_group_t*	group,		/* in: log group */
-	byte*		buf,		/* in: buffer */
-	ulint		len,		/* in: buffer len; must be divisible
+	log_group_t*	group,		/*!< in: log group */
+	byte*		buf,		/*!< in: buffer */
+	ulint		len,		/*!< in: buffer len; must be divisible
 					by OS_FILE_LOG_BLOCK_SIZE */
-	dulint		start_lsn,	/* in: start lsn of the buffer; must
+	ib_uint64_t	start_lsn,	/*!< in: start lsn of the buffer; must
 					be divisible by
 					OS_FILE_LOG_BLOCK_SIZE */
-	ulint		new_data_offset);/* in: start offset of new data in
+	ulint		new_data_offset);/*!< in: start offset of new data in
 					buf: this parameter is used to decide
 					if we have to write a new log file
 					header */
-/************************************************************
+/********************************************************//**
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
 some lsn, for instance, a checkpoint lsn. */
-
+UNIV_INTERN
 void
 log_group_set_fields(
 /*=================*/
-	log_group_t*	group,	/* in: group */
-	dulint		lsn);	/* in: lsn for which the values should be
+	log_group_t*	group,	/*!< in/out: group */
+	ib_uint64_t	lsn);	/*!< in: lsn for which the values should be
 				set */
-/**********************************************************
+/******************************************************//**
 Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
+included.
+@return	capacity in bytes */
+UNIV_INTERN
 ulint
 log_group_get_capacity(
 /*===================*/
-				/* out: capacity in bytes */
-	log_group_t*	group);	/* in: log group */
-/****************************************************************
-Gets a log block flush bit. */
+	const log_group_t*	group);	/*!< in: log group */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets a log block flush bit.
+@return	TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
 /*====================*/
-				/* out: TRUE if this block was the first
-				to be written in a log flush */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
-Gets a log block number stored in the header. */
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block number stored in the header.
+@return	log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
 /*=================*/
-				/* out: log block number stored in the block
-				header */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
-Gets a log block data length. */
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block data length.
+@return	log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
 /*===================*/
-				/* out: log block data length measured as a
-				byte offset from the block start */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
 Sets the log block data length. */
 UNIV_INLINE
 void
 log_block_set_data_len(
 /*===================*/
-	byte*	log_block,	/* in: log block */
-	ulint	len);		/* in: data length */
-/****************************************************************
-Calculates the checksum for a log block. */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	len);		/*!< in: data length */
+/************************************************************//**
+Calculates the checksum for a log block.
+@return	checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
-			/* out: checksum */
-	byte*	block);	/* in: log block */
-/****************************************************************
-Gets a log block checksum field value. */
+	const byte*	block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block checksum field value.
+@return	checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
 /*===================*/
-				/* out: checksum */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
 Sets a log block checksum field value. */
 UNIV_INLINE
 void
 log_block_set_checksum(
 /*===================*/
-	byte*	log_block,	/* in: log block */
-	ulint	checksum);	/* in: checksum */
-/****************************************************************
-Gets a log block first mtr log record group offset. */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	checksum);	/*!< in: checksum */
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
 UNIV_INLINE
 ulint
 log_block_get_first_rec_group(
 /*==========================*/
-				/* out: first mtr log record group byte offset
-				from the block start, 0 if none */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
 Sets the log block first mtr log record group offset. */
 UNIV_INLINE
 void
 log_block_set_first_rec_group(
 /*==========================*/
-	byte*	log_block,	/* in: log block */
-	ulint	offset);	/* in: offset, 0 if none */
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	offset);	/*!< in: offset, 0 if none */
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return	checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
 /*========================*/
-				/* out: checkpoint no (4 lowest bytes) */
-	byte*	log_block);	/* in: log block */
-/****************************************************************
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
 Initializes a log block in the log buffer. */
 UNIV_INLINE
 void
 log_block_init(
 /*===========*/
-	byte*	log_block,	/* in: pointer to the log buffer */
-	dulint	lsn);		/* in: lsn within the log block */
-/****************************************************************
+	byte*		log_block,	/*!< in: pointer to the log buffer */
+	ib_uint64_t	lsn);		/*!< in: lsn within the log block */
+/************************************************************//**
 Initializes a log block in the log buffer in the old, < 3.23.52 format, where
 there was no checksum yet. */
 UNIV_INLINE
 void
 log_block_init_in_old_format(
 /*=========================*/
-	byte*	log_block,	/* in: pointer to the log buffer */
-	dulint	lsn);		/* in: lsn within the log block */
-/****************************************************************
-Converts a lsn to a log block number. */
+	byte*		log_block,	/*!< in: pointer to the log buffer */
+	ib_uint64_t	lsn);		/*!< in: lsn within the log block */
+/************************************************************//**
+Converts a lsn to a log block number.
+@return	log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
 /*========================*/
-			/* out: log block number, it is > 0 and <= 1G */
-	dulint	lsn);	/* in: lsn of a byte within the block */
-/**********************************************************
+	ib_uint64_t	lsn);	/*!< in: lsn of a byte within the block */
+/******************************************************//**
 Prints info of the log. */
-
+UNIV_INTERN
 void
 log_print(
 /*======*/
-	FILE*	file);	/* in: file where to print */
-/**********************************************************
-Peeks the current lsn. */
-
+	FILE*	file);	/*!< in: file where to print */
+/******************************************************//**
+Peeks the current lsn.
+@return	TRUE if success, FALSE if could not get the log system mutex */
+UNIV_INTERN
 ibool
 log_peek_lsn(
 /*=========*/
-			/* out: TRUE if success, FALSE if could not get the
-			log system mutex */
-       dulint*	lsn);	/* out: if returns TRUE, current lsn is here */
-/**************************************************************************
+	ib_uint64_t*	lsn);	/*!< out: if returns TRUE, current lsn is here */
+/**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
 void
 log_refresh_stats(void);
 /*===================*/
@@ -515,11 +579,13 @@ extern log_t*	log_sys;
 /* Values used as flags */
 #define LOG_FLUSH	7652559
 #define LOG_CHECKPOINT	78656949
-#define LOG_ARCHIVE	11122331
+#ifdef UNIV_LOG_ARCHIVE
+# define LOG_ARCHIVE	11122331
+#endif /* UNIV_LOG_ARCHIVE */
 #define LOG_RECOVER	98887331
 
 /* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN	ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE)
+#define LOG_START_LSN	((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
 
 #define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
 #define LOG_ARCHIVE_BUF_SIZE	(srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
@@ -571,7 +637,7 @@ extern log_t*	log_sys;
 #define	LOG_CHECKPOINT_ARCHIVED_LSN	24
 #define	LOG_CHECKPOINT_GROUP_ARRAY	32
 
-/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */
+/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
 
 #define LOG_CHECKPOINT_ARCHIVED_FILE_NO	0
 #define LOG_CHECKPOINT_ARCHIVED_OFFSET	4
@@ -639,74 +705,78 @@ extern log_t*	log_sys;
 #define LOG_GROUP_OK		301
 #define LOG_GROUP_CORRUPTED	302
 
-/* Log group consists of a number of log files, each of the same size; a log
+/** Log group consists of a number of log files, each of the same size; a log
 group is implemented as a space in the sense of the module fil0fil. */
-
 struct log_group_struct{
 	/* The following fields are protected by log_sys->mutex */
-	ulint		id;		/* log group id */
-	ulint		n_files;	/* number of files in the group */
-	ulint		file_size;	/* individual log file size in bytes,
+	ulint		id;		/*!< log group id */
+	ulint		n_files;	/*!< number of files in the group */
+	ulint		file_size;	/*!< individual log file size in bytes,
 					including the log file header */
-	ulint		space_id;	/* file space which implements the log
+	ulint		space_id;	/*!< file space which implements the log
 					group */
-	ulint		state;		/* LOG_GROUP_OK or
+	ulint		state;		/*!< LOG_GROUP_OK or
 					LOG_GROUP_CORRUPTED */
-	dulint		lsn;		/* lsn used to fix coordinates within
+	ib_uint64_t	lsn;		/*!< lsn used to fix coordinates within
 					the log group */
-	ulint		lsn_offset;	/* the offset of the above lsn */
-	ulint		n_pending_writes;/* number of currently pending flush
+	ulint		lsn_offset;	/*!< the offset of the above lsn */
+	ulint		n_pending_writes;/*!< number of currently pending flush
 					writes for this log group */
-	byte**		file_header_bufs;/* buffers for each file header in the
-					group */
-	/*-----------------------------*/
-	byte**		archive_file_header_bufs;/* buffers for each file
+	byte**		file_header_bufs;/*!< buffers for each file
 					header in the group */
-	ulint		archive_space_id;/* file space which implements the log
-					group archive */
-	ulint		archived_file_no;/* file number corresponding to
+	/*-----------------------------*/
+	byte**		archive_file_header_bufs;/*!< buffers for each file
+					header in the group */
+	ulint		archive_space_id;/*!< file space which
+					implements the log group
+					archive */
+	ulint		archived_file_no;/*!< file number corresponding to
 					log_sys->archived_lsn */
-	ulint		archived_offset;/* file offset corresponding to
+	ulint		archived_offset;/*!< file offset corresponding to
 					log_sys->archived_lsn, 0 if we have
 					not yet written to the archive file
 					number archived_file_no */
-	ulint		next_archived_file_no;/* during an archive write,
+	ulint		next_archived_file_no;/*!< during an archive write,
 					until the write is completed, we
 					store the next value for
 					archived_file_no here: the write
 					completion function then sets the new
 					value to ..._file_no */
-	ulint		next_archived_offset; /* like the preceding field */
+	ulint		next_archived_offset; /*!< like the preceding field */
 	/*-----------------------------*/
-	dulint		scanned_lsn;	/* used only in recovery: recovery scan
+	ib_uint64_t	scanned_lsn;	/*!< used only in recovery: recovery scan
 					succeeded up to this lsn in this log
 					group */
-	byte*		checkpoint_buf;	/* checkpoint header is written from
+	byte*		checkpoint_buf;	/*!< checkpoint header is written from
 					this buffer to the group */
 	UT_LIST_NODE_T(log_group_t)
-			log_groups;	/* list of log groups */
+			log_groups;	/*!< list of log groups */
 };
 
+/** Redo log buffer */
 struct log_struct{
-	byte		pad[64];	/* padding to prevent other memory
+	byte		pad[64];	/*!< padding to prevent other memory
 					update hotspots from residing on the
 					same memory cache line */
-	dulint		lsn;		/* log sequence number */
-	ulint		buf_free;	/* first free offset within the log
+	ib_uint64_t	lsn;		/*!< log sequence number */
+	ulint		buf_free;	/*!< first free offset within the log
 					buffer */
-	mutex_t		mutex;		/* mutex protecting the log */
-	byte*		buf;		/* log buffer */
-	ulint		buf_size;	/* log buffer size in bytes */
-	ulint		max_buf_free;	/* recommended maximum value of
+#ifndef UNIV_HOTBACKUP
+	mutex_t		mutex;		/*!< mutex protecting the log */
+#endif /* !UNIV_HOTBACKUP */
+	byte*		buf;		/*!< log buffer */
+	ulint		buf_size;	/*!< log buffer size in bytes */
+	ulint		max_buf_free;	/*!< recommended maximum value of
 					buf_free, after which the buffer is
 					flushed */
-	ulint		old_buf_free;	/* value of buf free when log was
+	ulint		old_buf_free;	/*!< value of buf free when log was
 					last time opened; only in the debug
 					version */
-	dulint		old_lsn;	/* value of lsn when log was last time
-					opened; only in the debug version */
+	ib_uint64_t	old_lsn;	/*!< value of lsn when log was
+					last time opened; only in the
+					debug version */
 	ibool		check_flush_or_checkpoint;
-					/* this is set to TRUE when there may
+					/*!< this is set to TRUE when there may
 					be need to flush the log buffer, or
 					preflush buffer pool pages, or make
 					a checkpoint; this MUST be TRUE when
@@ -715,56 +785,59 @@ struct log_struct{
 					peeked at by log_free_check(), which
 					does not reserve the log mutex */
 	UT_LIST_BASE_NODE_T(log_group_t)
-			log_groups;	/* log groups */
+			log_groups;	/*!< log groups */
 
-	/* The fields involved in the log buffer flush */
+#ifndef UNIV_HOTBACKUP
+	/** The fields involved in the log buffer flush @{ */
 
-	ulint		buf_next_to_write;/* first offset in the log buffer
+	ulint		buf_next_to_write;/*!< first offset in the log buffer
 					where the byte content may not exist
 					written to file, e.g., the start
 					offset of a log record catenated
 					later; this is advanced when a flush
 					operation is completed to all the log
 					groups */
-	dulint		written_to_some_lsn;
-					/* first log sequence number not yet
+	ib_uint64_t	written_to_some_lsn;
+					/*!< first log sequence number not yet
 					written to any log group; for this to
 					be advanced, it is enough that the
 					write i/o has been completed for any
 					one log group */
-	dulint		written_to_all_lsn;
-					/* first log sequence number not yet
+	ib_uint64_t	written_to_all_lsn;
+					/*!< first log sequence number not yet
 					written to some log group; for this to
 					be advanced, it is enough that the
 					write i/o has been completed for all
 					log groups */
-	dulint		write_lsn;	/* end lsn for the current running
+	ib_uint64_t	write_lsn;	/*!< end lsn for the current running
 					write */
-	ulint		write_end_offset;/* the data in buffer has been written
-					up to this offset when the current
-					write ends: this field will then
-					be copied to buf_next_to_write */
-	dulint		current_flush_lsn;/* end lsn for the current running
+	ulint		write_end_offset;/*!< the data in buffer has
+					been written up to this offset
+					when the current write ends:
+					this field will then be copied
+					to buf_next_to_write */
+	ib_uint64_t	current_flush_lsn;/*!< end lsn for the current running
 					write + flush operation */
-	dulint		flushed_to_disk_lsn;
-					/* how far we have written the log
+	ib_uint64_t	flushed_to_disk_lsn;
+					/*!< how far we have written the log
 					AND flushed to disk */
-	ulint		n_pending_writes;/* number of currently pending flushes
-					or writes */
+	ulint		n_pending_writes;/*!< number of currently
+					pending flushes or writes */
 	/* NOTE on the 'flush' in names of the fields below: starting from
 	4.0.14, we separate the write of the log file and the actual fsync()
 	or other method to flush it to disk. The names below shhould really
 	be 'flush_or_write'! */
-	os_event_t	no_flush_event;	/* this event is in the reset state
+	os_event_t	no_flush_event;	/*!< this event is in the reset state
 					when a flush or a write is running;
 					a thread should wait for this without
 					owning the log mutex, but NOTE that
 					to set or reset this event, the
 					thread MUST own the log mutex! */
-	ibool		one_flushed;	/* during a flush, this is first FALSE
-					and becomes TRUE when one log group
-					has been written or flushed */
-	os_event_t	one_flushed_event;/* this event is reset when the
+	ibool		one_flushed;	/*!< during a flush, this is
+					first FALSE and becomes TRUE
+					when one log group has been
+					written or flushed */
+	os_event_t	one_flushed_event;/*!< this event is reset when the
 					flush or write has not yet completed
 					for any log group; e.g., this means
 					that a transaction has been committed
@@ -773,97 +846,110 @@ struct log_struct{
 					but NOTE that to set or reset this
 					event, the thread MUST own the log
 					mutex! */
-	ulint		n_log_ios;	/* number of log i/os initiated thus
+	ulint		n_log_ios;	/*!< number of log i/os initiated thus
 					far */
-	ulint		n_log_ios_old;	/* number of log i/o's at the
+	ulint		n_log_ios_old;	/*!< number of log i/o's at the
 					previous printout */
-	time_t		last_printout_time;/* when log_print was last time
+	time_t		last_printout_time;/*!< when log_print was last time
 					called */
+	/* @} */
 
-	/* Fields involved in checkpoints */
-	ulint		log_group_capacity; /* capacity of the log group; if
+	/** Fields involved in checkpoints @{ */
+	ulint		log_group_capacity; /*!< capacity of the log group; if
 					the checkpoint age exceeds this, it is
 					a serious error because it is possible
 					we will then overwrite log and spoil
 					crash recovery */
 	ulint		max_modified_age_async;
-					/* when this recommended value for lsn
-					- buf_pool_get_oldest_modification()
-					is exceeded, we start an asynchronous
-					preflush of pool pages */
+					/*!< when this recommended
+					value for lsn -
+					buf_pool_get_oldest_modification()
+					is exceeded, we start an
+					asynchronous preflush of pool pages */
 	ulint		max_modified_age_sync;
-					/* when this recommended value for lsn
-					- buf_pool_get_oldest_modification()
-					is exceeded, we start a synchronous
-					preflush of pool pages */
+					/*!< when this recommended
+					value for lsn -
+					buf_pool_get_oldest_modification()
+					is exceeded, we start a
+					synchronous preflush of pool pages */
 	ulint		adm_checkpoint_interval;
-					/* administrator-specified checkpoint
+					/*!< administrator-specified checkpoint
 					interval in terms of log growth in
 					bytes; the interval actually used by
 					the database can be smaller */
 	ulint		max_checkpoint_age_async;
-					/* when this checkpoint age is exceeded
-					we start an asynchronous writing of a
-					new checkpoint */
+					/*!< when this checkpoint age
+					is exceeded we start an
+					asynchronous writing of a new
+					checkpoint */
 	ulint		max_checkpoint_age;
-					/* this is the maximum allowed value
+					/*!< this is the maximum allowed value
 					for lsn - last_checkpoint_lsn when a
 					new query step is started */
-	dulint		next_checkpoint_no;
-					/* next checkpoint number */
-	dulint		last_checkpoint_lsn;
-					/* latest checkpoint lsn */
-	dulint		next_checkpoint_lsn;
-					/* next checkpoint lsn */
+	ib_uint64_t	next_checkpoint_no;
+					/*!< next checkpoint number */
+	ib_uint64_t	last_checkpoint_lsn;
+					/*!< latest checkpoint lsn */
+	ib_uint64_t	next_checkpoint_lsn;
+					/*!< next checkpoint lsn */
 	ulint		n_pending_checkpoint_writes;
-					/* number of currently pending
+					/*!< number of currently pending
 					checkpoint writes */
-	rw_lock_t	checkpoint_lock;/* this latch is x-locked when a
+	rw_lock_t	checkpoint_lock;/*!< this latch is x-locked when a
 					checkpoint write is running; a thread
 					should wait for this without owning
 					the log mutex */
-	byte*		checkpoint_buf;	/* checkpoint header is read to this
+#endif /* !UNIV_HOTBACKUP */
+	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
 					buffer */
-	/* Fields involved in archiving */
-	ulint		archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
+	/* @} */
+#ifdef UNIV_LOG_ARCHIVE
+	/** Fields involved in archiving @{ */
+	ulint		archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
 					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
-	dulint		archived_lsn;	/* archiving has advanced to this
+	ib_uint64_t	archived_lsn;	/*!< archiving has advanced to this
 					lsn */
 	ulint		max_archived_lsn_age_async;
-					/* recommended maximum age of
+					/*!< recommended maximum age of
 					archived_lsn, before we start
 					asynchronous copying to the archive */
 	ulint		max_archived_lsn_age;
-					/* maximum allowed age for
+					/*!< maximum allowed age for
 					archived_lsn */
-	dulint		next_archived_lsn;/* during an archive write,
+	ib_uint64_t	next_archived_lsn;/*!< during an archive write,
 					until the write is completed, we
 					store the next value for
 					archived_lsn here: the write
 					completion function then sets the new
 					value to archived_lsn */
-	ulint		archiving_phase;/* LOG_ARCHIVE_READ or
+	ulint		archiving_phase;/*!< LOG_ARCHIVE_READ or
 					LOG_ARCHIVE_WRITE */
 	ulint		n_pending_archive_ios;
-					/* number of currently pending reads
+					/*!< number of currently pending reads
 					or writes in archiving */
-	rw_lock_t	archive_lock;	/* this latch is x-locked when an
+	rw_lock_t	archive_lock;	/*!< this latch is x-locked when an
 					archive write is running; a thread
 					should wait for this without owning
 					the log mutex */
-	ulint		archive_buf_size;/* size of archive_buf */
-	byte*		archive_buf;	/* log segment is written to the
+	ulint		archive_buf_size;/*!< size of archive_buf */
+	byte*		archive_buf;	/*!< log segment is written to the
 					archive from this buffer */
-	os_event_t	archiving_on;	/* if archiving has been stopped,
+	os_event_t	archiving_on;	/*!< if archiving has been stopped,
 					a thread can wait for this event to
 					become signaled */
+	/* @} */
+#endif /* UNIV_LOG_ARCHIVE */
 };
 
+#ifdef UNIV_LOG_ARCHIVE
+/** Archiving state @{ */
 #define LOG_ARCH_ON		71
 #define LOG_ARCH_STOPPING	72
 #define LOG_ARCH_STOPPING2	73
 #define LOG_ARCH_STOPPED	74
 #define LOG_ARCH_OFF		75
+/* @} */
+#endif /* UNIV_LOG_ARCHIVE */
 
 #ifndef UNIV_NONINL
 #include "log0log.ic"
diff --git a/storage/innobase/include/log0log.ic b/storage/innodb_plugin/include/log0log.ic
similarity index 51%
rename from storage/innobase/include/log0log.ic
rename to storage/innodb_plugin/include/log0log.ic
index df0a8baf2d5..d071985982a 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innodb_plugin/include/log0log.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Database log
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0log.ic
+Database log
 
 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
@@ -10,27 +27,27 @@ Created 12/9/1995 Heikki Tuuri
 #include "mach0data.h"
 #include "mtr0mtr.h"
 
-/**********************************************************
+/******************************************************//**
 Checks by parsing that the catenated log segment for a single mtr is
 consistent. */
-
+UNIV_INTERN
 ibool
 log_check_log_recs(
 /*===============*/
-	byte*	buf,		/* in: pointer to the start of the log segment
-				in the log_sys->buf log buffer */
-	ulint	len,		/* in: segment length in bytes */
-	dulint	buf_start_lsn);	/* in: buffer start lsn */
+	byte*		buf,		/*!< in: pointer to the start of
+					the log segment in the
+					log_sys->buf log buffer */
+	ulint		len,		/*!< in: segment length in bytes */
+	ib_uint64_t	buf_start_lsn);	/*!< in: buffer start lsn */
 
-/****************************************************************
-Gets a log block flush bit. */
+/************************************************************//**
+Gets a log block flush bit.
+@return	TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
 /*====================*/
-				/* out: TRUE if this block was the first
-				to be written in a log flush */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	if (LOG_BLOCK_FLUSH_BIT_MASK
 	    & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
@@ -41,14 +58,14 @@ log_block_get_flush_bit(
 	return(FALSE);
 }
 
-/****************************************************************
+/************************************************************//**
 Sets the log block flush bit. */
 UNIV_INLINE
 void
 log_block_set_flush_bit(
 /*====================*/
-	byte*	log_block,	/* in: log block */
-	ibool	val)		/* in: value to set */
+	byte*	log_block,	/*!< in/out: log block */
+	ibool	val)		/*!< in: value to set */
 {
 	ulint	field;
 
@@ -63,29 +80,28 @@ log_block_set_flush_bit(
 	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
 }
 
-/****************************************************************
-Gets a log block number stored in the header. */
+/************************************************************//**
+Gets a log block number stored in the header.
+@return	log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
 /*=================*/
-				/* out: log block number stored in the block
-				header */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	return(~LOG_BLOCK_FLUSH_BIT_MASK
 	       & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
 }
 
-/****************************************************************
+/************************************************************//**
 Sets the log block number stored in the header; NOTE that this must be set
 before the flush bit! */
 UNIV_INLINE
 void
 log_block_set_hdr_no(
 /*=================*/
-	byte*	log_block,	/* in: log block */
-	ulint	n)		/* in: log block number: must be > 0 and
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	n)		/*!< in: log block number: must be > 0 and
 				< LOG_BLOCK_FLUSH_BIT_MASK */
 {
 	ut_ad(n > 0);
@@ -94,109 +110,99 @@ log_block_set_hdr_no(
 	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
 }
 
-/****************************************************************
-Gets a log block data length. */
+/************************************************************//**
+Gets a log block data length.
+@return	log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
 /*===================*/
-				/* out: log block data length measured as a
-				byte offset from the block start */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
 }
 
-/****************************************************************
+/************************************************************//**
 Sets the log block data length. */
 UNIV_INLINE
 void
 log_block_set_data_len(
 /*===================*/
-	byte*	log_block,	/* in: log block */
-	ulint	len)		/* in: data length */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	len)		/*!< in: data length */
 {
 	mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
 }
 
-/****************************************************************
-Gets a log block first mtr log record group offset. */
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
 UNIV_INLINE
 ulint
 log_block_get_first_rec_group(
 /*==========================*/
-				/* out: first mtr log record group byte offset
-				from the block start, 0 if none */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
 }
 
-/****************************************************************
+/************************************************************//**
 Sets the log block first mtr log record group offset. */
 UNIV_INLINE
 void
 log_block_set_first_rec_group(
 /*==========================*/
-	byte*	log_block,	/* in: log block */
-	ulint	offset)		/* in: offset, 0 if none */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	offset)		/*!< in: offset, 0 if none */
 {
 	mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
 }
 
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return	checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
 /*========================*/
-				/* out: checkpoint no (4 lowest bytes) */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
 }
 
-/****************************************************************
+/************************************************************//**
 Sets a log block checkpoint number field (4 lowest bytes). */
 UNIV_INLINE
 void
 log_block_set_checkpoint_no(
 /*========================*/
-	byte*	log_block,	/* in: log block */
-	dulint	no)		/* in: checkpoint no */
+	byte*		log_block,	/*!< in/out: log block */
+	ib_uint64_t	no)		/*!< in: checkpoint no */
 {
-	mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO,
-			ut_dulint_get_low(no));
+	mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no);
 }
 
-/****************************************************************
-Converts a lsn to a log block number. */
+/************************************************************//**
+Converts a lsn to a log block number.
+@return	log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
 /*========================*/
-			/* out: log block number, it is > 0 and <= 1G */
-	dulint	lsn)	/* in: lsn of a byte within the block */
+	ib_uint64_t	lsn)	/*!< in: lsn of a byte within the block */
 {
-	ulint	no;
-
-	no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
-	no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
-		* 2 * (0x80000000UL / OS_FILE_LOG_BLOCK_SIZE);
-
-	no = no & 0x3FFFFFFFUL;
-
-	return(no + 1);
+	return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
 }
 
-/****************************************************************
-Calculates the checksum for a log block. */
+/************************************************************//**
+Calculates the checksum for a log block.
+@return	checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
-			/* out: checksum */
-	byte*	block)	/* in: log block */
+	const byte*	block)	/*!< in: log block */
 {
 	ulint	sum;
 	ulint	sh;
@@ -206,8 +212,10 @@ log_block_calc_checksum(
 	sh = 0;
 
 	for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
-		sum = sum & 0x7FFFFFFFUL;
-		sum += (((ulint)(*(block + i))) << sh) + (ulint)(*(block + i));
+		ulint	b = (ulint) block[i];
+		sum &= 0x7FFFFFFFUL;
+		sum += b;
+		sum += b << sh;
 		sh++;
 		if (sh > 24) {
 			sh = 0;
@@ -217,41 +225,41 @@ log_block_calc_checksum(
 	return(sum);
 }
 
-/****************************************************************
-Gets a log block checksum field value. */
+/************************************************************//**
+Gets a log block checksum field value.
+@return	checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
 /*===================*/
-				/* out: checksum */
-	byte*	log_block)	/* in: log block */
+	const byte*	log_block)	/*!< in: log block */
 {
 	return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
 				- LOG_BLOCK_CHECKSUM));
 }
 
-/****************************************************************
+/************************************************************//**
 Sets a log block checksum field value. */
 UNIV_INLINE
 void
 log_block_set_checksum(
 /*===================*/
-	byte*	log_block,	/* in: log block */
-	ulint	checksum)	/* in: checksum */
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	checksum)	/*!< in: checksum */
 {
 	mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
 			- LOG_BLOCK_CHECKSUM,
 			checksum);
 }
 
-/****************************************************************
+/************************************************************//**
 Initializes a log block in the log buffer. */
 UNIV_INLINE
 void
 log_block_init(
 /*===========*/
-	byte*	log_block,	/* in: pointer to the log buffer */
-	dulint	lsn)		/* in: lsn within the log block */
+	byte*		log_block,	/*!< in: pointer to the log buffer */
+	ib_uint64_t	lsn)		/*!< in: lsn within the log block */
 {
 	ulint	no;
 
@@ -265,15 +273,15 @@ log_block_init(
 	log_block_set_first_rec_group(log_block, 0);
 }
 
-/****************************************************************
+/************************************************************//**
 Initializes a log block in the log buffer in the old format, where there
 was no checksum yet. */
 UNIV_INLINE
 void
 log_block_init_in_old_format(
 /*=========================*/
-	byte*	log_block,	/* in: pointer to the log buffer */
-	dulint	lsn)		/* in: lsn within the log block */
+	byte*		log_block,	/*!< in: pointer to the log buffer */
+	ib_uint64_t	lsn)		/*!< in: lsn within the log block */
 {
 	ulint	no;
 
@@ -288,23 +296,23 @@ log_block_init_in_old_format(
 	log_block_set_first_rec_group(log_block, 0);
 }
 
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
 Writes to the log the string given. The log must be released with
-log_release. */
+log_release.
+@return	end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
-dulint
+ib_uint64_t
 log_reserve_and_write_fast(
 /*=======================*/
-			/* out: end lsn of the log record, ut_dulint_zero if
-			did not succeed */
-	byte*	str,	/* in: string */
-	ulint	len,	/* in: string length */
-	dulint*	start_lsn,/* out: start lsn of the log record */
-	ibool*	success)/* out: TRUE if success */
+	byte*		str,	/*!< in: string */
+	ulint		len,	/*!< in: string length */
+	ib_uint64_t*	start_lsn,/*!< out: start lsn of the log record */
+	ibool*		success)/*!< out: TRUE if success */
 {
-	log_t*	log	= log_sys;
-	ulint	data_len;
-	dulint	lsn;
+	log_t*		log	= log_sys;
+	ulint		data_len;
+	ib_uint64_t	lsn;
 
 	*success = TRUE;
 
@@ -321,15 +329,15 @@ log_reserve_and_write_fast(
 
 		mutex_exit(&(log->mutex));
 
-		return(ut_dulint_zero);
+		return(0);
 	}
 
 	*start_lsn = log->lsn;
 
 	ut_memcpy(log->buf + log->buf_free, str, len);
 
-	log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
-					     OS_FILE_LOG_BLOCK_SIZE),
+	log_block_set_data_len((byte*) ut_align_down(log->buf + log->buf_free,
+						     OS_FILE_LOG_BLOCK_SIZE),
 			       data_len);
 #ifdef UNIV_LOG_DEBUG
 	log->old_buf_free = log->buf_free;
@@ -339,9 +347,7 @@ log_reserve_and_write_fast(
 
 	ut_ad(log->buf_free <= log->buf_size);
 
-	lsn = ut_dulint_add(log->lsn, len);
-
-	log->lsn = lsn;
+	lsn = log->lsn += len;
 
 #ifdef UNIV_LOG_DEBUG
 	log_check_log_recs(log->buf + log->old_buf_free,
@@ -350,7 +356,7 @@ log_reserve_and_write_fast(
 	return(lsn);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Releases the log mutex. */
 UNIV_INLINE
 void
@@ -360,15 +366,15 @@ log_release(void)
 	mutex_exit(&(log_sys->mutex));
 }
 
-/****************************************************************
-Gets the current lsn. */
+/************************************************************//**
+Gets the current lsn.
+@return	current lsn */
 UNIV_INLINE
-dulint
+ib_uint64_t
 log_get_lsn(void)
 /*=============*/
-			/* out: current lsn */
 {
-	dulint	lsn;
+	ib_uint64_t	lsn;
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -379,7 +385,19 @@ log_get_lsn(void)
 	return(lsn);
 }
 
-/***************************************************************************
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return	log group capacity */
+UNIV_INLINE
+ulint
+log_get_capacity(void)
+/*==================*/
+{
+	return(log_sys->log_group_capacity);
+}
+
+/***********************************************************************//**
 Checks if there is need for a log buffer flush or a new checkpoint, and does
 this if yes. Any database operation should call this when it has modified
 more than about 4 pages. NOTE that this function may only be called when the
@@ -396,3 +414,4 @@ log_free_check(void)
 		log_check_margins();
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/log0recv.h b/storage/innodb_plugin/include/log0recv.h
new file mode 100644
index 00000000000..8468c213bdb
--- /dev/null
+++ b/storage/innodb_plugin/include/log0recv.h
@@ -0,0 +1,466 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.h
+Recovery
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0recv_h
+#define log0recv_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+#include "hash0hash.h"
+#include "log0log.h"
+
+#ifdef UNIV_HOTBACKUP
+extern ibool	recv_replay_file_ops;
+
+/*******************************************************************//**
+Reads the checkpoint info needed in hot backup.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+recv_read_cp_info_for_backup(
+/*=========================*/
+	const byte*	hdr,	/*!< in: buffer containing the log group
+				header */
+	ib_uint64_t*	lsn,	/*!< out: checkpoint lsn */
+	ulint*		offset,	/*!< out: checkpoint offset in the log group */
+	ulint*		fsp_limit,/*!< out: fsp limit of space 0,
+				1000000000 if the database is running
+				with < version 3.23.50 of InnoDB */
+	ib_uint64_t*	cp_no,	/*!< out: checkpoint number */
+	ib_uint64_t*	first_header_lsn);
+				/*!< out: lsn of of the start of the
+				first log file */
+/*******************************************************************//**
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+UNIV_INTERN
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/*!< in: buffer containing log data */
+	ulint		buf_len,	/*!< in: data length in that buffer */
+	ib_uint64_t*	scanned_lsn,	/*!< in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/*!< in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned);/*!< out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
+#endif /* UNIV_HOTBACKUP */
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return	recv_recovery_on */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void);
+/*=====================*/
+#ifdef UNIV_LOG_ARCHIVE
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return	recv_recovery_from_backup_on */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void);
+/*=================================*/
+#endif /* UNIV_LOG_ARCHIVE */
+/************************************************************************//**
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool. */
+UNIV_INTERN
+void
+recv_recover_page_func(
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+	ibool		just_read_in,
+				/*!< in: TRUE if the i/o handler calls
+				this for a freshly read page */
+#endif /* !UNIV_HOTBACKUP */
+	buf_block_t*	block);	/*!< in/out: buffer block */
+#ifndef UNIV_HOTBACKUP
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri	in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block	in/out: the buffer block
+*/
+# define recv_recover_page(jri, block)	recv_recover_page_func(jri, block)
+#else /* !UNIV_HOTBACKUP */
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri	in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block	in/out: the buffer block
+*/
+# define recv_recover_page(jri, block)	recv_recover_page_func(block)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+recv_recovery_from_checkpoint_start_func(
+/*=====================================*/
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		type,		/*!< in: LOG_CHECKPOINT or
+					LOG_ARCHIVE */
+	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn
+					if possible */
+#endif /* UNIV_LOG_ARCHIVE */
+	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn from
+					data files */
+	ib_uint64_t	max_flushed_lsn);/*!< in: max flushed lsn from
+					 data files */
+#ifdef UNIV_LOG_ARCHIVE
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type	in: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim	in: recover up to this log sequence number if possible
+@param min	in: minimum flushed log sequence number from data files
+@param max	in: maximum flushed log sequence number from data files
+@return	error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
+	recv_recovery_from_checkpoint_start_func(type,lim,min,max)
+#else /* UNIV_LOG_ARCHIVE */
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type	ignored: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim	ignored: recover up to this log sequence number if possible
+@param min	in: minimum flushed log sequence number from data files
+@param max	in: maximum flushed log sequence number from data files
+@return	error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
+	recv_recovery_from_checkpoint_start_func(min,max)
+#endif /* UNIV_LOG_ARCHIVE */
+/********************************************************//**
+Completes recovery from a checkpoint. */
+UNIV_INTERN
+void
+recv_recovery_from_checkpoint_finish(void);
+/*======================================*/
+/*******************************************************//**
+Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found.  Unless
+UNIV_HOTBACKUP is defined, this function will apply log records
+automatically when the hash table becomes full.
+@return TRUE if limit_lsn has been reached, or not able to scan any
+more in this log group */
+UNIV_INTERN
+ibool
+recv_scan_log_recs(
+/*===============*/
+	ulint		available_memory,/*!< in: we let the hash table of recs
+					to grow to this size, at the maximum */
+	ibool		store_to_hash,	/*!< in: TRUE if the records should be
+					stored to the hash table; this is set
+					to FALSE if just debug checking is
+					needed */
+	const byte*	buf,		/*!< in: buffer containing a log
+					segment or garbage */
+	ulint		len,		/*!< in: buffer length */
+	ib_uint64_t	start_lsn,	/*!< in: buffer start lsn */
+	ib_uint64_t*	contiguous_lsn,	/*!< in/out: it is known that all log
+					groups contain contiguous log data up
+					to this lsn */
+	ib_uint64_t*	group_scanned_lsn);/*!< out: scanning succeeded up to
+					this lsn */
+/******************************************************//**
+Resets the logs. The contents of log files will be lost! */
+UNIV_INTERN
+void
+recv_reset_logs(
+/*============*/
+	ib_uint64_t	lsn,		/*!< in: reset to this lsn
+					rounded up to be divisible by
+					OS_FILE_LOG_BLOCK_SIZE, after
+					which we add
+					LOG_BLOCK_HDR_SIZE */
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		arch_log_no,	/*!< in: next archived log file number */
+#endif /* UNIV_LOG_ARCHIVE */
+	ibool		new_logs_created);/*!< in: TRUE if resetting logs
+					is done at the log creation;
+					FALSE if it is done after
+					archive recovery */
+#ifdef UNIV_HOTBACKUP
+/******************************************************//**
+Creates new log files after a backup has been restored. */
+UNIV_INTERN
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	const char*	log_dir,	/*!< in: log file directory path */
+	ulint		n_log_files,	/*!< in: number of log files */
+	ulint		log_file_size,	/*!< in: log file size */
+	ib_uint64_t	lsn);		/*!< in: new start lsn, must be
+					divisible by OS_FILE_LOG_BLOCK_SIZE */
+#endif /* UNIV_HOTBACKUP */
+/********************************************************//**
+Creates the recovery system. */
+UNIV_INTERN
+void
+recv_sys_create(void);
+/*=================*/
+/********************************************************//**
+Inits the recovery system for a recovery operation. */
+UNIV_INTERN
+void
+recv_sys_init(
+/*==========*/
+	ulint	available_memory);	/*!< in: available memory in bytes */
+/*******************************************************************//**
+Empties the hash table of stored log records, applying them to appropriate
+pages. */
+UNIV_INTERN
+void
+recv_apply_hashed_log_recs(
+/*=======================*/
+	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
+				allowed during the application; if FALSE,
+				no ibuf operations are allowed, and after
+				the application all file pages are flushed to
+				disk and invalidated in buffer pool: this
+				alternative means that no new log records
+				can be generated during the application */
+#ifdef UNIV_HOTBACKUP
+/*******************************************************************//**
+Applies log records in the hash table to a backup. */
+UNIV_INTERN
+void
+recv_apply_log_recs_for_backup(void);
+/*================================*/
+#endif
+#ifdef UNIV_LOG_ARCHIVE
+/********************************************************//**
+Recovers from archived log files, and also from log files, if they exist.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+recv_recovery_from_archive_start(
+/*=============================*/
+	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn field from the
+					data files */
+	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn if
+					possible */
+	ulint		first_log_no);	/*!< in: number of the first archived
+					log file to use in the recovery; the
+					file will be searched from
+					INNOBASE_LOG_ARCH_DIR specified in
+					server config file */
+/********************************************************//**
+Completes recovery from archive. */
+UNIV_INTERN
+void
+recv_recovery_from_archive_finish(void);
+/*===================================*/
+#endif /* UNIV_LOG_ARCHIVE */
+
+/** Block of log record data */
+typedef struct recv_data_struct	recv_data_t;
+/** Block of log record data */
+struct recv_data_struct{
+	recv_data_t*	next;	/*!< pointer to the next block or NULL */
+				/*!< the log record data is stored physically
+				immediately after this struct, max amount
+				RECV_DATA_BLOCK_SIZE bytes of it */
+};
+
+/** Stored log record struct */
+typedef struct recv_struct	recv_t;
+/** Stored log record struct */
+struct recv_struct{
+	byte		type;	/*!< log record type */
+	ulint		len;	/*!< log record body length in bytes */
+	recv_data_t*	data;	/*!< chain of blocks containing the log record
+				body */
+	ib_uint64_t	start_lsn;/*!< start lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the start lsn of
+				this log record */
+	ib_uint64_t	end_lsn;/*!< end lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the end lsn of
+				this log record */
+	UT_LIST_NODE_T(recv_t)
+			rec_list;/*!< list of log records for this page */
+};
+
+/** States of recv_addr_struct */
+enum recv_addr_state {
+	/** not yet processed */
+	RECV_NOT_PROCESSED,
+	/** page is being read */
+	RECV_BEING_READ,
+	/** log records are being applied on the page */
+	RECV_BEING_PROCESSED,
+	/** log records have been applied on the page, or they have
+	been discarded because the tablespace does not exist */
+	RECV_PROCESSED
+};
+
+/** Hashed page file address struct */
+typedef struct recv_addr_struct	recv_addr_t;
+/** Hashed page file address struct */
+struct recv_addr_struct{
+	enum recv_addr_state state;
+				/*!< recovery state of the page */
+	ulint		space;	/*!< space id */
+	ulint		page_no;/*!< page number */
+	UT_LIST_BASE_NODE_T(recv_t)
+			rec_list;/*!< list of log records for this page */
+	hash_node_t	addr_hash;/*!< hash node in the hash bucket chain */
+};
+
+/** Recovery system data structure */
+typedef struct recv_sys_struct	recv_sys_t;
+/** Recovery system data structure */
+struct recv_sys_struct{
+#ifndef UNIV_HOTBACKUP
+	mutex_t		mutex;	/*!< mutex protecting the fields apply_log_recs,
+				n_addrs, and the state field in each recv_addr
+				struct */
+#endif /* !UNIV_HOTBACKUP */
+	ibool		apply_log_recs;
+				/*!< this is TRUE when log rec application to
+				pages is allowed; this flag tells the
+				i/o-handler if it should do log record
+				application */
+	ibool		apply_batch_on;
+				/*!< this is TRUE when a log rec application
+				batch is running */
+	ib_uint64_t	lsn;	/*!< log sequence number */
+	ulint		last_log_buf_size;
+				/*!< size of the log buffer when the database
+				last time wrote to the log */
+	byte*		last_block;
+				/*!< possible incomplete last recovered log
+				block */
+	byte*		last_block_buf_start;
+				/*!< the nonaligned start address of the
+				preceding buffer */
+	byte*		buf;	/*!< buffer for parsing log records */
+	ulint		len;	/*!< amount of data in buf */
+	ib_uint64_t	parse_start_lsn;
+				/*!< this is the lsn from which we were able to
+				start parsing log records and adding them to
+				the hash table; zero if a suitable
+				start point not found yet */
+	ib_uint64_t	scanned_lsn;
+				/*!< the log data has been scanned up to this
+				lsn */
+	ulint		scanned_checkpoint_no;
+				/*!< the log data has been scanned up to this
+				checkpoint number (lowest 4 bytes) */
+	ulint		recovered_offset;
+				/*!< start offset of non-parsed log records in
+				buf */
+	ib_uint64_t	recovered_lsn;
+				/*!< the log records have been parsed up to
+				this lsn */
+	ib_uint64_t	limit_lsn;/*!< recovery should be made at most
+				up to this lsn */
+	ibool		found_corrupt_log;
+				/*!< this is set to TRUE if we during log
+				scan find a corrupt log block, or a corrupt
+				log record, or there is a log parsing
+				buffer overflow */
+#ifdef UNIV_LOG_ARCHIVE
+	log_group_t*	archive_group;
+				/*!< in archive recovery: the log group whose
+				archive is read */
+#endif /* !UNIV_LOG_ARCHIVE */
+	mem_heap_t*	heap;	/*!< memory heap of log records and file
+				addresses*/
+	hash_table_t*	addr_hash;/*!< hash table of file addresses of pages */
+	ulint		n_addrs;/*!< number of not processed hashed file
+				addresses in the hash table */
+};
+
+/** The recovery system */
+extern recv_sys_t*	recv_sys;
+
+/** TRUE when applying redo log records during crash recovery; FALSE
+otherwise.  Note that this is FALSE while a background thread is
+rolling back incomplete transactions. */
+extern ibool		recv_recovery_on;
+/** If the following is TRUE, the buffer pool file pages must be invalidated
+after recovery and no ibuf operations are allowed; this becomes TRUE if
+the log record hash table becomes too full, and log records must be merged
+to file pages already before the recovery is finished: in this case no
+ibuf operations are allowed, as they could modify the pages read in the
+buffer pool before the pages have been recovered to the up-to-date state.
+
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
+extern ibool		recv_no_ibuf_operations;
+/** TRUE when recv_init_crash_recovery() has been called. */
+extern ibool		recv_needed_recovery;
+
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
+recv_recovery_from_checkpoint_start_func(). */
+extern ibool		recv_lsn_checks_on;
+#ifdef UNIV_HOTBACKUP
+/** TRUE when the redo log is being backed up */
+extern ibool		recv_is_making_a_backup;
+#endif /* UNIV_HOTBACKUP */
+/** Maximum page number encountered in the redo log */
+extern ulint		recv_max_parsed_page_no;
+
+/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
+times! */
+#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+
+/** Size of block reads when the log groups are scanned forward to do a
+roll-forward */
+#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+
+/** This many frames must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free frames to read in pages when we start applying the
+log records to the database. */
+extern ulint	recv_n_pool_free_frames;
+
+#ifndef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/log0recv.ic b/storage/innodb_plugin/include/log0recv.ic
new file mode 100644
index 00000000000..0a8e55b96fa
--- /dev/null
+++ b/storage/innodb_plugin/include/log0recv.ic
@@ -0,0 +1,53 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.ic
+Recovery
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "univ.i"
+
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return	recv_recovery_on */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void)
+/*=====================*/
+{
+	return(UNIV_UNLIKELY(recv_recovery_on));
+}
+
+#ifdef UNIV_LOG_ARCHIVE
+/** TRUE when applying redo log records from an archived log file */
+extern ibool	recv_recovery_from_backup_on;
+
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return	recv_recovery_from_backup_on */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void)
+/*=================================*/
+{
+	return(recv_recovery_from_backup_on);
+}
+#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innodb_plugin/include/mach0data.h b/storage/innodb_plugin/include/mach0data.h
new file mode 100644
index 00000000000..44ee3df22ce
--- /dev/null
+++ b/storage/innodb_plugin/include/mach0data.h
@@ -0,0 +1,400 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.h
+Utilities for converting data from the database file
+to the machine format.
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef mach0data_h
+#define mach0data_h
+
+#include "univ.i"
+#include "ut0byte.h"
+
+/* The data and all fields are always stored in a database file
+in the same format: ascii, big-endian, ... .
+All data in the files MUST be accessed using the functions in this
+module. */
+
+/*******************************************************//**
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void
+mach_write_to_1(
+/*============*/
+	byte*	b,	/*!< in: pointer to byte where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 256 */
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return	ulint integer, >= 0, < 256 */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to byte */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lower address. */
+UNIV_INLINE
+void
+mach_write_to_2(
+/*============*/
+	byte*	b,	/*!< in: pointer to two bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 64k */
+/********************************************************//**
+The following function is used to fetch data from two consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer, >= 0, < 64k */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to two bytes */
+	__attribute__((nonnull, pure));
+
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+to the canonical format, for fast bytewise equality test
+against memory.
+@return	16-bit integer in canonical format */
+UNIV_INLINE
+uint16
+mach_encode_2(
+/*==========*/
+	ulint	n)	/*!< in: integer in machine-dependent format */
+	__attribute__((const));
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+from the canonical format, for fast bytewise equality test
+against memory.
+@return	integer in machine-dependent format */
+UNIV_INLINE
+ulint
+mach_decode_2(
+/*==========*/
+	uint16	n)	/*!< in: 16-bit integer in canonical format */
+	__attribute__((const));
+/*******************************************************//**
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_3(
+/*============*/
+	byte*	b,	/*!< in: pointer to 3 bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_3(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 3 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_4(
+/*============*/
+	byte*	b,	/*!< in: pointer to four bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_4(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to four bytes */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in a compressed form (1..5 bytes).
+@return	stored size in bytes */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+	byte*	b,	/*!< in: pointer to memory where to store */
+	ulint	n);	/*!< in: ulint integer to be stored */
+/*********************************************************//**
+Returns the size of an ulint when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+	ulint	n)	/*!< in: ulint integer to be stored */
+	__attribute__((const));
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return	read integer */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_6(
+/*============*/
+	byte*	b,	/*!< in: pointer to 6 bytes where to store */
+	dulint	n);	 /*!< in: dulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
+UNIV_INLINE
+dulint
+mach_read_from_6(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 6 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_7(
+/*============*/
+	byte*	b,	/*!< in: pointer to 7 bytes where to store */
+	dulint	n);	 /*!< in: dulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
+UNIV_INLINE
+dulint
+mach_read_from_7(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 7 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_8(
+/*============*/
+	byte*	b,	/*!< in: pointer to 8 bytes where to store */
+	dulint	n);	/*!< in: dulint integer to be stored */
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_ull(
+/*===========*/
+	byte*		b,	/*!< in: pointer to 8 bytes where to store */
+	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
+UNIV_INLINE
+dulint
+mach_read_from_8(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 8 bytes */
+	__attribute__((nonnull, pure));
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_ull(
+/*==========*/
+	const byte*	b)	/*!< in: pointer to 8 bytes */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a dulint in a compressed form (5..9 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_dulint_write_compressed(
+/*=========================*/
+	byte*	b,	/*!< in: pointer to memory where to store */
+	dulint	n);	/*!< in: dulint integer to be stored */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_dulint_get_compressed_size(
+/*============================*/
+	dulint	 n);	/*!< in: dulint integer to be stored */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return	read dulint */
+UNIV_INLINE
+dulint
+mach_dulint_read_compressed(
+/*========================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a dulint in a compressed form (1..11 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_dulint_write_much_compressed(
+/*==============================*/
+	byte*	b,	/*!< in: pointer to memory where to store */
+	dulint	n);	/*!< in: dulint integer to be stored */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_dulint_get_much_compressed_size(
+/*=================================*/
+	dulint	 n)	 /*!< in: dulint integer to be stored */
+	__attribute__((const));
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return	read dulint */
+UNIV_INLINE
+dulint
+mach_dulint_read_much_compressed(
+/*=============================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Reads a ulint in a compressed form if the log record fully contains it.
+@return	pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
+byte*
+mach_parse_compressed(
+/*==================*/
+	byte*	ptr,	/*!< in: pointer to buffer from where to read */
+	byte*	end_ptr,/*!< in: pointer to end of the buffer */
+	ulint*	val);	/*!< out: read value */
+/*********************************************************//**
+Reads a dulint in a compressed form if the log record fully contains it.
+@return	pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
+byte*
+mach_dulint_parse_compressed(
+/*=========================*/
+	byte*	ptr,	/*!< in: pointer to buffer from where to read */
+	byte*	end_ptr,/*!< in: pointer to end of the buffer */
+	dulint*	val);	/*!< out: read value */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return	double read */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	double	d);	/*!< in: double */
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return	float read */
+UNIV_INLINE
+float
+mach_float_read(
+/*============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*=============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	float	d);	/*!< in: float */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+	const byte*	buf,		/*!< in: from where to read */
+	ulint		buf_size)	/*!< in: from how many bytes to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	dest_size,	/*!< in: into how many bytes to write */
+	ulint	n);		/*!< in: unsigned long int to write */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+	const byte*	buf)		/*!< in: from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	n);		/*!< in: unsigned long int to write */
+
+/*********************************************************//**
+Convert integral type from storage byte order (big endian) to
+host byte order.
+@return	integer value */
+UNIV_INLINE
+ullint
+mach_read_int_type(
+/*===============*/
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	ibool		unsigned_type);	/*!< in: signed or unsigned flag */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "mach0data.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mach0data.ic b/storage/innodb_plugin/include/mach0data.ic
similarity index 54%
rename from storage/innobase/include/mach0data.ic
rename to storage/innodb_plugin/include/mach0data.ic
index ec15c10c661..ef20356bd31 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innodb_plugin/include/mach0data.ic
@@ -1,22 +1,39 @@
-/**********************************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.ic
 Utilities for converting data from the database file
 to the machine format.
 
-(c) 1995 Innobase Oy
-
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 
 #include "ut0mem.h"
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in one byte. */
 UNIV_INLINE
 void
 mach_write_to_1(
 /*============*/
-	byte*	b,	/* in: pointer to byte where to store */
-	ulint	n)	/* in: ulint integer to be stored, >= 0, < 256 */
+	byte*	b,	/*!< in: pointer to byte where to store */
+	ulint	n)	/*!< in: ulint integer to be stored, >= 0, < 256 */
 {
 	ut_ad(b);
 	ut_ad(n <= 0xFFUL);
@@ -24,28 +41,28 @@ mach_write_to_1(
 	b[0] = (byte)n;
 }
 
-/************************************************************
-The following function is used to fetch data from one byte. */
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return	ulint integer, >= 0, < 256 */
 UNIV_INLINE
 ulint
 mach_read_from_1(
 /*=============*/
-			/* out: ulint integer, >= 0, < 256 */
-	byte*	b)	/* in: pointer to byte */
+	const byte*	b)	/*!< in: pointer to byte */
 {
 	ut_ad(b);
 	return((ulint)(b[0]));
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in two consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_2(
 /*============*/
-	byte*	b,	/* in: pointer to two bytes where to store */
-	ulint	n)	/* in: ulint integer to be stored */
+	byte*	b,	/*!< in: pointer to two bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
 {
 	ut_ad(b);
 	ut_ad(n <= 0xFFFFUL);
@@ -54,15 +71,15 @@ mach_write_to_2(
 	b[1] = (byte)(n);
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_2(
 /*=============*/
-			/* out: ulint integer */
-	byte*	b)	/* in: pointer to 2 bytes */
+	const byte*	b)	/*!< in: pointer to 2 bytes */
 {
 	ut_ad(b);
 	return( ((ulint)(b[0]) << 8)
@@ -70,46 +87,46 @@ mach_read_from_2(
 		);
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to convert a 16-bit data item
 to the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return	16-bit integer in canonical format */
 UNIV_INLINE
 uint16
 mach_encode_2(
 /*==========*/
-			/* out: 16-bit integer in canonical format */
-	ulint	n)	/* in: integer in machine-dependent format */
+	ulint	n)	/*!< in: integer in machine-dependent format */
 {
 	uint16	ret;
 	ut_ad(2 == sizeof ret);
 	mach_write_to_2((byte*) &ret, n);
 	return(ret);
 }
-/************************************************************
+/********************************************************//**
 The following function is used to convert a 16-bit data item
 from the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return	integer in machine-dependent format */
 UNIV_INLINE
 ulint
 mach_decode_2(
 /*==========*/
-			/* out: integer in machine-dependent format */
-	uint16	n)	/* in: 16-bit integer in canonical format */
+	uint16	n)	/*!< in: 16-bit integer in canonical format */
 {
 	ut_ad(2 == sizeof n);
-	return(mach_read_from_2((byte*) &n));
+	return(mach_read_from_2((const byte*) &n));
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in 3 consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_3(
 /*============*/
-	byte*	b,	/* in: pointer to 3 bytes where to store */
-	ulint	n)	/* in: ulint integer to be stored */
+	byte*	b,	/*!< in: pointer to 3 bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
 {
 	ut_ad(b);
 	ut_ad(n <= 0xFFFFFFUL);
@@ -119,15 +136,15 @@ mach_write_to_3(
 	b[2] = (byte)(n);
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_3(
 /*=============*/
-			/* out: ulint integer */
-	byte*	b)	/* in: pointer to 3 bytes */
+	const byte*	b)	/*!< in: pointer to 3 bytes */
 {
 	ut_ad(b);
 	return( ((ulint)(b[0]) << 16)
@@ -136,15 +153,15 @@ mach_read_from_3(
 		);
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in four consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_4(
 /*============*/
-	byte*	b,	/* in: pointer to four bytes where to store */
-	ulint	n)	/* in: ulint integer to be stored */
+	byte*	b,	/*!< in: pointer to four bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
 {
 	ut_ad(b);
 
@@ -154,15 +171,15 @@ mach_write_to_4(
 	b[3] = (byte)n;
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_4(
 /*=============*/
-			/* out: ulint integer */
-	byte*	b)	/* in: pointer to four bytes */
+	const byte*	b)	/*!< in: pointer to four bytes */
 {
 	ut_ad(b);
 	return( ((ulint)(b[0]) << 24)
@@ -172,20 +189,20 @@ mach_read_from_4(
 		);
 }
 
-/*************************************************************
+/*********************************************************//**
 Writes a ulint in a compressed form where the first byte codes the
 length of the stored ulint. We look at the most significant bits of
 the byte. If the most significant bit is zero, it means 1-byte storage,
 else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
 it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
-else the storage is 5-byte. */
+else the storage is 5-byte.
+@return	compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_write_compressed(
 /*==================*/
-			/* out: compressed size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	ulint	n)	/* in: ulint integer (< 2^32) to be stored */
+	byte*	b,	/*!< in: pointer to memory where to store */
+	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
 {
 	ut_ad(b);
 
@@ -208,14 +225,14 @@ mach_write_compressed(
 	}
 }
 
-/*************************************************************
-Returns the size of a ulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a ulint when written in the compressed form.
+@return	compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_get_compressed_size(
 /*=====================*/
-			/* out: compressed size in bytes */
-	ulint	n)	/* in: ulint integer (< 2^32) to be stored */
+	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
 {
 	if (n < 0x80UL) {
 		return(1);
@@ -230,14 +247,14 @@ mach_get_compressed_size(
 	}
 }
 
-/*************************************************************
-Reads a ulint in a compressed form. */
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return	read integer (< 2^32) */
 UNIV_INLINE
 ulint
 mach_read_compressed(
 /*=================*/
-			/* out: read integer (< 2^32) */
-	byte*	b)	/* in: pointer to memory from where to read */
+	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	ulint	flag;
 
@@ -259,15 +276,15 @@ mach_read_compressed(
 	}
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in 8 consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_8(
 /*============*/
-	byte*	b,	/* in: pointer to 8 bytes where to store */
-	dulint	n)	/* in: dulint integer to be stored */
+	byte*	b,	/*!< in: pointer to 8 bytes where to store */
+	dulint	n)	/*!< in: dulint integer to be stored */
 {
 	ut_ad(b);
 
@@ -275,15 +292,31 @@ mach_write_to_8(
 	mach_write_to_4(b + 4, ut_dulint_get_low(n));
 }
 
-/************************************************************
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_ull(
+/*===========*/
+	byte*		b,	/*!< in: pointer to 8 bytes where to store */
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	ut_ad(b);
+
+	mach_write_to_4(b, (ulint) (n >> 32));
+	mach_write_to_4(b + 4, (ulint) n);
+}
+
+/********************************************************//**
 The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
 UNIV_INLINE
 dulint
 mach_read_from_8(
 /*=============*/
-			/* out: dulint integer */
-	byte*	b)	/* in: pointer to 8 bytes */
+	const byte*	b)	/*!< in: pointer to 8 bytes */
 {
 	ulint	high;
 	ulint	low;
@@ -296,15 +329,33 @@ mach_read_from_8(
 	return(ut_dulint_create(high, low));
 }
 
-/***********************************************************
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_ull(
+/*==========*/
+	const byte*	b)	/*!< in: pointer to 8 bytes */
+{
+	ib_uint64_t	ull;
+
+	ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
+	ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+
+	return(ull);
+}
+
+/*******************************************************//**
 The following function is used to store data in 7 consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_7(
 /*============*/
-	byte*	b,	/* in: pointer to 7 bytes where to store */
-	dulint	n)	/* in: dulint integer to be stored */
+	byte*	b,	/*!< in: pointer to 7 bytes where to store */
+	dulint	n)	/*!< in: dulint integer to be stored */
 {
 	ut_ad(b);
 
@@ -312,15 +363,15 @@ mach_write_to_7(
 	mach_write_to_4(b + 3, ut_dulint_get_low(n));
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
 UNIV_INLINE
 dulint
 mach_read_from_7(
 /*=============*/
-			/* out: dulint integer */
-	byte*	b)	/* in: pointer to 7 bytes */
+	const byte*	b)	/*!< in: pointer to 7 bytes */
 {
 	ulint	high;
 	ulint	low;
@@ -333,15 +384,15 @@ mach_read_from_7(
 	return(ut_dulint_create(high, low));
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function is used to store data in 6 consecutive
 bytes. We store the most significant byte to the lowest address. */
 UNIV_INLINE
 void
 mach_write_to_6(
 /*============*/
-	byte*	b,	/* in: pointer to 6 bytes where to store */
-	dulint	n)	/* in: dulint integer to be stored */
+	byte*	b,	/*!< in: pointer to 6 bytes where to store */
+	dulint	n)	/*!< in: dulint integer to be stored */
 {
 	ut_ad(b);
 
@@ -349,15 +400,15 @@ mach_write_to_6(
 	mach_write_to_4(b + 2, ut_dulint_get_low(n));
 }
 
-/************************************************************
+/********************************************************//**
 The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return	dulint integer */
 UNIV_INLINE
 dulint
 mach_read_from_6(
 /*=============*/
-			/* out: dulint integer */
-	byte*	b)	/* in: pointer to 7 bytes */
+	const byte*	b)	/*!< in: pointer to 6 bytes */
 {
 	ulint	high;
 	ulint	low;
@@ -370,15 +421,15 @@ mach_read_from_6(
 	return(ut_dulint_create(high, low));
 }
 
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
+/*********************************************************//**
+Writes a dulint in a compressed form (5..9 bytes).
+@return	size in bytes */
 UNIV_INLINE
 ulint
 mach_dulint_write_compressed(
 /*=========================*/
-			/* out: size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	dulint	n)	/* in: dulint integer to be stored */
+	byte*	b,	/*!< in: pointer to memory where to store */
+	dulint	n)	/*!< in: dulint integer to be stored */
 {
 	ulint	size;
 
@@ -390,26 +441,26 @@ mach_dulint_write_compressed(
 	return(size + 4);
 }
 
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return	compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_dulint_get_compressed_size(
 /*============================*/
-			/* out: compressed size in bytes */
-	dulint	 n)	/* in: dulint integer to be stored */
+	dulint	 n)	/*!< in: dulint integer to be stored */
 {
 	return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
 }
 
-/*************************************************************
-Reads a dulint in a compressed form. */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return	read dulint */
 UNIV_INLINE
 dulint
 mach_dulint_read_compressed(
 /*========================*/
-			/* out: read dulint */
-	byte*	b)	/* in: pointer to memory from where to read */
+	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	ulint	high;
 	ulint	low;
@@ -426,15 +477,15 @@ mach_dulint_read_compressed(
 	return(ut_dulint_create(high, low));
 }
 
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
+/*********************************************************//**
+Writes a dulint in a compressed form (1..11 bytes).
+@return	size in bytes */
 UNIV_INLINE
 ulint
 mach_dulint_write_much_compressed(
 /*==============================*/
-			/* out: size in bytes */
-	byte*	b,	/* in: pointer to memory where to store */
-	dulint	n)	/* in: dulint integer to be stored */
+	byte*	b,	/*!< in: pointer to memory where to store */
+	dulint	n)	/*!< in: dulint integer to be stored */
 {
 	ulint	size;
 
@@ -452,14 +503,14 @@ mach_dulint_write_much_compressed(
 	return(size);
 }
 
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return	compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_dulint_get_much_compressed_size(
 /*=================================*/
-			/* out: compressed size in bytes */
-	dulint	 n)	/* in: dulint integer to be stored */
+	dulint	 n)	/*!< in: dulint integer to be stored */
 {
 	if (0 == ut_dulint_get_high(n)) {
 		return(mach_get_compressed_size(ut_dulint_get_low(n)));
@@ -469,14 +520,14 @@ mach_dulint_get_much_compressed_size(
 	       + mach_get_compressed_size(ut_dulint_get_low(n)));
 }
 
-/*************************************************************
-Reads a dulint in a compressed form. */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return	read dulint */
 UNIV_INLINE
 dulint
 mach_dulint_read_much_compressed(
 /*=============================*/
-			/* out: read dulint */
-	byte*	b)	/* in: pointer to memory from where to read */
+	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	ulint	high;
 	ulint	low;
@@ -497,15 +548,15 @@ mach_dulint_read_much_compressed(
 
 	return(ut_dulint_create(high, low));
 }
-
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return	double read */
 UNIV_INLINE
 double
 mach_double_read(
 /*=============*/
-			/* out: double read */
-	byte*	b)	/* in: pointer to memory from where to read */
+	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	double	d;
 	ulint	i;
@@ -524,14 +575,14 @@ mach_double_read(
 	return(d);
 }
 
-/*************************************************************
+/*********************************************************//**
 Writes a double. It is stored in a little-endian format. */
 UNIV_INLINE
 void
 mach_double_write(
 /*==============*/
-	byte*	b,	/* in: pointer to memory where to write */
-	double	d)	/* in: double */
+	byte*	b,	/*!< in: pointer to memory where to write */
+	double	d)	/*!< in: double */
 {
 	ulint	i;
 	byte*	ptr;
@@ -547,14 +598,14 @@ mach_double_write(
 	}
 }
 
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return	float read */
 UNIV_INLINE
 float
 mach_float_read(
 /*============*/
-			/* out: float read */
-	byte*	b)	/* in: pointer to memory from where to read */
+	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	float	d;
 	ulint	i;
@@ -573,14 +624,14 @@ mach_float_read(
 	return(d);
 }
 
-/*************************************************************
+/*********************************************************//**
 Writes a float. It is stored in a little-endian format. */
 UNIV_INLINE
 void
 mach_float_write(
 /*=============*/
-	byte*	b,	/* in: pointer to memory where to write */
-	float	d)	/* in: float */
+	byte*	b,	/*!< in: pointer to memory where to write */
+	float	d)	/*!< in: float */
 {
 	ulint	i;
 	byte*	ptr;
@@ -596,18 +647,18 @@ mach_float_write(
 	}
 }
 
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_n_little_endian(
 /*===========================*/
-				/* out: unsigned long int */
-	byte*	buf,		/* in: from where to read */
-	ulint	buf_size)	/* in: from how many bytes to read */
+	const byte*	buf,		/*!< in: from where to read */
+	ulint		buf_size)	/*!< in: from how many bytes to read */
 {
 	ulint	n	= 0;
-	byte*	ptr;
+	const byte*	ptr;
 
 	ut_ad(buf_size <= sizeof(ulint));
 	ut_ad(buf_size > 0);
@@ -629,15 +680,15 @@ mach_read_from_n_little_endian(
 	return(n);
 }
 
-/*************************************************************
+/*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
 void
 mach_write_to_n_little_endian(
 /*==========================*/
-	byte*	dest,		/* in: where to write */
-	ulint	dest_size,	/* in: into how many bytes to write */
-	ulint	n)		/* in: unsigned long int to write */
+	byte*	dest,		/*!< in: where to write */
+	ulint	dest_size,	/*!< in: into how many bytes to write */
+	ulint	n)		/*!< in: unsigned long int to write */
 {
 	byte*	end;
 
@@ -661,26 +712,26 @@ mach_write_to_n_little_endian(
 	ut_ad(n == 0);
 }
 
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_2_little_endian(
 /*===========================*/
-				/* out: unsigned long int */
-	byte*	buf)		/* in: from where to read */
+	const byte*	buf)		/*!< in: from where to read */
 {
 	return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
 }
 
-/*************************************************************
+/*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
 void
 mach_write_to_2_little_endian(
 /*==========================*/
-	byte*	dest,		/* in: where to write */
-	ulint	n)		/* in: unsigned long int to write */
+	byte*	dest,		/*!< in: where to write */
+	ulint	n)		/*!< in: unsigned long int to write */
 {
 	ut_ad(n < 256 * 256);
 
@@ -692,17 +743,17 @@ mach_write_to_2_little_endian(
 	*dest = (byte)(n & 0xFFUL);
 }
 
-/*************************************************************
+/*********************************************************//**
 Convert integral type from storage byte order (big endian) to
-host byte order. */
+host byte order.
+@return	integer value */
 UNIV_INLINE
 ullint
 mach_read_int_type(
 /*===============*/
-					/* out: integer value */
-	const byte*	src,		/* in: where to read from */
-	ulint		len,		/* in: length of src */
-	ibool		unsigned_type)	/* in: signed or unsigned flag */
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	ibool		unsigned_type)	/*!< in: signed or unsigned flag */
 {
 	/* XXX this can be optimized on big-endian machines */
 
@@ -732,3 +783,4 @@ mach_read_int_type(
 
 	return(ret);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innodb_plugin/include/mem0dbg.h
similarity index 59%
rename from storage/innobase/include/mem0dbg.h
rename to storage/innodb_plugin/include/mem0dbg.h
index 2393e4edb54..a064af5c678 100644
--- a/storage/innobase/include/mem0dbg.h
+++ b/storage/innodb_plugin/include/mem0dbg.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0dbg.h
 The memory management: the debug code. This is not a compilation module,
 but is included in mem0mem.* !
 
-(c) 1994, 1995 Innobase Oy
-
 Created 6/9/1994 Heikki Tuuri
 *******************************************************/
 
@@ -31,96 +48,96 @@ check fields at the both ends of the field. */
 #endif
 
 #if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
+/***************************************************************//**
 Checks a memory heap for consistency and prints the contents if requested.
 Outputs the sum of sizes of buffers given to the user (only in
 the debug version), the physical size of the heap and the number of
 blocks in the heap. In case of error returns 0 as sizes and number
 of blocks. */
-
+UNIV_INTERN
 void
 mem_heap_validate_or_print(
 /*=======================*/
-	mem_heap_t*	heap,	/* in: memory heap */
-	byte*		top,	/* in: calculate and validate only until
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	byte*		top,	/*!< in: calculate and validate only until
 				this top pointer in the heap is reached,
 				if this pointer is NULL, ignored */
-	ibool		 print,	 /* in: if TRUE, prints the contents
+	ibool		 print,	 /*!< in: if TRUE, prints the contents
 				of the heap; works only in
 				the debug version */
-	ibool*		 error,	 /* out: TRUE if error */
-	ulint*		us_size,/* out: allocated memory
+	ibool*		 error,	 /*!< out: TRUE if error */
+	ulint*		us_size,/*!< out: allocated memory
 				(for the user) in the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored; in the
 				non-debug version this is always -1 */
-	ulint*		ph_size,/* out: physical size of the heap,
+	ulint*		ph_size,/*!< out: physical size of the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored */
-	ulint*		n_blocks); /* out: number of blocks in the heap,
+	ulint*		n_blocks); /*!< out: number of blocks in the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored */
-/******************************************************************
-Validates the contents of a memory heap. */
-
+/**************************************************************//**
+Validates the contents of a memory heap.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_heap_validate(
 /*==============*/
-				/* out: TRUE if ok */
-	mem_heap_t*   heap);	/* in: memory heap */
+	mem_heap_t*   heap);	/*!< in: memory heap */
 #endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
 #ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it) */
-
+/**************************************************************//**
+Checks that an object is a memory heap (or a block of it)
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_heap_check(
 /*===========*/
-				/* out: TRUE if ok */
-	mem_heap_t*   heap);	/* in: memory heap */
+	mem_heap_t*   heap);	/*!< in: memory heap */
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
+/*****************************************************************//**
+TRUE if no memory is currently allocated.
+@return	TRUE if no heaps exist */
+UNIV_INTERN
 ibool
 mem_all_freed(void);
 /*===============*/
-			/* out: TRUE if no heaps exist */
-/*********************************************************************
-Validates the dynamic memory */
-
+/*****************************************************************//**
+Validates the dynamic memory
+@return	TRUE if error */
+UNIV_INTERN
 ibool
 mem_validate_no_assert(void);
 /*=========================*/
-			/* out: TRUE if error */
-/****************************************************************
-Validates the dynamic memory */
-
+/************************************************************//**
+Validates the dynamic memory
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_validate(void);
 /*===============*/
-			/* out: TRUE if ok */
 #endif /* UNIV_MEM_DEBUG */
-/****************************************************************
+/************************************************************//**
 Tries to find neigboring memory allocation blocks and dumps to stderr
 the neighborhood of a given pointer. */
-
+UNIV_INTERN
 void
 mem_analyze_corruption(
 /*===================*/
-	void*	ptr);	/* in: pointer to place of possible corruption */
-/*********************************************************************
+	void*	ptr);	/*!< in: pointer to place of possible corruption */
+/*****************************************************************//**
 Prints information of dynamic memory usage and currently allocated memory
 heaps or buffers. Can only be used in the debug version. */
-
+UNIV_INTERN
 void
 mem_print_info(void);
 /*================*/
-/*********************************************************************
+/*****************************************************************//**
 Prints information of dynamic memory usage and currently allocated memory
 heaps or buffers since the last ..._print_info or..._print_new_info. */
-
+UNIV_INTERN
 void
 mem_print_new_info(void);
 /*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innodb_plugin/include/mem0dbg.ic
similarity index 55%
rename from storage/innobase/include/mem0dbg.ic
rename to storage/innodb_plugin/include/mem0dbg.ic
index e8a34adb3fa..cb9245411dc 100644
--- a/storage/innobase/include/mem0dbg.ic
+++ b/storage/innodb_plugin/include/mem0dbg.ic
@@ -1,62 +1,81 @@
-/************************************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0dbg.ic
 The memory management: the debug code. This is not an independent
 compilation module but is included in mem0mem.*.
 
-(c) 1994, 1995 Innobase Oy
-
 Created 6/8/1994 Heikki Tuuri
 *************************************************************************/
 
 #ifdef UNIV_MEM_DEBUG
+# ifndef UNIV_HOTBACKUP
 extern mutex_t	mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
 extern ulint	mem_current_allocated_memory;
 
-/**********************************************************************
+/******************************************************************//**
 Initializes an allocated memory field in the debug version. */
-
+UNIV_INTERN
 void
 mem_field_init(
 /*===========*/
-	byte*	buf,	/* in: memory field */
-	ulint	n);	/* in: how many bytes the user requested */
-/**********************************************************************
+	byte*	buf,	/*!< in: memory field */
+	ulint	n);	/*!< in: how many bytes the user requested */
+/******************************************************************//**
 Erases an allocated memory field in the debug version. */
-
+UNIV_INTERN
 void
 mem_field_erase(
 /*============*/
-	byte*	buf,	/* in: memory field */
-	ulint	n);	/* in: how many bytes the user requested */
-/*******************************************************************
+	byte*	buf,	/*!< in: memory field */
+	ulint	n);	/*!< in: how many bytes the user requested */
+/***************************************************************//**
 Initializes a buffer to a random combination of hex BA and BE.
 Used to initialize allocated memory. */
-
+UNIV_INTERN
 void
 mem_init_buf(
 /*=========*/
-	byte*	buf,	/* in: pointer to buffer */
-	ulint	 n);	 /* in: length of buffer */
-/*******************************************************************
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	 n);	 /*!< in: length of buffer */
+/***************************************************************//**
 Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
+Used to erase freed memory. */
+UNIV_INTERN
 void
 mem_erase_buf(
 /*==========*/
-	byte*	buf,	/* in: pointer to buffer */
-	ulint	 n);	 /* in: length of buffer */
-/*******************************************************************
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	n);	/*!< in: length of buffer */
+/***************************************************************//**
 Inserts a created memory heap to the hash table of
 current allocated memory heaps.
 Initializes the hash table when first called. */
-
+UNIV_INTERN
 void
 mem_hash_insert(
 /*============*/
-	mem_heap_t*	heap,	   /* in: the created heap */
-	const char*	file_name, /* in: file name of creation */
-	ulint		line);	   /* in: line where created */
-/*******************************************************************
+	mem_heap_t*	heap,	   /*!< in: the created heap */
+	const char*	file_name, /*!< in: file name of creation */
+	ulint		line);	   /*!< in: line where created */
+/***************************************************************//**
 Removes a memory heap (which is going to be freed by the caller)
 from the list of live memory heaps. Returns the size of the heap
 in terms of how much memory in bytes was allocated for the user of
@@ -64,13 +83,13 @@ the heap (not the total space occupied by the heap).
 Also validates the heap.
 NOTE: This function does not free the storage occupied by the
 heap itself, only the node in the list of heaps. */
-
+UNIV_INTERN
 void
 mem_hash_remove(
 /*============*/
-	mem_heap_t*	heap,	   /* in: the heap to be freed */
-	const char*	file_name, /* in: file name of freeing */
-	ulint		line);	   /* in: line where freed */
+	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
+	const char*	file_name, /*!< in: file name of freeing */
+	ulint		line);	   /*!< in: line where freed */
 
 
 void
diff --git a/storage/innobase/include/mem0mem.h b/storage/innodb_plugin/include/mem0mem.h
similarity index 56%
rename from storage/innobase/include/mem0mem.h
rename to storage/innodb_plugin/include/mem0mem.h
index 2d5fd1db6c3..a092b024219 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innodb_plugin/include/mem0mem.h
@@ -1,7 +1,24 @@
-/******************************************************
-The memory management
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0mem.h
+The memory management
 
 Created 6/9/1994 Heikki Tuuri
 *******************************************************/
@@ -12,9 +29,10 @@ Created 6/9/1994 Heikki Tuuri
 #include "univ.i"
 #include "ut0mem.h"
 #include "ut0byte.h"
-#include "ut0ut.h"
 #include "ut0rnd.h"
-#include "sync0sync.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+#endif /* UNIV_HOTBACKUP */
 #include "ut0lst.h"
 #include "mach0data.h"
 
@@ -50,84 +68,64 @@ create. The standard size is the maximum (payload) size of the blocks used for
 allocations of small buffers. */
 
 #define MEM_BLOCK_START_SIZE		64
-#define MEM_BLOCK_STANDARD_SIZE		8000
+#define MEM_BLOCK_STANDARD_SIZE		\
+	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
 
 /* If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
 #define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
 
-/**********************************************************************
+/******************************************************************//**
 Initializes the memory system. */
-
+UNIV_INTERN
 void
 mem_init(
 /*=====*/
-	ulint	size);	/* in: common pool size in bytes */
-/******************************************************************
+	ulint	size);	/*!< in: common pool size in bytes */
+/**************************************************************//**
 Use this macro instead of the corresponding function! Macro for memory
 heap creation. */
 
 #define mem_heap_create(N)	mem_heap_create_func(\
-		(N), NULL, MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-/******************************************************************
+		(N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
+/**************************************************************//**
 Use this macro instead of the corresponding function! Macro for memory
 heap creation. */
 
 #define mem_heap_create_in_buffer(N)	mem_heap_create_func(\
-		(N), NULL, MEM_HEAP_BUFFER, __FILE__, __LINE__)
-/******************************************************************
+		(N), MEM_HEAP_BUFFER, __FILE__, __LINE__)
+/**************************************************************//**
 Use this macro instead of the corresponding function! Macro for memory
 heap creation. */
 
 #define mem_heap_create_in_btr_search(N)	mem_heap_create_func(\
-		(N), NULL, MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
+		(N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
 		__FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for fast
-memory heap creation. An initial block of memory B is given by the
-caller, N is its size, and this memory block is not freed by
-mem_heap_free. See the parameter comment in mem_heap_create_func below. */
 
-#define mem_heap_fast_create(N, B)	mem_heap_create_func(\
-		(N), (B), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-
-/******************************************************************
+/**************************************************************//**
 Use this macro instead of the corresponding function! Macro for memory
 heap freeing. */
 
 #define mem_heap_free(heap) mem_heap_free_func(\
 					  (heap), __FILE__, __LINE__)
-/*********************************************************************
+/*****************************************************************//**
 NOTE: Use the corresponding macros instead of this function. Creates a
 memory heap. For debugging purposes, takes also the file name and line as
-arguments. */
+arguments.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
 /*=================*/
-					/* out, own: memory heap, NULL if
-					did not succeed (only possible for
-					MEM_HEAP_BTR_SEARCH type heaps)*/
-	ulint		n,		/* in: desired start block size,
+	ulint		n,		/*!< in: desired start block size,
 					this means that a single user buffer
 					of size n will fit in the block,
-					0 creates a default size block;
-					if init_block is not NULL, n tells
-					its size in bytes */
-	void*		init_block,	/* in: if very fast creation is
-					wanted, the caller can reserve some
-					memory from its stack, for example,
-					and pass it as the the initial block
-					to the heap: then no OS call of malloc
-					is needed at the creation. CAUTION:
-					the caller must make sure the initial
-					block is not unintentionally erased
-					(if allocated in the stack), before
-					the memory heap is explicitly freed. */
-	ulint		type,		/* in: heap type */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line);		/* in: line where created */
-/*********************************************************************
+					0 creates a default size block */
+	ulint		type,		/*!< in: heap type */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line);		/*!< in: line where created */
+/*****************************************************************//**
 NOTE: Use the corresponding macro instead of this function. Frees the space
 occupied by a memory heap. In the debug version erases the heap memory
 blocks. */
@@ -135,31 +133,41 @@ UNIV_INLINE
 void
 mem_heap_free_func(
 /*===============*/
-	mem_heap_t*	heap,		/* in, own: heap to be freed */
-	const char*	file_name,	/* in: file name where freed */
-	ulint		line);		/* in: line where freed */
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
+	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
+	const char*	file_name,	/*!< in: file name where freed */
+	ulint		line);		/*!< in: line where freed */
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return	allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
 /*===========*/
-				/* out: allocated storage, NULL if did not
-				succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps) */
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n);	/* in: number of bytes; if the heap is allowed
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes; if the heap is allowed
 				to grow into the buffer pool, this must be
 				<= MEM_MAX_ALLOC_IN_BUF */
-/*********************************************************************
-Returns a pointer to the heap top. */
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return	pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
 /*==================*/
-				/* out: pointer to the heap top */
-	mem_heap_t*	heap);	/* in: memory heap */
-/*********************************************************************
+	mem_heap_t*	heap);	/*!< in: memory heap */
+/*****************************************************************//**
 Frees the space in a memory heap exceeding the pointer given. The
 pointer must have been acquired from mem_heap_get_heap_top. The first
 memory block of the heap is not freed. */
@@ -167,86 +175,71 @@ UNIV_INLINE
 void
 mem_heap_free_heap_top(
 /*===================*/
-	mem_heap_t*	heap,	/* in: heap from which to free */
-	byte*		old_top);/* in: pointer to old top of heap */
-/*********************************************************************
+	mem_heap_t*	heap,	/*!< in: heap from which to free */
+	byte*		old_top);/*!< in: pointer to old top of heap */
+/*****************************************************************//**
 Empties a memory heap. The first memory block of the heap is not freed. */
 UNIV_INLINE
 void
 mem_heap_empty(
 /*===========*/
-	mem_heap_t*	heap);	/* in: heap to empty */
-/*********************************************************************
+	mem_heap_t*	heap);	/*!< in: heap to empty */
+/*****************************************************************//**
 Returns a pointer to the topmost element in a memory heap.
-The size of the element must be given. */
+The size of the element must be given.
+@return	pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
 /*=============*/
-				/* out: pointer to the topmost element */
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n);	/* in: size of the topmost element */
-/*********************************************************************
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: size of the topmost element */
+/*****************************************************************//**
 Frees the topmost element in a memory heap.
 The size of the element must be given. */
 UNIV_INLINE
 void
 mem_heap_free_top(
 /*==============*/
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n);	/* in: size of the topmost element */
-/*********************************************************************
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: size of the topmost element */
+/*****************************************************************//**
 Returns the space in bytes occupied by a memory heap. */
 UNIV_INLINE
 ulint
 mem_heap_get_size(
 /*==============*/
-	mem_heap_t*	heap);		/* in: heap */
-/******************************************************************
+	mem_heap_t*	heap);		/*!< in: heap */
+/**************************************************************//**
 Use this macro instead of the corresponding function!
 Macro for memory buffer allocation */
 
-#define mem_alloc(N)	mem_alloc_func((N), __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
+#define mem_zalloc(N)	memset(mem_alloc(N), 0, (N));
 
-#define mem_alloc_noninline(N)	  mem_alloc_func_noninline(\
-					  (N), __FILE__, __LINE__)
-/*******************************************************************
+#define mem_alloc(N)	mem_alloc_func((N), NULL, __FILE__, __LINE__)
+#define mem_alloc2(N,S)	mem_alloc_func((N), (S), __FILE__, __LINE__)
+/***************************************************************//**
 NOTE: Use the corresponding macro instead of this function.
 Allocates a single buffer of memory from the dynamic memory of
 the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+with mem_free.
+@return	own: free storage */
 UNIV_INLINE
 void*
 mem_alloc_func(
 /*===========*/
-					/* out, own: free storage */
-	ulint		n,		/* in: desired number of bytes */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line		/* in: line where created */
-);
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+	ulint		n,		/*!< in: requested size in bytes */
+	ulint*		size,		/*!< out: allocated size in bytes,
+					or NULL */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line);		/*!< in: line where created */
 
-void*
-mem_alloc_func_noninline(
-/*=====================*/
-					/* out, own: free storage */
-	ulint		n,		/* in: desired number of bytes */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line		/* in: line where created */
-	);
-/******************************************************************
+/**************************************************************//**
 Use this macro instead of the corresponding function!
 Macro for memory buffer freeing */
 
 #define mem_free(PTR)	mem_free_func((PTR), __FILE__, __LINE__)
-/*******************************************************************
+/***************************************************************//**
 NOTE: Use the corresponding macro instead of this function.
 Frees a single buffer of storage from
 the dynamic memory of C compiler. Similar to free of C. */
@@ -254,106 +247,90 @@ UNIV_INLINE
 void
 mem_free_func(
 /*==========*/
-	void*		ptr,		/* in, own: buffer to be freed */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line		/* in: line where created */
-);
+	void*		ptr,		/*!< in, own: buffer to be freed */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line);		/*!< in: line where created */
 
-/**************************************************************************
-Duplicates a NUL-terminated string. */
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
 UNIV_INLINE
 char*
 mem_strdup(
 /*=======*/
-				/* out, own: a copy of the string,
-				must be deallocated with mem_free */
-	const char*	str);	/* in: string to be copied */
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
+	const char*	str);	/*!< in: string to be copied */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
 UNIV_INLINE
 char*
 mem_strdupl(
 /*========*/
-				/* out, own: a copy of the string,
-				must be deallocated with mem_free */
-	const char*	str,	/* in: string to be copied */
-	ulint		len);	/* in: length of str, in bytes */
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len);	/*!< in: length of str, in bytes */
 
+/**********************************************************************//**
+Duplicates a NUL-terminated string, allocated from a memory heap.
+@return	own: a copy of the string */
+UNIV_INTERN
 char*
 mem_heap_strdup(
 /*============*/
-				/* out, own: a copy of the string */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	str);	/* in: string to be copied */
-/**************************************************************************
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str);	/*!< in: string to be copied */
+/**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
+allocated from a memory heap.
+@return	own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
 /*=============*/
-				/* out, own: a copy of the string */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	str,	/* in: string to be copied */
-	ulint		len);	/* in: length of str, in bytes */
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len);	/*!< in: length of str, in bytes */
 
+/**********************************************************************//**
+Concatenate two strings and return the result, using a memory heap.
+@return	own: the result */
+UNIV_INTERN
 char*
 mem_heap_strcat(
 /*============*/
-				/* out, own: the result */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	s1,	/* in: string 1 */
-	const char*	s2);	/* in: string 2 */
-
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	s1,	/*!< in: string 1 */
+	const char*	s2);	/*!< in: string 2 */
 
+/**********************************************************************//**
+Duplicate a block of data, allocated from a memory heap.
+@return	own: a copy of the data */
+UNIV_INTERN
 void*
 mem_heap_dup(
 /*=========*/
-				/* out, own: a copy of the data */
-	mem_heap_t*	heap,	/* in: memory heap where copy is allocated */
-	const void*	data,	/* in: data to be copied */
-	ulint		len);	/* in: length of data, in bytes */
+	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
+	const void*	data,	/*!< in: data to be copied */
+	ulint		len);	/*!< in: length of data, in bytes */
 
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
-				/* out, own: the result */
-	mem_heap_t*	heap,	/* in: memory heap where result is allocated */
-	const void*	b1,	/* in: block 1 */
-	ulint		len1,	/* in: length of b1, in bytes */
-	const void*	b2,	/* in: block 2 */
-	ulint		len2);	/* in: length of b2, in bytes */
-
-/********************************************************************
+/****************************************************************//**
 A simple (s)printf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
 the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
+required for the 'u' type).
+@return	heap-allocated formatted string */
+UNIV_INTERN
 char*
 mem_heap_printf(
 /*============*/
-				/* out: heap-allocated formatted string */
-	mem_heap_t*	heap,	/* in: memory heap */
-	const char*	format,	/* in: format string */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	const char*	format,	/*!< in: format string */
 	...) __attribute__ ((format (printf, 2, 3)));
 
 #ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
+/******************************************************************//**
 Goes through the list of all allocated mem blocks, checks their magic
 numbers, and reports possible corruption. */
-
+UNIV_INTERN
 void
 mem_validate_all_blocks(void);
 /*=========================*/
@@ -366,7 +343,7 @@ mem_validate_all_blocks(void);
 struct mem_block_info_struct {
 	ulint	magic_n;/* magic number for debugging */
 	char	file_name[8];/* file name where the mem heap was created */
-	ulint	line;	/* line number where the mem heap was created */
+	ulint	line;	/*!< line number where the mem heap was created */
 	UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
 			the list this is the base node of the list of blocks;
 			in subsequent blocks this is undefined */
@@ -374,22 +351,25 @@ struct mem_block_info_struct {
 			and prev in the list. The first block allocated
 			to the heap is also the first block in this list,
 			though it also contains the base node of the list. */
-	ulint	len;	/* physical length of this block in bytes */
-	ulint	type;	/* type of heap: MEM_HEAP_DYNAMIC, or
+	ulint	len;	/*!< physical length of this block in bytes */
+	ulint	type;	/*!< type of heap: MEM_HEAP_DYNAMIC, or
 			MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
-	ibool	init_block; /* TRUE if this is the first block used in fast
-			creation of a heap: the memory will be freed
-			by the creator, not by mem_heap_free */
-	ulint	free;	/* offset in bytes of the first free position for
+	ulint	free;	/*!< offset in bytes of the first free position for
 			user data in the block */
-	ulint	start;	/* the value of the struct field 'free' at the
+	ulint	start;	/*!< the value of the struct field 'free' at the
 			creation of the block */
-	byte*	free_block;
+#ifndef UNIV_HOTBACKUP
+	void*	free_block;
 			/* if the MEM_HEAP_BTR_SEARCH bit is set in type,
 			and this is the heap root, this can contain an
 			allocated buffer frame, which can be appended as a
 			free block to the heap, if we need more space;
 			otherwise, this is NULL */
+	void*	buf_block;
+			/* if this block has been allocated from the buffer
+			pool, this contains the buf_block_t handle;
+			otherwise, this is NULL */
+#endif /* !UNIV_HOTBACKUP */
 #ifdef MEM_PERIODIC_CHECK
 	UT_LIST_NODE_T(mem_block_t) mem_block_list;
 			/* List of all mem blocks allocated; protected
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innodb_plugin/include/mem0mem.ic
similarity index 66%
rename from storage/innobase/include/mem0mem.ic
rename to storage/innodb_plugin/include/mem0mem.ic
index 6227a27f277..e7080d8c508 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innodb_plugin/include/mem0mem.ic
@@ -1,60 +1,75 @@
-/************************************************************************
-The memory management
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0mem.ic
+The memory management
 
 Created 6/8/1994 Heikki Tuuri
 *************************************************************************/
 
 #include "mem0dbg.ic"
+#ifndef UNIV_HOTBACKUP
+# include "mem0pool.h"
+#endif /* !UNIV_HOTBACKUP */
 
-#include "mem0pool.h"
-
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
-
+/***************************************************************//**
+Creates a memory heap block where data can be allocated.
+@return own: memory heap block, NULL if did not succeed (only possible
+for MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
 mem_block_t*
 mem_heap_create_block(
 /*==================*/
-				/* out, own: memory heap block, NULL if
-				did not succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps) */
-	mem_heap_t*	heap,	/* in: memory heap or NULL if first block
+	mem_heap_t*	heap,	/*!< in: memory heap or NULL if first block
 				should be created */
-	ulint		n,	/* in: number of bytes needed for user data, or
-				if init_block is not NULL, its size in bytes */
-	void*		init_block, /* in: init block in fast create,
-				type must be MEM_HEAP_DYNAMIC */
-	ulint		type,	/* in: type of heap: MEM_HEAP_DYNAMIC or
+	ulint		n,	/*!< in: number of bytes needed for user data */
+	ulint		type,	/*!< in: type of heap: MEM_HEAP_DYNAMIC or
 				MEM_HEAP_BUFFER */
-	const char*	file_name,/* in: file name where created */
-	ulint		line);	/* in: line where created */
-/**********************************************************************
+	const char*	file_name,/*!< in: file name where created */
+	ulint		line);	/*!< in: line where created */
+/******************************************************************//**
 Frees a block from a memory heap. */
-
+UNIV_INTERN
 void
 mem_heap_block_free(
 /*================*/
-	mem_heap_t*	heap,	/* in: heap */
-	mem_block_t*	block);	/* in: block to free */
-/**********************************************************************
+	mem_heap_t*	heap,	/*!< in: heap */
+	mem_block_t*	block);	/*!< in: block to free */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
 Frees the free_block field from a memory heap. */
-
+UNIV_INTERN
 void
 mem_heap_free_block_free(
 /*=====================*/
-	mem_heap_t*	heap);	/* in: heap */
-/*******************************************************************
-Adds a new block to a memory heap. */
-
+	mem_heap_t*	heap);	/*!< in: heap */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Adds a new block to a memory heap.
+@return created block, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
 mem_block_t*
 mem_heap_add_block(
 /*===============*/
-				/* out: created block, NULL if did not
-				succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps)*/
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n);	/* in: number of bytes user needs */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes user needs */
 
 UNIV_INLINE
 void
@@ -122,17 +137,33 @@ mem_block_get_start(mem_block_t* block)
 	return(block->start);
 }
 
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return	allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+{
+	ut_ad(heap);
+	ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
+	return(memset(mem_heap_alloc(heap, n), 0, n));
+}
+
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
 /*===========*/
-				/* out: allocated storage, NULL if did not
-				succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps) */
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n)	/* in: number of bytes; if the heap is allowed
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: number of bytes; if the heap is allowed
 				to grow into the buffer pool, this must be
 				<= MEM_MAX_ALLOC_IN_BUF */
 {
@@ -186,14 +217,14 @@ mem_heap_alloc(
 	return(buf);
 }
 
-/*********************************************************************
-Returns a pointer to the heap top. */
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return	pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
 /*==================*/
-				/* out: pointer to the heap top */
-	mem_heap_t*	heap)	/* in: memory heap */
+	mem_heap_t*	heap)	/*!< in: memory heap */
 {
 	mem_block_t*	block;
 	byte*		buf;
@@ -207,7 +238,7 @@ mem_heap_get_heap_top(
 	return(buf);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Frees the space in a memory heap exceeding the pointer given. The
 pointer must have been acquired from mem_heap_get_heap_top. The first
 memory block of the heap is not freed. */
@@ -215,8 +246,8 @@ UNIV_INLINE
 void
 mem_heap_free_heap_top(
 /*===================*/
-	mem_heap_t*	heap,	/* in: heap from which to free */
-	byte*		old_top)/* in: pointer to old top of heap */
+	mem_heap_t*	heap,	/*!< in: heap from which to free */
+	byte*		old_top)/*!< in: pointer to old top of heap */
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
@@ -291,31 +322,32 @@ mem_heap_free_heap_top(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Empties a memory heap. The first memory block of the heap is not freed. */
 UNIV_INLINE
 void
 mem_heap_empty(
 /*===========*/
-	mem_heap_t*	heap)	/* in: heap to empty */
+	mem_heap_t*	heap)	/*!< in: heap to empty */
 {
 	mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
-
+#ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		mem_heap_free_block_free(heap);
 	}
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given. */
+element must be given.
+@return	pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
 /*=============*/
-				/* out: pointer to the topmost element */
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n)	/* in: size of the topmost element */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: size of the topmost element */
 {
 	mem_block_t*	block;
 	void*		buf;
@@ -341,15 +373,15 @@ mem_heap_get_top(
 	return(buf);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Frees the topmost element in a memory heap. The size of the element must be
 given. */
 UNIV_INLINE
 void
 mem_heap_free_top(
 /*==============*/
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n)	/* in: size of the topmost element */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: size of the topmost element */
 {
 	mem_block_t*	block;
 
@@ -384,48 +416,32 @@ mem_heap_free_top(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 NOTE: Use the corresponding macros instead of this function. Creates a
 memory heap. For debugging purposes, takes also the file name and line as
-argument. */
+argument.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
 /*=================*/
-					/* out, own: memory heap, NULL if
-					did not succeed (only possible for
-					MEM_HEAP_BTR_SEARCH type heaps)*/
-	ulint		n,		/* in: desired start block size,
+	ulint		n,		/*!< in: desired start block size,
 					this means that a single user buffer
 					of size n will fit in the block,
-					0 creates a default size block;
-					if init_block is not NULL, n tells
-					its size in bytes */
-	void*		init_block,	/* in: if very fast creation is
-					wanted, the caller can reserve some
-					memory from its stack, for example,
-					and pass it as the the initial block
-					to the heap: then no OS call of malloc
-					is needed at the creation. CAUTION:
-					the caller must make sure the initial
-					block is not unintentionally erased
-					(if allocated in the stack), before
-					the memory heap is explicitly freed. */
-	ulint		type,		/* in: heap type */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line)		/* in: line where created */
+					0 creates a default size block */
+	ulint		type,		/*!< in: heap type */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line)		/*!< in: line where created */
 {
 	mem_block_t*   block;
 
-	if (n > 0) {
-		block = mem_heap_create_block(NULL, n, init_block, type,
-					      file_name, line);
-	} else {
-		block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE,
-					      init_block, type,
-					      file_name, line);
+	if (!n) {
+		n = MEM_BLOCK_START_SIZE;
 	}
 
+	block = mem_heap_create_block(NULL, n, type, file_name, line);
+
 	if (block == NULL) {
 
 		return(NULL);
@@ -445,7 +461,7 @@ mem_heap_create_func(
 	return(block);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 NOTE: Use the corresponding macro instead of this function. Frees the space
 occupied by a memory heap. In the debug version erases the heap memory
 blocks. */
@@ -453,9 +469,9 @@ UNIV_INLINE
 void
 mem_heap_free_func(
 /*===============*/
-	mem_heap_t*	heap,		/* in, own: heap to be freed */
+	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
 	const char*	file_name __attribute__((unused)),
-					/* in: file name where freed */
+					/*!< in: file name where freed */
 	ulint		line  __attribute__((unused)))
 {
 	mem_block_t*	block;
@@ -473,10 +489,11 @@ mem_heap_free_func(
 	mem_hash_remove(heap, file_name, line);
 
 #endif
-
+#ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		mem_heap_free_block_free(heap);
 	}
+#endif /* !UNIV_HOTBACKUP */
 
 	while (block != NULL) {
 		/* Store the contents of info before freeing current block
@@ -490,32 +507,44 @@ mem_heap_free_func(
 	}
 }
 
-/*******************************************************************
+/***************************************************************//**
 NOTE: Use the corresponding macro instead of this function.
 Allocates a single buffer of memory from the dynamic memory of
 the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+with mem_free.
+@return	own: free storage */
 UNIV_INLINE
 void*
 mem_alloc_func(
 /*===========*/
-					/* out, own: free storage */
-	ulint		n,		/* in: desired number of bytes */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line		/* in: line where created */
-	)
+	ulint		n,		/*!< in: desired number of bytes */
+	ulint*		size,		/*!< out: allocated size in bytes,
+					or NULL */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line)		/*!< in: line where created */
 {
 	mem_heap_t*	heap;
 	void*		buf;
 
-	heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name,
-				    line);
+	heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line);
 
 	/* Note that as we created the first block in the heap big enough
 	for the buffer requested by the caller, the buffer will be in the
 	first block and thus we can calculate the pointer to the heap from
 	the pointer to the buffer when we free the memory buffer. */
 
+	if (UNIV_LIKELY_NULL(size)) {
+		/* Adjust the allocation to the actual size of the
+		memory block. */
+		ulint	m = mem_block_get_len(heap)
+			- mem_block_get_free(heap);
+#ifdef UNIV_MEM_DEBUG
+		m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
+#endif /* UNIV_MEM_DEBUG */
+		ut_ad(m >= n);
+		*size = n = m;
+	}
+
 	buf = mem_heap_alloc(heap, n);
 
 	ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
@@ -523,7 +552,7 @@ mem_alloc_func(
 	return(buf);
 }
 
-/*******************************************************************
+/***************************************************************//**
 NOTE: Use the corresponding macro instead of this function. Frees a single
 buffer of storage from the dynamic memory of the C compiler. Similar to the
 free of C. */
@@ -531,10 +560,9 @@ UNIV_INLINE
 void
 mem_free_func(
 /*==========*/
-	void*		ptr,		/* in, own: buffer to be freed */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line		/* in: line where created */
-	)
+	void*		ptr,		/*!< in, own: buffer to be freed */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line)		/*!< in: line where created */
 {
 	mem_heap_t*   heap;
 
@@ -543,13 +571,13 @@ mem_free_func(
 	mem_heap_free_func(heap, file_name, line);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Returns the space in bytes occupied by a memory heap. */
 UNIV_INLINE
 ulint
 mem_heap_get_size(
 /*==============*/
-	mem_heap_t*	heap)	/* in: heap */
+	mem_heap_t*	heap)	/*!< in: heap */
 {
 	mem_block_t*	block;
 	ulint		size	= 0;
@@ -563,57 +591,56 @@ mem_heap_get_size(
 		size += mem_block_get_len(block);
 		block = UT_LIST_GET_NEXT(list, block);
 	}
-
+#ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		size += UNIV_PAGE_SIZE;
 	}
+#endif /* !UNIV_HOTBACKUP */
 
 	return(size);
 }
 
-/**************************************************************************
-Duplicates a NUL-terminated string. */
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
 UNIV_INLINE
 char*
 mem_strdup(
 /*=======*/
-				/* out, own: a copy of the string,
-				must be deallocated with mem_free */
-	const char*	str)	/* in: string to be copied */
+	const char*	str)	/*!< in: string to be copied */
 {
 	ulint	len = strlen(str) + 1;
-	return(memcpy(mem_alloc(len), str, len));
+	return((char*) memcpy(mem_alloc(len), str, len));
 }
 
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
 UNIV_INLINE
 char*
 mem_strdupl(
 /*========*/
-				/* out, own: a copy of the string,
-				must be deallocated with mem_free */
-	const char*	str,	/* in: string to be copied */
-	ulint		len)	/* in: length of str, in bytes */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len)	/*!< in: length of str, in bytes */
 {
-	char*	s = mem_alloc(len + 1);
+	char*	s = (char*) mem_alloc(len + 1);
 	s[len] = 0;
-	return(memcpy(s, str, len));
+	return((char*) memcpy(s, str, len));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
+allocated from a memory heap.
+@return	own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
 /*=============*/
-				/* out, own: a copy of the string */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	str,	/* in: string to be copied */
-	ulint		len)	/* in: length of str, in bytes */
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len)	/*!< in: length of str, in bytes */
 {
-	char*	s = mem_heap_alloc(heap, len + 1);
+	char*	s = (char*) mem_heap_alloc(heap, len + 1);
 	s[len] = 0;
-	return(memcpy(s, str, len));
+	return((char*) memcpy(s, str, len));
 }
diff --git a/storage/innodb_plugin/include/mem0pool.h b/storage/innodb_plugin/include/mem0pool.h
new file mode 100644
index 00000000000..18f988241d6
--- /dev/null
+++ b/storage/innodb_plugin/include/mem0pool.h
@@ -0,0 +1,129 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0pool.h
+The lowest-level memory management
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0pool_h
+#define mem0pool_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "ut0lst.h"
+
+/** Memory area header */
+typedef struct mem_area_struct	mem_area_t;
+/** Memory pool */
+typedef struct mem_pool_struct	mem_pool_t;
+
+/** The common memory pool */
+extern mem_pool_t*	mem_comm_pool;
+
+/** Memory area header */
+
+struct mem_area_struct{
+	ulint		size_and_free;	/*!< memory area size is obtained by
+					anding with ~MEM_AREA_FREE; area in
+					a free list if ANDing with
+					MEM_AREA_FREE results in nonzero */
+	UT_LIST_NODE_T(mem_area_t)
+			free_list;	/*!< free list node */
+};
+
+/** Each memory area takes this many extra bytes for control information */
+#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_struct),\
+			UNIV_MEM_ALIGNMENT))
+
+/********************************************************************//**
+Creates a memory pool.
+@return	memory pool */
+UNIV_INTERN
+mem_pool_t*
+mem_pool_create(
+/*============*/
+	ulint	size);	/*!< in: pool size in bytes */
+/********************************************************************//**
+Allocates memory from a pool. NOTE: This low-level function should only be
+used in mem0mem.*!
+@return	own: allocated memory buffer */
+UNIV_INTERN
+void*
+mem_area_alloc(
+/*===========*/
+	ulint*		psize,	/*!< in: requested size in bytes; for optimum
+				space usage, the size should be a power of 2
+				minus MEM_AREA_EXTRA_SIZE;
+				out: allocated size in bytes (greater than
+				or equal to the requested size) */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Frees memory to a pool. */
+UNIV_INTERN
+void
+mem_area_free(
+/*==========*/
+	void*		ptr,	/*!< in, own: pointer to allocated memory
+				buffer */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Returns the amount of reserved memory.
+@return	reserved mmeory in bytes */
+UNIV_INTERN
+ulint
+mem_pool_get_reserved(
+/*==================*/
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Reserves the mem pool mutex. */
+UNIV_INTERN
+void
+mem_pool_mutex_enter(void);
+/*======================*/
+/********************************************************************//**
+Releases the mem pool mutex. */
+UNIV_INTERN
+void
+mem_pool_mutex_exit(void);
+/*=====================*/
+/********************************************************************//**
+Validates a memory pool.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+mem_pool_validate(
+/*==============*/
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Prints info of a memory pool. */
+UNIV_INTERN
+void
+mem_pool_print_info(
+/*================*/
+	FILE*		outfile,/*!< in: output file to write to */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+
+
+#ifndef UNIV_NONINL
+#include "mem0pool.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/mem0pool.ic b/storage/innodb_plugin/include/mem0pool.ic
new file mode 100644
index 00000000000..b891dd6dea0
--- /dev/null
+++ b/storage/innodb_plugin/include/mem0pool.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0pool.ic
+The lowest-level memory management
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
diff --git a/storage/innodb_plugin/include/mtr0log.h b/storage/innodb_plugin/include/mtr0log.h
new file mode 100644
index 00000000000..6322af2a569
--- /dev/null
+++ b/storage/innodb_plugin/include/mtr0log.h
@@ -0,0 +1,250 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.h
+Mini-transaction logging routines
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0log_h
+#define mtr0log_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "dict0types.h"
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Writes 1 - 4 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_write_ulint(
+/*=============*/
+	byte*	ptr,	/*!< in: pointer where to write */
+	ulint	val,	/*!< in: value to write */
+	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes 8 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_write_dulint(
+/*==============*/
+	byte*	ptr,	/*!< in: pointer where to write */
+	dulint	val,	/*!< in: value to write */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes a string to a file page buffered in the buffer pool. Writes the
+corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_write_string(
+/*==============*/
+	byte*		ptr,	/*!< in: pointer where to write */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len,	/*!< in: string length */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Logs a write of a string to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_log_string(
+/*============*/
+	byte*	ptr,	/*!< in: pointer written to */
+	ulint	len,	/*!< in: string length */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes initial part of a log record consisting of one-byte item
+type and four-byte space and page numbers. */
+UNIV_INTERN
+void
+mlog_write_initial_log_record(
+/*==========================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/*!< in: space id, if applicable */
+	ulint	page_no,/*!< in: page number (not relevant currently) */
+	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
+	mtr_t*	mtr);	/*!< in: mtr */
+/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val,	/*!< in: value to write */
+	ulint	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+/********************************************************//**
+Catenates n bytes to the mtr log. */
+UNIV_INTERN
+void
+mlog_catenate_string(
+/*=================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len);	/*!< in: string length */
+/********************************************************//**
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val);	/*!< in: value to write */
+/********************************************************//**
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	dulint	val);	/*!< in: value to write */
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	size);	/*!< in: buffer size in bytes; MUST be
+			smaller than DYN_ARRAY_DATA_SIZE! */
+/********************************************************//**
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	byte*	ptr);	/*!< in: buffer space from ptr up was not used */
+/********************************************************//**
+Writes the initial part of a log record (3..11 bytes).
+If the implementation of this function is changed, all
+size parameters to mlog_open() should be adjusted accordingly!
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	byte*		log_ptr,/*!< in: pointer to mtr log which has
+				been opened */
+	mtr_t*		mtr);	/*!< in: mtr */
+#else /* !UNIV_HOTBACKUP */
+# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
+# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte *) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
+Parses an initial log record written by mlog_write_initial_log_record.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_initial_log_record(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*	space,	/*!< out: space id */
+	ulint*	page_no);/*!< out: page number */
+/********************************************************//**
+Parses a log record written by mlog_write_ulint or mlog_write_dulint.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_nbytes(
+/*==============*/
+	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip);/*!< in/out: compressed page, or NULL */
+/********************************************************//**
+Parses a log record written by mlog_write_string.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_string(
+/*==============*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip);/*!< in/out: compressed page, or NULL */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Opens a buffer for mlog, writes the initial log record and,
+if needed, the field lengths of an index.  Reserves space
+for further log entries.  The log entry must be closed with
+mtr_close().
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INTERN
+byte*
+mlog_open_and_write_index(
+/*======================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	rec,	/*!< in: index record or page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	byte		type,	/*!< in: log item type */
+	ulint		size);	/*!< in: requested buffer size in bytes
+				(if 0, calls mlog_close() and returns NULL) */
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************//**
+Parses a log record written by mlog_open_and_write_index.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_index(
+/*=============*/
+	byte*		ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	ibool		comp,	/*!< in: TRUE=compact record format */
+	dict_index_t**	index);	/*!< out, own: dummy index */
+
+#ifndef UNIV_HOTBACKUP
+/* Insert, update, and maybe other functions may use this value to define an
+extra mlog buffer size for variable size data */
+#define MLOG_BUF_MARGIN	256
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "mtr0log.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/mtr0log.ic b/storage/innodb_plugin/include/mtr0log.ic
new file mode 100644
index 00000000000..5c24c38b337
--- /dev/null
+++ b/storage/innodb_plugin/include/mtr0log.ic
@@ -0,0 +1,274 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.ic
+Mini-transaction logging routines
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+#include "fsp0types.h"
+#include "trx0sys.h"
+
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	size)	/*!< in: buffer size in bytes; MUST be
+			smaller than DYN_ARRAY_DATA_SIZE! */
+{
+	dyn_array_t*	mlog;
+
+	mtr->modifications = TRUE;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return(NULL);
+	}
+
+	mlog = &(mtr->log);
+
+	return(dyn_array_open(mlog, size));
+}
+
+/********************************************************//**
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	byte*	ptr)	/*!< in: buffer space from ptr up was not used */
+{
+	dyn_array_t*	mlog;
+
+	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+
+	mlog = &(mtr->log);
+
+	dyn_array_close(mlog, ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val,	/*!< in: value to write */
+	ulint	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+	dyn_array_t*	mlog;
+	byte*		ptr;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return;
+	}
+
+	mlog = &(mtr->log);
+
+#if MLOG_1BYTE != 1
+# error "MLOG_1BYTE != 1"
+#endif
+#if MLOG_2BYTES != 2
+# error "MLOG_2BYTES != 2"
+#endif
+#if MLOG_4BYTES != 4
+# error "MLOG_4BYTES != 4"
+#endif
+#if MLOG_8BYTES != 8
+# error "MLOG_8BYTES != 8"
+#endif
+	ptr = (byte*) dyn_array_push(mlog, type);
+
+	if (type == MLOG_4BYTES) {
+		mach_write_to_4(ptr, val);
+	} else if (type == MLOG_2BYTES) {
+		mach_write_to_2(ptr, val);
+	} else {
+		ut_ad(type == MLOG_1BYTE);
+		mach_write_to_1(ptr, val);
+	}
+}
+
+/********************************************************//**
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val)	/*!< in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 10);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/********************************************************//**
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	dulint	val)	/*!< in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 15);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_dulint_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/********************************************************//**
+Writes the initial part of a log record (3..11 bytes).
+If the implementation of this function is changed, all
+size parameters to mlog_open() should be adjusted accordingly!
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	byte*		log_ptr,/*!< in: pointer to mtr log which has
+				been opened */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+#ifdef UNIV_DEBUG
+	buf_block_t*	block;
+#endif
+	const byte*	page;
+	ulint		space;
+	ulint		offset;
+
+	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(type <= MLOG_BIGGEST_TYPE);
+	ut_ad(ptr && log_ptr);
+
+	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
+
+	/* check whether the page is in the doublewrite buffer;
+	the doublewrite buffer is located in pages
+	FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
+	system tablespace */
+	if (space == TRX_SYS_SPACE
+	    && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
+		if (trx_doublewrite_buf_is_being_created) {
+			/* Do nothing: we only come to this branch in an
+			InnoDB database creation. We do not redo log
+			anything for the doublewrite buffer pages. */
+			return(log_ptr);
+		} else {
+			fprintf(stderr,
+				"Error: trying to redo log a record of type "
+				"%d on page %lu of space %lu in the "
+				"doublewrite buffer, continuing anyway.\n"
+				"Please post a bug report to "
+				"bugs.mysql.com.\n",
+				type, offset, space);
+		}
+	}
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+	log_ptr += mach_write_compressed(log_ptr, space);
+	log_ptr += mach_write_compressed(log_ptr, offset);
+
+	mtr->n_log_recs++;
+
+#ifdef UNIV_LOG_DEBUG
+	fprintf(stderr,
+		"Adding to mtr log record type %lu space %lu page no %lu\n",
+		(ulong) type, space, offset);
+#endif
+
+#ifdef UNIV_DEBUG
+	/* We now assume that all x-latched pages have been modified! */
+	block = (buf_block_t*) buf_block_align(ptr);
+
+	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
+
+		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
+	}
+#endif
+	return(log_ptr);
+}
+
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/*!< in: space id, if applicable */
+	ulint	page_no,/*!< in: page number (not relevant currently) */
+	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	ut_ad(log_ptr);
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+
+	/* We write dummy space id and page number */
+	log_ptr += mach_write_compressed(log_ptr, space_id);
+	log_ptr += mach_write_compressed(log_ptr, page_no);
+
+	mtr->n_log_recs++;
+
+	return(log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/mtr0mtr.h b/storage/innodb_plugin/include/mtr0mtr.h
new file mode 100644
index 00000000000..69a2c03f4cb
--- /dev/null
+++ b/storage/innodb_plugin/include/mtr0mtr.h
@@ -0,0 +1,416 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.h
+Mini-transaction buffer
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0mtr_h
+#define mtr0mtr_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "dyn0dyn.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Logging modes for a mini-transaction */
+#define MTR_LOG_ALL		21	/* default mode: log all operations
+					modifying disk-based data */
+#define	MTR_LOG_NONE		22	/* log no operations */
+/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
+					file space page allocation data
+					(operations in fsp0fsp.* ) */
+#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
+					form */
+
+/* Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
+#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
+#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
+#define MTR_MEMO_MODIFY		54
+#define	MTR_MEMO_S_LOCK		55
+#define	MTR_MEMO_X_LOCK		56
+
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+#define	MLOG_SINGLE_REC_FLAG	128		/*!< if the mtr contains only
+						one log record for one page,
+						i.e., write_initial_log_record
+						has been called only once,
+						this flag is ORed to the type
+						of that first log record */
+#define	MLOG_1BYTE		(1)		/*!< one byte is written */
+#define	MLOG_2BYTES		(2)		/*!< 2 bytes ... */
+#define	MLOG_4BYTES		(4)		/*!< 4 bytes ... */
+#define	MLOG_8BYTES		(8)		/*!< 8 bytes ... */
+#define	MLOG_REC_INSERT		((byte)9)	/*!< record insert */
+#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10)	/*!< mark clustered index record
+						deleted */
+#define	MLOG_REC_SEC_DELETE_MARK ((byte)11)	/*!< mark secondary index record
+						deleted */
+#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/*!< update of a record,
+						preserves record field sizes */
+#define MLOG_REC_DELETE		((byte)14)	/*!< delete a record from a
+						page */
+#define	MLOG_LIST_END_DELETE	((byte)15)	/*!< delete record list end on
+						index page */
+#define	MLOG_LIST_START_DELETE	((byte)16)	/*!< delete record list start on
+						index page */
+#define	MLOG_LIST_END_COPY_CREATED ((byte)17)	/*!< copy record list end to a
+						new created index page */
+#define	MLOG_PAGE_REORGANIZE	((byte)18)	/*!< reorganize an
+						index page in
+						ROW_FORMAT=REDUNDANT */
+#define MLOG_PAGE_CREATE	((byte)19)	/*!< create an index page */
+#define	MLOG_UNDO_INSERT	((byte)20)	/*!< insert entry in an undo
+						log */
+#define MLOG_UNDO_ERASE_END	((byte)21)	/*!< erase an undo log
+						page end */
+#define	MLOG_UNDO_INIT		((byte)22)	/*!< initialize a page in an
+						undo log */
+#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/*!< discard an update undo log
+						header */
+#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/*!< reuse an insert undo log
+						header */
+#define MLOG_UNDO_HDR_CREATE	((byte)25)	/*!< create an undo
+						log header */
+#define MLOG_REC_MIN_MARK	((byte)26)	/*!< mark an index
+						record as the
+						predefined minimum
+						record */
+#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/*!< initialize an
+						ibuf bitmap page */
+/*#define	MLOG_FULL_PAGE	((byte)28)	full contents of a page */
+#define MLOG_INIT_FILE_PAGE	((byte)29)	/*!< this means that a
+						file page is taken
+						into use and the prior
+						contents of the page
+						should be ignored: in
+						recovery we must not
+						trust the lsn values
+						stored to the file
+						page */
+#define MLOG_WRITE_STRING	((byte)30)	/*!< write a string to
+						a page */
+#define	MLOG_MULTI_REC_END	((byte)31)	/*!< if a single mtr writes
+						log records for several pages,
+						this log record ends the
+						sequence of these records */
+#define MLOG_DUMMY_RECORD	((byte)32)	/*!< dummy log record used to
+						pad a log block full */
+#define MLOG_FILE_CREATE	((byte)33)	/*!< log record about an .ibd
+						file creation */
+#define MLOG_FILE_RENAME	((byte)34)	/*!< log record about an .ibd
+						file rename */
+#define MLOG_FILE_DELETE	((byte)35)	/*!< log record about an .ibd
+						file deletion */
+#define MLOG_COMP_REC_MIN_MARK	((byte)36)	/*!< mark a compact
+						index record as the
+						predefined minimum
+						record */
+#define MLOG_COMP_PAGE_CREATE	((byte)37)	/*!< create a compact
+						index page */
+#define MLOG_COMP_REC_INSERT	((byte)38)	/*!< compact record insert */
+#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
+						/*!< mark compact
+						clustered index record
+						deleted */
+#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
+						secondary index record
+						deleted; this log
+						record type is
+						redundant, as
+						MLOG_REC_SEC_DELETE_MARK
+						is independent of the
+						record format. */
+#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
+						compact record,
+						preserves record field
+						sizes */
+#define MLOG_COMP_REC_DELETE	((byte)42)	/*!< delete a compact record
+						from a page */
+#define MLOG_COMP_LIST_END_DELETE ((byte)43)	/*!< delete compact record list
+						end on index page */
+#define MLOG_COMP_LIST_START_DELETE ((byte)44)	/*!< delete compact record list
+						start on index page */
+#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
+						/*!< copy compact
+						record list end to a
+						new created index
+						page */
+#define MLOG_COMP_PAGE_REORGANIZE ((byte)46)	/*!< reorganize an index page */
+#define MLOG_FILE_CREATE2	((byte)47)	/*!< log record about creating
+						an .ibd file, with format */
+#define MLOG_ZIP_WRITE_NODE_PTR	((byte)48)	/*!< write the node pointer of
+						a record on a compressed
+						non-leaf B-tree page */
+#define MLOG_ZIP_WRITE_BLOB_PTR	((byte)49)	/*!< write the BLOB pointer
+						of an externally stored column
+						on a compressed page */
+#define MLOG_ZIP_WRITE_HEADER	((byte)50)	/*!< write to compressed page
+						header */
+#define MLOG_ZIP_PAGE_COMPRESS	((byte)51)	/*!< compress an index page */
+#define MLOG_BIGGEST_TYPE	((byte)51)	/*!< biggest value (used in
+						assertions) */
+/* @} */
+
+/** @name Flags for MLOG_FILE operations
+(stored in the page number parameter, called log_flags in the
+functions).  The page number parameter was originally written as 0. @{ */
+#define MLOG_FILE_FLAG_TEMP	1	/*!< identifies TEMPORARY TABLE in
+					MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
+/* @} */
+
+/***************************************************************//**
+Starts a mini-transaction and creates a mini-transaction handle
+and buffer in the memory buffer given by the caller.
+@return	mtr buffer which also acts as the mtr handle */
+UNIV_INLINE
+mtr_t*
+mtr_start(
+/*======*/
+	mtr_t*	mtr);	/*!< in: memory buffer for the mtr buffer */
+/***************************************************************//**
+Commits a mini-transaction. */
+UNIV_INTERN
+void
+mtr_commit(
+/*=======*/
+	mtr_t*	mtr);	/*!< in: mini-transaction */
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return	savepoint */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************//**
+Releases the latches stored in an mtr memo down to a savepoint.
+NOTE! The mtr must not have made changes to buffer pages after the
+savepoint, as these can be handled only by mtr_commit. */
+UNIV_INTERN
+void
+mtr_rollback_to_savepoint(
+/*======================*/
+	mtr_t*	mtr,		/*!< in: mtr */
+	ulint	savepoint);	/*!< in: savepoint */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+	mtr_t*		mtr,		/*!< in: mtr */
+	ulint		savepoint,	/*!< in: savepoint */
+	rw_lock_t*	lock);		/*!< in: latch to release */
+#else /* !UNIV_HOTBACKUP */
+# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return	logging mode: MTR_LOG_NONE, ... */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return	old mode */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	mode);	/*!< in: logging mode: MTR_LOG_NONE, ... */
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INTERN
+ulint
+mtr_read_ulint(
+/*===========*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Reads 8 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INTERN
+dulint
+mtr_read_dulint(
+/*============*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This macro locks an rw-lock in s-mode. */
+#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+/*********************************************************************//**
+This macro locks an rw-lock in x-mode. */
+#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+/*********************************************************************//**
+NOTE! Use the macro above!
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************************//**
+NOTE! Use the macro above!
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************//**
+Releases an object in the memo stack. */
+UNIV_INTERN
+void
+mtr_memo_release(
+/*=============*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+#ifdef UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Checks if memo contains the given item.
+@return	TRUE if contains */
+UNIV_INLINE
+ibool
+mtr_memo_contains(
+/*==============*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const void*	object,	/*!< in: object to search */
+	ulint		type);	/*!< in: type of object */
+
+/**********************************************************//**
+Checks if memo contains the given page.
+@return	TRUE if contains */
+UNIV_INTERN
+ibool
+mtr_memo_contains_page(
+/*===================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	ptr,	/*!< in: pointer to buffer frame */
+	ulint		type);	/*!< in: type of object */
+/*********************************************************//**
+Prints info of an mtr handle. */
+UNIV_INTERN
+void
+mtr_print(
+/*======*/
+	mtr_t*	mtr);	/*!< in: mtr */
+# else /* !UNIV_HOTBACKUP */
+#  define mtr_memo_contains(mtr, object, type)		TRUE
+#  define mtr_memo_contains_page(mtr, ptr, type)	TRUE
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
+/*######################################################################*/
+
+#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
+
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return	log */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+	mtr_t*	mtr);	/*!< in: mini-transaction */
+/***************************************************//**
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+
+
+/* Type definition of a mini-transaction memo stack slot. */
+typedef	struct mtr_memo_slot_struct	mtr_memo_slot_t;
+struct mtr_memo_slot_struct{
+	ulint	type;	/*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
+	void*	object;	/*!< pointer to the object */
+};
+
+/* Mini-transaction handle and buffer */
+struct mtr_struct{
+#ifdef UNIV_DEBUG
+	ulint		state;	/*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
+#endif
+	dyn_array_t	memo;	/*!< memo stack for locks etc. */
+	dyn_array_t	log;	/*!< mini-transaction log */
+	ibool		modifications;
+				/* TRUE if the mtr made modifications to
+				buffer pool pages */
+	ulint		n_log_recs;
+				/* count of how many page initial log records
+				have been written to the mtr log */
+	ulint		log_mode; /* specifies which operations should be
+				logged; default value MTR_LOG_ALL */
+	ib_uint64_t	start_lsn;/* start lsn of the possible log entry for
+				this mtr */
+	ib_uint64_t	end_lsn;/* end lsn of the possible log entry for
+				this mtr */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;
+#endif /* UNIV_DEBUG */
+};
+
+#ifdef UNIV_DEBUG
+# define MTR_MAGIC_N		54551
+#endif /* UNIV_DEBUG */
+
+#define MTR_ACTIVE		12231
+#define MTR_COMMITTING		56456
+#define MTR_COMMITTED		34676
+
+#ifndef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic
similarity index 52%
rename from storage/innobase/include/mtr0mtr.ic
rename to storage/innodb_plugin/include/mtr0mtr.ic
index 81eec3bfc92..310c7c4117f 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innodb_plugin/include/mtr0mtr.ic
@@ -1,25 +1,43 @@
-/******************************************************
-Mini-transaction buffer
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.ic
+Mini-transaction buffer
 
 Created 11/26/1995 Heikki Tuuri
 *******************************************************/
 
-#include "sync0sync.h"
-#include "sync0rw.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
 #include "mach0data.h"
 
-/*******************************************************************
+/***************************************************************//**
 Starts a mini-transaction and creates a mini-transaction handle
-and a buffer in the memory buffer given by the caller. */
+and a buffer in the memory buffer given by the caller.
+@return	mtr buffer which also acts as the mtr handle */
 UNIV_INLINE
 mtr_t*
 mtr_start(
 /*======*/
-			/* out: mtr buffer which also acts as
-			the mtr handle */
-	mtr_t*	mtr)	/* in: memory buffer for the mtr buffer */
+	mtr_t*	mtr)	/*!< in: memory buffer for the mtr buffer */
 {
 	dyn_array_create(&(mtr->memo));
 	dyn_array_create(&(mtr->log));
@@ -28,22 +46,21 @@ mtr_start(
 	mtr->modifications = FALSE;
 	mtr->n_log_recs = 0;
 
-#ifdef UNIV_DEBUG
-	mtr->state = MTR_ACTIVE;
-	mtr->magic_n = MTR_MAGIC_N;
-#endif
+	ut_d(mtr->state = MTR_ACTIVE);
+	ut_d(mtr->magic_n = MTR_MAGIC_N);
+
 	return(mtr);
 }
 
-/*******************************************************
+/***************************************************//**
 Pushes an object to an mtr memo stack. */
 UNIV_INLINE
 void
 mtr_memo_push(
 /*==========*/
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object */
-	ulint	type)	/* in: object type: MTR_MEMO_S_LOCK, ... */
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
 {
 	dyn_array_t*		memo;
 	mtr_memo_slot_t*	slot;
@@ -56,20 +73,20 @@ mtr_memo_push(
 
 	memo = &(mtr->memo);
 
-	slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t));
+	slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
 
 	slot->object = object;
 	slot->type = type;
 }
 
-/**************************************************************
-Sets and returns a savepoint in mtr. */
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return	savepoint */
 UNIV_INLINE
 ulint
 mtr_set_savepoint(
 /*==============*/
-			/* out: savepoint */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	dyn_array_t*	memo;
 
@@ -81,16 +98,17 @@ mtr_set_savepoint(
 	return(dyn_array_get_data_size(memo));
 }
 
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
 Releases the (index tree) s-latch stored in an mtr memo after a
 savepoint. */
 UNIV_INLINE
 void
 mtr_release_s_latch_at_savepoint(
 /*=============================*/
-	mtr_t*		mtr,		/* in: mtr */
-	ulint		savepoint,	/* in: savepoint */
-	rw_lock_t*	lock)		/* in: latch to release */
+	mtr_t*		mtr,		/*!< in: mtr */
+	ulint		savepoint,	/*!< in: savepoint */
+	rw_lock_t*	lock)		/*!< in: latch to release */
 {
 	mtr_memo_slot_t* slot;
 	dyn_array_t*	memo;
@@ -103,7 +121,7 @@ mtr_release_s_latch_at_savepoint(
 
 	ut_ad(dyn_array_get_data_size(memo) > savepoint);
 
-	slot = dyn_array_get_element(memo, savepoint);
+	slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
 
 	ut_ad(slot->object == lock);
 	ut_ad(slot->type == MTR_MEMO_S_LOCK);
@@ -113,17 +131,17 @@ mtr_release_s_latch_at_savepoint(
 	slot->object = NULL;
 }
 
-#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
+# ifdef UNIV_DEBUG
+/**********************************************************//**
+Checks if memo contains the given item.
+@return	TRUE if contains */
 UNIV_INLINE
 ibool
 mtr_memo_contains(
 /*==============*/
-			/* out: TRUE if contains */
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object to search */
-	ulint	type)	/* in: type of object */
+	mtr_t*		mtr,	/*!< in: mtr */
+	const void*	object,	/*!< in: object to search */
+	ulint		type)	/*!< in: type of object */
 {
 	mtr_memo_slot_t* slot;
 	dyn_array_t*	memo;
@@ -149,16 +167,17 @@ mtr_memo_contains(
 
 	return(FALSE);
 }
-#endif /* UNIV_DEBUG */
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return	log */
 UNIV_INLINE
 dyn_array_t*
 mtr_get_log(
 /*========*/
-			/* out: log */
-	mtr_t*	mtr)	/* in: mini-transaction */
+	mtr_t*	mtr)	/*!< in: mini-transaction */
 {
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
@@ -166,14 +185,14 @@ mtr_get_log(
 	return(&(mtr->log));
 }
 
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return	logging mode: MTR_LOG_NONE, ... */
 UNIV_INLINE
 ulint
 mtr_get_log_mode(
 /*=============*/
-			/* out: logging mode: MTR_LOG_NONE, ... */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	ut_ad(mtr);
 	ut_ad(mtr->log_mode >= MTR_LOG_ALL);
@@ -182,15 +201,15 @@ mtr_get_log_mode(
 	return(mtr->log_mode);
 }
 
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return	old mode */
 UNIV_INLINE
 ulint
 mtr_set_log_mode(
 /*=============*/
-			/* out: old mode */
-	mtr_t*	mtr,	/* in: mtr */
-	ulint	mode)	/* in: logging mode: MTR_LOG_NONE, ... */
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	mode)	/*!< in: logging mode: MTR_LOG_NONE, ... */
 {
 	ulint	old_mode;
 
@@ -212,16 +231,17 @@ mtr_set_log_mode(
 	return(old_mode);
 }
 
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
 Locks a lock in s-mode. */
 UNIV_INLINE
 void
 mtr_s_lock_func(
 /*============*/
-	rw_lock_t*	lock,	/* in: rw-lock */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line number */
-	mtr_t*		mtr)	/* in: mtr */
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(mtr);
 	ut_ad(lock);
@@ -231,16 +251,16 @@ mtr_s_lock_func(
 	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Locks a lock in x-mode. */
 UNIV_INLINE
 void
 mtr_x_lock_func(
 /*============*/
-	rw_lock_t*	lock,	/* in: rw-lock */
-	const char*	file,	/* in: file name */
-	ulint		line,	/* in: line number */
-	mtr_t*		mtr)	/* in: mtr */
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ut_ad(mtr);
 	ut_ad(lock);
@@ -249,3 +269,4 @@ mtr_x_lock_func(
 
 	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/mtr0types.h b/storage/innodb_plugin/include/mtr0types.h
new file mode 100644
index 00000000000..83a7aaf3839
--- /dev/null
+++ b/storage/innodb_plugin/include/mtr0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0types.h
+Mini-transaction buffer global types
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0types_h
+#define mtr0types_h
+
+typedef struct mtr_struct	mtr_t;
+
+#endif
diff --git a/storage/innodb_plugin/include/mysql_addons.h b/storage/innodb_plugin/include/mysql_addons.h
new file mode 100644
index 00000000000..17660c18710
--- /dev/null
+++ b/storage/innodb_plugin/include/mysql_addons.h
@@ -0,0 +1,33 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mysql_addons.h
+This file contains functions that need to be added to
+MySQL code but have not been added yet.
+
+Whenever you add a function here submit a MySQL bug
+report (feature request) with the implementation. Then
+write the bug number in the comment before the
+function in this file.
+
+When MySQL commits the function it can be deleted from
+here. In a perfect world this file exists but is empty.
+
+Created November 07, 2007 Vasil Dimov
+*******************************************************/
diff --git a/storage/innobase/include/os0file.h b/storage/innodb_plugin/include/os0file.h
similarity index 52%
rename from storage/innobase/include/os0file.h
rename to storage/innodb_plugin/include/os0file.h
index 70c07ea6d1a..d8d2f0e5d9e 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innodb_plugin/include/os0file.h
@@ -1,7 +1,50 @@
-/******************************************************
-The interface to the operating system file io
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/os0file.h
+The interface to the operating system file io
 
 Created 10/21/1995 Heikki Tuuri
 *******************************************************/
@@ -17,47 +60,63 @@ Created 10/21/1995 Heikki Tuuri
 #include <time.h>
 #endif
 
+/** File node of a tablespace or the log data space */
 typedef	struct fil_node_struct	fil_node_t;
 
 #ifdef UNIV_DO_FLUSH
 extern ibool	os_do_not_call_flush_at_each_write;
 #endif /* UNIV_DO_FLUSH */
 extern ibool	os_has_said_disk_full;
+/** Flag: enable debug printout for asynchronous i/o */
 extern ibool	os_aio_print_debug;
 
+/** Number of pending os_file_pread() operations */
 extern ulint	os_file_n_pending_preads;
+/** Number of pending os_file_pwrite() operations */
 extern ulint	os_file_n_pending_pwrites;
 
+/** Number of pending read operations */
 extern ulint	os_n_pending_reads;
+/** Number of pending write operations */
 extern ulint	os_n_pending_writes;
 
 #ifdef __WIN__
 
-/* We define always WIN_ASYNC_IO, and check at run-time whether
+/** We define always WIN_ASYNC_IO, and check at run-time whether
    the OS actually supports it: Win 95 does not, NT does. */
 #define WIN_ASYNC_IO
 
+/** Use unbuffered I/O */
 #define UNIV_NON_BUFFERED_IO
 
 #endif
 
 #ifdef __WIN__
+/** File handle */
 #define os_file_t	HANDLE
+/** Convert a C file descriptor to a native file handle
+@param fd	file descriptor
+@return		native file handle */
+#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
 #else
+/** File handle */
 typedef int	os_file_t;
+/** Convert a C file descriptor to a native file handle
+@param fd	file descriptor
+@return		native file handle */
+#define OS_FILE_FROM_FD(fd) fd
 #endif
 
+/** Umask for creating files */
 extern ulint	os_innodb_umask;
 
-/* If this flag is TRUE, then we will use the native aio of the
+/** If this flag is TRUE, then we will use the native aio of the
 OS (provided we compiled Innobase with it in), otherwise we will
 use simulated aio we build below with threads */
 
 extern ibool	os_aio_use_native_aio;
 
-#define OS_FILE_SECTOR_SIZE		512
-
-/* The next value should be smaller or equal to the smallest sector size used
+/** The next value should be smaller or equal to the smallest sector size used
 on any disk. A log block is required to be a portion of disk which is written
 so that if the start and the end of a block get written to disk, then the
 whole block gets written. This should be true even in most cases of a crash:
@@ -66,7 +125,7 @@ log. */
 
 #define OS_FILE_LOG_BLOCK_SIZE		512
 
-/* Options for file_create */
+/** Options for file_create @{ */
 #define	OS_FILE_OPEN			51
 #define	OS_FILE_CREATE			52
 #define OS_FILE_OVERWRITE		53
@@ -82,12 +141,14 @@ log. */
 /* Options for file_create */
 #define	OS_FILE_AIO			61
 #define	OS_FILE_NORMAL			62
+/* @} */
 
-/* Types for file create */
+/** Types for file create @{ */
 #define	OS_DATA_FILE			100
 #define OS_LOG_FILE			101
+/* @} */
 
-/* Error codes from os_file_get_last_error */
+/** Error codes from os_file_get_last_error @{ */
 #define	OS_FILE_NOT_FOUND		71
 #define	OS_FILE_DISK_FULL		72
 #define	OS_FILE_ALREADY_EXISTS		73
@@ -96,23 +157,25 @@ log. */
 						to become available again */
 #define	OS_FILE_SHARING_VIOLATION	76
 #define	OS_FILE_ERROR_NOT_SPECIFIED	77
+/* @} */
 
-/* Types for aio operations */
+/** Types for aio operations @{ */
 #define OS_FILE_READ	10
 #define OS_FILE_WRITE	11
 
 #define OS_FILE_LOG	256	/* This can be ORed to type */
+/* @} */
 
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/* Win NT does not allow more
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/*!< Win NT does not allow more
 						than 64 */
 
-/* Modes for aio operations */
-#define OS_AIO_NORMAL	21	/* Normal asynchronous i/o not for ibuf
+/** Modes for aio operations @{ */
+#define OS_AIO_NORMAL	21	/*!< Normal asynchronous i/o not for ibuf
 				pages or ibuf bitmap pages */
-#define OS_AIO_IBUF	22	/* Asynchronous i/o for ibuf pages or ibuf
+#define OS_AIO_IBUF	22	/*!< Asynchronous i/o for ibuf pages or ibuf
 				bitmap pages */
-#define OS_AIO_LOG	23	/* Asynchronous i/o for the log */
-#define OS_AIO_SYNC	24	/* Asynchronous i/o where the calling thread
+#define OS_AIO_LOG	23	/*!< Asynchronous i/o for the log */
+#define OS_AIO_SYNC	24	/*!< Asynchronous i/o where the calling thread
 				will itself wait for the i/o to complete,
 				doing also the job of the i/o-handler thread;
 				can be used for any pages, ibuf or non-ibuf.
@@ -122,16 +185,18 @@ log. */
 				the file seek and read or write, causing a
 				bottleneck for parallelism. */
 
-#define OS_AIO_SIMULATED_WAKE_LATER	512 /* This can be ORed to mode
+#define OS_AIO_SIMULATED_WAKE_LATER	512 /*!< This can be ORed to mode
 				in the call of os_aio(...),
 				if the caller wants to post several i/o
 				requests in a batch, and only after that
 				wake the i/o-handler thread; this has
 				effect only in simulated aio */
-#define OS_WIN31	1
-#define OS_WIN95	2
-#define OS_WINNT	3
-#define OS_WIN2000	4
+/* @} */
+
+#define OS_WIN31	1	/*!< Microsoft Windows 3.x */
+#define OS_WIN95	2	/*!< Microsoft Windows 95 */
+#define OS_WINNT	3	/*!< Microsoft Windows NT 3.x */
+#define OS_WIN2000	4	/*!< Microsoft Windows 2000 */
 
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
@@ -155,150 +220,157 @@ bigger than 4000 bytes */
 
 /* Struct used in fetching information of a file in a directory */
 struct os_file_stat_struct{
-	char		name[OS_FILE_MAX_PATH];	/* path to a file */
-	os_file_type_t	type;			/* file type */
-	ib_longlong	size;			/* file size */
-	time_t		ctime;			/* creation time */
-	time_t		mtime;			/* modification time */
-	time_t		atime;			/* access time */
+	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
+	os_file_type_t	type;			/*!< file type */
+	ib_int64_t	size;			/*!< file size */
+	time_t		ctime;			/*!< creation time */
+	time_t		mtime;			/*!< modification time */
+	time_t		atime;			/*!< access time */
 };
 typedef struct os_file_stat_struct	os_file_stat_t;
 
 #ifdef __WIN__
-typedef HANDLE	os_file_dir_t;	/* directory stream */
+typedef HANDLE	os_file_dir_t;	/*!< directory stream */
 #else
-typedef DIR*	os_file_dir_t;	/* directory stream */
+typedef DIR*	os_file_dir_t;	/*!< directory stream */
 #endif
 
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
+/***********************************************************************//**
+Gets the operating system version. Currently works only on Windows.
+@return	OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
+UNIV_INTERN
 ulint
 os_get_os_version(void);
 /*===================*/
-		  /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
 Creates the seek mutexes used in positioned reads and writes. */
-
+UNIV_INTERN
 void
 os_io_init_simple(void);
 /*===================*/
-/***************************************************************************
+/***********************************************************************//**
 Creates a temporary file.  This function is like tmpfile(3), but
 the temporary file is created in the MySQL temporary directory.
 On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
+library of Netware does not expose the delete-on-close flag.
+@return	temporary file handle, or NULL on error */
 
 FILE*
 os_file_create_tmpfile(void);
 /*========================*/
-			/* out: temporary file handle, or NULL on error */
-/***************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
 The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
 at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
+and '..' items at the start of the directory listing.
+@return	directory stream, NULL if error */
+UNIV_INTERN
 os_file_dir_t
 os_file_opendir(
 /*============*/
-					/* out: directory stream, NULL if
-					error */
-	const char*	dirname,	/* in: directory name; it must not
+	const char*	dirname,	/*!< in: directory name; it must not
 					contain a trailing '\' or '/' */
-	ibool		error_is_fatal);/* in: TRUE if we should treat an
+	ibool		error_is_fatal);/*!< in: TRUE if we should treat an
 					error as a fatal error; if we try to
 					open symlinks then we do not wish a
 					fatal error if it happens not to be
 					a directory */
-/***************************************************************************
-Closes a directory stream. */
-
+/***********************************************************************//**
+Closes a directory stream.
+@return	0 if success, -1 if failure */
+UNIV_INTERN
 int
 os_file_closedir(
 /*=============*/
-				/* out: 0 if success, -1 if failure */
-	os_file_dir_t	dir);	/* in: directory stream */
-/***************************************************************************
+	os_file_dir_t	dir);	/*!< in: directory stream */
+/***********************************************************************//**
 This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
+over the '.' and '..' entries in the directory.
+@return	0 if ok, -1 if error, 1 if at the end of the directory */
+UNIV_INTERN
 int
 os_file_readdir_next_file(
 /*======================*/
-				/* out: 0 if ok, -1 if error, 1 if at the end
-				of the directory */
-	const char*	dirname,/* in: directory name or path */
-	os_file_dir_t	dir,	/* in: directory stream */
-	os_file_stat_t*	info);	/* in/out: buffer where the info is returned */
-/*********************************************************************
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info);	/*!< in/out: buffer where the info is returned */
+/*****************************************************************//**
 This function attempts to create a directory named pathname. The new directory
 gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
 directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
+fail_if_exists arguments is true.
+@return	TRUE if call succeeds, FALSE on error */
+UNIV_INTERN
 ibool
 os_file_create_directory(
 /*=====================*/
-					/* out: TRUE if call succeeds,
-					FALSE on error */
-	const char*	pathname,	/* in: directory name as
+	const char*	pathname,	/*!< in: directory name as
 					null-terminated string */
-	ibool		fail_if_exists);/* in: if TRUE, pre-existing directory
+	ibool		fail_if_exists);/*!< in: if TRUE, pre-existing directory
 					is treated as an error. */
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create_simple(
 /*==================*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file is
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file is
 				opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error), or
 				OS_FILE_CREATE_PATH if new file
 				(if exists, error) and subdirectories along
 				its path are created (if needed)*/
-	ulint		access_type,/* in: OS_FILE_READ_ONLY or
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-A simple function to open or create a file. */
-
+	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create_simple_no_error_handling(
 /*====================================*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file
 				is opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error) */
-	ulint		access_type,/* in: OS_FILE_READ_ONLY,
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-Opens an existing file or creates a new. */
-
+	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+/****************************************************************//**
+Tries to disable OS caching on an opened file descriptor. */
+UNIV_INTERN
+void
+os_file_set_nocache(
+/*================*/
+	int		fd,		/*!< in: file descriptor to alter */
+	const char*	file_name,	/*!< in: file name, used in the
+					diagnostic message */
+	const char*	operation_name);/*!< in: "open" or "create"; used in the
+					diagnostic message */
+/****************************************************************//**
+Opens an existing file or creates a new.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create(
 /*===========*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file
 				is opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error),
@@ -306,192 +378,190 @@ os_file_create(
 				or an old overwritten;
 				OS_FILE_OPEN_RAW, if a raw device or disk
 				partition should be opened */
-	ulint		purpose,/* in: OS_FILE_AIO, if asynchronous,
+	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
 				non-buffered i/o is desired,
 				OS_FILE_NORMAL, if any normal file;
 				NOTE that it also depends on type, os_aio_..
 				and srv_.. variables whether we really use
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
-	ulint		type,	/* in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success);/* out: TRUE if succeed, FALSE if error */
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
+	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
+	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+/***********************************************************************//**
+Deletes a file. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_delete(
 /*===========*/
-				/* out: TRUE if success */
-	const char*	name);	/* in: file path as a null-terminated string */
-
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
+	const char*	name);	/*!< in: file path as a null-terminated string */
 
+/***********************************************************************//**
+Deletes a file if it exists. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_delete_if_exists(
 /*=====================*/
-				/* out: TRUE if success */
-	const char*	name);	/* in: file path as a null-terminated string */
-/***************************************************************************
+	const char*	name);	/*!< in: file path as a null-terminated string */
+/***********************************************************************//**
 Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
+file is closed before calling this function.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_rename(
 /*===========*/
-					/* out: TRUE if success */
-	const char*	oldpath,	/* in: old file path as a
+	const char*	oldpath,	/*!< in: old file path as a
 					null-terminated string */
-	const char*	newpath);	/* in: new file path */
-/***************************************************************************
+	const char*	newpath);	/*!< in: new file path */
+/***********************************************************************//**
 Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
+os_file_get_last_error.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_close(
 /*==========*/
-				/* out: TRUE if success */
-	os_file_t	file);	/* in, own: handle to a file */
-/***************************************************************************
-Closes a file handle. */
-
+	os_file_t	file);	/*!< in, own: handle to a file */
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************//**
+Closes a file handle.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_close_no_error_handling(
 /*============================*/
-				/* out: TRUE if success */
-	os_file_t	file);	/* in, own: handle to a file */
-/***************************************************************************
-Gets a file size. */
-
+	os_file_t	file);	/*!< in, own: handle to a file */
+#endif /* UNIV_HOTBACKUP */
+/***********************************************************************//**
+Gets a file size.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_get_size(
 /*=============*/
-				/* out: TRUE if success */
-	os_file_t	file,	/* in: handle to a file */
-	ulint*		size,	/* out: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	ulint*		size,	/*!< out: least significant 32 bits of file
 				size */
-	ulint*		size_high);/* out: most significant 32 bits of size */
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
+	ulint*		size_high);/*!< out: most significant 32 bits of size */
+/***********************************************************************//**
+Gets file size as a 64-bit integer ib_int64_t.
+@return	size in bytes, -1 if error */
+UNIV_INTERN
+ib_int64_t
 os_file_get_size_as_iblonglong(
 /*===========================*/
-				/* out: size in bytes, -1 if error */
-	os_file_t	file);	/* in: handle to a file */
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
+	os_file_t	file);	/*!< in: handle to a file */
+/***********************************************************************//**
+Write the specified number of zeros to a newly created file.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_set_size(
 /*=============*/
-				/* out: TRUE if success */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	ulint		size,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	ulint		size,	/*!< in: least significant 32 bits of file
 				size */
-	ulint		size_high);/* in: most significant 32 bits of size */
-/***************************************************************************
-Truncates a file at its current position. */
-
+	ulint		size_high);/*!< in: most significant 32 bits of size */
+/***********************************************************************//**
+Truncates a file at its current position.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_set_eof(
 /*============*/
-				/* out: TRUE if success */
-	FILE*		file);	/* in: file to be truncated */
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
+	FILE*		file);	/*!< in: file to be truncated */
+/***********************************************************************//**
+Flushes the write buffers of a given file to the disk.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_flush(
 /*==========*/
-				/* out: TRUE if success */
-	os_file_t	file);	/* in, own: handle to a file */
-/***************************************************************************
+	os_file_t	file);	/*!< in, own: handle to a file */
+/***********************************************************************//**
 Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
 overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
+the OS error number + 100 is returned.
+@return	error number, or OS error number + 100 */
+UNIV_INTERN
 ulint
 os_file_get_last_error(
 /*===================*/
-					/* out: error number, or OS error
-					number + 100 */
-	ibool	report_all_errors);	/* in: TRUE if we want an error message
+	ibool	report_all_errors);	/*!< in: TRUE if we want an error message
 					printed of all errors */
-/***********************************************************************
-Requests a synchronous read operation. */
-
+/*******************************************************************//**
+Requests a synchronous read operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_read(
 /*=========*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read */
-	ulint		offset_high,/* in: most significant 32 bits of
+	ulint		offset_high,/*!< in: most significant 32 bits of
 				offset */
-	ulint		n);	/* in: number of bytes to read */
-/***********************************************************************
+	ulint		n);	/*!< in: number of bytes to read */
+/*******************************************************************//**
 Rewind file to its start, read at most size - 1 bytes from it to str, and
 NUL-terminate str. All errors are silently ignored. This function is
 mostly meant to be used with temporary files. */
-
+UNIV_INTERN
 void
 os_file_read_string(
 /*================*/
-	FILE*	file,	/* in: file to read from */
-	char*	str,	/* in: buffer where to read */
-	ulint	size);	/* in: size of buffer */
-/***********************************************************************
+	FILE*	file,	/*!< in: file to read from */
+	char*	str,	/*!< in: buffer where to read */
+	ulint	size);	/*!< in: size of buffer */
+/*******************************************************************//**
 Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
+any error handling. In case of error it returns FALSE.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_read_no_error_handling(
 /*===========================*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read */
-	ulint		offset_high,/* in: most significant 32 bits of
+	ulint		offset_high,/*!< in: most significant 32 bits of
 				offset */
-	ulint		n);	/* in: number of bytes to read */
-
-/***********************************************************************
-Requests a synchronous write operation. */
+	ulint		n);	/*!< in: number of bytes to read */
 
+/*******************************************************************//**
+Requests a synchronous write operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_write(
 /*==========*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	const void*	buf,	/* in: buffer from which to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from which to write */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to write */
-	ulint		offset_high,/* in: most significant 32 bits of
+	ulint		offset_high,/*!< in: most significant 32 bits of
 				offset */
-	ulint		n);	/* in: number of bytes to write */
-/***********************************************************************
-Check the existence and type of the given file. */
-
+	ulint		n);	/*!< in: number of bytes to write */
+/*******************************************************************//**
+Check the existence and type of the given file.
+@return	TRUE if call succeeded */
+UNIV_INTERN
 ibool
 os_file_status(
 /*===========*/
-				/* out: TRUE if call succeeded */
-	const char*	path,	/* in:	pathname of the file */
-	ibool*		exists,	/* out: TRUE if file exists */
-	os_file_type_t* type);	/* out: type of the file (if it exists) */
-/********************************************************************
+	const char*	path,	/*!< in:	pathname of the file */
+	ibool*		exists,	/*!< out: TRUE if file exists */
+	os_file_type_t* type);	/*!< out: type of the file (if it exists) */
+/****************************************************************//**
 The function os_file_dirname returns a directory component of a
 null-terminated pathname string.  In the usual case, dirname returns
 the string up to, but not including, the final '/', and basename
@@ -517,48 +587,47 @@ returned by dirname and basename for different paths:
        "/"	      "/"	     "/"
        "."	      "."	     "."
        ".."	      "."	     ".."
-*/
 
+@return	own: directory component of the pathname */
+UNIV_INTERN
 char*
 os_file_dirname(
 /*============*/
-				/* out, own: directory component of the
-				pathname */
-	const char*	path);	/* in: pathname */
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
+	const char*	path);	/*!< in: pathname */
+/****************************************************************//**
+Creates all missing subdirectories along the given path.
+@return	TRUE if call succeeded FALSE otherwise */
+UNIV_INTERN
 ibool
 os_file_create_subdirs_if_needed(
 /*=============================*/
-				/* out: TRUE if call succeeded
-				   FALSE otherwise */
-	const char*	path);	/* in: path name */
-/****************************************************************************
-Initializes the asynchronous io system. Creates n_read_threads segments for
-read, n_write_threads segments for writes, one segment for the ibuf i/o, and
-one segment for log IO. Returns the number of segments created. When async
-IO is not used, and 4 threads should be created to process requests put
-in the segments. */
-
-ulint
+	const char*	path);	/*!< in: path name */
+/***********************************************************************
+Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_read_segs and n_write_segs
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that */
+UNIV_INTERN
+void
 os_aio_init(
 /*========*/
-	ulint	ios_per_array,	/* in: maximum number of pending aio operations
-                                allowed per array */
-	ulint	n_read_threads, /* in: number of read threads */
-	ulint	n_write_threads, /* in: number of write threads */
-	ulint	n_slots_sync);	/* in: number of slots in the sync aio array */
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
+	ulint	n_per_seg,	/*<! in: maximum number of pending aio
+				operations allowed per segment */
+	ulint	n_read_segs,	/*<! in: number of reader threads */
+	ulint	n_write_segs,	/*<! in: number of writer threads */
+	ulint	n_slots_sync);	/*<! in: number of slots in the sync aio
+				array */
+/*******************************************************************//**
+Requests an asynchronous i/o operation.
+@return	TRUE if request was queued successfully, FALSE if fail */
+UNIV_INTERN
 ibool
 os_aio(
 /*===*/
-				/* out: TRUE if request was queued
-				successfully, FALSE if fail */
-	ulint		type,	/* in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		mode,	/* in: OS_AIO_NORMAL, ..., possibly ORed
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
 				to OS_AIO_SIMULATED_WAKE_LATER: the
 				last flag advises this function not to wake
 				i/o-handler threads, but the caller will
@@ -571,65 +640,68 @@ os_aio(
 				because i/os are not actually handled until
 				all have been posted: use with great
 				caution! */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read or from which
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read or from which
 				to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read or write */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		n,	/* in: number of bytes to read or write */
-	fil_node_t*	message1,/* in: messages for the aio handler (these
-				can be used to identify a completed aio
-				operation); if mode is OS_AIO_SYNC, these
-				are ignored */
-	void*		message2);
-/****************************************************************************
+	ulint		n,	/*!< in: number of bytes to read or write */
+	fil_node_t*	message1,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+	void*		message2);/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+/************************************************************************//**
 Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
-
+UNIV_INTERN
 void
 os_aio_wake_all_threads_at_shutdown(void);
 /*=====================================*/
-/****************************************************************************
+/************************************************************************//**
 Waits until there are no pending writes in os_aio_write_array. There can
 be other, synchronous, pending writes. */
-
+UNIV_INTERN
 void
 os_aio_wait_until_no_pending_writes(void);
 /*=====================================*/
-/**************************************************************************
+/**********************************************************************//**
 Wakes up simulated aio i/o-handler threads if they have something to do. */
-
+UNIV_INTERN
 void
 os_aio_simulated_wake_handler_threads(void);
 /*=======================================*/
-/**************************************************************************
+/**********************************************************************//**
 This function can be called if one wants to post a batch of reads and
 prefers an i/o-handler thread to handle them all at once later. You must
 call os_aio_simulated_wake_handler_threads later to ensure the threads
 are not left sleeping! */
-
+UNIV_INTERN
 void
 os_aio_simulated_put_read_threads_to_sleep(void);
 /*============================================*/
 
 #ifdef WIN_ASYNC_IO
-/**************************************************************************
+/**********************************************************************//**
 This function is only used in Windows asynchronous i/o.
 Waits for an aio operation to complete. This function is used to wait the
 for completed requests. The aio array of pending requests is divided
 into segments. The thread specifies which segment or slot it wants to wait
 for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
+therefore no other thread is allowed to do the freeing!
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
 ibool
 os_aio_windows_handle(
 /*==================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	segment,	/* in: the number of the segment in the aio
+	ulint	segment,	/*!< in: the number of the segment in the aio
 				arrays to wait for; segment 0 is the ibuf
 				i/o thread, segment 1 the log i/o thread,
 				then follow the non-ibuf read threads, and as
@@ -637,95 +709,88 @@ os_aio_windows_handle(
 				this is ULINT_UNDEFINED, then it means that
 				sync aio is used, and this parameter is
 				ignored */
-	ulint	pos,		/* this parameter is used only in sync aio:
+	ulint	pos,		/*!< this parameter is used only in sync aio:
 				wait for the aio slot at this position */
-	fil_node_t**message1,	/* out: the messages passed with the aio
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
 				request; note that also in the case where
 				the aio operation failed, these output
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type);		/* out: OS_FILE_WRITE or ..._READ */
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
 #endif
 
-/* Currently we do not use Posix async i/o */
-#ifdef POSIX_ASYNC_IO
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	array_no,	/* in: array number 0 - 3 */
-	fil_node_t**message1,	/* out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2);
-#endif
-/**************************************************************************
+/**********************************************************************//**
 Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
+thread.
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
 ibool
 os_aio_simulated_handle(
 /*====================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	segment,	/* in: the number of the segment in the aio
+	ulint	segment,	/*!< in: the number of the segment in the aio
 				arrays to wait for; segment 0 is the ibuf
 				i/o thread, segment 1 the log i/o thread,
 				then follow the non-ibuf read threads, and as
 				the last are the non-ibuf write threads */
-	fil_node_t**message1,	/* out: the messages passed with the aio
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
 				request; note that also in the case where
 				the aio operation failed, these output
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type);		/* out: OS_FILE_WRITE or ..._READ */
-/**************************************************************************
-Validates the consistency of the aio system. */
-
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
+/**********************************************************************//**
+Validates the consistency of the aio system.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 os_aio_validate(void);
 /*=================*/
-				/* out: TRUE if ok */
-/**************************************************************************
+/**********************************************************************//**
 Prints info of the aio arrays. */
-
+UNIV_INTERN
 void
 os_aio_print(
 /*=========*/
-	FILE*	file);	/* in: file where to print */
-/**************************************************************************
+	FILE*	file);	/*!< in: file where to print */
+/**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
 void
 os_aio_refresh_stats(void);
 /*======================*/
 
 #ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
 Checks that all slots in the system have been freed, that is, there are
 no pending io operations. */
-
+UNIV_INTERN
 ibool
 os_aio_all_slots_free(void);
 /*=======================*/
 #endif /* UNIV_DEBUG */
 
-/***********************************************************************
-This function returns information about the specified file */
+/*******************************************************************//**
+This function returns information about the specified file
+@return	TRUE if stat information found */
+UNIV_INTERN
 ibool
 os_file_get_status(
 /*===============*/
-					/* out: TRUE if stat
-					information found */
-	const char*	path,		/* in:	pathname of the file */
-	os_file_stat_t* stat_info);	/* information of a file in a
+	const char*	path,		/*!< in:	pathname of the file */
+	os_file_stat_t* stat_info);	/*!< information of a file in a
 					directory */
 
+#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
+/*********************************************************************//**
+Creates a temporary file that will be deleted on close.
+This function is defined in ha_innodb.cc.
+@return	temporary file descriptor, or < 0 on error */
+UNIV_INTERN
+int
+innobase_mysql_tmpfile(void);
+/*========================*/
+#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
+
 #endif
diff --git a/storage/innodb_plugin/include/os0proc.h b/storage/innodb_plugin/include/os0proc.h
new file mode 100644
index 00000000000..fd46bd7db87
--- /dev/null
+++ b/storage/innodb_plugin/include/os0proc.h
@@ -0,0 +1,77 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.h
+The interface to the operating system
+process control primitives
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0proc_h
+#define os0proc_h
+
+#include "univ.i"
+
+#ifdef UNIV_LINUX
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
+typedef void*			os_process_t;
+typedef unsigned long int	os_process_id_t;
+
+extern ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+extern ulint os_large_page_size;
+
+/****************************************************************//**
+Converts the current process id to a number. It is not guaranteed that the
+number is unique. In Linux returns the 'process number' of the current
+thread. That number is the same as one sees in 'top', for example. In Linux
+the thread id is not the same as one sees in 'top'.
+@return	process id as a number */
+UNIV_INTERN
+ulint
+os_proc_get_number(void);
+/*====================*/
+/****************************************************************//**
+Allocates large pages memory.
+@return	allocated memory */
+UNIV_INTERN
+void*
+os_mem_alloc_large(
+/*===============*/
+	ulint*	n);			/*!< in/out: number of bytes */
+/****************************************************************//**
+Frees large pages memory. */
+UNIV_INTERN
+void
+os_mem_free_large(
+/*==============*/
+	void	*ptr,			/*!< in: pointer returned by
+					os_mem_alloc_large() */
+	ulint	size);			/*!< in: size returned by
+					os_mem_alloc_large() */
+
+#ifndef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/os0proc.ic b/storage/innodb_plugin/include/os0proc.ic
new file mode 100644
index 00000000000..c9641644525
--- /dev/null
+++ b/storage/innodb_plugin/include/os0proc.ic
@@ -0,0 +1,27 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.ic
+The interface to the operating system
+process control primitives
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h
new file mode 100644
index 00000000000..0e0b32e7036
--- /dev/null
+++ b/storage/innodb_plugin/include/os0sync.h
@@ -0,0 +1,386 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.h
+The interface to the operating system
+synchronization primitives.
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0sync_h
+#define os0sync_h
+
+#include "univ.i"
+#include "ut0lst.h"
+
+#ifdef __WIN__
+
+/** Native mutex */
+#define os_fast_mutex_t CRITICAL_SECTION
+
+/** Native event */
+typedef HANDLE		os_native_event_t;
+
+/** Operating system event */
+typedef struct os_event_struct	os_event_struct_t;
+/** Operating system event handle */
+typedef os_event_struct_t*	os_event_t;
+
+/** An asynchronous signal sent between threads */
+struct os_event_struct {
+	os_native_event_t		  handle;
+					/*!< Windows event */
+	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+					/*!< list of all created events */
+};
+#else
+/** Native mutex */
+typedef pthread_mutex_t	os_fast_mutex_t;
+
+/** Operating system event */
+typedef struct os_event_struct	os_event_struct_t;
+/** Operating system event handle */
+typedef os_event_struct_t*	os_event_t;
+
+/** An asynchronous signal sent between threads */
+struct os_event_struct {
+	os_fast_mutex_t	os_mutex;	/*!< this mutex protects the next
+					fields */
+	ibool		is_set;		/*!< this is TRUE when the event is
+					in the signaled state, i.e., a thread
+					does not stop if it tries to wait for
+					this event */
+	ib_int64_t	signal_count;	/*!< this is incremented each time
+					the event becomes signaled */
+	pthread_cond_t	cond_var;	/*!< condition variable is used in
+					waiting for the event */
+	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+					/*!< list of all created events */
+};
+#endif
+
+/** Operating system mutex */
+typedef struct os_mutex_struct	os_mutex_str_t;
+/** Operating system mutex handle */
+typedef os_mutex_str_t*		os_mutex_t;
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME	((ulint)(-1))
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED	1
+
+/** Mutex protecting counts and the event and OS 'slow' mutex lists */
+extern os_mutex_t	os_sync_mutex;
+
+/** This is incremented by 1 in os_thread_create and decremented by 1 in
+os_thread_exit */
+extern ulint		os_thread_count;
+
+extern ulint		os_event_count;
+extern ulint		os_mutex_count;
+extern ulint		os_fast_mutex_count;
+
+/*********************************************************//**
+Initializes global event and OS 'slow' mutex lists. */
+UNIV_INTERN
+void
+os_sync_init(void);
+/*==============*/
+/*********************************************************//**
+Frees created events and OS 'slow' mutexes. */
+UNIV_INTERN
+void
+os_sync_free(void);
+/*==============*/
+/*********************************************************//**
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event.
+@return	the event handle */
+UNIV_INTERN
+os_event_t
+os_event_create(
+/*============*/
+	const char*	name);	/*!< in: the name of the event, if NULL
+				the event is created without a name */
+/**********************************************************//**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+UNIV_INTERN
+void
+os_event_set(
+/*=========*/
+	os_event_t	event);	/*!< in: event to set */
+/**********************************************************//**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low(). */
+UNIV_INTERN
+ib_int64_t
+os_event_reset(
+/*===========*/
+	os_event_t	event);	/*!< in: event to reset */
+/**********************************************************//**
+Frees an event object. */
+UNIV_INTERN
+void
+os_event_free(
+/*==========*/
+	os_event_t	event);	/*!< in: event to free */
+
+/**********************************************************//**
+Waits for an event object until it is in the signaled state. If
+srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
+waiting thread when the event becomes signaled (or immediately if the
+event is already in the signaled state).
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->is_set == TRUE.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set()   [event->is_set == TRUE]
+thread C calls os_event_reset() [event->is_set == FALSE]
+thread A calls os_event_wait()  [infinite wait!]
+thread C calls os_event_wait()  [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+UNIV_INTERN
+void
+os_event_wait_low(
+/*==============*/
+	os_event_t	event,		/*!< in: event to wait */
+	ib_int64_t	reset_sig_count);/*!< in: zero or the value
+					returned by previous call of
+					os_event_reset(). */
+
+#define os_event_wait(event) os_event_wait_low(event, 0)
+
+/**********************************************************//**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+UNIV_INTERN
+ulint
+os_event_wait_time(
+/*===============*/
+	os_event_t	event,	/*!< in: event to wait */
+	ulint		time);	/*!< in: timeout in microseconds, or
+				OS_SYNC_INFINITE_TIME */
+#ifdef __WIN__
+/**********************************************************//**
+Waits for any event in an OS native event array. Returns if even a single
+one is signaled or becomes signaled.
+@return	index of the event which was signaled */
+UNIV_INTERN
+ulint
+os_event_wait_multiple(
+/*===================*/
+	ulint			n,	/*!< in: number of events in the
+					array */
+	os_native_event_t*	native_event_array);
+					/*!< in: pointer to an array of event
+					handles */
+#endif
+/*********************************************************//**
+Creates an operating system mutex semaphore. Because these are slow, the
+mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+@return	the mutex handle */
+UNIV_INTERN
+os_mutex_t
+os_mutex_create(
+/*============*/
+	const char*	name);	/*!< in: the name of the mutex, if NULL
+				the mutex is created without a name */
+/**********************************************************//**
+Acquires ownership of a mutex semaphore. */
+UNIV_INTERN
+void
+os_mutex_enter(
+/*===========*/
+	os_mutex_t	mutex);	/*!< in: mutex to acquire */
+/**********************************************************//**
+Releases ownership of a mutex. */
+UNIV_INTERN
+void
+os_mutex_exit(
+/*==========*/
+	os_mutex_t	mutex);	/*!< in: mutex to release */
+/**********************************************************//**
+Frees an mutex object. */
+UNIV_INTERN
+void
+os_mutex_free(
+/*==========*/
+	os_mutex_t	mutex);	/*!< in: mutex to free */
+/**********************************************************//**
+Acquires ownership of a fast mutex. Currently in Windows this is the same
+as os_fast_mutex_lock!
+@return	0 if success, != 0 if was reserved by another thread */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
+/**********************************************************//**
+Releases ownership of a fast mutex. */
+UNIV_INTERN
+void
+os_fast_mutex_unlock(
+/*=================*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to release */
+/*********************************************************//**
+Initializes an operating system fast mutex semaphore. */
+UNIV_INTERN
+void
+os_fast_mutex_init(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: fast mutex */
+/**********************************************************//**
+Acquires ownership of a fast mutex. */
+UNIV_INTERN
+void
+os_fast_mutex_lock(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
+/**********************************************************//**
+Frees an mutex object. */
+UNIV_INTERN
+void
+os_fast_mutex_free(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to free */
+
+/**********************************************************//**
+Atomic compare-and-swap and increment for InnoDB. */
+
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+# define os_compare_and_swap(ptr, old_val, new_val) \
+	__sync_bool_compare_and_swap(ptr, old_val, new_val)
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+# define os_atomic_increment(ptr, amount) \
+	__sync_add_and_fetch(ptr, amount)
+# define os_atomic_increment_lint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+# define os_atomic_increment_ulint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	__sync_lock_test_and_set(ptr, new_val)
+/* If not compiling with GCC or GCC doesn't support the atomic
+intrinsics and running on Solaris >= 10 use Solaris atomics */
+#elif defined(HAVE_SOLARIS_ATOMICS)
+#include <atomic.h>
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	(atomic_cas_ulong(ptr, old_val, new_val) == old_val)
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
+# ifdef INNODB_RW_LOCKS_USE_ATOMICS
+#  if   SIZEOF_PTHREAD_T == 4
+#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val)
+#  elif SIZEOF_PTHREAD_T == 8
+#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	((pthread_t)atomic_cas_64(ptr, old_val, new_val) == old_val)
+#  else
+#   error "SIZEOF_PTHREAD_T != 4 or 8"
+#  endif /* SIZEOF_PTHREAD_T CHECK */
+# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+# define os_atomic_increment_lint(ptr, amount) \
+	atomic_add_long_nv((ulong_t*) ptr, amount)
+# define os_atomic_increment_ulint(ptr, amount) \
+	atomic_add_long_nv(ptr, amount)
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	atomic_swap_uchar(ptr, new_val)
+/* On Windows, use Windows atomics / interlocked */
+#elif defined(HAVE_WINDOWS_ATOMICS)
+# ifdef _WIN64
+#  define win_cmp_and_xchg InterlockedCompareExchange64
+#  define win_xchg_and_add InterlockedExchangeAdd64
+# else /* _WIN64 */
+#  define win_cmp_and_xchg InterlockedCompareExchange
+#  define win_xchg_and_add InterlockedExchangeAdd
+# endif
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+# ifdef INNODB_RW_LOCKS_USE_ATOMICS
+#  define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	(InterlockedCompareExchange(ptr, new_val, old_val) == old_val)
+# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+# define os_atomic_increment_lint(ptr, amount) \
+	(win_xchg_and_add(ptr, amount) + amount)
+# define os_atomic_increment_ulint(ptr, amount) \
+	((ulint) (win_xchg_and_add(ptr, amount) + amount))
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val.
+InterlockedExchange() operates on LONG, and the LONG will be
+clobbered */
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	((byte) InterlockedExchange(ptr, new_val))
+#endif /* HAVE_GCC_ATOMIC_BUILTINS */
+
+#ifndef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/os0sync.ic b/storage/innodb_plugin/include/os0sync.ic
new file mode 100644
index 00000000000..1f3ce38fa65
--- /dev/null
+++ b/storage/innodb_plugin/include/os0sync.ic
@@ -0,0 +1,53 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.ic
+The interface to the operating system synchronization primitives.
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifdef __WIN__
+#include <winbase.h>
+#endif
+
+/**********************************************************//**
+Acquires ownership of a fast mutex. Currently in Windows this is the same
+as os_fast_mutex_lock!
+@return	0 if success, != 0 if was reserved by another thread */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
+{
+#ifdef __WIN__
+	EnterCriticalSection(fast_mutex);
+
+	return(0);
+#else
+	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
+	so that it returns 0 on success. In the operating system
+	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
+	returns 1 on success (but MySQL remaps that to 0), while Linux,
+	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
+
+	return((ulint) pthread_mutex_trylock(fast_mutex));
+#endif
+}
diff --git a/storage/innobase/include/os0thread.h b/storage/innodb_plugin/include/os0thread.h
similarity index 56%
rename from storage/innobase/include/os0thread.h
rename to storage/innodb_plugin/include/os0thread.h
index 3cf05feb3a9..6583de0005f 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innodb_plugin/include/os0thread.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.h
 The interface to the operating system
 process and thread control primitives
 
-(c) 1995 Innobase Oy
-
 Created 9/8/1995 Heikki Tuuri
 *******************************************************/
 
@@ -27,11 +44,11 @@ can wait inside InnoDB */
 
 #ifdef __WIN__
 typedef void*			os_thread_t;
-typedef ulint			os_thread_id_t;	/* In Windows the thread id
+typedef unsigned long		os_thread_id_t;	/*!< In Windows the thread id
 						is an unsigned long int */
 #else
 typedef pthread_t		os_thread_t;
-typedef os_thread_t		os_thread_id_t;	/* In Unix we use the thread
+typedef os_thread_t		os_thread_id_t;	/*!< In Unix we use the thread
 						handle itself as the id of
 						the thread */
 #endif
@@ -39,101 +56,101 @@ typedef os_thread_t		os_thread_id_t;	/* In Unix we use the thread
 /* Define a function pointer type to use in a typecast */
 typedef void* (*os_posix_f_t) (void*);
 
-/*******************************************************************
-Compares two thread ids for equality. */
-
+/***************************************************************//**
+Compares two thread ids for equality.
+@return	TRUE if equal */
+UNIV_INTERN
 ibool
 os_thread_eq(
 /*=========*/
-				/* out: TRUE if equal */
-	os_thread_id_t	a,	/* in: OS thread or thread id */
-	os_thread_id_t	b);	/* in: OS thread or thread id */
-/********************************************************************
+	os_thread_id_t	a,	/*!< in: OS thread or thread id */
+	os_thread_id_t	b);	/*!< in: OS thread or thread id */
+/****************************************************************//**
 Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
+unique for the thread though!
+@return	thread identifier as a number */
+UNIV_INTERN
 ulint
 os_thread_pf(
 /*=========*/
-				/* out: unsigned long int */
-	os_thread_id_t	a);	/* in: thread or thread id */
-/********************************************************************
+	os_thread_id_t	a);	/*!< in: OS thread identifier */
+/****************************************************************//**
 Creates a new thread of execution. The execution starts from
 the function given. The start function takes a void* parameter
 and returns a ulint.
 NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit. */
-
+thread should always use that to exit and not use return() to exit.
+@return	handle to the thread */
+UNIV_INTERN
 os_thread_t
 os_thread_create(
 /*=============*/
-						/* out: handle to the thread */
 #ifndef __WIN__
 		 os_posix_f_t		 start_f,
 #else
-	ulint (*start_f)(void*),		/* in: pointer to function
+	ulint (*start_f)(void*),		/*!< in: pointer to function
 						from which to start */
 #endif
-	void*			arg,		/* in: argument to start
+	void*			arg,		/*!< in: argument to start
 						function */
-	os_thread_id_t*		thread_id);	/* out: id of the created
+	os_thread_id_t*		thread_id);	/*!< out: id of the created
 						thread, or NULL */
-int
-os_thread_join(
-/*===========*/
-  os_thread_id_t  thread_id);	/* in: id of the thread to join */
-/*********************************************************************
-Exits the current thread. */
 
+/*****************************************************************//**
+Exits the current thread. */
+UNIV_INTERN
 void
 os_thread_exit(
 /*===========*/
-	void*	exit_value);	/* in: exit value; in Windows this void*
+	void*	exit_value);	/*!< in: exit value; in Windows this void*
 				is cast as a DWORD */
-/*********************************************************************
-Returns the thread identifier of current thread. */
-
+/*****************************************************************//**
+Returns the thread identifier of current thread.
+@return	current thread identifier */
+UNIV_INTERN
 os_thread_id_t
 os_thread_get_curr_id(void);
 /*========================*/
-/*********************************************************************
-Returns handle to the current thread. */
-
+/*****************************************************************//**
+Returns handle to the current thread.
+@return	current thread handle */
+UNIV_INTERN
 os_thread_t
 os_thread_get_curr(void);
 /*====================*/
-/*********************************************************************
+/*****************************************************************//**
 Advises the os to give up remainder of the thread's time slice. */
-
+UNIV_INTERN
 void
 os_thread_yield(void);
 /*=================*/
-/*********************************************************************
+/*****************************************************************//**
 The thread sleeps at least the time given in microseconds. */
-
+UNIV_INTERN
 void
 os_thread_sleep(
 /*============*/
-	ulint	tm);	/* in: time in microseconds */
-/**********************************************************************
-Gets a thread priority. */
-
+	ulint	tm);	/*!< in: time in microseconds */
+/******************************************************************//**
+Gets a thread priority.
+@return	priority */
+UNIV_INTERN
 ulint
 os_thread_get_priority(
 /*===================*/
-				/* out: priority */
-	os_thread_t	handle);/* in: OS handle to the thread */
-/**********************************************************************
+	os_thread_t	handle);/*!< in: OS handle to the thread */
+/******************************************************************//**
 Sets a thread priority. */
-
+UNIV_INTERN
 void
 os_thread_set_priority(
 /*===================*/
-	os_thread_t	handle,	/* in: OS handle to the thread */
-	ulint		pri);	/* in: priority: one of OS_PRIORITY_... */
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
+	os_thread_t	handle,	/*!< in: OS handle to the thread */
+	ulint		pri);	/*!< in: priority: one of OS_PRIORITY_... */
+/******************************************************************//**
+Gets the last operating system error code for the calling thread.
+@return	last error on Windows, 0 otherwise */
+UNIV_INTERN
 ulint
 os_thread_get_last_error(void);
 /*==========================*/
diff --git a/storage/innodb_plugin/include/os0thread.ic b/storage/innodb_plugin/include/os0thread.ic
new file mode 100644
index 00000000000..f89bc40b4fa
--- /dev/null
+++ b/storage/innodb_plugin/include/os0thread.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.ic
+The interface to the operating system
+process and thread control primitives
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/page0cur.h b/storage/innodb_plugin/include/page0cur.h
new file mode 100644
index 00000000000..1544b0abe1c
--- /dev/null
+++ b/storage/innodb_plugin/include/page0cur.h
@@ -0,0 +1,346 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/page0cur.h
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef page0cur_h
+#define page0cur_h
+
+#include "univ.i"
+
+#include "buf0types.h"
+#include "page0page.h"
+#include "rem0rec.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+
+
+#define PAGE_CUR_ADAPT
+
+/* Page cursor search modes; the values must be in this order! */
+
+#define	PAGE_CUR_UNSUPP	0
+#define	PAGE_CUR_G	1
+#define	PAGE_CUR_GE	2
+#define	PAGE_CUR_L	3
+#define	PAGE_CUR_LE	4
+/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
+				 "column LIKE 'abc%' ORDER BY column DESC";
+				 we have to find strings which are <= 'abc' or
+				 which extend it */
+#ifdef UNIV_SEARCH_DEBUG
+# define PAGE_CUR_DBG	6	/* As PAGE_CUR_LE, but skips search shortcut */
+#endif /* UNIV_SEARCH_DEBUG */
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return	record */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+#else /* UNIV_DEBUG */
+# define page_cur_get_page(cur)		page_align((cur)->rec)
+# define page_cur_get_block(cur)	(cur)->block
+# define page_cur_get_page_zip(cur)	buf_block_get_page_zip((cur)->block)
+# define page_cur_get_rec(cur)		(cur)->rec
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur);	/*!< in: cursor */
+/*********************************************************//**
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur);	/*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return	TRUE if at start */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+	const page_cur_t*	cur);	/*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return	TRUE if at end */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+	const page_cur_t*	cur);	/*!< in: cursor */
+/**********************************************************//**
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record on a page */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	page_cur_t*		cur);	/*!< out: page cursor */
+/**********************************************************//**
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur);	/*!< out: page cursor */
+/**********************************************************//**
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur);	/*!< in/out: cursor; must not be after last */
+/**********************************************************//**
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur);	/*!< in/out: cursor; not before first */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const rec_t*	rec,	/*!< in: record to insert */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+	rec_t*		current_rec,/*!< in: pointer to current record after
+				which the new record is inserted */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+	rec_t**		current_rec,/*!< in/out: pointer to current record after
+				which the new record is inserted */
+	buf_block_t*	block,	/*!< in: buffer block of *current_rec */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+/*************************************************************//**
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied. */
+UNIV_INTERN
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+	page_t*		new_page,	/*!< in/out: index page to copy to */
+	rec_t*		rec,		/*!< in: first record to copy */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/***********************************************************//**
+Deletes a record at the page cursor. The cursor is moved to the
+next record after the deleted one. */
+UNIV_INTERN
+void
+page_cur_delete_rec(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const ulint*	offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return	number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor);/*!< out: page cursor */
+/****************************************************************//**
+Searches the right position for a page cursor. */
+UNIV_INTERN
+void
+page_cur_search_with_match(
+/*=======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	ulint*			iup_matched_fields,
+					/*!< in/out: already matched
+					fields in upper limit record */
+	ulint*			iup_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	ulint*			ilow_matched_fields,
+					/*!< in/out: already matched
+					fields in lower limit record */
+	ulint*			ilow_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	page_cur_t*		cursor);/*!< out: page cursor */
+/***********************************************************//**
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+UNIV_INTERN
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: page */
+	page_cur_t*	cursor);/*!< out: page cursor */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+	ibool		is_short,/*!< in: TRUE if short inserts */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return	pointer to record end or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+
+/** Index page cursor */
+
+struct page_cur_struct{
+	byte*		rec;	/*!< pointer to a record on page */
+	buf_block_t*	block;	/*!< pointer to the block containing rec */
+};
+
+#ifndef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/page0cur.ic b/storage/innodb_plugin/include/page0cur.ic
new file mode 100644
index 00000000000..3520677dfb3
--- /dev/null
+++ b/storage/innodb_plugin/include/page0cur.ic
@@ -0,0 +1,299 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/page0cur.ic
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0page.h"
+#include "buf0types.h"
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+
+	return(page_align(cur->rec));
+}
+
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(cur->block);
+}
+
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	return(buf_block_get_page_zip(page_cur_get_block(cur)));
+}
+
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return	record */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+
+	return(cur->rec);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************//**
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur)	/*!< in: cursor */
+{
+	cur->block = (buf_block_t*) block;
+	cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block));
+}
+
+/*********************************************************//**
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur)	/*!< in: cursor */
+{
+	cur->block = (buf_block_t*) block;
+	cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block));
+}
+
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return	TRUE if at start */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+	const page_cur_t*	cur)	/*!< in: cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(page_rec_is_infimum(cur->rec));
+}
+
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return	TRUE if at end */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+	const page_cur_t*	cur)	/*!< in: cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(page_rec_is_supremum(cur->rec));
+}
+
+/**********************************************************//**
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record on a page */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	page_cur_t*		cur)	/*!< out: page cursor */
+{
+	ut_ad(rec && block && cur);
+	ut_ad(page_align(rec) == block->frame);
+
+	cur->rec = (rec_t*) rec;
+	cur->block = (buf_block_t*) block;
+}
+
+/**********************************************************//**
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur)	/*!< out: page cursor */
+{
+	ut_ad(cur);
+
+	cur->rec = NULL;
+	cur->block = NULL;
+}
+
+/**********************************************************//**
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur)	/*!< in/out: cursor; must not be after last */
+{
+	ut_ad(!page_cur_is_after_last(cur));
+
+	cur->rec = page_rec_get_next(cur->rec);
+}
+
+/**********************************************************//**
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur)	/*!< in/out: page cursor, not before first */
+{
+	ut_ad(!page_cur_is_before_first(cur));
+
+	cur->rec = page_rec_get_prev(cur->rec);
+}
+
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return	number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor)	/*!< out: page cursor */
+{
+	ulint		low_matched_fields = 0;
+	ulint		low_matched_bytes = 0;
+	ulint		up_matched_fields = 0;
+	ulint		up_matched_bytes = 0;
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	page_cur_search_with_match(block, index, tuple, mode,
+				   &up_matched_fields,
+				   &up_matched_bytes,
+				   &low_matched_fields,
+				   &low_matched_bytes,
+				   cursor);
+	return(low_matched_fields);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	mem_heap_t*	heap;
+	ulint*		offsets;
+	ulint		size
+		= rec_get_converted_size(index, tuple, n_ext);
+	rec_t*		rec;
+
+	heap = mem_heap_create(size
+			       + (4 + REC_OFFS_HEADER_SIZE
+				  + dtuple_get_n_fields(tuple))
+			       * sizeof *offsets);
+	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
+					index, tuple, n_ext);
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+	if (buf_block_get_page_zip(cursor->block)) {
+		rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
+					      index, rec, offsets, mtr);
+	} else {
+		rec = page_cur_insert_rec_low(cursor->rec,
+					      index, rec, offsets, mtr);
+	}
+
+	mem_heap_free(heap);
+	return(rec);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const rec_t*	rec,	/*!< in: record to insert */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	if (buf_block_get_page_zip(cursor->block)) {
+		return(page_cur_insert_rec_zip(&cursor->rec, cursor->block,
+					       index, rec, offsets, mtr));
+	} else {
+		return(page_cur_insert_rec_low(cursor->rec,
+					       index, rec, offsets, mtr));
+	}
+}
diff --git a/storage/innodb_plugin/include/page0page.h b/storage/innodb_plugin/include/page0page.h
new file mode 100644
index 00000000000..a4fe069d022
--- /dev/null
+++ b/storage/innodb_plugin/include/page0page.h
@@ -0,0 +1,1012 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0page.h
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0page_h
+#define page0page_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#include "data0data.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/*			PAGE HEADER
+			===========
+
+Index page header starts at the first offset left free by the FIL-module */
+
+typedef	byte		page_header_t;
+
+#define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
+				offset */
+/*-----------------------------*/
+#define PAGE_N_DIR_SLOTS 0	/* number of slots in page directory */
+#define	PAGE_HEAP_TOP	 2	/* pointer to record heap top */
+#define	PAGE_N_HEAP	 4	/* number of records in the heap,
+				bit 15=flag: new-style compact page format */
+#define	PAGE_FREE	 6	/* pointer to start of page free record list */
+#define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
+#define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
+				NULL if this info has been reset by a delete,
+				for example */
+#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
+#define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
+				direction */
+#define	PAGE_N_RECS	 16	/* number of user records on the page */
+#define PAGE_MAX_TRX_ID	 18	/* highest id of a trx which may have modified
+				a record on the page; a dulint; defined only
+				in secondary indexes and in the insert buffer
+				tree; NOTE: this may be modified only
+				when the thread has an x-latch to the page,
+				and ALSO an x-latch to btr_search_latch
+				if there is a hash index to the page! */
+#define PAGE_HEADER_PRIV_END 26	/* end of private data structure of the page
+				header which are set in a page create */
+/*----*/
+#define	PAGE_LEVEL	 26	/* level of the node in an index tree; the
+				leaf level is the level 0 */
+#define	PAGE_INDEX_ID	 28	/* index id where the page belongs */
+#define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
+				a B-tree: defined only on the root page of a
+				B-tree, but not in the root of an ibuf tree */
+#define PAGE_BTR_IBUF_FREE_LIST	PAGE_BTR_SEG_LEAF
+#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
+				/* in the place of PAGE_BTR_SEG_LEAF and _TOP
+				there is a free list base node if the page is
+				the root page of an ibuf tree, and at the same
+				place is the free list node if the page is in
+				a free list */
+#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
+				/* file segment header for the non-leaf pages
+				in a B-tree: defined only on the root page of
+				a B-tree, but not in the root of an ibuf
+				tree */
+/*----*/
+#define PAGE_DATA	(PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
+				/* start of data on the page */
+
+#define PAGE_OLD_INFIMUM	(PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
+				/* offset of the page infimum record on an
+				old-style page */
+#define PAGE_OLD_SUPREMUM	(PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
+				/* offset of the page supremum record on an
+				old-style page */
+#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
+				/* offset of the page supremum record end on
+				an old-style page */
+#define PAGE_NEW_INFIMUM	(PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
+				/* offset of the page infimum record on a
+				new-style compact page */
+#define PAGE_NEW_SUPREMUM	(PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
+				/* offset of the page supremum record on a
+				new-style compact page */
+#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
+				/* offset of the page supremum record end on
+				a new-style compact page */
+/*-----------------------------*/
+
+/* Heap numbers */
+#define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
+#define PAGE_HEAP_NO_USER_LOW	2	/* first user record in
+					creation (insertion) order,
+					not necessarily collation order;
+					this record may have been deleted */
+
+/* Directions of cursor movement */
+#define	PAGE_LEFT		1
+#define	PAGE_RIGHT		2
+#define	PAGE_SAME_REC		3
+#define	PAGE_SAME_PAGE		4
+#define	PAGE_NO_DIRECTION	5
+
+/*			PAGE DIRECTORY
+			==============
+*/
+
+typedef	byte			page_dir_slot_t;
+typedef page_dir_slot_t		page_dir_t;
+
+/* Offset of the directory start down from the page end. We call the
+slot with the highest file address directory start, as it points to
+the first record in the list of records. */
+#define	PAGE_DIR		FIL_PAGE_DATA_END
+
+/* We define a slot in the page directory as two bytes */
+#define	PAGE_DIR_SLOT_SIZE	2
+
+/* The offset of the physically lower end of the directory, counted from
+page end, when the page is empty */
+#define PAGE_EMPTY_DIR_START	(PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
+
+/* The maximum and minimum number of records owned by a directory slot. The
+number may drop below the minimum in the first and the last slot in the
+directory. */
+#define PAGE_DIR_SLOT_MAX_N_OWNED	8
+#define	PAGE_DIR_SLOT_MIN_N_OWNED	4
+
+/************************************************************//**
+Gets the start of a page.
+@return	start of the page */
+UNIV_INLINE
+page_t*
+page_align(
+/*=======*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+		__attribute__((const));
+/************************************************************//**
+Gets the offset within a page.
+@return	offset from the start of the page */
+UNIV_INLINE
+ulint
+page_offset(
+/*========*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+		__attribute__((const));
+/*************************************************************//**
+Returns the max trx id field value. */
+UNIV_INLINE
+trx_id_t
+page_get_max_trx_id(
+/*================*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Sets the max trx id field value. */
+UNIV_INTERN
+void
+page_set_max_trx_id(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction, or NULL */
+/*************************************************************//**
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/*************************************************************//**
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field);	/*!< in: PAGE_N_DIR_SLOTS, ... */
+/*************************************************************//**
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_N_DIR_SLOTS, ... */
+	ulint		val);	/*!< in: value */
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return	offset from the start of the page, or 0 */
+UNIV_INLINE
+ulint
+page_header_get_offs(
+/*=================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_FREE, ... */
+	__attribute__((nonnull, pure));
+
+/*************************************************************//**
+Returns the pointer stored in the given header field, or NULL. */
+#define page_header_get_ptr(page, field)			\
+	(page_header_get_offs(page, field)			\
+	 ? page + page_header_get_offs(page, field) : NULL)
+/*************************************************************//**
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in/out: PAGE_FREE, ... */
+	const byte*	ptr);	/*!< in: pointer or NULL*/
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*		page,	/*!< in: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return	offset of the first record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_infimum_offset(
+/*====================*/
+	const page_t*	page);	/*!< in: page which must have record(s) */
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return	offset of the last record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_supremum_offset(
+/*=====================*/
+	const page_t*	page);	/*!< in: page which must have record(s) */
+#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
+#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+/************************************************************//**
+Returns the middle record of record list. If there are an even number
+of records in the list, returns the first record of upper half-list.
+@return	middle record */
+UNIV_INTERN
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page);	/*!< in: page */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record on a page; may also
+				be page infimum or supremum, in which case
+				matched-parameter values below are not
+				affected */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*		matched_bytes); /*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+#endif /* !UNIV_HOTBACKUP */
+/*************************************************************//**
+Gets the page number.
+@return	page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Gets the tablespace identifier.
+@return	space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Gets the number of user records on page (the infimum and supremum records
+are not user records).
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records.
+@return	number of records */
+UNIV_INTERN
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/*************************************************************//**
+Gets the number of records in the heap.
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+	const page_t*	page);	/*!< in: index page */
+/*************************************************************//**
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL.
+				Note that the size of the dense page directory
+				in the compressed page trailer is
+				n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+	ulint		n_heap);/*!< in: number of records */
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return	number of slots */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+	const page_t*	page);	/*!< in: index page */
+/*************************************************************//**
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		n_slots);/*!< in: number of slots */
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return	pointer to dir slot */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n);	/*!< in: position */
+#else /* UNIV_DEBUG */
+# define page_dir_get_nth_slot(page, n)		\
+	((page) + UNIV_PAGE_SIZE - PAGE_DIR	\
+	 - (n + 1) * PAGE_DIR_SLOT_SIZE)
+#endif /* UNIV_DEBUG */
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return	TRUE if succeed */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+	const rec_t*	rec);	/*!< in: record */
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return	pointer to record */
+UNIV_INLINE
+const rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+	const page_dir_slot_t*	slot);	/*!< in: directory slot */
+/***************************************************************//**
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/*!< in: directory slot */
+	rec_t*		 rec);	/*!< in: record on the page */
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return	number of records */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+	const page_dir_slot_t*	slot);	/*!< in: page directory slot */
+/***************************************************************//**
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*slot,	/*!< in/out: directory slot */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n);	/*!< in: number of records owned by the slot */
+/************************************************************//**
+Calculates the space reserved for directory slots of a given
+number of records. The exact value is a fraction number
+n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
+rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs);	/*!< in: number of records */
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return	the directory slot number */
+UNIV_INTERN
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
+UNIV_INLINE
+ulint
+page_is_comp(
+/*=========*/
+	const page_t*	page);	/*!< in: index page */
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return	nonzero if in compact format */
+UNIV_INLINE
+ulint
+page_rec_is_comp(
+/*=============*/
+	const rec_t*	rec);	/*!< in: record */
+/***************************************************************//**
+Returns the heap number of a record.
+@return	heap number */
+UNIV_INLINE
+ulint
+page_rec_get_heap_no(
+/*=================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return	TRUE if the page is a B-tree leaf */
+UNIV_INLINE
+ibool
+page_is_leaf(
+/*=========*/
+	const page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, pure));
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	ulint		comp);	/*!< in: nonzero=compact page layout */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+	rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+	const rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*	rec,	/*!< in: pointer to record,
+			must not be page supremum */
+	rec_t*	next);	/*!< in: pointer to next record,
+			must not be page infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+	const rec_t*	rec);	/*!< in: pointer to record, must not be page
+				infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+	rec_t*		rec);	/*!< in: pointer to record,
+				must not be page infimum */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum_low(
+/*====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return	the owner record */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+	rec_t*	rec);	/*!< in: the physical record */
+/***********************************************************************//**
+This is a low-level operation which is used in a database index creation
+to update the page number of a created B-tree to a data dictionary
+record. */
+UNIV_INTERN
+void
+page_rec_write_index_page_no(
+/*=========================*/
+	rec_t*	rec,	/*!< in: record to update */
+	ulint	i,	/*!< in: index of the field to update */
+	ulint	page_no,/*!< in: value to write */
+	mtr_t*	mtr);	/*!< in: mtr */
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of record heap.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs);/*!< in: number of records */
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of record heap if page is first reorganized.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs);/*!< in: number of records */
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return	free space */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(
+/*=========================*/
+	ulint	comp)	/*!< in: nonzero=compact page format */
+		__attribute__((const));
+/**********************************************************//**
+Returns the base extra size of a physical record.  This is the
+size of the fixed header, independent of the record size.
+@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
+ulint
+page_rec_get_base_extra_size(
+/*=========================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/************************************************************//**
+Returns the sum of the sizes of the records in the record list
+excluding the infimum and supremum records.
+@return	data in bytes */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+	const page_t*	page);	/*!< in: index page */
+/************************************************************//**
+Allocates a block of memory from the head of the free list
+of an index page. */
+UNIV_INLINE
+void
+page_mem_alloc_free(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	rec_t*		next_rec,/*!< in: pointer to the new head of the
+				free record list */
+	ulint		need);	/*!< in: number of bytes allocated */
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return	pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
+byte*
+page_mem_alloc_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	ulint		need,	/*!< in: total number of bytes needed */
+	ulint*		heap_no);/*!< out: this contains the heap number
+				of the allocated record
+				if allocation succeeds */
+/************************************************************//**
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	rec_t*		rec,	/*!< in: pointer to the (origin of) record */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create(
+/*========*/
+	buf_block_t*	block,		/*!< in: a buffer block where the
+					page is created */
+	mtr_t*		mtr,		/*!< in: mini-transaction handle */
+	ulint		comp);		/*!< in: nonzero=compact page format */
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+	buf_block_t*	block,		/*!< in/out: a buffer frame where the
+					page is created */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint		level,		/*!< in: the B-tree level of the page */
+	mtr_t*		mtr);		/*!< in: mini-transaction handle */
+
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page. */
+UNIV_INTERN
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*************************************************************//**
+Copies records from page to new_page, from the given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page.
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Copies records from page to new_page, up to the given record, NOT
+including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page.
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_end(
+/*=====================*/
+	rec_t*		rec,	/*!< in: pointer to record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n_recs,	/*!< in: number of records to delete,
+				or ULINT_UNDEFINED if not known */
+	ulint		size,	/*!< in: the sum of the sizes of the
+				records in the end of the chain to
+				delete, or ULINT_UNDEFINED if not known */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_start(
+/*=======================*/
+	rec_t*		rec,	/*!< in: record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Moves record list end to another page. Moved records include
+split_rec.
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
+page_move_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in: index page from where to move */
+	rec_t*		split_rec,	/*!< in: first record to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull(1, 2, 4, 5)));
+/*************************************************************//**
+Moves record list start to another page. Moved records do not include
+split_rec.
+@return	TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
+page_move_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in/out: page containing split_rec */
+	rec_t*		split_rec,	/*!< in: first record not to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull(1, 2, 4, 5)));
+/****************************************************************//**
+Splits a directory slot which owns too many records. */
+UNIV_INTERN
+void
+page_dir_split_slot(
+/*================*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be written, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+	__attribute__((nonnull(1)));
+/*************************************************************//**
+Tries to balance the given directory slot with too few records
+with the upper neighbor, so that there are at least the minimum number
+of records owned by the slot; this may result in the merging of
+two slots. */
+UNIV_INTERN
+void
+page_dir_balance_slot(
+/*==================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+	__attribute__((nonnull(1)));
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+				MLOG_LIST_START_DELETE,
+				MLOG_COMP_LIST_END_DELETE or
+				MLOG_COMP_LIST_START_DELETE */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in/out: buffer block or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_create(
+/*==============*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	ulint		comp,	/*!< in: nonzero=compact page format */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/************************************************************//**
+Prints record contents including the data relevant only in
+the index page context. */
+UNIV_INTERN
+void
+page_rec_print(
+/*===========*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: record descriptor */
+/***************************************************************//**
+This is used to print the contents of the directory for
+debugging purposes. */
+UNIV_INTERN
+void
+page_dir_print(
+/*===========*/
+	page_t*	page,	/*!< in: index page */
+	ulint	pr_n);	/*!< in: print n first and n last entries */
+/***************************************************************//**
+This is used to print the contents of the page record list for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print_list(
+/*============*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		pr_n);	/*!< in: print n first and n last entries */
+/***************************************************************//**
+Prints the info in a page header. */
+UNIV_INTERN
+void
+page_header_print(
+/*==============*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+This is used to print the contents of the page for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print(
+/*=======*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		dn,	/*!< in: print dn first and last entries
+				in directory */
+	ulint		rn);	/*!< in: print rn first and last records
+				in directory */
+/***************************************************************//**
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_rec_validate(
+/*==============*/
+	rec_t*		rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+UNIV_INTERN
+void
+page_check_dir(
+/*===========*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_old(
+/*=====================*/
+	page_t*	page);	/*!< in: old-style index page */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+	page_t*	block);	/*!< in: new-style index page */
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_validate(
+/*==========*/
+	page_t*		page,	/*!< in: index page */
+	dict_index_t*	index);	/*!< in: data dictionary index containing
+				the page record type definition */
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return	record, NULL if not found */
+
+const rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		heap_no);/*!< in: heap number */
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE  UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+#include "page0page.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/page0page.ic b/storage/innodb_plugin/include/page0page.ic
new file mode 100644
index 00000000000..318ec1cc1f2
--- /dev/null
+++ b/storage/innodb_plugin/include/page0page.ic
@@ -0,0 +1,1073 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0page.ic
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#ifdef UNIV_DEBUG
+# include "log0recv.h"
+#endif /* !UNIV_DEBUG */
+#ifndef UNIV_HOTBACKUP
+# include "rem0cmp.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "mtr0log.h"
+#include "page0zip.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/************************************************************//**
+Gets the start of a page.
+@return	start of the page */
+UNIV_INLINE
+page_t*
+page_align(
+/*=======*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+{
+	return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
+}
+/************************************************************//**
+Gets the offset within a page.
+@return	offset from the start of the page */
+UNIV_INLINE
+ulint
+page_offset(
+/*========*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+{
+	return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
+}
+/*************************************************************//**
+Returns the max trx id field value. */
+UNIV_INLINE
+trx_id_t
+page_get_max_trx_id(
+/*================*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page);
+
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
+}
+
+/*************************************************************//**
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	ut_ad(block);
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* During crash recovery, this function may be called on
+	something else than a leaf page of a secondary index or the
+	insert buffer index tree (dict_index_is_sec_or_ibuf() returns
+	TRUE for the dummy indexes constructed during redo log
+	application).  In that case, PAGE_MAX_TRX_ID is unused,
+	and trx_id is usually zero. */
+	ut_ad(!ut_dulint_is_zero(trx_id) || recv_recovery_is_on());
+	ut_ad(page_is_leaf(buf_block_get_frame(block)));
+
+	if (ut_dulint_cmp(page_get_max_trx_id(buf_block_get_frame(block)),
+			  trx_id) < 0) {
+
+		page_set_max_trx_id(block, page_zip, trx_id, mtr);
+	}
+}
+
+/*************************************************************//**
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_LEVEL, ... */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_INDEX_ID);
+
+	return(mach_read_from_2(page + PAGE_HEADER + field));
+}
+
+/*************************************************************//**
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_N_DIR_SLOTS, ... */
+	ulint		val)	/*!< in: value */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_N_RECS);
+	ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
+	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+
+	mach_write_to_2(page + PAGE_HEADER + field, val);
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_write_header(page_zip,
+				      page + PAGE_HEADER + field, 2, NULL);
+	}
+}
+
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return	offset from the start of the page, or 0 */
+UNIV_INLINE
+ulint
+page_header_get_offs(
+/*=================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_FREE, ... */
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	offs = page_header_get_field(page, field);
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	return(offs);
+}
+
+/*************************************************************//**
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*		page,	/*!< in: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_FREE, ... */
+	const byte*	ptr)	/*!< in: pointer or NULL*/
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	if (ptr == NULL) {
+		offs = 0;
+	} else {
+		offs = ptr - page;
+	}
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	page_header_set_field(page, page_zip, field, offs);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(page && mtr);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_LAST_INSERT),
+				      2, mtr);
+	} else {
+		mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
+				 MLOG_2BYTES, mtr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
+UNIV_INLINE
+ulint
+page_is_comp(
+/*=========*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
+			   0x8000));
+}
+
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return	nonzero if in compact format */
+UNIV_INLINE
+ulint
+page_rec_is_comp(
+/*=============*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	return(page_is_comp(page_align(rec)));
+}
+
+/***************************************************************//**
+Returns the heap number of a record.
+@return	heap number */
+UNIV_INLINE
+ulint
+page_rec_get_heap_no(
+/*=================*/
+	const rec_t*	rec)	/*!< in: the physical record */
+{
+	if (page_rec_is_comp(rec)) {
+		return(rec_get_heap_no_new(rec));
+	} else {
+		return(rec_get_heap_no_old(rec));
+	}
+}
+
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return	TRUE if the page is a B-tree leaf */
+UNIV_INLINE
+ibool
+page_is_leaf(
+/*=========*/
+	const page_t*	page)	/*!< in: page */
+{
+	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+}
+
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return	offset of the first record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_infimum_offset(
+/*====================*/
+	const page_t*	page)	/*!< in: page which must have record(s) */
+{
+	ut_ad(page);
+	ut_ad(!page_offset(page));
+
+	if (page_is_comp(page)) {
+		return(PAGE_NEW_INFIMUM);
+	} else {
+		return(PAGE_OLD_INFIMUM);
+	}
+}
+
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return	offset of the last record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_supremum_offset(
+/*=====================*/
+	const page_t*	page)	/*!< in: page which must have record(s) */
+{
+	ut_ad(page);
+	ut_ad(!page_offset(page));
+
+	if (page_is_comp(page)) {
+		return(PAGE_NEW_SUPREMUM);
+	} else {
+		return(PAGE_OLD_SUPREMUM);
+	}
+}
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
+# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
+#endif
+#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
+# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
+#endif
+#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
+# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
+#endif
+#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
+# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
+#endif
+#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
+# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
+#endif
+#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
+# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
+#endif
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM)
+	       && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM)
+	       && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM)
+	       && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
+}
+
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM)
+	       || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
+}
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum_low(
+/*====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM)
+	       || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
+}
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	return(page_rec_is_user_rec_low(page_offset(rec)));
+}
+
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	return(page_rec_is_supremum_low(page_offset(rec)));
+}
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	return(page_rec_is_infimum_low(page_offset(rec)));
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record on a page; may also
+				be page infimum or supremum, in which case
+				matched-parameter values below are not
+				affected */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*		matched_bytes) /*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+{
+	ulint	rec_offset;
+
+	ut_ad(dtuple_check_typed(dtuple));
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
+
+	rec_offset = page_offset(rec);
+
+	if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM)
+	    || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) {
+		return(1);
+	}
+	if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM)
+	    || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) {
+		return(-1);
+	}
+
+	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
+					 matched_fields,
+					 matched_bytes));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Gets the page number.
+@return	page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page == page_align((page_t*) page));
+	return(mach_read_from_4(page + FIL_PAGE_OFFSET));
+}
+
+/*************************************************************//**
+Gets the tablespace identifier.
+@return	space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page == page_align((page_t*) page));
+	return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+}
+
+/*************************************************************//**
+Gets the number of user records on page (infimum and supremum records
+are not user records).
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_RECS));
+}
+
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return	number of slots */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
+}
+/*************************************************************//**
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		n_slots)/*!< in: number of slots */
+{
+	page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
+}
+
+/*************************************************************//**
+Gets the number of records in the heap.
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
+}
+
+/*************************************************************//**
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL.
+				Note that the size of the dense page directory
+				in the compressed page trailer is
+				n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+	ulint		n_heap)	/*!< in: number of records */
+{
+	ut_ad(n_heap < 0x8000);
+	ut_ad(!page_zip || n_heap
+	      == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
+
+	page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
+			      | (0x8000
+				 & page_header_get_field(page, PAGE_N_HEAP)));
+}
+
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return	pointer to dir slot */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n)	/*!< in: position */
+{
+	ut_ad(page_dir_get_n_slots(page) > n);
+
+	return((page_dir_slot_t*)
+	       page + UNIV_PAGE_SIZE - PAGE_DIR
+	       - (n + 1) * PAGE_DIR_SLOT_SIZE);
+}
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return	TRUE if succeed */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	const page_t*	page = page_align(rec);
+
+	ut_a(rec);
+
+	ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP));
+	ut_a(page_offset(rec) >= PAGE_DATA);
+
+	return(TRUE);
+}
+
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return	pointer to record */
+UNIV_INLINE
+const rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+	const page_dir_slot_t*	slot)	/*!< in: directory slot */
+{
+	return(page_align(slot) + mach_read_from_2(slot));
+}
+
+/***************************************************************//**
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/*!< in: directory slot */
+	rec_t*		 rec)	/*!< in: record on the page */
+{
+	ut_ad(page_rec_check(rec));
+
+	mach_write_to_2(slot, page_offset(rec));
+}
+
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return	number of records */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+	const page_dir_slot_t*	slot)	/*!< in: page directory slot */
+{
+	const rec_t*	rec	= page_dir_slot_get_rec(slot);
+	if (page_rec_is_comp(slot)) {
+		return(rec_get_n_owned_new(rec));
+	} else {
+		return(rec_get_n_owned_old(rec));
+	}
+}
+
+/***************************************************************//**
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*slot,	/*!< in/out: directory slot */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n)	/*!< in: number of records owned by the slot */
+{
+	rec_t*	rec	= (rec_t*) page_dir_slot_get_rec(slot);
+	if (page_rec_is_comp(slot)) {
+		rec_set_n_owned_new(rec, page_zip, n);
+	} else {
+		ut_ad(!page_zip);
+		rec_set_n_owned_old(rec, n);
+	}
+}
+
+/************************************************************//**
+Calculates the space reserved for directory slots of a given number of
+records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
+PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs)		/*!< in: number of records */
+{
+	return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
+	       / PAGE_DIR_SLOT_MIN_N_OWNED);
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	ulint		comp)	/*!< in: nonzero=compact page layout */
+{
+	ulint		offs;
+	const page_t*	page;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+
+	offs = rec_get_next_offs(rec, comp);
+
+	if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
+		fprintf(stderr,
+			"InnoDB: Next record offset is nonsensical %lu"
+			" in record at offset %lu\n"
+			"InnoDB: rec address %p, space id %lu, page %lu\n",
+			(ulong)offs, (ulong) page_offset(rec),
+			(void*) rec,
+			(ulong) page_get_space_id(page),
+			(ulong) page_get_page_no(page));
+		buf_page_print(page, 0);
+
+		ut_error;
+	}
+
+	if (UNIV_UNLIKELY(offs == 0)) {
+
+		return(NULL);
+	}
+
+	return(page + offs);
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+	rec_t*	rec)	/*!< in: pointer to record */
+{
+	return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+	const rec_t*	rec)	/*!< in: pointer to record */
+{
+	return(page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*	rec,		/*!< in: pointer to record,
+				must not be page supremum */
+	rec_t*	next)		/*!< in: pointer to next record,
+				must not be page infimum */
+{
+	ulint	offs;
+
+	ut_ad(page_rec_check(rec));
+	ut_ad(!page_rec_is_supremum(rec));
+	ut_ad(rec != next);
+
+	ut_ad(!next || !page_rec_is_infimum(next));
+	ut_ad(!next || page_align(rec) == page_align(next));
+
+	if (UNIV_LIKELY(next != NULL)) {
+		offs = page_offset(next);
+	} else {
+		offs = 0;
+	}
+
+	if (page_rec_is_comp(rec)) {
+		rec_set_next_offs_new(rec, offs);
+	} else {
+		rec_set_next_offs_old(rec, offs);
+	}
+}
+
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+	const rec_t*	rec)	/*!< in: pointer to record, must not be page
+				infimum */
+{
+	const page_dir_slot_t*	slot;
+	ulint			slot_no;
+	const rec_t*		rec2;
+	const rec_t*		prev_rec = NULL;
+	const page_t*		page;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+
+	ut_ad(!page_rec_is_infimum(rec));
+
+	slot_no = page_dir_find_owner_slot(rec);
+
+	ut_a(slot_no != 0);
+
+	slot = page_dir_get_nth_slot(page, slot_no - 1);
+
+	rec2 = page_dir_slot_get_rec(slot);
+
+	if (page_is_comp(page)) {
+		while (rec != rec2) {
+			prev_rec = rec2;
+			rec2 = page_rec_get_next_low(rec2, TRUE);
+		}
+	} else {
+		while (rec != rec2) {
+			prev_rec = rec2;
+			rec2 = page_rec_get_next_low(rec2, FALSE);
+		}
+	}
+
+	ut_a(prev_rec);
+
+	return(prev_rec);
+}
+
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+	rec_t*	rec)	/*!< in: pointer to record, must not be page
+			infimum */
+{
+	return((rec_t*) page_rec_get_prev_const(rec));
+}
+
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return	the owner record */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+	rec_t*	rec)	/*!< in: the physical record */
+{
+	ut_ad(page_rec_check(rec));
+
+	if (page_rec_is_comp(rec)) {
+		while (rec_get_n_owned_new(rec) == 0) {
+			rec = page_rec_get_next(rec);
+		}
+	} else {
+		while (rec_get_n_owned_old(rec) == 0) {
+			rec = page_rec_get_next(rec);
+		}
+	}
+
+	return(rec);
+}
+
+/**********************************************************//**
+Returns the base extra size of a physical record.  This is the
+size of the fixed header, independent of the record size.
+@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
+ulint
+page_rec_get_base_extra_size(
+/*=========================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
+# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
+#endif
+	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
+}
+
+/************************************************************//**
+Returns the sum of the sizes of the records in the record list, excluding
+the infimum and supremum records.
+@return	data in bytes */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+	const page_t*	page)	/*!< in: index page */
+{
+	ulint	ret;
+
+	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
+		      - (page_is_comp(page)
+			 ? PAGE_NEW_SUPREMUM_END
+			 : PAGE_OLD_SUPREMUM_END)
+		      - page_header_get_field(page, PAGE_GARBAGE));
+
+	ut_ad(ret < UNIV_PAGE_SIZE);
+
+	return(ret);
+}
+
+
+/************************************************************//**
+Allocates a block of memory from the free list of an index page. */
+UNIV_INTERN
+void
+page_mem_alloc_free(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	rec_t*		next_rec,/*!< in: pointer to the new head of the
+				free record list */
+	ulint		need)	/*!< in: number of bytes allocated */
+{
+	ulint		garbage;
+
+#ifdef UNIV_DEBUG
+	const rec_t*	old_rec	= page_header_get_ptr(page, PAGE_FREE);
+	ulint		next_offs;
+
+	ut_ad(old_rec);
+	next_offs = rec_get_next_offs(old_rec, page_is_comp(page));
+	ut_ad(next_rec == (next_offs ? page + next_offs : NULL));
+#endif
+
+	page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec);
+
+	garbage = page_header_get_field(page, PAGE_GARBAGE);
+	ut_ad(garbage >= need);
+
+	page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need);
+}
+
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return	free space */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(
+/*=========================*/
+	ulint	comp)		/*!< in: nonzero=compact page layout */
+{
+	if (UNIV_LIKELY(comp)) {
+		return((ulint)(UNIV_PAGE_SIZE
+			       - PAGE_NEW_SUPREMUM_END
+			       - PAGE_DIR
+			       - 2 * PAGE_DIR_SLOT_SIZE));
+	}
+
+	return((ulint)(UNIV_PAGE_SIZE
+		       - PAGE_OLD_SUPREMUM_END
+		       - PAGE_DIR
+		       - 2 * PAGE_DIR_SLOT_SIZE));
+}
+
+/************************************************************//**
+Each user record on a page, and also the deleted user records in the heap
+takes its size plus the fraction of the dir cell size /
+PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
+value of page_get_free_space_of_empty, the insert is impossible, otherwise
+it is allowed. This function returns the maximum combined size of records
+which can be inserted on top of the record heap.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs)	/*!< in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+
+	if (page_is_comp(page)) {
+		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+			- PAGE_NEW_SUPREMUM_END
+			+ page_dir_calc_reserved_space(
+				n_recs + page_dir_get_n_heap(page) - 2);
+
+		free_space = page_get_free_space_of_empty(TRUE);
+	} else {
+		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+			- PAGE_OLD_SUPREMUM_END
+			+ page_dir_calc_reserved_space(
+				n_recs + page_dir_get_n_heap(page) - 2);
+
+		free_space = page_get_free_space_of_empty(FALSE);
+	}
+
+	/* Above the 'n_recs +' part reserves directory space for the new
+	inserted records; the '- 2' excludes page infimum and supremum
+	records */
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of the record heap if a page is first reorganized.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs)	/*!< in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+
+	occupied = page_get_data_size(page)
+		+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
+
+	free_space = page_get_free_space_of_empty(page_is_comp(page));
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/************************************************************//**
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	rec_t*		rec,	/*!< in: pointer to the (origin of) record */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	rec_t*		free;
+	ulint		garbage;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	free = page_header_get_ptr(page, PAGE_FREE);
+
+	page_rec_set_next(rec, free);
+	page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
+
+	garbage = page_header_get_field(page, PAGE_GARBAGE);
+
+	page_header_set_field(page, page_zip, PAGE_GARBAGE,
+			      garbage + rec_offs_size(offsets));
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_dir_delete(page_zip, rec, index, offsets, free);
+	} else {
+		page_header_set_field(page, page_zip, PAGE_N_RECS,
+				      page_get_n_recs(page) - 1);
+	}
+}
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innodb_plugin/include/page0types.h b/storage/innodb_plugin/include/page0types.h
new file mode 100644
index 00000000000..d9a277bf208
--- /dev/null
+++ b/storage/innodb_plugin/include/page0types.h
@@ -0,0 +1,150 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0types.h
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0types_h
+#define page0types_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "mtr0types.h"
+
+/** Eliminates a name collision on HP-UX */
+#define page_t	   ib_page_t
+/** Type of the index page */
+typedef	byte		page_t;
+/** Index page cursor */
+typedef struct page_cur_struct	page_cur_t;
+
+/** Compressed index page */
+typedef byte				page_zip_t;
+/** Compressed page descriptor */
+typedef struct page_zip_des_struct	page_zip_des_t;
+
+/* The following definitions would better belong to page0zip.h,
+but we cannot include page0zip.h from rem0rec.ic, because
+page0*.h includes rem0rec.h and may include rem0rec.ic. */
+
+/** Number of bits needed for representing different compressed page sizes */
+#define PAGE_ZIP_SSIZE_BITS 3
+
+/** log2 of smallest compressed page size */
+#define PAGE_ZIP_MIN_SIZE_SHIFT	10
+/** Smallest compressed page size */
+#define PAGE_ZIP_MIN_SIZE	(1 << PAGE_ZIP_MIN_SIZE_SHIFT)
+
+/** Number of supported compressed page sizes */
+#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
+#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)
+# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)"
+#endif
+
+/** Compressed page descriptor */
+struct page_zip_des_struct
+{
+	page_zip_t*	data;		/*!< compressed page data */
+
+#ifdef UNIV_DEBUG
+	unsigned	m_start:16;	/*!< start offset of modification log */
+#endif /* UNIV_DEBUG */
+	unsigned	m_end:16;	/*!< end offset of modification log */
+	unsigned	m_nonempty:1;	/*!< TRUE if the modification log
+					is not empty */
+	unsigned	n_blobs:12;	/*!< number of externally stored
+					columns on the page; the maximum
+					is 744 on a 16 KiB page */
+	unsigned	ssize:PAGE_ZIP_SSIZE_BITS;
+					/*!< 0 or compressed page size;
+					the size in bytes is
+					PAGE_ZIP_MIN_SIZE << (ssize - 1). */
+};
+
+/** Compression statistics for a given page size */
+struct page_zip_stat_struct {
+	/** Number of page compressions */
+	ulint		compressed;
+	/** Number of successful page compressions */
+	ulint		compressed_ok;
+	/** Number of page decompressions */
+	ulint		decompressed;
+	/** Duration of page compressions in microseconds */
+	ib_uint64_t	compressed_usec;
+	/** Duration of page decompressions in microseconds */
+	ib_uint64_t	decompressed_usec;
+};
+
+/** Compression statistics */
+typedef struct page_zip_stat_struct page_zip_stat_t;
+
+/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
+extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page.  The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page.  The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Shift the dense page directory when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in: deleted record */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
+	const byte*	free)	/*!< in: previous start of the free list */
+	__attribute__((nonnull(1,2,3,4)));
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		is_clustered)	/*!< in: nonzero for clustered index,
+					zero for others */
+	__attribute__((nonnull));
+#endif
diff --git a/storage/innodb_plugin/include/page0zip.h b/storage/innodb_plugin/include/page0zip.h
new file mode 100644
index 00000000000..9aaa066306b
--- /dev/null
+++ b/storage/innodb_plugin/include/page0zip.h
@@ -0,0 +1,471 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0zip.h
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+#ifndef page0zip_h
+#define page0zip_h
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "mtr0types.h"
+#include "page0types.h"
+#include "buf0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "mem0mem.h"
+
+/**********************************************************************//**
+Determine the size of a compressed page in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+page_zip_get_size(
+/*==============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+	__attribute__((nonnull, pure));
+/**********************************************************************//**
+Set the size of a compressed page in bytes. */
+UNIV_INLINE
+void
+page_zip_set_size(
+/*==============*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		size);		/*!< in: size in bytes */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine if a record is so big that it needs to be stored externally.
+@return	FALSE if the entire record can be stored locally on the page */
+UNIV_INLINE
+ibool
+page_zip_rec_needs_ext(
+/*===================*/
+	ulint	rec_size,	/*!< in: length of the record in bytes */
+	ulint	comp,		/*!< in: nonzero=compact format */
+	ulint	n_fields,	/*!< in: number of fields in the record;
+				ignored if zip_size == 0 */
+	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+	__attribute__((const));
+
+/**********************************************************************//**
+Determine the guaranteed free space on an empty page.
+@return	minimum payload size on the page */
+UNIV_INTERN
+ulint
+page_zip_empty_size(
+/*================*/
+	ulint	n_fields,	/*!< in: number of columns in the index */
+	ulint	zip_size)	/*!< in: compressed page size in bytes */
+	__attribute__((const));
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Initialize a compressed page descriptor. */
+UNIV_INLINE
+void
+page_zip_des_init(
+/*==============*/
+	page_zip_des_t*	page_zip);	/*!< in/out: compressed page
+					descriptor */
+
+/**********************************************************************//**
+Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+page_zip_set_alloc(
+/*===============*/
+	void*		stream,		/*!< in/out: zlib stream */
+	mem_heap_t*	heap);		/*!< in: memory heap to use */
+
+/**********************************************************************//**
+Compress a page.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_compress(
+/*==============*/
+	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	const page_t*	page,	/*!< in: uncompressed page */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2,3)));
+
+/**********************************************************************//**
+Decompress a page.  This function should tolerate errors on the compressed
+page.  Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return	TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+page_zip_decompress(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in: data, ssize;
+				out: m_start, m_end, m_nonempty, n_blobs */
+	page_t*		page)	/*!< out: uncompressed page, may be trashed */
+	__attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate a compressed page descriptor.
+@return	TRUE if ok */
+UNIV_INLINE
+ibool
+page_zip_simple_validate(
+/*=====================*/
+	const page_zip_des_t*	page_zip);	/*!< in: compressed page
+						descriptor */
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_ZIP_DEBUG
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return	TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate_low(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	ibool			sloppy)	/*!< in: FALSE=strict,
+					TRUE=ignore the MIN_REC_FLAG */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Check that the compressed and decompressed pages match. */
+UNIV_INTERN
+ibool
+page_zip_validate(
+/*==============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page)	/*!< in: uncompressed page */
+	__attribute__((nonnull));
+#endif /* UNIV_ZIP_DEBUG */
+
+/**********************************************************************//**
+Determine how big record can be inserted without recompressing the page.
+@return a positive number indicating the maximum size of a record
+whose insertion is guaranteed to succeed, or zero or negative */
+UNIV_INLINE
+lint
+page_zip_max_ins_size(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust)/*!< in: TRUE if clustered index */
+	__attribute__((nonnull, pure));
+
+/**********************************************************************//**
+Determine if enough space is available in the modification log.
+@return	TRUE if page_zip_write_rec() will succeed */
+UNIV_INLINE
+ibool
+page_zip_available(
+/*===============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust,/*!< in: TRUE if clustered index */
+	ulint			length,	/*!< in: combined size of the record */
+	ulint			create)	/*!< in: nonzero=add the record to
+					the heap */
+	__attribute__((nonnull, pure));
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page. */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Write an entire record on the compressed page.  The data must already
+have been written to the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_rec(
+/*===============*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record being written */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		create)	/*!< in: nonzero=insert, zero=update */
+	__attribute__((nonnull));
+
+/***********************************************************//**
+Parses a log record of writing a BLOB pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_blob_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write a BLOB pointer of a record on the leaf page of a clustered index.
+The information must already have been updated on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_blob_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in/out: record whose data is being
+				written */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		n,	/*!< in: column index */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle,
+				or NULL if no logging is needed */
+	__attribute__((nonnull(1,2,3,4)));
+
+/***********************************************************//**
+Parses a log record of writing the node pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_node_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write the node pointer of a record on a non-leaf compressed page. */
+UNIV_INTERN
+void
+page_zip_write_node_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	ulint		size,	/*!< in: data size of rec */
+	ulint		ptr,	/*!< in: node pointer */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+UNIV_INTERN
+void
+page_zip_write_trx_id_and_roll_ptr(
+/*===============================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
+	trx_id_t	trx_id,	/*!< in: transaction identifier */
+	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page.  The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page.  The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Insert a record to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_insert(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	prev_rec,/*!< in: record after which to insert */
+	const byte*	free_rec,/*!< in: record from which rec was
+				allocated, or NULL */
+	byte*		rec);	/*!< in: record to insert */
+
+/**********************************************************************//**
+Shift the dense page directory and the array of BLOB pointers
+when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in: deleted record */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
+	const byte*	free)	/*!< in: previous start of the free list */
+	__attribute__((nonnull(1,2,3,4)));
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		is_clustered)	/*!< in: nonzero for clustered index,
+					zero for others */
+	__attribute__((nonnull));
+
+/***********************************************************//**
+Parses a log record of writing to the header of a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_header(
+/*========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page.
+However, the data portion of the uncompressed page may differ from
+the compressed page when a record is being inserted in
+page_cur_insert_rec_low(). */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reorganize and compress a page.  This is a low-level operation for
+compressed pages, to be used when page_zip_compress() fails.
+On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
+The function btr_page_reorganize() should be preferred whenever possible.
+IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
+non-clustered index, the caller must update the insert buffer free
+bits in the same mini-transaction in such a way that the modification
+will be redo-logged.
+@return TRUE on success, FALSE on failure; page and page_zip will be
+left intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_reorganize(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page with compressed page;
+				on the compressed page, in: size;
+				out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Copy the records of a page byte for byte.  Do not copy the page header
+or trailer, except those B-tree header fields that are directly
+related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
+NOTE: The caller must update the lock table and the adaptive hash index. */
+UNIV_INTERN
+void
+page_zip_copy_recs(
+/*===============*/
+	page_zip_des_t*		page_zip,	/*!< out: copy of src_zip
+						(n_blobs, m_start, m_end,
+						m_nonempty, data[0..size-1]) */
+	page_t*			page,		/*!< out: copy of src */
+	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
+	const page_t*		src,		/*!< in: page */
+	dict_index_t*		index,		/*!< in: index of the B-tree */
+	mtr_t*			mtr)		/*!< in: mini-transaction */
+	__attribute__((nonnull(1,2,3,4)));
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Parses a log record of compressing an index page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_compress(
+/*====================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< out: compressed page */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Calculate the compressed page checksum.
+@return	page checksum */
+UNIV_INTERN
+ulint
+page_zip_calc_checksum(
+/*===================*/
+        const void*     data,   /*!< in: compressed page */
+        ulint           size)   /*!< in: size of compressed page */
+	__attribute__((nonnull));
+
+#ifndef UNIV_HOTBACKUP
+/** Check if a pointer to an uncompressed page matches a compressed page.
+@param ptr	pointer to an uncompressed page frame
+@param page_zip	compressed page descriptor
+@return		TRUE if ptr and page_zip refer to the same block */
+# define PAGE_ZIP_MATCH(ptr, page_zip)			\
+	(buf_frame_get_page_zip(ptr) == (page_zip))
+#else /* !UNIV_HOTBACKUP */
+/** Check if a pointer to an uncompressed page matches a compressed page.
+@param ptr	pointer to an uncompressed page frame
+@param page_zip	compressed page descriptor
+@return		TRUE if ptr and page_zip refer to the same block */
+# define PAGE_ZIP_MATCH(ptr, page_zip)				\
+	(page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+# include "page0zip.ic"
+#endif
+
+#endif /* page0zip_h */
diff --git a/storage/innodb_plugin/include/page0zip.ic b/storage/innodb_plugin/include/page0zip.ic
new file mode 100644
index 00000000000..75cc7a9fcc4
--- /dev/null
+++ b/storage/innodb_plugin/include/page0zip.ic
@@ -0,0 +1,397 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0zip.ic
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "page0zip.h"
+#include "page0page.h"
+
+/* The format of compressed pages is as follows.
+
+The header and trailer of the uncompressed pages, excluding the page
+directory in the trailer, are copied as is to the header and trailer
+of the compressed page.
+
+At the end of the compressed page, there is a dense page directory
+pointing to every user record contained on the page, including deleted
+records on the free list.  The dense directory is indexed in the
+collation order, i.e., in the order in which the record list is
+linked on the uncompressed page.  The infimum and supremum records are
+excluded.  The two most significant bits of the entries are allocated
+for the delete-mark and an n_owned flag indicating the last record in
+a chain of records pointed to from the sparse page directory on the
+uncompressed page.
+
+The data between PAGE_ZIP_START and the last page directory entry will
+be written in compressed format, starting at offset PAGE_DATA.
+Infimum and supremum records are not stored.  We exclude the
+REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
+from the dense page directory stored at the end of the compressed
+page.
+
+The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
+roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
+externally stored columns are stored separately, in ascending order of
+heap_no and column index, starting backwards from the dense page
+directory.
+
+The compressed data stream may be followed by a modification log
+covering the compressed portion of the page, as follows.
+
+MODIFICATION LOG ENTRY FORMAT
+- write record:
+  - (heap_no - 1) << 1 (1..2 bytes)
+  - extra bytes backwards
+  - data bytes
+- clear record:
+  - (heap_no - 1) << 1 | 1 (1..2 bytes)
+
+The integer values are stored in a variable-length format:
+- 0xxxxxxx: 0..127
+- 1xxxxxxx xxxxxxxx: 0..32767
+
+The end of the modification log is marked by a 0 byte.
+
+In summary, the compressed page looks like this:
+
+(1) Uncompressed page header (PAGE_DATA bytes)
+(2) Compressed index information
+(3) Compressed page data
+(4) Page modification log (page_zip->m_start..page_zip->m_end)
+(5) Empty zero-filled space
+(6) BLOB pointers (on leaf pages)
+  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
+  - in descending collation order
+(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
+  - indexed by heap_no
+  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
+  - REC_NODE_PTR_SIZE for non-leaf pages
+  - 0 otherwise
+(8) dense page directory, stored backwards
+  - n_dense = n_heap - 2
+  - existing records in ascending collation order
+  - deleted records (free list) in link order
+*/
+
+/** Start offset of the area that will be compressed */
+#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
+/** Size of an compressed page directory entry */
+#define PAGE_ZIP_DIR_SLOT_SIZE	2
+/** Mask of record offsets */
+#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
+/** 'owned' flag */
+#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
+/** 'deleted' flag */
+#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
+
+/**********************************************************************//**
+Determine the size of a compressed page in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+page_zip_get_size(
+/*==============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+{
+	ulint	size;
+
+	if (UNIV_UNLIKELY(!page_zip->ssize)) {
+		return(0);
+	}
+
+	size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
+
+	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+	ut_ad(size <= UNIV_PAGE_SIZE);
+
+	return(size);
+}
+/**********************************************************************//**
+Set the size of a compressed page in bytes. */
+UNIV_INLINE
+void
+page_zip_set_size(
+/*==============*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		size)		/*!< in: size in bytes */
+{
+	if (size) {
+		int	ssize;
+
+		ut_ad(ut_is_2pow(size));
+
+		for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
+		}
+
+		page_zip->ssize = ssize;
+	} else {
+		page_zip->ssize = 0;
+	}
+
+	ut_ad(page_zip_get_size(page_zip) == size);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine if a record is so big that it needs to be stored externally.
+@return	FALSE if the entire record can be stored locally on the page */
+UNIV_INLINE
+ibool
+page_zip_rec_needs_ext(
+/*===================*/
+	ulint	rec_size,	/*!< in: length of the record in bytes */
+	ulint	comp,		/*!< in: nonzero=compact format */
+	ulint	n_fields,	/*!< in: number of fields in the record;
+				ignored if zip_size == 0 */
+	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+{
+	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(comp || !zip_size);
+
+#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
+	if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
+		return(TRUE);
+	}
+#endif
+
+	if (UNIV_UNLIKELY(zip_size)) {
+		ut_ad(comp);
+		/* On a compressed page, there is a two-byte entry in
+		the dense page directory for every record.  But there
+		is no record header.  There should be enough room for
+		one record on an empty leaf page.  Subtract 1 byte for
+		the encoded heap number.  Check also the available space
+		on the uncompressed page. */
+		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2)
+		       >= (page_zip_empty_size(n_fields, zip_size) - 1)
+		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
+	}
+
+	return(rec_size >= page_get_free_space_of_empty(comp) / 2);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate a compressed page descriptor.
+@return	TRUE if ok */
+UNIV_INLINE
+ibool
+page_zip_simple_validate(
+/*=====================*/
+	const page_zip_des_t*	page_zip)/*!< in: compressed page descriptor */
+{
+	ut_ad(page_zip);
+	ut_ad(page_zip->data);
+	ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
+	ut_ad(page_zip->m_start <= page_zip->m_end);
+	ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
+	ut_ad(page_zip->n_blobs
+	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Determine if the length of the page trailer.
+@return length of the page trailer, in bytes, not including the
+terminating zero byte of the modification log */
+UNIV_INLINE
+ibool
+page_zip_get_trailer_len(
+/*=====================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust,/*!< in: TRUE if clustered index */
+	ulint*			entry_size)/*!< out: size of the uncompressed
+					portion of a user record */
+{
+	ulint	uncompressed_size;
+
+	ut_ad(page_zip_simple_validate(page_zip));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+			+ REC_NODE_PTR_SIZE;
+		ut_ad(!page_zip->n_blobs);
+	} else if (UNIV_UNLIKELY(is_clust)) {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+	} else {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
+		ut_ad(!page_zip->n_blobs);
+	}
+
+	if (entry_size) {
+		*entry_size = uncompressed_size;
+	}
+
+	return((page_dir_get_n_heap(page_zip->data) - 2)
+	       * uncompressed_size
+	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
+}
+
+/**********************************************************************//**
+Determine how big record can be inserted without recompressing the page.
+@return a positive number indicating the maximum size of a record
+whose insertion is guaranteed to succeed, or zero or negative */
+UNIV_INLINE
+lint
+page_zip_max_ins_size(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust)/*!< in: TRUE if clustered index */
+{
+	ulint	uncompressed_size;
+	ulint	trailer_len;
+
+	trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
+					       &uncompressed_size);
+
+	/* When a record is created, a pointer may be added to
+	the dense directory.
+	Likewise, space for the columns that will not be
+	compressed will be allocated from the page trailer.
+	Also the BLOB pointers will be allocated from there, but
+	we may as well count them in the length of the record. */
+
+	trailer_len += uncompressed_size;
+
+	return((lint) page_zip_get_size(page_zip)
+	       - trailer_len - page_zip->m_end
+	       - (REC_N_NEW_EXTRA_BYTES - 2));
+}
+
+/**********************************************************************//**
+Determine if enough space is available in the modification log.
+@return	TRUE if enough space is available */
+UNIV_INLINE
+ibool
+page_zip_available(
+/*===============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust,/*!< in: TRUE if clustered index */
+	ulint			length,	/*!< in: combined size of the record */
+	ulint			create)	/*!< in: nonzero=add the record to
+					the heap */
+{
+	ulint	uncompressed_size;
+	ulint	trailer_len;
+
+	ut_ad(length > REC_N_NEW_EXTRA_BYTES);
+
+	trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
+					       &uncompressed_size);
+
+	/* Subtract the fixed extra bytes and add the maximum
+	space needed for identifying the record (encoded heap_no). */
+	length -= REC_N_NEW_EXTRA_BYTES - 2;
+
+	if (UNIV_UNLIKELY(create)) {
+		/* When a record is created, a pointer may be added to
+		the dense directory.
+		Likewise, space for the columns that will not be
+		compressed will be allocated from the page trailer.
+		Also the BLOB pointers will be allocated from there, but
+		we may as well count them in the length of the record. */
+
+		trailer_len += uncompressed_size;
+	}
+
+	return(UNIV_LIKELY(length
+			   + trailer_len
+			   + page_zip->m_end
+			   < page_zip_get_size(page_zip)));
+}
+
+/**********************************************************************//**
+Initialize a compressed page descriptor. */
+UNIV_INLINE
+void
+page_zip_des_init(
+/*==============*/
+	page_zip_des_t*	page_zip)	/*!< in/out: compressed page
+					descriptor */
+{
+	memset(page_zip, 0, sizeof *page_zip);
+}
+
+/**********************************************************************//**
+Write a log record of writing to the uncompressed header portion of a page. */
+UNIV_INTERN
+void
+page_zip_write_header_log(
+/*======================*/
+	const byte*	data,/*!< in: data on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page.
+However, the data portion of the uncompressed page may differ from
+the compressed page when a record is being inserted in
+page_cur_insert_rec_zip(). */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+{
+	ulint	pos;
+
+	ut_ad(PAGE_ZIP_MATCH(str, page_zip));
+	ut_ad(page_zip_simple_validate(page_zip));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	pos = page_offset(str);
+
+	ut_ad(pos < PAGE_DATA);
+
+	memcpy(page_zip->data + pos, str, length);
+
+	/* The following would fail in page_cur_insert_rec_zip(). */
+	/* ut_ad(page_zip_validate(page_zip, str - pos)); */
+
+	if (UNIV_LIKELY_NULL(mtr)) {
+#ifndef UNIV_HOTBACKUP
+		page_zip_write_header_log(str, length, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	}
+}
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innobase/include/pars0grm.h b/storage/innodb_plugin/include/pars0grm.h
similarity index 83%
rename from storage/innobase/include/pars0grm.h
rename to storage/innodb_plugin/include/pars0grm.h
index 0062b8314ee..3de233eed3a 100644
--- a/storage/innobase/include/pars0grm.h
+++ b/storage/innodb_plugin/include/pars0grm.h
@@ -1,28 +1,30 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+As a special exception, when this file is copied by Bison into a
+Bison output file, you may use that output file without restriction.
+This special exception was added by the Free Software Foundation
+in version 1.24 of Bison.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
 /* A Bison parser, made by GNU Bison 1.875d.  */
 
-/* Skeleton parser for Yacc-like parsing with Bison,
-   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, when this file is copied by Bison into a
-   Bison output file, you may use that output file without restriction.
-   This special exception was added by the Free Software Foundation
-   in version 1.24 of Bison.  */
-
 /* Tokens.  */
 #ifndef YYTOKENTYPE
 # define YYTOKENTYPE
diff --git a/storage/innodb_plugin/include/pars0opt.h b/storage/innodb_plugin/include/pars0opt.h
new file mode 100644
index 00000000000..42d956068f8
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0opt.h
@@ -0,0 +1,75 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.h
+Simple SQL optimizer
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0opt_h
+#define pars0opt_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0sym.h"
+#include "dict0types.h"
+#include "row0sel.h"
+
+/*******************************************************************//**
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+UNIV_INTERN
+void
+opt_search_plan(
+/*============*/
+	sel_node_t*	sel_node);	/*!< in: parsed select node */
+/*******************************************************************//**
+Looks for occurrences of the columns of the table in the query subgraph and
+adds them to the list of columns if an occurrence of the same column does not
+already exist in the list. If the column is already in the list, puts a value
+indirection to point to the occurrence in the column list, except if the
+column occurrence we are looking at is in the column list, in which case
+nothing is done. */
+UNIV_INTERN
+void
+opt_find_all_cols(
+/*==============*/
+	ibool		copy_val,	/*!< in: if TRUE, new found columns are
+					added as columns to copy */
+	dict_index_t*	index,		/*!< in: index to use */
+	sym_node_list_t* col_list,	/*!< in: base node of a list where
+					to add new found columns */
+	plan_t*		plan,		/*!< in: plan or NULL */
+	que_node_t*	exp);		/*!< in: expression or condition */
+/********************************************************************//**
+Prints info of a query plan. */
+UNIV_INTERN
+void
+opt_print_query_plan(
+/*=================*/
+	sel_node_t*	sel_node);	/*!< in: select node */
+
+#ifndef UNIV_NONINL
+#include "pars0opt.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/pars0opt.ic b/storage/innodb_plugin/include/pars0opt.ic
new file mode 100644
index 00000000000..e0bb6bf1af2
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0opt.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.ic
+Simple SQL optimizer
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/pars0pars.h b/storage/innodb_plugin/include/pars0pars.h
new file mode 100644
index 00000000000..a7de7f2292e
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0pars.h
@@ -0,0 +1,742 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.h
+SQL parser
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0pars_h
+#define pars0pars_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+
+/** Type of the user functions. The first argument is always InnoDB-supplied
+and varies in type, while 'user_arg' is a user-supplied argument. The
+meaning of the return type also varies. See the individual use cases, e.g.
+the FETCH statement, for details on them. */
+typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
+
+/** If the following is set TRUE, the parser will emit debugging
+information */
+extern int	yydebug;
+
+#ifdef UNIV_SQL_DEBUG
+/** If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
+extern ibool	pars_print_lexed;
+#endif /* UNIV_SQL_DEBUG */
+
+/* Global variable used while parsing a single procedure or query : the code is
+NOT re-entrant */
+extern sym_tab_t*	pars_sym_tab_global;
+
+extern pars_res_word_t	pars_to_char_token;
+extern pars_res_word_t	pars_to_number_token;
+extern pars_res_word_t	pars_to_binary_token;
+extern pars_res_word_t	pars_binary_to_number_token;
+extern pars_res_word_t	pars_substr_token;
+extern pars_res_word_t	pars_replstr_token;
+extern pars_res_word_t	pars_concat_token;
+extern pars_res_word_t	pars_length_token;
+extern pars_res_word_t	pars_instr_token;
+extern pars_res_word_t	pars_sysdate_token;
+extern pars_res_word_t	pars_printf_token;
+extern pars_res_word_t	pars_assert_token;
+extern pars_res_word_t	pars_rnd_token;
+extern pars_res_word_t	pars_rnd_str_token;
+extern pars_res_word_t	pars_count_token;
+extern pars_res_word_t	pars_sum_token;
+extern pars_res_word_t	pars_distinct_token;
+extern pars_res_word_t	pars_binary_token;
+extern pars_res_word_t	pars_blob_token;
+extern pars_res_word_t	pars_int_token;
+extern pars_res_word_t	pars_char_token;
+extern pars_res_word_t	pars_float_token;
+extern pars_res_word_t	pars_update_token;
+extern pars_res_word_t	pars_asc_token;
+extern pars_res_word_t	pars_desc_token;
+extern pars_res_word_t	pars_open_token;
+extern pars_res_word_t	pars_close_token;
+extern pars_res_word_t	pars_share_token;
+extern pars_res_word_t	pars_unique_token;
+extern pars_res_word_t	pars_clustered_token;
+
+extern ulint		pars_star_denoter;
+
+/* Procedure parameter types */
+#define PARS_INPUT	0
+#define PARS_OUTPUT	1
+#define PARS_NOT_PARAM	2
+
+int
+yyparse(void);
+
+/*************************************************************//**
+Parses an SQL string returning the query graph.
+@return	own: the query graph */
+UNIV_INTERN
+que_t*
+pars_sql(
+/*=====*/
+	pars_info_t*	info,	/*!< in: extra information, or NULL */
+	const char*	str);	/*!< in: SQL string */
+/*************************************************************//**
+Retrieves characters to the lexical analyzer. */
+UNIV_INTERN
+void
+pars_get_lex_chars(
+/*===============*/
+	char*	buf,		/*!< in/out: buffer where to copy */
+	int*	result,		/*!< out: number of characters copied or EOF */
+	int	max_size);	/*!< in: maximum number of characters which fit
+				in the buffer */
+/*************************************************************//**
+Called by yyparse on error. */
+UNIV_INTERN
+void
+yyerror(
+/*====*/
+	const char*	s);	/*!< in: error message string */
+/*********************************************************************//**
+Parses a variable declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
+sym_node_t*
+pars_variable_declaration(
+/*======================*/
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
+				id of the variable */
+	pars_res_word_t* type);	/*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses a function expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
+func_node_t*
+pars_func(
+/*======*/
+	que_node_t*	res_word,/*!< in: function name reserved word */
+	que_node_t*	arg);	/*!< in: first argument in the argument list */
+/*********************************************************************//**
+Parses an operator expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
+func_node_t*
+pars_op(
+/*====*/
+	int		func,	/*!< in: operator token code */
+	que_node_t*	arg1,	/*!< in: first argument */
+	que_node_t*	arg2);	/*!< in: second argument or NULL for an unary
+				operator */
+/*********************************************************************//**
+Parses an ORDER BY clause. Order by a single column only is supported.
+@return	own: order-by node in a query tree */
+UNIV_INTERN
+order_node_t*
+pars_order_by(
+/*==========*/
+	sym_node_t*	column,	/*!< in: column name */
+	pars_res_word_t* asc);	/*!< in: &pars_asc_token or pars_desc_token */
+/*********************************************************************//**
+Parses a select list; creates a query graph node for the whole SELECT
+statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
+sel_node_t*
+pars_select_list(
+/*=============*/
+	que_node_t*	select_list,	/*!< in: select list */
+	sym_node_t*	into_list);	/*!< in: variables list or NULL */
+/*********************************************************************//**
+Parses a cursor declaration.
+@return	sym_node */
+UNIV_INTERN
+que_node_t*
+pars_cursor_declaration(
+/*====================*/
+	sym_node_t*	sym_node,	/*!< in: cursor id node in the symbol
+					table */
+	sel_node_t*	select_node);	/*!< in: select node */
+/*********************************************************************//**
+Parses a function declaration.
+@return	sym_node */
+UNIV_INTERN
+que_node_t*
+pars_function_declaration(
+/*======================*/
+	sym_node_t*	sym_node);	/*!< in: function id node in the symbol
+					table */
+/*********************************************************************//**
+Parses a select statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
+sel_node_t*
+pars_select_statement(
+/*==================*/
+	sel_node_t*	select_node,	/*!< in: select node already containing
+					the select list */
+	sym_node_t*	table_list,	/*!< in: table list */
+	que_node_t*	search_cond,	/*!< in: search condition or NULL */
+	pars_res_word_t* for_update,	/*!< in: NULL or &pars_update_token */
+	pars_res_word_t* consistent_read,/*!< in: NULL or
+						&pars_consistent_token */
+	order_node_t*	order_by);	/*!< in: NULL or an order-by node */
+/*********************************************************************//**
+Parses a column assignment in an update.
+@return	column assignment node */
+UNIV_INTERN
+col_assign_node_t*
+pars_column_assignment(
+/*===================*/
+	sym_node_t*	column,	/*!< in: column to assign */
+	que_node_t*	exp);	/*!< in: value to assign */
+/*********************************************************************//**
+Parses a delete or update statement start.
+@return	own: update node in a query tree */
+UNIV_INTERN
+upd_node_t*
+pars_update_statement_start(
+/*========================*/
+	ibool		is_delete,	/*!< in: TRUE if delete */
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL
+					if delete */
+/*********************************************************************//**
+Parses an update or delete statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
+upd_node_t*
+pars_update_statement(
+/*==================*/
+	upd_node_t*	node,		/*!< in: update node */
+	sym_node_t*	cursor_sym,	/*!< in: pointer to a cursor entry in
+					the symbol table or NULL */
+	que_node_t*	search_cond);	/*!< in: search condition or NULL */
+/*********************************************************************//**
+Parses an insert statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
+ins_node_t*
+pars_insert_statement(
+/*==================*/
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	que_node_t*	values_list,	/*!< in: value expression list or NULL */
+	sel_node_t*	select);	/*!< in: select condition or NULL */
+/*********************************************************************//**
+Parses a procedure parameter declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
+sym_node_t*
+pars_parameter_declaration(
+/*=======================*/
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
+				id of the parameter */
+	ulint		param_type,
+				/*!< in: PARS_INPUT or PARS_OUTPUT */
+	pars_res_word_t* type);	/*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses an elsif element.
+@return	elsif node */
+UNIV_INTERN
+elsif_node_t*
+pars_elsif_element(
+/*===============*/
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses an if-statement.
+@return	if-statement node */
+UNIV_INTERN
+if_node_t*
+pars_if_statement(
+/*==============*/
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list,	/*!< in: statement list */
+	que_node_t*	else_part);	/*!< in: else-part statement list */
+/*********************************************************************//**
+Parses a for-loop-statement.
+@return	for-statement node */
+UNIV_INTERN
+for_node_t*
+pars_for_statement(
+/*===============*/
+	sym_node_t*	loop_var,	/*!< in: loop variable */
+	que_node_t*	loop_start_limit,/*!< in: loop start expression */
+	que_node_t*	loop_end_limit,	/*!< in: loop end expression */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses a while-statement.
+@return	while-statement node */
+UNIV_INTERN
+while_node_t*
+pars_while_statement(
+/*=================*/
+	que_node_t*	cond,		/*!< in: while-condition */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses an exit statement.
+@return	exit statement node */
+UNIV_INTERN
+exit_node_t*
+pars_exit_statement(void);
+/*=====================*/
+/*********************************************************************//**
+Parses a return-statement.
+@return	return-statement node */
+UNIV_INTERN
+return_node_t*
+pars_return_statement(void);
+/*=======================*/
+/*********************************************************************//**
+Parses a procedure call.
+@return	function node */
+UNIV_INTERN
+func_node_t*
+pars_procedure_call(
+/*================*/
+	que_node_t*	res_word,/*!< in: procedure name reserved word */
+	que_node_t*	args);	/*!< in: argument list */
+/*********************************************************************//**
+Parses an assignment statement.
+@return	assignment statement node */
+UNIV_INTERN
+assign_node_t*
+pars_assignment_statement(
+/*======================*/
+	sym_node_t*	var,	/*!< in: variable to assign */
+	que_node_t*	val);	/*!< in: value to assign */
+/*********************************************************************//**
+Parses a fetch statement. into_list or user_func (but not both) must be
+non-NULL.
+@return	fetch statement node */
+UNIV_INTERN
+fetch_node_t*
+pars_fetch_statement(
+/*=================*/
+	sym_node_t*	cursor,		/*!< in: cursor node */
+	sym_node_t*	into_list,	/*!< in: variables to set, or NULL */
+	sym_node_t*	user_func);	/*!< in: user function name, or NULL */
+/*********************************************************************//**
+Parses an open or close cursor statement.
+@return	fetch statement node */
+UNIV_INTERN
+open_node_t*
+pars_open_statement(
+/*================*/
+	ulint		type,	/*!< in: ROW_SEL_OPEN_CURSOR
+				or ROW_SEL_CLOSE_CURSOR */
+	sym_node_t*	cursor);	/*!< in: cursor node */
+/*********************************************************************//**
+Parses a row_printf-statement.
+@return	row_printf-statement node */
+UNIV_INTERN
+row_printf_node_t*
+pars_row_printf_statement(
+/*======================*/
+	sel_node_t*	sel_node);	/*!< in: select node */
+/*********************************************************************//**
+Parses a commit statement.
+@return	own: commit node struct */
+UNIV_INTERN
+commit_node_t*
+pars_commit_statement(void);
+/*=======================*/
+/*********************************************************************//**
+Parses a rollback statement.
+@return	own: rollback node struct */
+UNIV_INTERN
+roll_node_t*
+pars_rollback_statement(void);
+/*=========================*/
+/*********************************************************************//**
+Parses a column definition at a table creation.
+@return	column sym table node */
+UNIV_INTERN
+sym_node_t*
+pars_column_def(
+/*============*/
+	sym_node_t*		sym_node,	/*!< in: column node in the
+						symbol table */
+	pars_res_word_t*	type,		/*!< in: data type */
+	sym_node_t*		len,		/*!< in: length of column, or
+						NULL */
+	void*			is_unsigned,	/*!< in: if not NULL, column
+						is of type UNSIGNED. */
+	void*			is_not_null);	/*!< in: if not NULL, column
+						is of type NOT NULL. */
+/*********************************************************************//**
+Parses a table creation operation.
+@return	table create subgraph */
+UNIV_INTERN
+tab_node_t*
+pars_create_table(
+/*==============*/
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
+					table */
+	sym_node_t*	column_defs,	/*!< in: list of column names */
+	void*		not_fit_in_memory);/*!< in: a non-NULL pointer means that
+					this is a table which in simulations
+					should be simulated as not fitting
+					in memory; thread is put to sleep
+					to simulate disk accesses; NOTE that
+					this flag is not stored to the data
+					dictionary on disk, and the database
+					will forget about non-NULL value if
+					it has to reload the table definition
+					from disk */
+/*********************************************************************//**
+Parses an index creation operation.
+@return	index create subgraph */
+UNIV_INTERN
+ind_node_t*
+pars_create_index(
+/*==============*/
+	pars_res_word_t* unique_def,	/*!< in: not NULL if a unique index */
+	pars_res_word_t* clustered_def,	/*!< in: not NULL if a clustered index */
+	sym_node_t*	index_sym,	/*!< in: index name node in the symbol
+					table */
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
+					table */
+	sym_node_t*	column_list);	/*!< in: list of column names */
+/*********************************************************************//**
+Parses a procedure definition.
+@return	query fork node */
+UNIV_INTERN
+que_fork_t*
+pars_procedure_definition(
+/*======================*/
+	sym_node_t*	sym_node,	/*!< in: procedure id node in the symbol
+					table */
+	sym_node_t*	param_list,	/*!< in: parameter declaration list */
+	que_node_t*	stat_list);	/*!< in: statement list */
+
+/*************************************************************//**
+Parses a stored procedure call, when this is not within another stored
+procedure, that is, the client issues a procedure call directly.
+In MySQL/InnoDB, stored InnoDB procedures are invoked via the
+parsed procedure tree, not via InnoDB SQL, so this function is not used.
+@return	query graph */
+UNIV_INTERN
+que_fork_t*
+pars_stored_procedure_call(
+/*=======================*/
+	sym_node_t*	sym_node);	/*!< in: stored procedure name */
+/******************************************************************//**
+Completes a query graph by adding query thread and fork nodes
+above it and prepares the graph for running. The fork created is of
+type QUE_FORK_MYSQL_INTERFACE.
+@return	query thread node to run */
+UNIV_INTERN
+que_thr_t*
+pars_complete_graph_for_exec(
+/*=========================*/
+	que_node_t*	node,	/*!< in: root node for an incomplete
+				query graph */
+	trx_t*		trx,	/*!< in: transaction handle */
+	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+
+/****************************************************************//**
+Create parser info struct.
+@return	own: info struct */
+UNIV_INTERN
+pars_info_t*
+pars_info_create(void);
+/*==================*/
+
+/****************************************************************//**
+Free info struct and everything it contains. */
+UNIV_INTERN
+void
+pars_info_free(
+/*===========*/
+	pars_info_t*	info);	/*!< in, own: info struct */
+
+/****************************************************************//**
+Add bound literal. */
+UNIV_INTERN
+void
+pars_info_add_literal(
+/*==================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const void*	address,	/*!< in: address */
+	ulint		length,		/*!< in: length of data */
+	ulint		type,		/*!< in: type, e.g. DATA_FIXBINARY */
+	ulint		prtype);	/*!< in: precise type, e.g.
+					DATA_UNSIGNED */
+
+/****************************************************************//**
+Equivalent to pars_info_add_literal(info, name, str, strlen(str),
+DATA_VARCHAR, DATA_ENGLISH). */
+UNIV_INTERN
+void
+pars_info_add_str_literal(
+/*======================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	str);		/*!< in: string */
+
+/****************************************************************//**
+Equivalent to:
+
+char buf[4];
+mach_write_to_4(buf, val);
+pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_add_int4_literal(
+/*=======================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	lint		val);		/*!< in: value */
+
+/****************************************************************//**
+Equivalent to:
+
+char buf[8];
+mach_write_to_8(buf, val);
+pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_add_dulint_literal(
+/*=========================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	dulint		val);		/*!< in: value */
+/****************************************************************//**
+Add user function. */
+UNIV_INTERN
+void
+pars_info_add_function(
+/*===================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name,	/*!< in: function name */
+	pars_user_func_cb_t	func,	/*!< in: function address */
+	void*			arg);	/*!< in: user-supplied argument */
+
+/****************************************************************//**
+Add bound id. */
+UNIV_INTERN
+void
+pars_info_add_id(
+/*=============*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	id);		/*!< in: id */
+
+/****************************************************************//**
+Get user function with the given name.
+@return	user func, or NULL if not found */
+UNIV_INTERN
+pars_user_func_t*
+pars_info_get_user_func(
+/*====================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name);	/*!< in: function name to find*/
+
+/****************************************************************//**
+Get bound literal with the given name.
+@return	bound literal, or NULL if not found */
+UNIV_INTERN
+pars_bound_lit_t*
+pars_info_get_bound_lit(
+/*====================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name);	/*!< in: bound literal name to find */
+
+/****************************************************************//**
+Get bound id with the given name.
+@return	bound id, or NULL if not found */
+UNIV_INTERN
+pars_bound_id_t*
+pars_info_get_bound_id(
+/*===================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name);	/*!< in: bound id name to find */
+
+
+/** Extra information supplied for pars_sql(). */
+struct pars_info_struct {
+	mem_heap_t*	heap;		/*!< our own memory heap */
+
+	ib_vector_t*	funcs;		/*!< user functions, or NUll
+					(pars_user_func_t*) */
+	ib_vector_t*	bound_lits;	/*!< bound literals, or NULL
+					(pars_bound_lit_t*) */
+	ib_vector_t*	bound_ids;	/*!< bound ids, or NULL
+					(pars_bound_id_t*) */
+
+	ibool		graph_owns_us;	/*!< if TRUE (which is the default),
+					que_graph_free() will free us */
+};
+
+/** User-supplied function and argument. */
+struct pars_user_func_struct {
+	const char*		name;	/*!< function name */
+	pars_user_func_cb_t	func;	/*!< function address */
+	void*			arg;	/*!< user-supplied argument */
+};
+
+/** Bound literal. */
+struct pars_bound_lit_struct {
+	const char*	name;		/*!< name */
+	const void*	address;	/*!< address */
+	ulint		length;		/*!< length of data */
+	ulint		type;		/*!< type, e.g. DATA_FIXBINARY */
+	ulint		prtype;		/*!< precise type, e.g. DATA_UNSIGNED */
+};
+
+/** Bound identifier. */
+struct pars_bound_id_struct {
+	const char*	name;		/*!< name */
+	const char*	id;		/*!< identifier */
+};
+
+/** Struct used to denote a reserved word in a parsing tree */
+struct pars_res_word_struct{
+	int	code;	/*!< the token code for the reserved word from
+			pars0grm.h */
+};
+
+/** A predefined function or operator node in a parsing tree; this construct
+is also used for some non-functions like the assignment ':=' */
+struct func_node_struct{
+	que_common_t	common;	/*!< type: QUE_NODE_FUNC */
+	int		func;	/*!< token code of the function name */
+	ulint		class;	/*!< class of the function */
+	que_node_t*	args;	/*!< argument(s) of the function */
+	UT_LIST_NODE_T(func_node_t) cond_list;
+				/*!< list of comparison conditions; defined
+				only for comparison operator nodes except,
+				presently, for OPT_SCROLL_TYPE ones */
+	UT_LIST_NODE_T(func_node_t) func_node_list;
+				/*!< list of function nodes in a parsed
+				query graph */
+};
+
+/** An order-by node in a select */
+struct order_node_struct{
+	que_common_t	common;	/*!< type: QUE_NODE_ORDER */
+	sym_node_t*	column;	/*!< order-by column */
+	ibool		asc;	/*!< TRUE if ascending, FALSE if descending */
+};
+
+/** Procedure definition node */
+struct proc_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_PROC */
+	sym_node_t*	proc_id;	/*!< procedure name symbol in the symbol
+					table of this same procedure */
+	sym_node_t*	param_list;	/*!< input and output parameters */
+	que_node_t*	stat_list;	/*!< statement list */
+	sym_tab_t*	sym_tab;	/*!< symbol table of this procedure */
+};
+
+/** elsif-element node */
+struct elsif_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_ELSIF */
+	que_node_t*	cond;		/*!< if condition */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** if-statement node */
+struct if_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_IF */
+	que_node_t*	cond;		/*!< if condition */
+	que_node_t*	stat_list;	/*!< statement list */
+	que_node_t*	else_part;	/*!< else-part statement list */
+	elsif_node_t*	elsif_list;	/*!< elsif element list */
+};
+
+/** while-statement node */
+struct while_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_WHILE */
+	que_node_t*	cond;		/*!< while condition */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** for-loop-statement node */
+struct for_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_FOR */
+	sym_node_t*	loop_var;	/*!< loop variable: this is the
+					dereferenced symbol from the
+					variable declarations, not the
+					symbol occurrence in the for loop
+					definition */
+	que_node_t*	loop_start_limit;/*!< initial value of loop variable */
+	que_node_t*	loop_end_limit;	/*!< end value of loop variable */
+	lint		loop_end_value;	/*!< evaluated value for the end value:
+					it is calculated only when the loop
+					is entered, and will not change within
+					the loop */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** exit statement node */
+struct exit_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_EXIT */
+};
+
+/** return-statement node */
+struct return_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_RETURN */
+};
+
+/** Assignment statement node */
+struct assign_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_ASSIGNMENT */
+	sym_node_t*	var;		/*!< variable to set */
+	que_node_t*	val;		/*!< value to assign */
+};
+
+/** Column assignment node */
+struct col_assign_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_COL_ASSIGN */
+	sym_node_t*	col;		/*!< column to set */
+	que_node_t*	val;		/*!< value to assign */
+};
+
+/** Classes of functions */
+/* @{ */
+#define PARS_FUNC_ARITH		1	/*!< +, -, *, / */
+#define	PARS_FUNC_LOGICAL	2	/*!< AND, OR, NOT */
+#define PARS_FUNC_CMP		3	/*!< comparison operators */
+#define	PARS_FUNC_PREDEFINED	4	/*!< TO_NUMBER, SUBSTR, ... */
+#define	PARS_FUNC_AGGREGATE	5	/*!< COUNT, DISTINCT, SUM */
+#define	PARS_FUNC_OTHER		6	/*!< these are not real functions,
+					e.g., := */
+/* @} */
+
+#ifndef UNIV_NONINL
+#include "pars0pars.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/pars0pars.ic b/storage/innodb_plugin/include/pars0pars.ic
new file mode 100644
index 00000000000..ae6c13cd671
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0pars.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.ic
+SQL parser
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/pars0sym.h b/storage/innodb_plugin/include/pars0sym.h
new file mode 100644
index 00000000000..6d1a4b82414
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0sym.h
@@ -0,0 +1,244 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.h
+SQL parser symbol table
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0sym_h
+#define pars0sym_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "dict0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+
+/******************************************************************//**
+Creates a symbol table for a single stored procedure or query.
+@return	own: symbol table */
+UNIV_INTERN
+sym_tab_t*
+sym_tab_create(
+/*===========*/
+	mem_heap_t*	heap);	/*!< in: memory heap where to create */
+/******************************************************************//**
+Frees the memory allocated dynamically AFTER parsing phase for variables
+etc. in the symbol table. Does not free the mem heap where the table was
+originally created. Frees also SQL explicit cursor definitions. */
+UNIV_INTERN
+void
+sym_tab_free_private(
+/*=================*/
+	sym_tab_t*	sym_tab);	/*!< in, own: symbol table */
+/******************************************************************//**
+Adds an integer literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_int_lit(
+/*================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	ulint		val);		/*!< in: integer value */
+/******************************************************************//**
+Adds an string literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_str_lit(
+/*================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	byte*		str,		/*!< in: string with no quotes around
+					it */
+	ulint		len);		/*!< in: string length */
+/******************************************************************//**
+Add a bound literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_bound_lit(
+/*==================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name,		/*!< in: name of bound literal */
+	ulint*		lit_type);	/*!< out: type of literal (PARS_*_LIT) */
+/******************************************************************//**
+Adds an SQL null literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_null_lit(
+/*=================*/
+	sym_tab_t*	sym_tab);	/*!< in: symbol table */
+/******************************************************************//**
+Adds an identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_id(
+/*===========*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	byte*		name,		/*!< in: identifier name */
+	ulint		len);		/*!< in: identifier length */
+
+/******************************************************************//**
+Add a bound identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_bound_id(
+/*===========*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name);		/*!< in: name of bound id */
+
+/** Index of sym_node_struct::field_nos corresponding to the clustered index */
+#define	SYM_CLUST_FIELD_NO	0
+/** Index of sym_node_struct::field_nos corresponding to a secondary index */
+#define	SYM_SEC_FIELD_NO	1
+
+/** Types of a symbol table node */
+enum sym_tab_entry {
+	SYM_VAR = 91,		/*!< declared parameter or local
+				variable of a procedure */
+	SYM_IMPLICIT_VAR,	/*!< storage for a intermediate result
+				of a calculation */
+	SYM_LIT,		/*!< literal */
+	SYM_TABLE,		/*!< database table name */
+	SYM_COLUMN,		/*!< database table name */
+	SYM_CURSOR,		/*!< named cursor */
+	SYM_PROCEDURE_NAME,	/*!< stored procedure name */
+	SYM_INDEX,		/*!< database index name */
+	SYM_FUNCTION		/*!< user function name */
+};
+
+/** Symbol table node */
+struct sym_node_struct{
+	que_common_t			common;		/*!< node type:
+							QUE_NODE_SYMBOL */
+	/* NOTE: if the data field in 'common.val' is not NULL and the symbol
+	table node is not for a temporary column, the memory for the value has
+	been allocated from dynamic memory and it should be freed when the
+	symbol table is discarded */
+
+	/* 'alias' and 'indirection' are almost the same, but not quite.
+	'alias' always points to the primary instance of the variable, while
+	'indirection' does the same only if we should use the primary
+	instance's values for the node's data. This is usually the case, but
+	when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
+	t WHERE id = x;"), we copy the values from the primary instance to
+	the cursor's instance so that they are fixed for the duration of the
+	cursor, and set 'indirection' to NULL. If we did not, the value of
+	'x' could change between fetches and things would break horribly.
+
+	TODO: It would be cleaner to make 'indirection' a boolean field and
+	always use 'alias' to refer to the primary node. */
+
+	sym_node_t*			indirection;	/*!< pointer to
+							another symbol table
+							node which contains
+							the value for this
+							node, NULL otherwise */
+	sym_node_t*			alias;		/*!< pointer to
+							another symbol table
+							node for which this
+							node is an alias,
+							NULL otherwise */
+	UT_LIST_NODE_T(sym_node_t)	col_var_list;	/*!< list of table
+							columns or a list of
+							input variables for an
+							explicit cursor */
+	ibool				copy_val;	/*!< TRUE if a column
+							and its value should
+							be copied to dynamic
+							memory when fetched */
+	ulint				field_nos[2];	/*!< if a column, in
+							the position
+							SYM_CLUST_FIELD_NO is
+							the field number in the
+							clustered index; in
+							the position
+							SYM_SEC_FIELD_NO
+							the field number in the
+							non-clustered index to
+							use first; if not found
+							from the index, then
+							ULINT_UNDEFINED */
+	ibool				resolved;	/*!< TRUE if the
+							meaning of a variable
+							or a column has been
+							resolved; for literals
+							this is always TRUE */
+	enum sym_tab_entry		token_type;	/*!< type of the
+							parsed token */
+	const char*			name;		/*!< name of an id */
+	ulint				name_len;	/*!< id name length */
+	dict_table_t*			table;		/*!< table definition
+							if a table id or a
+							column id */
+	ulint				col_no;		/*!< column number if a
+							column */
+	sel_buf_t*			prefetch_buf;	/*!< NULL, or a buffer
+							for cached column
+							values for prefetched
+							rows */
+	sel_node_t*			cursor_def;	/*!< cursor definition
+							select node if a
+							named cursor */
+	ulint				param_type;	/*!< PARS_INPUT,
+							PARS_OUTPUT, or
+							PARS_NOT_PARAM if not a
+							procedure parameter */
+	sym_tab_t*			sym_table;	/*!< back pointer to
+							the symbol table */
+	UT_LIST_NODE_T(sym_node_t)	sym_list;	/*!< list of symbol
+							nodes */
+};
+
+/** Symbol table */
+struct sym_tab_struct{
+	que_t*			query_graph;
+					/*!< query graph generated by the
+					parser */
+	const char*		sql_string;
+					/*!< SQL string to parse */
+	size_t			string_len;
+					/*!< SQL string length */
+	int			next_char_pos;
+					/*!< position of the next character in
+					sql_string to give to the lexical
+					analyzer */
+	pars_info_t*		info;	/*!< extra information, or NULL */
+	sym_node_list_t		sym_list;
+					/*!< list of symbol nodes in the symbol
+					table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+				func_node_list;
+					/*!< list of function nodes in the
+					parsed query graph */
+	mem_heap_t*		heap;	/*!< memory heap from which we can
+					allocate space */
+};
+
+#ifndef UNIV_NONINL
+#include "pars0sym.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/pars0sym.ic b/storage/innodb_plugin/include/pars0sym.ic
new file mode 100644
index 00000000000..9eb09db3a47
--- /dev/null
+++ b/storage/innodb_plugin/include/pars0sym.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.ic
+SQL parser symbol table
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/pars0types.h b/storage/innodb_plugin/include/pars0types.h
similarity index 53%
rename from storage/innobase/include/pars0types.h
rename to storage/innodb_plugin/include/pars0types.h
index bf7df89a883..e0a8a86bf07 100644
--- a/storage/innobase/include/pars0types.h
+++ b/storage/innodb_plugin/include/pars0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser global types
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0types.h
+SQL parser global types
 
 Created 1/11/1998 Heikki Tuuri
 *******************************************************/
diff --git a/storage/innobase/include/que0que.h b/storage/innodb_plugin/include/que0que.h
similarity index 64%
rename from storage/innobase/include/que0que.h
rename to storage/innodb_plugin/include/que0que.h
index 8fbf5330c89..420f34550e2 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innodb_plugin/include/que0que.h
@@ -1,7 +1,24 @@
-/******************************************************
-Query graph
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.h
+Query graph
 
 Created 5/27/1996 Heikki Tuuri
 *******************************************************/
@@ -23,349 +40,335 @@ Created 5/27/1996 Heikki Tuuri
 of SQL execution in the UNIV_SQL_DEBUG version */
 extern ibool	que_trace_on;
 
-/***************************************************************************
+/***********************************************************************//**
 Adds a query graph to the session's list of graphs. */
-
+UNIV_INTERN
 void
 que_graph_publish(
 /*==============*/
-	que_t*	graph,	/* in: graph */
-	sess_t*	sess);	/* in: session */
-/***************************************************************************
-Creates a query graph fork node. */
-
+	que_t*	graph,	/*!< in: graph */
+	sess_t*	sess);	/*!< in: session */
+/***********************************************************************//**
+Creates a query graph fork node.
+@return	own: fork node */
+UNIV_INTERN
 que_fork_t*
 que_fork_create(
 /*============*/
-					/* out, own: fork node */
-	que_t*		graph,		/* in: graph, if NULL then this
+	que_t*		graph,		/*!< in: graph, if NULL then this
 					fork node is assumed to be the
 					graph root */
-	que_node_t*	parent,		/* in: parent node */
-	ulint		fork_type,	/* in: fork type */
-	mem_heap_t*	heap);		/* in: memory heap where created */
-/***************************************************************************
+	que_node_t*	parent,		/*!< in: parent node */
+	ulint		fork_type,	/*!< in: fork type */
+	mem_heap_t*	heap);		/*!< in: memory heap where created */
+/***********************************************************************//**
 Gets the first thr in a fork. */
 UNIV_INLINE
 que_thr_t*
 que_fork_get_first_thr(
 /*===================*/
-	que_fork_t*	fork);	/* in: query fork */
-/***************************************************************************
+	que_fork_t*	fork);	/*!< in: query fork */
+/***********************************************************************//**
 Gets the child node of the first thr in a fork. */
 UNIV_INLINE
 que_node_t*
 que_fork_get_child(
 /*===============*/
-	que_fork_t*	fork);	/* in: query fork */
-/***************************************************************************
+	que_fork_t*	fork);	/*!< in: query fork */
+/***********************************************************************//**
 Sets the parent of a graph node. */
 UNIV_INLINE
 void
 que_node_set_parent(
 /*================*/
-	que_node_t*	node,	/* in: graph node */
-	que_node_t*	parent);/* in: parent */
-/***************************************************************************
-Creates a query graph thread node. */
-
+	que_node_t*	node,	/*!< in: graph node */
+	que_node_t*	parent);/*!< in: parent */
+/***********************************************************************//**
+Creates a query graph thread node.
+@return	own: query thread node */
+UNIV_INTERN
 que_thr_t*
 que_thr_create(
 /*===========*/
-				/* out, own: query thread node */
-	que_fork_t*	parent,	/* in: parent node, i.e., a fork node */
-	mem_heap_t*	heap);	/* in: memory heap where created */
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
-			/* out: TRUE if freed */
-	que_t*	graph);	/* in: query graph */
-/**************************************************************************
+	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/**********************************************************************//**
 Frees a query graph, but not the heap where it was created. Does not free
 explicit cursor declarations, they are freed in que_graph_free. */
-
+UNIV_INTERN
 void
 que_graph_free_recursive(
 /*=====================*/
-	que_node_t*	node);	/* in: query graph node */
-/**************************************************************************
+	que_node_t*	node);	/*!< in: query graph node */
+/**********************************************************************//**
 Frees a query graph. */
-
+UNIV_INTERN
 void
 que_graph_free(
 /*===========*/
-	que_t*	graph);	/* in: query graph; we assume that the memory
+	que_t*	graph);	/*!< in: query graph; we assume that the memory
 			heap where this graph was created is private
 			to this graph: if not, then use
 			que_graph_free_recursive and free the heap
 			afterwards! */
-/**************************************************************************
+/**********************************************************************//**
 Stops a query thread if graph or trx is in a state requiring it. The
 conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
+to be reserved.
+@return	TRUE if stopped */
+UNIV_INTERN
 ibool
 que_thr_stop(
 /*=========*/
-				/* out: TRUE if stopped */
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction. */
-
+UNIV_INTERN
 void
 que_thr_move_to_run_state_for_mysql(
 /*================================*/
-	que_thr_t*	thr,	/* in: an query thread */
-	trx_t*		trx);	/* in: transaction */
-/**************************************************************************
+	que_thr_t*	thr,	/*!< in: an query thread */
+	trx_t*		trx);	/*!< in: transaction */
+/**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL
 select, when there is no error or lock wait. */
-
+UNIV_INTERN
 void
 que_thr_stop_for_mysql_no_error(
 /*============================*/
-	que_thr_t*	thr,	/* in: query thread */
-	trx_t*		trx);	/* in: transaction */
-/**************************************************************************
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_t*		trx);	/*!< in: transaction */
+/**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
 query thread is stopped and made inactive, except in the case where
 it was put to the lock wait state in lock0lock.c, but the lock has already
 been granted or the transaction chosen as a victim in deadlock resolution. */
-
+UNIV_INTERN
 void
 que_thr_stop_for_mysql(
 /*===================*/
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
 Run a query thread. Handles lock waits. */
-
+UNIV_INTERN
 void
 que_run_threads(
 /*============*/
-	que_thr_t*	thr);	/* in: query thread */
-/**************************************************************************
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
 After signal handling is finished, returns control to a query graph error
 handling routine. (Currently, just returns the control to the root of the
 graph so that the graph can communicate an error message to the client.) */
-
+UNIV_INTERN
 void
 que_fork_error_handle(
 /*==================*/
-	trx_t*	trx,	/* in: trx */
-	que_t*	fork);	/* in: query graph which was run before signal
+	trx_t*	trx,	/*!< in: trx */
+	que_t*	fork);	/*!< in: query graph which was run before signal
 			handling started, NULL not allowed */
-/**************************************************************************
+/**********************************************************************//**
 Moves a suspended query thread to the QUE_THR_RUNNING state and releases
 a single worker thread to execute it. This function should be used to end
 the wait state of a query thread waiting for a lock or a stored procedure
 completion. */
-
+UNIV_INTERN
 void
 que_thr_end_wait(
 /*=============*/
-	que_thr_t*	thr,		/* in: query thread in the
+	que_thr_t*	thr,		/*!< in: query thread in the
 					QUE_THR_LOCK_WAIT,
 					or QUE_THR_PROCEDURE_WAIT, or
 					QUE_THR_SIG_REPLY_WAIT state */
-	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+	que_thr_t**	next_thr);	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
 					a new query thread */
-/**************************************************************************
+/**********************************************************************//**
 Same as que_thr_end_wait, but no parameter next_thr available. */
-
+UNIV_INTERN
 void
 que_thr_end_wait_no_next_thr(
 /*=========================*/
-	que_thr_t*	thr);		/* in: query thread in the
+	que_thr_t*	thr);		/*!< in: query thread in the
 					QUE_THR_LOCK_WAIT,
 					or QUE_THR_PROCEDURE_WAIT, or
 					QUE_THR_SIG_REPLY_WAIT state */
-/**************************************************************************
+/**********************************************************************//**
 Starts execution of a command in a query fork. Picks a query thread which
 is not in the QUE_THR_RUNNING state and moves it to that state. If none
 can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
+is returned.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
 que_thr_t*
 que_fork_start_command(
 /*===================*/
-				/* out: a query thread of the graph moved to
-				QUE_THR_RUNNING state, or NULL; the query
-				thread should be executed by que_run_threads
-				by the caller */
-	que_fork_t*	fork);	/* in: a query fork */
-/***************************************************************************
+	que_fork_t*	fork);	/*!< in: a query fork */
+/***********************************************************************//**
 Gets the trx of a query thread. */
 UNIV_INLINE
 trx_t*
 thr_get_trx(
 /*========*/
-	que_thr_t*	thr);	/* in: query thread */
-/***************************************************************************
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************************//**
 Gets the type of a graph node. */
 UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node);	/* in: graph node */
-/***************************************************************************
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
 Gets pointer to the value data type field of a graph node. */
 UNIV_INLINE
 dtype_t*
 que_node_get_data_type(
 /*===================*/
-	que_node_t*	node);	/* in: graph node */
-/***************************************************************************
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
 Gets pointer to the value dfield of a graph node. */
 UNIV_INLINE
 dfield_t*
 que_node_get_val(
 /*=============*/
-	que_node_t*	node);	/* in: graph node */
-/***************************************************************************
-Gets the value buffer size of a graph node. */
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return	val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
 /*======================*/
-				/* out: val buffer size, not defined if
-				val.data == NULL in node */
-	que_node_t*	node);	/* in: graph node */
-/***************************************************************************
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
 Sets the value buffer size of a graph node. */
 UNIV_INLINE
 void
 que_node_set_val_buf_size(
 /*======================*/
-	que_node_t*	node,	/* in: graph node */
-	ulint		size);	/* in: size */
-/*************************************************************************
+	que_node_t*	node,	/*!< in: graph node */
+	ulint		size);	/*!< in: size */
+/*********************************************************************//**
 Gets the next list node in a list of query graph nodes. */
 UNIV_INLINE
 que_node_t*
 que_node_get_next(
 /*==============*/
-	que_node_t*	node);	/* in: node in a list */
-/*************************************************************************
-Gets the parent node of a query graph node. */
+	que_node_t*	node);	/*!< in: node in a list */
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return	parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
 /*================*/
-				/* out: parent node or NULL */
-	que_node_t*	node);	/* in: node */
-/********************************************************************
+	que_node_t*	node);	/*!< in: node */
+/****************************************************************//**
 Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
+given node, or NULL if the node is not within a loop.
+@return	containing loop node, or NULL. */
+UNIV_INTERN
 que_node_t*
 que_node_get_containing_loop_node(
 /*==============================*/
-				/* out: containing loop node, or NULL. */
-	que_node_t*	node);	/* in: node */
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
+	que_node_t*	node);	/*!< in: node */
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return	one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
 /*===================*/
-					/* out: one-way list of nodes */
-	que_node_t*	node_list,	/* in: node list, or NULL */
-	que_node_t*	node);		/* in: node */
-/*************************************************************************
-Gets a query graph node list length. */
+	que_node_t*	node_list,	/*!< in: node list, or NULL */
+	que_node_t*	node);		/*!< in: node */
+/*********************************************************************//**
+Gets a query graph node list length.
+@return	length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
 /*==================*/
-					/* out: length, for NULL list 0 */
-	que_node_t*	node_list);	/* in: node list, or NULL */
-/**************************************************************************
+	que_node_t*	node_list);	/*!< in: node list, or NULL */
+/**********************************************************************//**
 Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the kernel mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
 UNIV_INLINE
 ibool
 que_thr_peek_stop(
 /*==============*/
-				/* out: TRUE if should be stopped; NOTE that
-				if the peek is made without reserving the
-				kernel mutex, then another peek with the
-				mutex reserved is necessary before deciding
-				the actual stopping */
-	que_thr_t*	thr);	/* in: query thread */
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return	TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
 /*================*/
-					/* out: TRUE if a select */
-	que_t*		graph);		/* in: graph */
-/**************************************************************************
+	que_t*		graph);		/*!< in: graph */
+/**********************************************************************//**
 Prints info of an SQL query graph node. */
-
+UNIV_INTERN
 void
 que_node_print_info(
 /*================*/
-	que_node_t*	node);	/* in: query graph node */
-/*************************************************************************
-Evaluate the given SQL */
-
+	que_node_t*	node);	/*!< in: query graph node */
+/*********************************************************************//**
+Evaluate the given SQL
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 que_eval_sql(
 /*=========*/
-				/* out: error code or DB_SUCCESS */
-	pars_info_t*	info,	/* in: info struct, or NULL */
-	const char*	sql,	/* in: SQL string */
+	pars_info_t*	info,	/*!< in: info struct, or NULL */
+	const char*	sql,	/*!< in: SQL string */
 	ibool		reserve_dict_mutex,
-				/* in: if TRUE, acquire/release
+				/*!< in: if TRUE, acquire/release
 				dict_sys->mutex around call to pars_sql. */
-	trx_t*		trx);	/* in: trx */
+	trx_t*		trx);	/*!< in: trx */
 
 /* Query graph query thread node: the fields are protected by the kernel
 mutex with the exceptions named below */
 
 struct que_thr_struct{
-	que_common_t	common;		/* type: QUE_NODE_THR */
-	ulint		magic_n;	/* magic number to catch memory
+	que_common_t	common;		/*!< type: QUE_NODE_THR */
+	ulint		magic_n;	/*!< magic number to catch memory
 					corruption */
-	que_node_t*	child;		/* graph child node */
-	que_t*		graph;		/* graph where this node belongs */
-	ibool		is_active;	/* TRUE if the thread has been set
+	que_node_t*	child;		/*!< graph child node */
+	que_t*		graph;		/*!< graph where this node belongs */
+	ibool		is_active;	/*!< TRUE if the thread has been set
 					to the run state in
 					que_thr_move_to_run_state, but not
 					deactivated in
 					que_thr_dec_reference_count */
-	ulint		state;		/* state of the query thread */
+	ulint		state;		/*!< state of the query thread */
 	UT_LIST_NODE_T(que_thr_t)
-			thrs;		/* list of thread nodes of the fork
+			thrs;		/*!< list of thread nodes of the fork
 					node */
 	UT_LIST_NODE_T(que_thr_t)
-			trx_thrs;	/* lists of threads in wait list of
+			trx_thrs;	/*!< lists of threads in wait list of
 					the trx */
 	UT_LIST_NODE_T(que_thr_t)
-			queue;		/* list of runnable thread nodes in
+			queue;		/*!< list of runnable thread nodes in
 					the server task queue */
 	/*------------------------------*/
 	/* The following fields are private to the OS thread executing the
 	query thread, and are not protected by the kernel mutex: */
 
-	que_node_t*	run_node;	/* pointer to the node where the
+	que_node_t*	run_node;	/*!< pointer to the node where the
 					subgraph down from this node is
 					currently executed */
-	que_node_t*	prev_node;	/* pointer to the node from which
+	que_node_t*	prev_node;	/*!< pointer to the node from which
 					the control came */
-	ulint		resource;	/* resource usage of the query thread
+	ulint		resource;	/*!< resource usage of the query thread
 					thus far */
-	ulint		lock_state;	/* lock state of thread (table or
+	ulint		lock_state;	/*!< lock state of thread (table or
 					row) */
 };
 
@@ -374,49 +377,49 @@ struct que_thr_struct{
 
 /* Query graph fork node: its fields are protected by the kernel mutex */
 struct que_fork_struct{
-	que_common_t	common;		/* type: QUE_NODE_FORK */
-	que_t*		graph;		/* query graph of this node */
-	ulint		fork_type;	/* fork type */
-	ulint		n_active_thrs;	/* if this is the root of a graph, the
+	que_common_t	common;		/*!< type: QUE_NODE_FORK */
+	que_t*		graph;		/*!< query graph of this node */
+	ulint		fork_type;	/*!< fork type */
+	ulint		n_active_thrs;	/*!< if this is the root of a graph, the
 					number query threads that have been
 					started in que_thr_move_to_run_state
 					but for which que_thr_dec_refer_count
 					has not yet been called */
-	trx_t*		trx;		/* transaction: this is set only in
+	trx_t*		trx;		/*!< transaction: this is set only in
 					the root node */
-	ulint		state;		/* state of the fork node */
-	que_thr_t*	caller;		/* pointer to a possible calling query
+	ulint		state;		/*!< state of the fork node */
+	que_thr_t*	caller;		/*!< pointer to a possible calling query
 					thread */
 	UT_LIST_BASE_NODE_T(que_thr_t)
-			thrs;		/* list of query threads */
+			thrs;		/*!< list of query threads */
 	/*------------------------------*/
 	/* The fields in this section are defined only in the root node */
-	sym_tab_t*	sym_tab;	/* symbol table of the query,
+	sym_tab_t*	sym_tab;	/*!< symbol table of the query,
 					generated by the parser, or NULL
 					if the graph was created 'by hand' */
-	pars_info_t*	info;		/* in: info struct, or NULL */
+	pars_info_t*	info;		/*!< info struct, or NULL */
 	/* The following cur_... fields are relevant only in a select graph */
 
-	ulint		cur_end;	/* QUE_CUR_NOT_DEFINED, QUE_CUR_START,
+	ulint		cur_end;	/*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
 					QUE_CUR_END */
-	ulint		cur_pos;	/* if there are n rows in the result
+	ulint		cur_pos;	/*!< if there are n rows in the result
 					set, values 0 and n + 1 mean before
 					first row, or after last row, depending
 					on cur_end; values 1...n mean a row
 					index */
-	ibool		cur_on_row;	/* TRUE if cursor is on a row, i.e.,
+	ibool		cur_on_row;	/*!< TRUE if cursor is on a row, i.e.,
 					it is not before the first row or
 					after the last row */
-	dulint		n_inserts;	/* number of rows inserted */
-	dulint		n_updates;	/* number of rows updated */
-	dulint		n_deletes;	/* number of rows deleted */
-	sel_node_t*	last_sel_node;	/* last executed select node, or NULL
+	dulint		n_inserts;	/*!< number of rows inserted */
+	dulint		n_updates;	/*!< number of rows updated */
+	dulint		n_deletes;	/*!< number of rows deleted */
+	sel_node_t*	last_sel_node;	/*!< last executed select node, or NULL
 					if none */
 	UT_LIST_NODE_T(que_fork_t)
-			graphs;		/* list of query graphs of a session
+			graphs;		/*!< list of query graphs of a session
 					or a stored procedure */
 	/*------------------------------*/
-	mem_heap_t*	heap;		/* memory heap where the fork was
+	mem_heap_t*	heap;		/*!< memory heap where the fork was
 					created */
 
 };
diff --git a/storage/innobase/include/que0que.ic b/storage/innodb_plugin/include/que0que.ic
similarity index 57%
rename from storage/innobase/include/que0que.ic
rename to storage/innodb_plugin/include/que0que.ic
index a20108a7820..a1c0dc1e77a 100644
--- a/storage/innobase/include/que0que.ic
+++ b/storage/innodb_plugin/include/que0que.ic
@@ -1,44 +1,61 @@
-/******************************************************
-Query graph
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.ic
+Query graph
 
 Created 5/27/1996 Heikki Tuuri
 *******************************************************/
 
 #include "usr0sess.h"
 
-/***************************************************************************
+/***********************************************************************//**
 Gets the trx of a query thread. */
 UNIV_INLINE
 trx_t*
 thr_get_trx(
 /*========*/
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ut_ad(thr);
 
 	return(thr->graph->trx);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Gets the first thr in a fork. */
 UNIV_INLINE
 que_thr_t*
 que_fork_get_first_thr(
 /*===================*/
-	que_fork_t*	fork)	/* in: query fork */
+	que_fork_t*	fork)	/*!< in: query fork */
 {
 	return(UT_LIST_GET_FIRST(fork->thrs));
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Gets the child node of the first thr in a fork. */
 UNIV_INLINE
 que_node_t*
 que_fork_get_child(
 /*===============*/
-	que_fork_t*	fork)	/* in: query fork */
+	que_fork_t*	fork)	/*!< in: query fork */
 {
 	que_thr_t*	thr;
 
@@ -47,102 +64,101 @@ que_fork_get_child(
 	return(thr->child);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Gets the type of a graph node. */
 UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node)	/* in: graph node */
+	que_node_t*	node)	/*!< in: graph node */
 {
 	ut_ad(node);
 
 	return(((que_common_t*)node)->type);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Gets pointer to the value dfield of a graph node. */
 UNIV_INLINE
 dfield_t*
 que_node_get_val(
 /*=============*/
-	que_node_t*	node)	/* in: graph node */
+	que_node_t*	node)	/*!< in: graph node */
 {
 	ut_ad(node);
 
 	return(&(((que_common_t*)node)->val));
 }
 
-/***************************************************************************
-Gets the value buffer size of a graph node. */
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return	val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
 /*======================*/
-				/* out: val buffer size, not defined if
-				val.data == NULL in node */
-	que_node_t*	node)	/* in: graph node */
+	que_node_t*	node)	/*!< in: graph node */
 {
 	ut_ad(node);
 
 	return(((que_common_t*)node)->val_buf_size);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Sets the value buffer size of a graph node. */
 UNIV_INLINE
 void
 que_node_set_val_buf_size(
 /*======================*/
-	que_node_t*	node,	/* in: graph node */
-	ulint		size)	/* in: size */
+	que_node_t*	node,	/*!< in: graph node */
+	ulint		size)	/*!< in: size */
 {
 	ut_ad(node);
 
 	((que_common_t*)node)->val_buf_size = size;
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Sets the parent of a graph node. */
 UNIV_INLINE
 void
 que_node_set_parent(
 /*================*/
-	que_node_t*	node,	/* in: graph node */
-	que_node_t*	parent)	/* in: parent */
+	que_node_t*	node,	/*!< in: graph node */
+	que_node_t*	parent)	/*!< in: parent */
 {
 	ut_ad(node);
 
 	((que_common_t*)node)->parent = parent;
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Gets pointer to the value data type field of a graph node. */
 UNIV_INLINE
 dtype_t*
 que_node_get_data_type(
 /*===================*/
-	que_node_t*	node)	/* in: graph node */
+	que_node_t*	node)	/*!< in: graph node */
 {
 	ut_ad(node);
 
-	return(&(((que_common_t*)node)->val.type));
+	return(dfield_get_type(&((que_common_t*) node)->val));
 }
 
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return	one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
 /*===================*/
-					/* out: one-way list of nodes */
-	que_node_t*	node_list,	/* in: node list, or NULL */
-	que_node_t*	node)		/* in: node */
+	que_node_t*	node_list,	/*!< in: node list, or NULL */
+	que_node_t*	node)		/*!< in: node */
 {
 	que_common_t*	cnode;
 	que_common_t*	cnode2;
 
-	cnode = node;
+	cnode = (que_common_t*) node;
 
 	cnode->brother = NULL;
 
@@ -151,10 +167,10 @@ que_node_list_add_last(
 		return(node);
 	}
 
-	cnode2 = node_list;
+	cnode2 = (que_common_t*) node_list;
 
 	while (cnode2->brother != NULL) {
-		cnode2 = cnode2->brother;
+		cnode2 = (que_common_t*) cnode2->brother;
 	}
 
 	cnode2->brother = node;
@@ -162,66 +178,64 @@ que_node_list_add_last(
 	return(node_list);
 }
 
-/*************************************************************************
-Gets the next list node in a list of query graph nodes. */
+/*********************************************************************//**
+Gets the next list node in a list of query graph nodes.
+@return	next node in a list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_get_next(
 /*==============*/
-				/* out: next node in a list of nodes */
-	que_node_t*	node)	/* in: node in a list */
+	que_node_t*	node)	/*!< in: node in a list */
 {
 	return(((que_common_t*)node)->brother);
 }
 
-/*************************************************************************
-Gets a query graph node list length. */
+/*********************************************************************//**
+Gets a query graph node list length.
+@return	length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
 /*==================*/
-					/* out: length, for NULL list 0 */
-	que_node_t*	node_list)	/* in: node list, or NULL */
+	que_node_t*	node_list)	/*!< in: node list, or NULL */
 {
-	que_common_t*	cnode;
-	ulint		len;
+	const que_common_t*	cnode;
+	ulint			len;
 
-	cnode = node_list;
+	cnode = (const que_common_t*) node_list;
 	len = 0;
 
 	while (cnode != NULL) {
 		len++;
-		cnode = cnode->brother;
+		cnode = (const que_common_t*) cnode->brother;
 	}
 
 	return(len);
 }
 
-/*************************************************************************
-Gets the parent node of a query graph node. */
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return	parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
 /*================*/
-				/* out: parent node or NULL */
-	que_node_t*	node)	/* in: node */
+	que_node_t*	node)	/*!< in: node */
 {
 	return(((que_common_t*)node)->parent);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the kernel mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
 UNIV_INLINE
 ibool
 que_thr_peek_stop(
 /*==============*/
-				/* out: TRUE if should be stopped; NOTE that
-				if the peek is made without reserving the
-				kernel mutex, then another peek with the
-				mutex reserved is necessary before deciding
-				the actual stopping */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 	que_t*	graph;
@@ -240,14 +254,14 @@ que_thr_peek_stop(
 	return(FALSE);
 }
 
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return	TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
 /*================*/
-					/* out: TRUE if a select */
-	que_t*		graph)		/* in: graph */
+	que_t*		graph)		/*!< in: graph */
 {
 	if (graph->fork_type == QUE_FORK_SELECT_SCROLL
 	    || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
diff --git a/storage/innodb_plugin/include/que0types.h b/storage/innodb_plugin/include/que0types.h
new file mode 100644
index 00000000000..ea976074768
--- /dev/null
+++ b/storage/innodb_plugin/include/que0types.h
@@ -0,0 +1,60 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0types.h
+Query graph global types
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0types_h
+#define que0types_h
+
+#include "data0data.h"
+#include "dict0types.h"
+
+/* Pseudotype for all graph nodes */
+typedef void	que_node_t;
+
+typedef struct que_fork_struct	que_fork_t;
+
+/* Query graph root is a fork node */
+typedef	que_fork_t	que_t;
+
+typedef struct que_thr_struct		que_thr_t;
+typedef struct que_common_struct	que_common_t;
+
+/* Common struct at the beginning of each query graph node; the name of this
+substruct must be 'common' */
+
+struct que_common_struct{
+	ulint		type;	/*!< query node type */
+	que_node_t*	parent;	/*!< back pointer to parent node, or NULL */
+	que_node_t*	brother;/* pointer to a possible brother node */
+	dfield_t	val;	/*!< evaluated value for an expression */
+	ulint		val_buf_size;
+				/* buffer size for the evaluated value data,
+				if the buffer has been allocated dynamically:
+				if this field is != 0, and the node is a
+				symbol node or a function node, then we
+				have to free the data field in val
+				explicitly */
+};
+
+#endif
diff --git a/storage/innodb_plugin/include/read0read.h b/storage/innodb_plugin/include/read0read.h
new file mode 100644
index 00000000000..4d9a9fade36
--- /dev/null
+++ b/storage/innodb_plugin/include/read0read.h
@@ -0,0 +1,194 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.h
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0read_h
+#define read0read_h
+
+#include "univ.i"
+
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "trx0trx.h"
+#include "read0types.h"
+
+/*********************************************************************//**
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view.
+@return	own: read view struct */
+UNIV_INTERN
+read_view_t*
+read_view_open_now(
+/*===============*/
+	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
+					transaction, or ut_dulint_zero
+					used in purge */
+	mem_heap_t*	heap);		/*!< in: memory heap from which
+					allocated */
+/*********************************************************************//**
+Makes a copy of the oldest existing read view, or opens a new. The view
+must be closed with ..._close.
+@return	own: read view struct */
+UNIV_INTERN
+read_view_t*
+read_view_oldest_copy_or_open_new(
+/*==============================*/
+	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
+					transaction, or ut_dulint_zero
+					used in purge */
+	mem_heap_t*	heap);		/*!< in: memory heap from which
+					allocated */
+/*********************************************************************//**
+Closes a read view. */
+UNIV_INTERN
+void
+read_view_close(
+/*============*/
+	read_view_t*	view);	/*!< in: read view */
+/*********************************************************************//**
+Closes a consistent read view for MySQL. This function is called at an SQL
+statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
+UNIV_INTERN
+void
+read_view_close_for_mysql(
+/*======================*/
+	trx_t*	trx);	/*!< in: trx which has a read view */
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return	TRUE if sees */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+	const read_view_t*	view,	/*!< in: read view */
+	trx_id_t		trx_id);/*!< in: trx id */
+/*********************************************************************//**
+Prints a read view to stderr. */
+UNIV_INTERN
+void
+read_view_print(
+/*============*/
+	const read_view_t*	view);	/*!< in: read view */
+/*********************************************************************//**
+Create a consistent cursor view for mysql to be used in cursors. In this
+consistent read view modifications done by the creating transaction or future
+transactions are not visible. */
+UNIV_INTERN
+cursor_view_t*
+read_cursor_view_create_for_mysql(
+/*==============================*/
+	trx_t*		cr_trx);/*!< in: trx where cursor view is created */
+/*********************************************************************//**
+Close a given consistent cursor view for mysql and restore global read view
+back to a transaction read view. */
+UNIV_INTERN
+void
+read_cursor_view_close_for_mysql(
+/*=============================*/
+	trx_t*		trx,		/*!< in: trx */
+	cursor_view_t*	curview);	/*!< in: cursor view to be closed */
+/*********************************************************************//**
+This function sets a given consistent cursor view to a transaction
+read view if given consistent cursor view is not NULL. Otherwise, function
+restores a global read view to a transaction read view. */
+UNIV_INTERN
+void
+read_cursor_set_for_mysql(
+/*======================*/
+	trx_t*		trx,	/*!< in: transaction where cursor is set */
+	cursor_view_t*	curview);/*!< in: consistent cursor view to be set */
+
+/** Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+struct read_view_struct{
+	ulint		type;	/*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
+	undo_no_t	undo_no;/*!< ut_dulint_zero or if type is
+				VIEW_HIGH_GRANULARITY
+				transaction undo_no when this high-granularity
+				consistent read view was created */
+	trx_id_t	low_limit_no;
+				/*!< The view does not need to see the undo
+				logs for transactions whose transaction number
+				is strictly smaller (<) than this value: they
+				can be removed in purge if not needed by other
+				views */
+	trx_id_t	low_limit_id;
+				/*!< The read should not see any transaction
+				with trx id >= this value. In other words,
+				this is the "high water mark". */
+	trx_id_t	up_limit_id;
+				/*!< The read should see all trx ids which
+				are strictly smaller (<) than this value.
+				In other words,
+				this is the "low water mark". */
+	ulint		n_trx_ids;
+				/*!< Number of cells in the trx_ids array */
+	trx_id_t*	trx_ids;/*!< Additional trx ids which the read should
+				not see: typically, these are the active
+				transactions at the time when the read is
+				serialized, except the reading transaction
+				itself; the trx ids in this array are in a
+				descending order. These trx_ids should be
+				between the "low" and "high" water marks,
+				that is, up_limit_id and low_limit_id. */
+	trx_id_t	creator_trx_id;
+				/*!< trx id of creating transaction, or
+				ut_dulint_zero used in purge */
+	UT_LIST_NODE_T(read_view_t) view_list;
+				/*!< List of read views in trx_sys */
+};
+
+/** Read view types @{ */
+#define VIEW_NORMAL		1	/*!< Normal consistent read view
+					where transaction does not see changes
+					made by active transactions except
+					creating transaction. */
+#define VIEW_HIGH_GRANULARITY	2	/*!< High-granularity read view where
+					transaction does not see changes
+					made by active transactions and own
+					changes after a point in time when this
+					read view was created. */
+/* @} */
+
+/** Implement InnoDB framework to support consistent read views in
+cursors. This struct holds both heap where consistent read view
+is allocated and pointer to a read view. */
+
+struct cursor_view_struct{
+	mem_heap_t*	heap;
+				/*!< Memory heap for the cursor view */
+	read_view_t*	read_view;
+				/*!< Consistent read view of the cursor*/
+	ulint		n_mysql_tables_in_use;
+				/*!< number of Innobase tables used in the
+				processing of this cursor */
+};
+
+#ifndef UNIV_NONINL
+#include "read0read.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/read0read.ic b/storage/innodb_plugin/include/read0read.ic
new file mode 100644
index 00000000000..9924967cc2d
--- /dev/null
+++ b/storage/innodb_plugin/include/read0read.ic
@@ -0,0 +1,98 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.ic
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+/*********************************************************************//**
+Gets the nth trx id in a read view.
+@return	trx id */
+UNIV_INLINE
+trx_id_t
+read_view_get_nth_trx_id(
+/*=====================*/
+	const read_view_t*	view,	/*!< in: read view */
+	ulint			n)	/*!< in: position */
+{
+	ut_ad(n < view->n_trx_ids);
+
+	return(*(view->trx_ids + n));
+}
+
+/*********************************************************************//**
+Sets the nth trx id in a read view. */
+UNIV_INLINE
+void
+read_view_set_nth_trx_id(
+/*=====================*/
+	read_view_t*	view,	/*!< in: read view */
+	ulint		n,	/*!< in: position */
+	trx_id_t	trx_id)	/*!< in: trx id to set */
+{
+	ut_ad(n < view->n_trx_ids);
+
+	*(view->trx_ids + n) = trx_id;
+}
+
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return	TRUE if sees */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+	const read_view_t*	view,	/*!< in: read view */
+	trx_id_t		trx_id)	/*!< in: trx id */
+{
+	ulint	n_ids;
+	int	cmp;
+	ulint	i;
+
+	if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
+
+		return(TRUE);
+	}
+
+	if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
+
+		return(FALSE);
+	}
+
+	/* We go through the trx ids in the array smallest first: this order
+	may save CPU time, because if there was a very long running
+	transaction in the trx id array, its trx id is looked at first, and
+	the first two comparisons may well decide the visibility of trx_id. */
+
+	n_ids = view->n_trx_ids;
+
+	for (i = 0; i < n_ids; i++) {
+
+		cmp = ut_dulint_cmp(
+			trx_id,
+			read_view_get_nth_trx_id(view, n_ids - i - 1));
+		if (cmp <= 0) {
+			return(cmp < 0);
+		}
+	}
+
+	return(TRUE);
+}
diff --git a/storage/innodb_plugin/include/read0types.h b/storage/innodb_plugin/include/read0types.h
new file mode 100644
index 00000000000..caf69e3fb51
--- /dev/null
+++ b/storage/innodb_plugin/include/read0types.h
@@ -0,0 +1,32 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0types.h
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0types_h
+#define read0types_h
+
+typedef struct read_view_struct	read_view_t;
+typedef struct cursor_view_struct	cursor_view_t;
+
+#endif
diff --git a/storage/innodb_plugin/include/rem0cmp.h b/storage/innodb_plugin/include/rem0cmp.h
new file mode 100644
index 00000000000..d30d9f86abe
--- /dev/null
+++ b/storage/innodb_plugin/include/rem0cmp.h
@@ -0,0 +1,194 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.h
+Comparison services for records
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef rem0cmp_h
+#define rem0cmp_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+
+/*************************************************************//**
+Returns TRUE if two columns are equal for comparison purposes.
+@return	TRUE if the columns are considered equal in comparisons */
+UNIV_INTERN
+ibool
+cmp_cols_are_equal(
+/*===============*/
+	const dict_col_t*	col1,	/*!< in: column 1 */
+	const dict_col_t*	col2,	/*!< in: column 2 */
+	ibool			check_charsets);
+					/*!< in: whether to check charsets */
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow(
+/*===============*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
+This function is used to compare two dfields where at least the first
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
+	const dfield_t*	dfield2);/*!< in: data field */
+/*************************************************************//**
+This function is used to compare a data tuple to a physical record.
+Only dtuple->n_fields_cmp first fields are taken into account for
+the the data tuple! If we denote by n = n_fields_cmp, then rec must
+have either m >= n fields, or it must differ from dtuple in some of
+the m fields rec has. If rec has an externally stored field we do not
+compare it but return with value 0 if such a comparison should be
+made.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared, or until
+the first externally stored field in rec */
+UNIV_INTERN
+int
+cmp_dtuple_rec_with_match(
+/*======================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when function returns,
+				contains the value for current comparison */
+	ulint*		matched_bytes); /*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns, contains the
+				value for current comparison */
+/**************************************************************//**
+Compares a data tuple to a physical record.
+@see cmp_dtuple_rec_with_match
+@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
+UNIV_INTERN
+int
+cmp_dtuple_rec(
+/*===========*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**************************************************************//**
+Checks if a dtuple is a prefix of a record. The last field in dtuple
+is allowed to be a prefix of the corresponding field in the record.
+@return	TRUE if prefix */
+UNIV_INTERN
+ibool
+cmp_dtuple_is_prefix_of_rec(
+/*========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@return	1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+	const rec_t*		rec1,	/*!< in: physical record */
+	const rec_t*		rec2,	/*!< in: physical record */
+	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+	const dict_index_t*	index);	/*!< in: data dictionary index */
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared, and if an externally stored field is
+encountered, then 0 is returned.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
+UNIV_INTERN
+int
+cmp_rec_rec_with_match(
+/*===================*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index,	/*!< in: data dictionary index */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when the function returns,
+				contains the value the for current
+				comparison */
+	ulint*		matched_bytes);/*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when the function returns, contains
+				the value for the current comparison */
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index);	/*!< in: data dictionary index */
+
+
+#ifndef UNIV_NONINL
+#include "rem0cmp.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/rem0cmp.ic b/storage/innodb_plugin/include/rem0cmp.ic
new file mode 100644
index 00000000000..39ef5f4fba3
--- /dev/null
+++ b/storage/innodb_plugin/include/rem0cmp.ic
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.ic
+Comparison services for records
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
+}
+
+/*************************************************************//**
+This function is used to compare two dfields where at least the first
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
+	const dfield_t*	dfield2)/*!< in: data field */
+{
+	const dtype_t*	type;
+
+	ut_ad(dfield_check_typed(dfield1));
+
+	type = dfield_get_type(dfield1);
+
+	return(cmp_data_data(type->mtype, type->prtype,
+			     (const byte*) dfield_get_data(dfield1),
+			     dfield_get_len(dfield1),
+			     (const byte*) dfield_get_data(dfield2),
+			     dfield_get_len(dfield2)));
+}
+
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index)	/*!< in: data dictionary index */
+{
+	ulint	match_f		= 0;
+	ulint	match_b		= 0;
+
+	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+				      &match_f, &match_b));
+}
diff --git a/storage/innodb_plugin/include/rem0rec.h b/storage/innodb_plugin/include/rem0rec.h
new file mode 100644
index 00000000000..17d08afabb9
--- /dev/null
+++ b/storage/innodb_plugin/include/rem0rec.h
@@ -0,0 +1,824 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.h
+Record manager
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0rec_h
+#define rem0rec_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "rem0types.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Info bit denoting the predefined minimum record: this bit is set
+if and only if the record is the first user record on a non-leaf
+B-tree page that is the leftmost page on its level
+(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
+#define REC_INFO_MIN_REC_FLAG	0x10UL
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG	0x20UL	/* when bit is set to 1, it means the
+					record has been delete marked */
+
+/* Number of extra bytes in an old-style record,
+in addition to the data and the offsets */
+#define REC_N_OLD_EXTRA_BYTES	6
+/* Number of extra bytes in a new-style record,
+in addition to the data and the offsets */
+#define REC_N_NEW_EXTRA_BYTES	5
+
+/* Record status values */
+#define REC_STATUS_ORDINARY	0
+#define REC_STATUS_NODE_PTR	1
+#define REC_STATUS_INFIMUM	2
+#define REC_STATUS_SUPREMUM	3
+
+/* The following four constants are needed in page0zip.c in order to
+efficiently compress and decompress pages. */
+
+/* The offset of heap_no in a compact record */
+#define REC_NEW_HEAP_NO		4
+/* The shift of heap_no in a compact record.
+The status is stored in the low-order bits. */
+#define	REC_HEAP_NO_SHIFT	3
+
+/* Length of a B-tree node pointer, in bytes */
+#define REC_NODE_PTR_SIZE	4
+
+#ifdef UNIV_DEBUG
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE	4
+#else /* UNIV_DEBUG */
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE	2
+#endif /* UNIV_DEBUG */
+
+/* Number of elements that should be initially allocated for the
+offsets[] array, first passed to rec_get_offsets() */
+#define REC_OFFS_NORMAL_SIZE	100
+#define REC_OFFS_SMALL_SIZE	10
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to get the offset of the
+next chained record on the same page.
+@return	the page offset of the next chained record, or 0 if none */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to set the next record offset field
+of an old-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	next);	/*!< in: offset of the next record */
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	next);	/*!< in: offset of the next record */
+/******************************************************//**
+The following function is used to get the number of fields
+in an old-style record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields_old(
+/*=================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/******************************************************//**
+The following function is used to get the number of fields
+in a record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index);	/*!< in: record descriptor */
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_old(
+/*================*/
+	const rec_t*	rec);	/*!< in: old-style physical record */
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_old(
+/*================*/
+	rec_t*	rec,		/*!< in: old-style physical record */
+	ulint	n_owned);	/*!< in: the number of owned */
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+	const rec_t*	rec);	/*!< in: new-style physical record */
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_new(
+/*================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n_owned);/*!< in: the number of owned */
+/******************************************************//**
+The following function is used to retrieve the info bits of
+a record.
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	bits);	/*!< in: info bits */
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	bits);	/*!< in: info bits */
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return	status bits */
+UNIV_INLINE
+ulint
+rec_get_status(
+/*===========*/
+	const rec_t*	rec);	/*!< in: physical record */
+
+/******************************************************//**
+The following function is used to set the status bits of a new-style record. */
+UNIV_INLINE
+void
+rec_set_status(
+/*===========*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits);	/*!< in: info bits */
+
+/******************************************************//**
+The following function is used to retrieve the info and status
+bits of a record.  (Only compact records have status bits.)
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_and_status_bits(
+/*=========================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to set the info and status
+bits of a record.  (Only compact records have status bits.) */
+UNIV_INLINE
+void
+rec_set_info_and_status_bits(
+/*=========================*/
+	rec_t*	rec,	/*!< in/out: compact physical record */
+	ulint	bits);	/*!< in: info bits */
+
+/******************************************************//**
+The following function tells if record is delete marked.
+@return	nonzero if delete marked */
+UNIV_INLINE
+ulint
+rec_get_deleted_flag(
+/*=================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp);	/*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_old(
+/*=====================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	flag);	/*!< in: nonzero if delete marked */
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		flag);	/*!< in: nonzero if delete marked */
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return	TRUE if node pointer */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*==================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/******************************************************//**
+The following function is used to get the order number
+of an old-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_old(
+/*================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/******************************************************//**
+The following function is used to set the heap number
+field in an old-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_old(
+/*================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	heap_no);/*!< in: the heap number */
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_new(
+/*================*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	heap_no);/*!< in: the heap number */
+/******************************************************//**
+The following function is used to test whether the data offsets
+in the record are stored in one-byte or two-byte format.
+@return	TRUE if 1-byte form */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+	const rec_t*	rec);	/*!< in: physical record */
+
+/******************************************************//**
+Determine how many of the first n columns in a compact
+physical record are stored externally.
+@return	number of externally stored columns */
+UNIV_INTERN
+ulint
+rec_get_n_extern_new(
+/*=================*/
+	const rec_t*	rec,	/*!< in: compact physical record */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n);	/*!< in: number of columns to scan */
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record.	It can reuse a previously allocated array.
+@return	the new offsets */
+UNIV_INTERN
+ulint*
+rec_get_offsets_func(
+/*=================*/
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets,/*!< in/out: array consisting of
+					offsets[0] allocated elements,
+					or an array from rec_get_offsets(),
+					or NULL */
+	ulint			n_fields,/*!< in: maximum number of
+					initialized fields
+					 (ULINT_UNDEFINED if all fields) */
+	mem_heap_t**		heap,	/*!< in/out: memory heap */
+	const char*		file,	/*!< in: file name where called */
+	ulint			line);	/*!< in: line number where called */
+
+#define rec_get_offsets(rec,index,offsets,n,heap)	\
+	rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
+
+/******************************************************//**
+Determine the offset to each field in a leaf-page record
+in ROW_FORMAT=COMPACT.  This is a special case of
+rec_init_offsets() and rec_get_offsets_func(). */
+UNIV_INTERN
+void
+rec_init_offsets_comp_ordinary(
+/*===========================*/
+	const rec_t*		rec,	/*!< in: physical record in
+					ROW_FORMAT=COMPACT */
+	ulint			extra,	/*!< in: number of bytes to reserve
+					between the record header and
+					the data payload
+					(usually REC_N_NEW_EXTRA_BYTES) */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets);/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record.  It can reuse a previously allocated array. */
+UNIV_INTERN
+void
+rec_get_offsets_reverse(
+/*====================*/
+	const byte*		extra,	/*!< in: the extra bytes of a
+					compact record in reverse order,
+					excluding the fixed-size
+					REC_N_NEW_EXTRA_BYTES */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			node_ptr,/*!< in: nonzero=node pointer,
+					0=leaf node */
+	ulint*			offsets);/*!< in/out: array consisting of
+					offsets[0] allocated elements */
+
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid */
+UNIV_INLINE
+ibool
+rec_offs_validate(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets);/*!< in: array returned by
+					rec_get_offsets() */
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets);/*!< in: array returned by
+					rec_get_offsets() */
+#else
+# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+#endif /* UNIV_DEBUG */
+
+/************************************************************//**
+The following function is used to get the offset to the nth
+data field in an old-style record.
+@return	offset to the field */
+UNIV_INTERN
+ulint
+rec_get_nth_field_offs_old(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len);	/*!< out: length of the field; UNIV_SQL_NULL
+				if SQL null */
+#define rec_get_nth_field_old(rec, n, len) \
+((rec) + rec_get_nth_field_offs_old(rec, n, len))
+/************************************************************//**
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size.
+@return	field size in bytes */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n);	/*!< in: index of the field */
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return	offset from the origin of rec */
+UNIV_INLINE
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len);	/*!< out: length of the field; UNIV_SQL_NULL
+				if SQL null */
+#define rec_get_nth_field(rec, offsets, n, len) \
+((rec) + rec_get_nth_field_offs(offsets, n, len))
+/******************************************************//**
+Determine if the offsets are for a record in the new
+compact format.
+@return	nonzero if compact format */
+UNIV_INLINE
+ulint
+rec_offs_comp(
+/*==========*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_nth_extern(
+/*================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n);	/*!< in: nth field */
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return	nonzero if SQL NULL */
+UNIV_INLINE
+ulint
+rec_offs_nth_sql_null(
+/*==================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n);	/*!< in: nth field */
+/******************************************************//**
+Gets the physical size of a field.
+@return	length of field */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n);	/*!< in: nth field */
+
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return	number of externally stored fields */
+UNIV_INLINE
+ulint
+rec_offs_n_extern(
+/*==============*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***********************************************************//**
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null.
+For records in ROW_FORMAT=COMPACT (new-style records), len must not be
+UNIV_SQL_NULL unless the field already is SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index number of the field */
+	const void*	data,	/*!< in: pointer to the data if not SQL null */
+	ulint		len);	/*!< in: length of the data or UNIV_SQL_NULL */
+/**********************************************************//**
+The following function returns the data size of an old-style physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_data_size_old(
+/*==================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/**********************************************************//**
+The following function returns the number of allocated elements
+for an array of offsets.
+@return	number of elements */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+	const ulint*	offsets);/*!< in: array for rec_get_offsets() */
+/**********************************************************//**
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+	ulint*	offsets,	/*!< out: array for rec_get_offsets(),
+				must be allocated */
+	ulint	n_alloc);	/*!< in: number of elements */
+#define rec_offs_init(offsets) \
+	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return	number of fields */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*==============*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_data_size(
+/*===============*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns the total size of record minus data size of record.
+The value returned by the function is the distance from record
+start to record origin in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_extra_size(
+/*================*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns the total size of a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_size(
+/*==========*/
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return	pointer to start */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+	rec_t*		rec,	/*!< in: pointer to record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return	pointer to end */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+	rec_t*		rec,	/*!< in: pointer to record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return	pointer to the origin of the copy */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+	void*		buf,	/*!< in: buffer */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Copies the first n fields of a physical record to a new physical record in
+a buffer.
+@return	own: copied record */
+UNIV_INTERN
+rec_t*
+rec_copy_prefix_to_buf(
+/*===================*/
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	byte**			buf,		/*!< in/out: memory buffer
+						for the copied prefix,
+						or NULL */
+	ulint*			buf_size);	/*!< in/out: buffer size */
+/************************************************************//**
+Folds a prefix of a physical record to a ulint.
+@return	the folded value */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+	const rec_t*	rec,		/*!< in: the physical record */
+	const ulint*	offsets,	/*!< in: array returned by
+					rec_get_offsets() */
+	ulint		n_fields,	/*!< in: number of complete
+					fields to fold */
+	ulint		n_bytes,	/*!< in: number of bytes to fold
+					in an incomplete last field */
+	dulint		tree_id)	/*!< in: index tree id */
+	__attribute__((pure));
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************//**
+Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_rec_comp(
+/*===========================*/
+	rec_t*			rec,	/*!< in: origin of record */
+	ulint			extra,	/*!< in: number of bytes to
+					reserve between the record
+					header and the data payload
+					(normally REC_N_NEW_EXTRA_BYTES) */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			status,	/*!< in: status bits of the record */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields);/*!< in: number of data fields */
+/*********************************************************//**
+Builds a physical record out of a data tuple and
+stores it into the given buffer.
+@return	pointer to the origin of physical record */
+UNIV_INTERN
+rec_t*
+rec_convert_dtuple_to_rec(
+/*======================*/
+	byte*			buf,	/*!< in: start address of the
+					physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		dtuple,	/*!< in: data tuple */
+	ulint			n_ext);	/*!< in: number of
+					externally stored columns */
+/**********************************************************//**
+Returns the extra size of an old-style physical record if we know its
+data size and number of fields.
+@return	extra size */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+	ulint	data_size,	/*!< in: data size */
+	ulint	n_fields,	/*!< in: number of fields */
+	ulint	n_ext)		/*!< in: number of externally stored columns */
+		__attribute__((const));
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+	const dict_index_t*	index,	/*!< in: record descriptor;
+					dict_table_is_comp() is
+					assumed to hold, even if
+					it does not */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra);	/*!< out: extra size */
+/**********************************************************//**
+Determines the size of a data tuple in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor;
+					dict_table_is_comp() is
+					assumed to hold, even if
+					it does not */
+	ulint			status,	/*!< in: status bits of the record */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra);	/*!< out: extra size */
+/**********************************************************//**
+The following function returns the size of a data tuple when converted to
+a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	ulint		n_ext);	/*!< in: number of externally stored columns */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Copies the first n fields of a physical record to a data tuple.
+The fields are copied to the memory heap. */
+UNIV_INTERN
+void
+rec_copy_prefix_to_dtuple(
+/*======================*/
+	dtuple_t*		tuple,		/*!< out: data tuple */
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	mem_heap_t*		heap);		/*!< in: memory heap */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Validates the consistency of a physical record.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+rec_validate(
+/*=========*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Prints an old-style physical record. */
+UNIV_INTERN
+void
+rec_print_old(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec);	/*!< in: physical record */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
+record header. */
+UNIV_INTERN
+void
+rec_print_comp(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print_new(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print(
+/*======*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	dict_index_t*	index);	/*!< in: record descriptor */
+#endif /* UNIV_HOTBACKUP */
+
+#define REC_INFO_BITS		6	/* This is single byte bit-field */
+
+/* Maximum lengths for the data in a physical record if the offsets
+are given in one byte (resp. two byte) format. */
+#define REC_1BYTE_OFFS_LIMIT	0x7FUL
+#define REC_2BYTE_OFFS_LIMIT	0x7FFFUL
+
+/* The data size of record must be smaller than this because we reserve
+two upmost bits in a two byte offset for special purposes */
+#define REC_MAX_DATA_SIZE	(16 * 1024)
+
+#ifndef UNIV_NONINL
+#include "rem0rec.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innodb_plugin/include/rem0rec.ic
similarity index 52%
rename from storage/innobase/include/rem0rec.ic
rename to storage/innodb_plugin/include/rem0rec.ic
index d91fb4c4391..9fe736f9b0b 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innodb_plugin/include/rem0rec.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager
+/*****************************************************************************
 
-(c) 1994-1996 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.ic
+Record manager
 
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
@@ -84,9 +101,11 @@ and the shift needed to obtain each bit-field of the record. */
 #define REC_NEW_STATUS_SHIFT	0
 
 #define REC_OLD_HEAP_NO		5
-#define REC_NEW_HEAP_NO		4
 #define REC_HEAP_NO_MASK	0xFFF8UL
+#if 0 /* defined in rem0rec.h for use of page0zip.c */
+#define REC_NEW_HEAP_NO		4
 #define	REC_HEAP_NO_SHIFT	3
+#endif
 
 #define REC_OLD_N_OWNED		6	/* This is single byte bit-field */
 #define REC_NEW_N_OWNED		5	/* This is single byte bit-field */
@@ -98,9 +117,6 @@ and the shift needed to obtain each bit-field of the record. */
 #define	REC_INFO_BITS_MASK	0xF0UL
 #define REC_INFO_BITS_SHIFT	0
 
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG	0x20UL	/* when bit is set to 1, it means the
-					record has been delete marked */
 /* The following masks are used to filter the SQL null bit from
 one-byte and two-byte offsets */
 
@@ -128,78 +144,52 @@ a field stored to another page: */
 # error "sum of new-style masks != 0xFFFFFFUL"
 #endif
 
-/***************************************************************
+/***********************************************************//**
 Sets the value of the ith field SQL null bit of an old-style record. */
-
+UNIV_INTERN
 void
 rec_set_nth_field_null_bit(
 /*=======================*/
-	rec_t*	rec,	/* in: record */
-	ulint	i,	/* in: ith field */
-	ibool	val);	/* in: value to set */
-/***************************************************************
+	rec_t*	rec,	/*!< in: record */
+	ulint	i,	/*!< in: ith field */
+	ibool	val);	/*!< in: value to set */
+/***********************************************************//**
 Sets an old-style record field to SQL null.
 The physical size of the field is not changed. */
-
+UNIV_INTERN
 void
 rec_set_nth_field_sql_null(
 /*=======================*/
-	rec_t*	rec,	/* in: record */
-	ulint	n);	/* in: index of the field */
+	rec_t*	rec,	/*!< in: record */
+	ulint	n);	/*!< in: index of the field */
 
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
-
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
-	rec_t*	rec,	/* in: old-style record */
-	ulint	i,	/* in: ith field */
-	ibool	val,	/* in: value to set */
-	mtr_t*	mtr);	/* in: mtr holding an X-latch to the page where
-			rec is, or NULL; in the NULL case we do not
-			write to log about the change */
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		ith,	/* in: ith field */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr);	/* in: mtr holding an X-latch to the page
-				where rec is, or NULL; in the NULL case
-				we do not write to log about the change */
-
-/**********************************************************
+/******************************************************//**
 Gets a bit field from within 1 byte. */
 UNIV_INLINE
 ulint
 rec_get_bit_field_1(
 /*================*/
-	rec_t*	rec,	/* in: pointer to record origin */
-	ulint	offs,	/* in: offset from the origin down */
-	ulint	mask,	/* in: mask used to filter bits */
-	ulint	shift)	/* in: shift right applied after masking */
+	const rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint		offs,	/*!< in: offset from the origin down */
+	ulint		mask,	/*!< in: mask used to filter bits */
+	ulint		shift)	/*!< in: shift right applied after masking */
 {
 	ut_ad(rec);
 
 	return((mach_read_from_1(rec - offs) & mask) >> shift);
 }
 
-/**********************************************************
+/******************************************************//**
 Sets a bit field within 1 byte. */
 UNIV_INLINE
 void
 rec_set_bit_field_1(
 /*================*/
-	rec_t*	rec,	/* in: pointer to record origin */
-	ulint	val,	/* in: value to set */
-	ulint	offs,	/* in: offset from the origin down */
-	ulint	mask,	/* in: mask used to filter bits */
-	ulint	shift)	/* in: shift right applied after masking */
+	rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint	val,	/*!< in: value to set */
+	ulint	offs,	/*!< in: offset from the origin down */
+	ulint	mask,	/*!< in: mask used to filter bits */
+	ulint	shift)	/*!< in: shift right applied after masking */
 {
 	ut_ad(rec);
 	ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
@@ -213,33 +203,33 @@ rec_set_bit_field_1(
 			| (val << shift));
 }
 
-/**********************************************************
+/******************************************************//**
 Gets a bit field from within 2 bytes. */
 UNIV_INLINE
 ulint
 rec_get_bit_field_2(
 /*================*/
-	rec_t*	rec,	/* in: pointer to record origin */
-	ulint	offs,	/* in: offset from the origin down */
-	ulint	mask,	/* in: mask used to filter bits */
-	ulint	shift)	/* in: shift right applied after masking */
+	const rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint		offs,	/*!< in: offset from the origin down */
+	ulint		mask,	/*!< in: mask used to filter bits */
+	ulint		shift)	/*!< in: shift right applied after masking */
 {
 	ut_ad(rec);
 
 	return((mach_read_from_2(rec - offs) & mask) >> shift);
 }
 
-/**********************************************************
+/******************************************************//**
 Sets a bit field within 2 bytes. */
 UNIV_INLINE
 void
 rec_set_bit_field_2(
 /*================*/
-	rec_t*	rec,	/* in: pointer to record origin */
-	ulint	val,	/* in: value to set */
-	ulint	offs,	/* in: offset from the origin down */
-	ulint	mask,	/* in: mask used to filter bits */
-	ulint	shift)	/* in: shift right applied after masking */
+	rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint	val,	/*!< in: value to set */
+	ulint	offs,	/*!< in: offset from the origin down */
+	ulint	mask,	/*!< in: mask used to filter bits */
+	ulint	shift)	/*!< in: shift right applied after masking */
 {
 	ut_ad(rec);
 	ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
@@ -255,29 +245,30 @@ rec_set_bit_field_2(
 			| (val << shift));
 }
 
-/**********************************************************
-The following function is used to get the offset of the next chained record
-on the same page. */
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
 UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
-			/* out: the page offset of the next chained record, or
-			0 if none */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
+
+	ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+	ut_ad(REC_NEXT_SHIFT == 0);
 
 	field_value = mach_read_from_2(rec - REC_NEXT);
 
-	if (comp) {
+	if (UNIV_UNLIKELY(field_value == 0)) {
+
+		return(NULL);
+	}
+
+	if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
 #if UNIV_PAGE_SIZE <= 32768
 		/* Note that for 64 KiB pages, field_value can 'wrap around'
 		and the debug assertion is not valid */
@@ -294,11 +285,85 @@ rec_get_next_offs(
 		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
 		      < UNIV_PAGE_SIZE);
 #endif
-		if (field_value == 0) {
+		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+		between each record. */
+		ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+		       && field_value < 32768)
+		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
+		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		       + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(field_value < UNIV_PAGE_SIZE);
+
+		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		       + field_value);
+	}
+}
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	comp)	/*!< in: nonzero=compact page format */
+{
+	return((rec_t*) rec_get_next_ptr_const(rec, comp));
+}
+
+/******************************************************//**
+The following function is used to get the offset of the next chained record
+on the same page.
+@return	the page offset of the next chained record, or 0 if none */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	ulint	field_value;
+#if REC_NEXT_MASK != 0xFFFFUL
+# error "REC_NEXT_MASK != 0xFFFFUL"
+#endif
+#if REC_NEXT_SHIFT
+# error "REC_NEXT_SHIFT != 0"
+#endif
+
+	field_value = mach_read_from_2(rec - REC_NEXT);
+
+	if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
+#if UNIV_PAGE_SIZE <= 32768
+		/* Note that for 64 KiB pages, field_value can 'wrap around'
+		and the debug assertion is not valid */
+
+		/* In the following assertion, field_value is interpreted
+		as signed 16-bit integer in 2's complement arithmetics.
+		If all platforms defined int16_t in the standard headers,
+		the expression could be written simpler as
+		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		*/
+		ut_ad((field_value >= 32768
+		       ? field_value - 65536
+		       : field_value)
+		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
+		      < UNIV_PAGE_SIZE);
+#endif
+		if (UNIV_UNLIKELY(field_value == 0)) {
 
 			return(0);
 		}
 
+		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+		between each record. */
+		ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+		       && field_value < 32768)
+		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
 		return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
 	} else {
 		ut_ad(field_value < UNIV_PAGE_SIZE);
@@ -307,16 +372,15 @@ rec_get_next_offs(
 	}
 }
 
-/**********************************************************
-The following function is used to set the next record offset field of the
-record. */
+/******************************************************//**
+The following function is used to set the next record offset field
+of an old-style record. */
 UNIV_INLINE
 void
-rec_set_next_offs(
-/*==============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	next)	/* in: offset of the next record, or 0 if none */
+rec_set_next_offs_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
 {
 	ut_ad(rec);
 	ut_ad(UNIV_PAGE_SIZE > next);
@@ -327,37 +391,49 @@ rec_set_next_offs(
 # error "REC_NEXT_SHIFT != 0"
 #endif
 
-	if (comp) {
-		ulint field_value;
-
-		if (next) {
-			/* The following two statements calculate
-			next - offset_of_rec mod 64Ki, where mod is the modulo
-			as a non-negative number */
-
-			field_value = (ulint)((lint)next
-					      - (lint)ut_align_offset(
-						      rec, UNIV_PAGE_SIZE));
-			field_value &= REC_NEXT_MASK;
-		} else {
-			field_value = 0;
-		}
-
-		mach_write_to_2(rec - REC_NEXT, field_value);
-	} else {
-		mach_write_to_2(rec - REC_NEXT, next);
-	}
+	mach_write_to_2(rec - REC_NEXT, next);
 }
 
-/**********************************************************
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
+{
+	ulint	field_value;
+
+	ut_ad(rec);
+	ut_ad(UNIV_PAGE_SIZE > next);
+
+	if (UNIV_UNLIKELY(!next)) {
+		field_value = 0;
+	} else {
+		/* The following two statements calculate
+		next - offset_of_rec mod 64Ki, where mod is the modulo
+		as a non-negative number */
+
+		field_value = (ulint)
+			((lint) next 
+			 - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+		field_value &= REC_NEXT_MASK;
+	}
+
+	mach_write_to_2(rec - REC_NEXT, field_value);
+}
+
+/******************************************************//**
 The following function is used to get the number of fields
-in an old-style record. */
+in an old-style record.
+@return	number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields_old(
 /*=================*/
-			/* out: number of data fields */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 	ulint	ret;
 
@@ -372,15 +448,15 @@ rec_get_n_fields_old(
 	return(ret);
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the number of fields
 in an old-style record. */
 UNIV_INLINE
 void
 rec_set_n_fields_old(
 /*=================*/
-	rec_t*	rec,		/* in: physical record */
-	ulint	n_fields)	/* in: the number of fields */
+	rec_t*	rec,		/*!< in: physical record */
+	ulint	n_fields)	/*!< in: the number of fields */
 {
 	ut_ad(rec);
 	ut_ad(n_fields <= REC_MAX_N_FIELDS);
@@ -390,14 +466,14 @@ rec_set_n_fields_old(
 			    REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
 }
 
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return	status bits */
 UNIV_INLINE
 ulint
 rec_get_status(
 /*===========*/
-			/* out: status bits */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 	ulint	ret;
 
@@ -410,16 +486,16 @@ rec_get_status(
 	return(ret);
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to get the number of fields
-in a record. */
+in a record.
+@return	number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields(
 /*=============*/
-				/* out: number of data fields */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index)	/* in: record descriptor */
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index)	/*!< in: record descriptor */
 {
 	ut_ad(rec);
 	ut_ad(index);
@@ -442,113 +518,129 @@ rec_get_n_fields(
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to get the number of records owned by the
-previous directory record. */
+previous directory record.
+@return	number of owned records */
 UNIV_INLINE
 ulint
-rec_get_n_owned(
-/*============*/
-			/* out: number of owned records */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+rec_get_n_owned_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: old-style physical record */
 {
-	ulint	ret;
-
-	ut_ad(rec);
-
-	ret = rec_get_bit_field_1(rec,
-				  comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
-				  REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
-	ut_ad(ret <= REC_MAX_N_OWNED);
-
-	return(ret);
+	return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
+				   REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
 void
-rec_set_n_owned(
-/*============*/
-	rec_t*	rec,		/* in: physical record */
-	ulint	comp,		/* in: nonzero=compact page format */
-	ulint	n_owned)	/* in: the number of owned */
+rec_set_n_owned_old(
+/*================*/
+	rec_t*	rec,		/*!< in: old-style physical record */
+	ulint	n_owned)	/*!< in: the number of owned */
 {
-	ut_ad(rec);
-	ut_ad(n_owned <= REC_MAX_N_OWNED);
-
-	rec_set_bit_field_1(rec, n_owned,
-			    comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+	rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
 			    REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
 }
 
-/**********************************************************
-The following function is used to retrieve the info bits of a record. */
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: new-style physical record */
+{
+	return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
+				   REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_new(
+/*================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n_owned)/*!< in: the number of owned */
+{
+	rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
+			    REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
+	if (UNIV_LIKELY_NULL(page_zip)
+	    && UNIV_LIKELY(rec_get_status(rec)
+			   != REC_STATUS_SUPREMUM)) {
+		page_zip_rec_set_owned(page_zip, rec, n_owned);
+	}
+}
+
+/******************************************************//**
+The following function is used to retrieve the info bits of a record.
+@return	info bits */
 UNIV_INLINE
 ulint
 rec_get_info_bits(
 /*==============*/
-			/* out: info bits */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
 {
-	ulint	ret;
-
-	ut_ad(rec);
-
-	ret = rec_get_bit_field_1(rec,
-				  comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
-				  REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-	ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
-
-	return(ret);
+	return(rec_get_bit_field_1(
+		       rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+		       REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the info bits of a record. */
 UNIV_INLINE
 void
-rec_set_info_bits(
-/*==============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	bits)	/* in: info bits */
+rec_set_info_bits_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	bits)	/*!< in: info bits */
 {
-	ut_ad(rec);
-	ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
-
-	rec_set_bit_field_1(rec, bits,
-			    comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+	rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
+			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+}
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	bits)	/*!< in: info bits */
+{
+	rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
 			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the status bits of a new-style record. */
 UNIV_INLINE
 void
 rec_set_status(
 /*===========*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	bits)	/* in: info bits */
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits)	/*!< in: info bits */
 {
-	ut_ad(rec);
-	ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
-
 	rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
 			    REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to retrieve the info and status
-bits of a record.  (Only compact records have status bits.) */
+bits of a record.  (Only compact records have status bits.)
+@return	info bits */
 UNIV_INLINE
 ulint
 rec_get_info_and_status_bits(
 /*=========================*/
-			/* out: info bits */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	ulint	bits;
 #if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
@@ -563,38 +655,33 @@ rec_get_info_and_status_bits(
 	}
 	return(bits);
 }
-/**********************************************************
+/******************************************************//**
 The following function is used to set the info and status
 bits of a record.  (Only compact records have status bits.) */
 UNIV_INLINE
 void
 rec_set_info_and_status_bits(
 /*=========================*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	bits)	/* in: info bits */
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits)	/*!< in: info bits */
 {
 #if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
 & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
 # error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
 #endif
-	if (comp) {
-		rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
-	} else {
-		ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
-	}
-	rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
+	rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
+	rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
 }
 
-/**********************************************************
-The following function tells if record is delete marked. */
+/******************************************************//**
+The following function tells if record is delete marked.
+@return	nonzero if delete marked */
 UNIV_INLINE
 ulint
 rec_get_deleted_flag(
 /*=================*/
-			/* out: nonzero if delete marked */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
 {
 	if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
 		return(UNIV_UNLIKELY(
@@ -609,19 +696,18 @@ rec_get_deleted_flag(
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the deleted bit. */
 UNIV_INLINE
 void
-rec_set_deleted_flag(
-/*=================*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	flag)	/* in: nonzero if delete marked */
+rec_set_deleted_flag_old(
+/*=====================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	flag)	/*!< in: nonzero if delete marked */
 {
 	ulint	val;
 
-	val = rec_get_info_bits(rec, comp);
+	val = rec_get_info_bits(rec, FALSE);
 
 	if (flag) {
 		val |= REC_INFO_DELETED_FLAG;
@@ -629,70 +715,113 @@ rec_set_deleted_flag(
 		val &= ~REC_INFO_DELETED_FLAG;
 	}
 
-	rec_set_info_bits(rec, comp, val);
+	rec_set_info_bits_old(rec, val);
 }
 
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+{
+	ulint	val;
+
+	val = rec_get_info_bits(rec, TRUE);
+
+	if (flag) {
+		val |= REC_INFO_DELETED_FLAG;
+	} else {
+		val &= ~REC_INFO_DELETED_FLAG;
+	}
+
+	rec_set_info_bits_new(rec, val);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_rec_set_deleted(page_zip, rec, flag);
+	}
+}
+
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return	TRUE if node pointer */
 UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
 /*==================*/
-			/* out: TRUE if node pointer */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 	return(REC_STATUS_NODE_PTR == rec_get_status(rec));
 }
 
-/**********************************************************
-The following function is used to get the order number of the record in the
-heap of the index page. */
+/******************************************************//**
+The following function is used to get the order number
+of an old-style record in the heap of the index page.
+@return	heap order number */
 UNIV_INLINE
 ulint
-rec_get_heap_no(
-/*============*/
-			/* out: heap order number */
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp)	/* in: nonzero=compact page format */
+rec_get_heap_no_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
 {
-	ulint	ret;
-
-	ut_ad(rec);
-
-	ret = rec_get_bit_field_2(rec,
-				  comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
-				  REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-	ut_ad(ret <= REC_MAX_HEAP_NO);
-
-	return(ret);
+	return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
+				   REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
 }
 
-/**********************************************************
-The following function is used to set the heap number field in the record. */
+/******************************************************//**
+The following function is used to set the heap number
+field in an old-style record. */
 UNIV_INLINE
 void
-rec_set_heap_no(
-/*============*/
-	rec_t*	rec,	/* in: physical record */
-	ulint	comp,	/* in: nonzero=compact page format */
-	ulint	heap_no)/* in: the heap number */
+rec_set_heap_no_old(
+/*================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	heap_no)/*!< in: the heap number */
 {
-	ut_ad(heap_no <= REC_MAX_HEAP_NO);
-
-	rec_set_bit_field_2(rec, heap_no,
-			    comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+	rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
 			    REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
 }
 
-/**********************************************************
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
+				   REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_new(
+/*================*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	heap_no)/*!< in: the heap number */
+{
+	rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
+			    REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
+}
+
+/******************************************************//**
 The following function is used to test whether the data offsets in the record
-are stored in one-byte or two-byte format. */
+are stored in one-byte or two-byte format.
+@return	TRUE if 1-byte form */
 UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
 /*====================*/
-			/* out: TRUE if 1-byte form */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 #if TRUE != 1
 #error "TRUE != 1"
@@ -702,14 +831,14 @@ rec_get_1byte_offs_flag(
 				   REC_OLD_SHORT_SHIFT));
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to set the 1-byte offsets flag. */
 UNIV_INLINE
 void
 rec_set_1byte_offs_flag(
 /*====================*/
-	rec_t*	rec,	/* in: physical record */
-	ibool	flag)	/* in: TRUE if 1byte form */
+	rec_t*	rec,	/*!< in: physical record */
+	ibool	flag)	/*!< in: TRUE if 1byte form */
 {
 #if TRUE != 1
 #error "TRUE != 1"
@@ -720,18 +849,17 @@ rec_set_1byte_offs_flag(
 			    REC_OLD_SHORT_SHIFT);
 }
 
-/**********************************************************
+/******************************************************//**
 Returns the offset of nth field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return	offset of the start of the field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_field_end_info(
 /*=====================*/
-			/* out: offset of the start of the field, SQL null
-			flag ORed */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(rec_get_1byte_offs_flag(rec));
 	ut_ad(n < rec_get_n_fields_old(rec));
@@ -739,18 +867,18 @@ rec_1_get_field_end_info(
 	return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
 }
 
-/**********************************************************
+/******************************************************//**
 Returns the offset of nth field end if the record is stored in the 2-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
 UNIV_INLINE
 ulint
 rec_2_get_field_end_info(
 /*=====================*/
-			/* out: offset of the start of the field, SQL null
-			flag and extern storage flag ORed */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(!rec_get_1byte_offs_flag(rec));
 	ut_ad(n < rec_get_n_fields_old(rec));
@@ -758,61 +886,54 @@ rec_2_get_field_end_info(
 	return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
 }
 
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE	4
-#else /* UNIV_DEBUG */
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE	2
-#endif /* UNIV_DEBUG */
-
 /* Get the base address of offsets.  The extra_size is stored at
 this position, and following positions hold the end offsets of
 the fields. */
 #define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
 
-/**************************************************************
+/**********************************************************//**
 The following function returns the number of allocated elements
-for an array of offsets. */
+for an array of offsets.
+@return	number of elements */
 UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
 /*=================*/
-				/* out: number of elements */
-	const ulint*	offsets)/* in: array for rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
 {
 	ulint	n_alloc;
 	ut_ad(offsets);
 	n_alloc = offsets[0];
-	ut_ad(n_alloc > 0);
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
 	return(n_alloc);
 }
 
-/**************************************************************
+/**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
 UNIV_INLINE
 void
 rec_offs_set_n_alloc(
 /*=================*/
-	ulint*	offsets,	/* out: array for rec_get_offsets(),
+	ulint*	offsets,	/*!< out: array for rec_get_offsets(),
 				must be allocated */
-	ulint	n_alloc)	/* in: number of elements */
+	ulint	n_alloc)	/*!< in: number of elements */
 {
 	ut_ad(offsets);
-	ut_ad(n_alloc > 0);
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
 	UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
 	offsets[0] = n_alloc;
 }
 
-/**************************************************************
-The following function returns the number of fields in a record. */
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return	number of fields */
 UNIV_INLINE
 ulint
 rec_offs_n_fields(
 /*==============*/
-				/* out: number of fields */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	n_fields;
 	ut_ad(offsets);
@@ -824,16 +945,17 @@ rec_offs_n_fields(
 	return(n_fields);
 }
 
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid */
 UNIV_INLINE
 ibool
 rec_offs_validate(
 /*==============*/
-				/* out: TRUE if valid */
-	rec_t*		rec,	/* in: record or NULL */
-	dict_index_t*	index,	/* in: record descriptor or NULL */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets)/*!< in: array returned by
+					rec_get_offsets() */
 {
 	ulint	i	= rec_offs_n_fields(offsets);
 	ulint	last	= ULINT_MAX;
@@ -878,52 +1000,50 @@ rec_offs_validate(
 	}
 	return(TRUE);
 }
-/****************************************************************
+#ifdef UNIV_DEBUG
+/************************************************************//**
 Updates debug data in offsets, in order to avoid bogus
 rec_offs_validate() failures. */
 UNIV_INLINE
 void
 rec_offs_make_valid(
 /*================*/
-	rec_t*		rec __attribute__((unused)),
-				/* in: record */
-	dict_index_t*	index __attribute__((unused)),
-				/* in: record descriptor */
-	ulint*		offsets __attribute__((unused)))
-				/* in: array returned by rec_get_offsets() */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in: array returned by
+					rec_get_offsets() */
 {
-#ifdef UNIV_DEBUG
+	ut_ad(rec);
+	ut_ad(index);
+	ut_ad(offsets);
 	ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
 	offsets[2] = (ulint) rec;
 	offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
 }
+#endif /* UNIV_DEBUG */
 
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return	offset from the origin of rec */
 UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
-				/* out: pointer to the field */
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n,	/* in: index of the field */
-	ulint*		len)	/* out: length of the field; UNIV_SQL_NULL
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
 				if SQL null */
 {
-	byte*	field;
+	ulint	offs;
 	ulint	length;
-	ut_ad(rec);
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	ut_ad(n < rec_offs_n_fields(offsets));
 	ut_ad(len);
 
 	if (UNIV_UNLIKELY(n == 0)) {
-		field = rec;
+		offs = 0;
 	} else {
-		field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK);
+		offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
 	}
 
 	length = rec_offs_base(offsets)[1 + n];
@@ -932,36 +1052,50 @@ rec_get_nth_field(
 		length = UNIV_SQL_NULL;
 	} else {
 		length &= REC_OFFS_MASK;
-		length -= field - rec;
+		length -= offs;
 	}
 
 	*len = length;
-	return(field);
+	return(offs);
 }
 
-/**********************************************************
+/******************************************************//**
 Determine if the offsets are for a record in the new
-compact format. */
+compact format.
+@return	nonzero if compact format */
 UNIV_INLINE
 ulint
 rec_offs_comp(
 /*==========*/
-				/* out: nonzero if compact format */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
 }
 
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
+}
+
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return	nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_nth_extern(
 /*================*/
-				/* out: nonzero if externally stored */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n)	/* in: nth field */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
 {
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	ut_ad(n < rec_offs_n_fields(offsets));
@@ -969,15 +1103,15 @@ rec_offs_nth_extern(
 			     & REC_OFFS_EXTERNAL));
 }
 
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return	nonzero if SQL NULL */
 UNIV_INLINE
 ulint
 rec_offs_nth_sql_null(
 /*==================*/
-				/* out: nonzero if SQL NULL */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n)	/* in: nth field */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
 {
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	ut_ad(n < rec_offs_n_fields(offsets));
@@ -985,15 +1119,15 @@ rec_offs_nth_sql_null(
 			     & REC_OFFS_SQL_NULL));
 }
 
-/**********************************************************
-Gets the physical size of a field. */
+/******************************************************//**
+Gets the physical size of a field.
+@return	length of field */
 UNIV_INLINE
 ulint
 rec_offs_nth_size(
 /*==============*/
-				/* out: length of field */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n)	/* in: nth field */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
 {
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
 	ut_ad(n < rec_offs_n_fields(offsets));
@@ -1004,60 +1138,43 @@ rec_offs_nth_size(
 	       & REC_OFFS_MASK);
 }
 
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of an old-style record. */
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return	number of externally stored fields */
 UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
-				/* out: TRUE if a field is stored externally */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+ulint
+rec_offs_n_extern(
+/*==============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
-	ulint	i;
-	for (i = rec_offs_n_fields(offsets); i--; ) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			return(TRUE);
+	ulint	n = 0;
+
+	if (rec_offs_any_extern(offsets)) {
+		ulint	i;
+
+		for (i = rec_offs_n_fields(offsets); i--; ) {
+			if (rec_offs_nth_extern(offsets, i)) {
+				n++;
+			}
 		}
 	}
-	return(FALSE);
+
+	return(n);
 }
 
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
-UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		i,	/* in: ith field */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr)	/* in: mtr holding an X-latch to the page
-				where rec is, or NULL; in the NULL case
-				we do not write to log about the change */
-{
-	if (dict_table_is_comp(index->table)) {
-		rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
-	} else {
-		rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
-	}
-}
-
-/**********************************************************
+/******************************************************//**
 Returns the offset of n - 1th field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value. This function and the 2-byte counterpart are defined here because the
 C-compiler was not able to sum negative and positive constant offsets, and
-warned of constant arithmetic overflow within the compiler. */
+warned of constant arithmetic overflow within the compiler.
+@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_prev_field_end_info(
 /*==========================*/
-			/* out: offset of the start of the PREVIOUS field, SQL
-			null flag ORed */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(rec_get_1byte_offs_flag(rec));
 	ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1065,18 +1182,17 @@ rec_1_get_prev_field_end_info(
 	return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
 }
 
-/**********************************************************
+/******************************************************//**
 Returns the offset of n - 1th field end if the record is stored in the 2-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_2_get_prev_field_end_info(
 /*==========================*/
-			/* out: offset of the start of the PREVIOUS field, SQL
-			null flag ORed */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(!rec_get_1byte_offs_flag(rec));
 	ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1084,16 +1200,16 @@ rec_2_get_prev_field_end_info(
 	return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
 }
 
-/**********************************************************
+/******************************************************//**
 Sets the field end info for the nth field if the record is stored in the
 1-byte format. */
 UNIV_INLINE
 void
 rec_1_set_field_end_info(
 /*=====================*/
-	rec_t*	rec,	/* in: record */
-	ulint	n,	/* in: field index */
-	ulint	info)	/* in: value to set */
+	rec_t*	rec,	/*!< in: record */
+	ulint	n,	/*!< in: field index */
+	ulint	info)	/*!< in: value to set */
 {
 	ut_ad(rec_get_1byte_offs_flag(rec));
 	ut_ad(n < rec_get_n_fields_old(rec));
@@ -1101,16 +1217,16 @@ rec_1_set_field_end_info(
 	mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
 }
 
-/**********************************************************
+/******************************************************//**
 Sets the field end info for the nth field if the record is stored in the
 2-byte format. */
 UNIV_INLINE
 void
 rec_2_set_field_end_info(
 /*=====================*/
-	rec_t*	rec,	/* in: record */
-	ulint	n,	/* in: field index */
-	ulint	info)	/* in: value to set */
+	rec_t*	rec,	/*!< in: record */
+	ulint	n,	/*!< in: field index */
+	ulint	info)	/*!< in: value to set */
 {
 	ut_ad(!rec_get_1byte_offs_flag(rec));
 	ut_ad(n < rec_get_n_fields_old(rec));
@@ -1118,16 +1234,16 @@ rec_2_set_field_end_info(
 	mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
 }
 
-/**********************************************************
+/******************************************************//**
 Returns the offset of nth field start if the record is stored in the 1-byte
-offsets form. */
+offsets form.
+@return	offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_1_get_field_start_offs(
 /*=======================*/
-			/* out: offset of the start of the field */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(rec_get_1byte_offs_flag(rec));
 	ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1141,16 +1257,16 @@ rec_1_get_field_start_offs(
 	       & ~REC_1BYTE_SQL_NULL_MASK);
 }
 
-/**********************************************************
+/******************************************************//**
 Returns the offset of nth field start if the record is stored in the 2-byte
-offsets form. */
+offsets form.
+@return	offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_2_get_field_start_offs(
 /*=======================*/
-			/* out: offset of the start of the field */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(!rec_get_1byte_offs_flag(rec));
 	ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1164,18 +1280,18 @@ rec_2_get_field_start_offs(
 	       & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
 }
 
-/**********************************************************
+/******************************************************//**
 The following function is used to read the offset of the start of a data field
 in the record. The start of an SQL null field is the end offset of the
 previous non-null field, or 0, if none exists. If n is the number of the last
-field + 1, then the end offset of the last field is returned. */
+field + 1, then the end offset of the last field is returned.
+@return	offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_get_field_start_offs(
 /*=====================*/
-			/* out: offset of the start of the field */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: field index */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
 {
 	ut_ad(rec);
 	ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1193,17 +1309,17 @@ rec_get_field_start_offs(
 	return(rec_2_get_field_start_offs(rec, n));
 }
 
-/****************************************************************
+/************************************************************//**
 Gets the physical size of an old-style field.
 Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
+if the data type is of a fixed size.
+@return	field size in bytes */
 UNIV_INLINE
 ulint
 rec_get_nth_field_size(
 /*===================*/
-			/* out: field size in bytes */
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: index of the field */
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: index of the field */
 {
 	ulint	os;
 	ulint	next_os;
@@ -1216,7 +1332,7 @@ rec_get_nth_field_size(
 	return(next_os - os);
 }
 
-/***************************************************************
+/***********************************************************//**
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
 is UNIV_SQL_NULL then the field is treated as an SQL null.
@@ -1226,12 +1342,12 @@ UNIV_INLINE
 void
 rec_set_nth_field(
 /*==============*/
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		n,	/* in: index number of the field */
-	const void*	data,	/* in: pointer to the data
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index number of the field */
+	const void*	data,	/*!< in: pointer to the data
 				if not SQL null */
-	ulint		len)	/* in: length of the data or UNIV_SQL_NULL */
+	ulint		len)	/*!< in: length of the data or UNIV_SQL_NULL */
 {
 	byte*	data2;
 	ulint	len2;
@@ -1260,32 +1376,32 @@ rec_set_nth_field(
 	ut_memcpy(data2, data, len);
 }
 
-/**************************************************************
+/**********************************************************//**
 The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return	size */
 UNIV_INLINE
 ulint
 rec_get_data_size_old(
 /*==================*/
-				/* out: size */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 	ut_ad(rec);
 
 	return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
 }
 
-/**************************************************************
+/**********************************************************//**
 The following function sets the number of fields in offsets. */
 UNIV_INLINE
 void
 rec_offs_set_n_fields(
 /*==================*/
-	ulint*	offsets,	/* in/out: array returned by
+	ulint*	offsets,	/*!< in/out: array returned by
 				rec_get_offsets() */
-	ulint	n_fields)	/* in: number of fields */
+	ulint	n_fields)	/*!< in: number of fields */
 {
 	ut_ad(offsets);
 	ut_ad(n_fields > 0);
@@ -1295,17 +1411,17 @@ rec_offs_set_n_fields(
 	offsets[1] = n_fields;
 }
 
-/**************************************************************
+/**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return	size */
 UNIV_INLINE
 ulint
 rec_offs_data_size(
 /*===============*/
-				/* out: size */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	size;
 
@@ -1316,79 +1432,81 @@ rec_offs_data_size(
 	return(size);
 }
 
-/**************************************************************
+/**********************************************************//**
 Returns the total size of record minus data size of record. The value
 returned by the function is the distance from record start to record origin
-in bytes. */
+in bytes.
+@return	size */
 UNIV_INLINE
 ulint
 rec_offs_extra_size(
 /*================*/
-				/* out: size */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	size;
 	ut_ad(rec_offs_validate(NULL, NULL, offsets));
-	size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT;
+	size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
 	ut_ad(size < UNIV_PAGE_SIZE);
 	return(size);
 }
 
-/**************************************************************
-Returns the total size of a physical record.  */
+/**********************************************************//**
+Returns the total size of a physical record.
+@return	size */
 UNIV_INLINE
 ulint
 rec_offs_size(
 /*==========*/
-				/* out: size */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
 }
 
-/**************************************************************
-Returns a pointer to the end of the record. */
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return	pointer to end */
 UNIV_INLINE
 byte*
 rec_get_end(
 /*========*/
-				/* out: pointer to end */
-	rec_t*		rec,	/* in: pointer to record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	rec_t*		rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	return(rec + rec_offs_data_size(offsets));
 }
 
-/**************************************************************
-Returns a pointer to the start of the record. */
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return	pointer to start */
 UNIV_INLINE
 byte*
 rec_get_start(
 /*==========*/
-				/* out: pointer to start */
-	rec_t*		rec,	/* in: pointer to record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	rec_t*		rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	return(rec - rec_offs_extra_size(offsets));
 }
 
-/*******************************************************************
-Copies a physical record to a buffer. */
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return	pointer to the origin of the copy */
 UNIV_INLINE
 rec_t*
 rec_copy(
 /*=====*/
-				/* out: pointer to the origin of the copy */
-	void*		buf,	/* in: buffer */
-	const rec_t*	rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	void*		buf,	/*!< in: buffer */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	extra_len;
 	ulint	data_len;
 
 	ut_ad(rec && buf);
 	ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
-	ut_ad(rec_validate((rec_t*) rec, offsets));
+	ut_ad(rec_validate(rec, offsets));
 
 	extra_len = rec_offs_extra_size(offsets);
 	data_len = rec_offs_data_size(offsets);
@@ -1398,18 +1516,19 @@ rec_copy(
 	return((byte*)buf + extra_len);
 }
 
-/**************************************************************
+/**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
+data size and number of fields.
+@return	extra size */
 UNIV_INLINE
 ulint
 rec_get_converted_extra_size(
 /*=========================*/
-				/* out: extra size */
-	ulint	data_size,	/* in: data size */
-	ulint	n_fields)	/* in: number of fields */
+	ulint	data_size,	/*!< in: data size */
+	ulint	n_fields,	/*!< in: number of fields */
+	ulint	n_ext)		/*!< in: number of externally stored columns */
 {
-	if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+	if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
 
 		return(REC_N_OLD_EXTRA_BYTES + n_fields);
 	}
@@ -1417,26 +1536,17 @@ rec_get_converted_extra_size(
 	return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
 }
 
-/**************************************************************
+/**********************************************************//**
 The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
-ulint
-rec_get_converted_size_new(
-/*=======================*/
-				/* out: size */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple);/* in: data tuple */
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a physical record. */
+a physical record.
+@return	size */
 UNIV_INLINE
 ulint
 rec_get_converted_size(
 /*===================*/
-				/* out: size */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple)	/* in: data tuple */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
 {
 	ulint	data_size;
 	ulint	extra_size;
@@ -1453,42 +1563,47 @@ rec_get_converted_size(
 		  : dict_index_get_n_fields(index)));
 
 	if (dict_table_is_comp(index->table)) {
-		return(rec_get_converted_size_new(index, dtuple));
+		return(rec_get_converted_size_comp(index,
+						   dtuple_get_info_bits(dtuple)
+						   & REC_NEW_STATUS_MASK,
+						   dtuple->fields,
+						   dtuple->n_fields, NULL));
 	}
 
-	data_size = dtuple_get_data_size(dtuple);
+	data_size = dtuple_get_data_size(dtuple, 0);
 
 	extra_size = rec_get_converted_extra_size(
-		data_size, dtuple_get_n_fields(dtuple));
+		data_size, dtuple_get_n_fields(dtuple), n_ext);
 
 	return(data_size + extra_size);
 }
 
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
 Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record. */
+that is, checks that we do not run out of the record.
+@return	the folded value */
 UNIV_INLINE
 ulint
 rec_fold(
 /*=====*/
-					/* out: the folded value */
-	rec_t*		rec,		/* in: the physical record */
-	const ulint*	offsets,	/* in: array returned by
+	const rec_t*	rec,		/*!< in: the physical record */
+	const ulint*	offsets,	/*!< in: array returned by
 					rec_get_offsets() */
-	ulint		n_fields,	/* in: number of complete
+	ulint		n_fields,	/*!< in: number of complete
 					fields to fold */
-	ulint		n_bytes,	/* in: number of bytes to fold
+	ulint		n_bytes,	/*!< in: number of bytes to fold
 					in an incomplete last field */
-	dulint		tree_id)	/* in: index tree id */
+	dulint		tree_id)	/*!< in: index tree id */
 {
-	ulint	i;
-	byte*	data;
-	ulint	len;
-	ulint	fold;
-	ulint	n_fields_rec;
+	ulint		i;
+	const byte*	data;
+	ulint		len;
+	ulint		fold;
+	ulint		n_fields_rec;
 
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(rec_validate((rec_t*) rec, offsets));
+	ut_ad(rec_validate(rec, offsets));
 	ut_ad(n_fields + n_bytes > 0);
 
 	n_fields_rec = rec_offs_n_fields(offsets);
@@ -1529,3 +1644,4 @@ rec_fold(
 
 	return(fold);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/rem0types.h b/storage/innodb_plugin/include/rem0types.h
new file mode 100644
index 00000000000..8b84d4af233
--- /dev/null
+++ b/storage/innodb_plugin/include/rem0types.h
@@ -0,0 +1,46 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0types.h
+Record manager global types
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0types_h
+#define rem0types_h
+
+/* We define the physical record simply as an array of bytes */
+typedef byte	rec_t;
+
+/* Maximum values for various fields (for non-blob tuples) */
+#define REC_MAX_N_FIELDS	(1024 - 1)
+#define REC_MAX_HEAP_NO		(2 * 8192 - 1)
+#define REC_MAX_N_OWNED		(16 - 1)
+
+/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed column length (or indexed prefix length). It is set to 3*256,
+so that one can create a column prefix index on 256 characters of a
+TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
+a character may take at most 3 bytes.
+This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define REC_MAX_INDEX_COL_LEN	768
+
+#endif
diff --git a/storage/innodb_plugin/include/row0ext.h b/storage/innodb_plugin/include/row0ext.h
new file mode 100644
index 00000000000..43d82d644e6
--- /dev/null
+++ b/storage/innodb_plugin/include/row0ext.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ext.h
+Caching of externally stored column prefixes
+
+Created September 2006 Marko Makela
+*******************************************************/
+
+#ifndef row0ext_h
+#define row0ext_h
+
+#include "univ.i"
+#include "row0types.h"
+#include "data0types.h"
+#include "mem0mem.h"
+
+/********************************************************************//**
+Creates a cache of column prefixes of externally stored columns.
+@return	own: column prefix cache */
+UNIV_INTERN
+row_ext_t*
+row_ext_create(
+/*===========*/
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	const ulint*	ext,	/*!< in: col_no's of externally stored columns
+				in the InnoDB table object, as reported by
+				dict_col_get_no(); NOT relative to the records
+				in the clustered index */
+	const dtuple_t*	tuple,	/*!< in: data tuple containing the field
+				references of the externally stored
+				columns; must be indexed by col_no;
+				the clustered index record must be
+				covered by a lock or a page latch
+				to prevent deletion (rollback or purge). */
+	ulint		zip_size,/*!< compressed page size in bytes, or 0 */
+	mem_heap_t*	heap);	/*!< in: heap where created */
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup_ith(
+/*===============*/
+	const row_ext_t*	ext,	/*!< in/out: column prefix cache */
+	ulint			i,	/*!< in: index of ext->ext[] */
+	ulint*			len);	/*!< out: length of prefix, in bytes,
+					at most REC_MAX_INDEX_COL_LEN */
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup(
+/*===========*/
+	const row_ext_t*	ext,	/*!< in: column prefix cache */
+	ulint			col,	/*!< in: column number in the InnoDB
+					table object, as reported by
+					dict_col_get_no(); NOT relative to the
+					records in the clustered index */
+	ulint*			len);	/*!< out: length of prefix, in bytes,
+					at most REC_MAX_INDEX_COL_LEN */
+
+/** Prefixes of externally stored columns */
+struct row_ext_struct{
+	ulint		n_ext;	/*!< number of externally stored columns */
+	const ulint*	ext;	/*!< col_no's of externally stored columns */
+	byte*		buf;	/*!< backing store of the column prefix cache */
+	ulint		len[1];	/*!< prefix lengths; 0 if not cached */
+};
+
+#ifndef UNIV_NONINL
+#include "row0ext.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0ext.ic b/storage/innodb_plugin/include/row0ext.ic
new file mode 100644
index 00000000000..82771a9312a
--- /dev/null
+++ b/storage/innodb_plugin/include/row0ext.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ext.ic
+Caching of externally stored column prefixes
+
+Created September 2006 Marko Makela
+*******************************************************/
+
+#include "rem0types.h"
+#include "btr0types.h"
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup_ith(
+/*===============*/
+	const row_ext_t*	ext,	/*!< in/out: column prefix cache */
+	ulint			i,	/*!< in: index of ext->ext[] */
+	ulint*			len)	/*!< out: length of prefix, in bytes,
+					at most REC_MAX_INDEX_COL_LEN */
+{
+	ut_ad(ext);
+	ut_ad(len);
+	ut_ad(i < ext->n_ext);
+
+	*len = ext->len[i];
+
+	if (UNIV_UNLIKELY(*len == 0)) {
+		/* The BLOB could not be fetched to the cache. */
+		return(field_ref_zero);
+	} else {
+		return(ext->buf + i * REC_MAX_INDEX_COL_LEN);
+	}
+}
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup(
+/*===========*/
+	const row_ext_t*	ext,	/*!< in: column prefix cache */
+	ulint			col,	/*!< in: column number in the InnoDB
+					table object, as reported by
+					dict_col_get_no(); NOT relative to the
+					records in the clustered index */
+	ulint*			len)	/*!< out: length of prefix, in bytes,
+					at most REC_MAX_INDEX_COL_LEN */
+{
+	ulint	i;
+
+	ut_ad(ext);
+	ut_ad(len);
+
+	for (i = 0; i < ext->n_ext; i++) {
+		if (col == ext->ext[i]) {
+			return(row_ext_lookup_ith(ext, i, len));
+		}
+	}
+
+	return(NULL);
+}
diff --git a/storage/innodb_plugin/include/row0ins.h b/storage/innodb_plugin/include/row0ins.h
new file mode 100644
index 00000000000..530622e6225
--- /dev/null
+++ b/storage/innodb_plugin/include/row0ins.h
@@ -0,0 +1,156 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.h
+Insert into a table
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0ins_h
+#define row0ins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+
+/***************************************************************//**
+Checks if foreign key constraint fails for an index entry. Sets shared locks
+which lock either the success or the failure of the constraint. NOTE that
+the caller must have a shared latch on dict_foreign_key_check_lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
+DB_ROW_IS_REFERENCED */
+UNIV_INTERN
+ulint
+row_ins_check_foreign_constraint(
+/*=============================*/
+	ibool		check_ref,/*!< in: TRUE If we want to check that
+				the referenced table is ok, FALSE if we
+				want to to check the foreign key table */
+	dict_foreign_t*	foreign,/*!< in: foreign constraint; NOTE that the
+				tables mentioned in it must be in the
+				dictionary cache if they exist at all */
+	dict_table_t*	table,	/*!< in: if check_ref is TRUE, then the foreign
+				table, else the referenced table */
+	dtuple_t*	entry,	/*!< in: index entry for index */
+	que_thr_t*	thr);	/*!< in: query thread */
+/*********************************************************************//**
+Creates an insert node struct.
+@return	own: insert node struct */
+UNIV_INTERN
+ins_node_t*
+ins_node_create(
+/*============*/
+	ulint		ins_type,	/*!< in: INS_VALUES, ... */
+	dict_table_t*	table,		/*!< in: table where to insert */
+	mem_heap_t*	heap);		/*!< in: mem heap where created */
+/*********************************************************************//**
+Sets a new row to insert for an INS_DIRECT node. This function is only used
+if we have constructed the row separately, which is a rare case; this
+function is quite slow. */
+UNIV_INTERN
+void
+ins_node_set_new_row(
+/*=================*/
+	ins_node_t*	node,	/*!< in: insert node */
+	dtuple_t*	row);	/*!< in: new row (or first row) for the node */
+/***************************************************************//**
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+ulint
+row_ins_index_entry(
+/*================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	ibool		foreign,/*!< in: TRUE=check foreign key constraints */
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************//**
+Inserts a row to a table. This is a high-level function used in
+SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_ins_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************//**
+Creates an entry template for each index of a table. */
+UNIV_INTERN
+void
+ins_node_create_entry_list(
+/*=======================*/
+	ins_node_t*	node);	/*!< in: row insert node */
+
+/* Insert node structure */
+
+struct ins_node_struct{
+	que_common_t	common;	/*!< node type: QUE_NODE_INSERT */
+	ulint		ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
+	dtuple_t*	row;	/*!< row to insert */
+	dict_table_t*	table;	/*!< table where to insert */
+	sel_node_t*	select;	/*!< select in searched insert */
+	que_node_t*	values_list;/* list of expressions to evaluate and
+				insert in an INS_VALUES insert */
+	ulint		state;	/*!< node execution state */
+	dict_index_t*	index;	/*!< NULL, or the next index where the index
+				entry should be inserted */
+	dtuple_t*	entry;	/*!< NULL, or entry to insert in the index;
+				after a successful insert of the entry,
+				this should be reset to NULL */
+	UT_LIST_BASE_NODE_T(dtuple_t)
+			entry_list;/* list of entries, one for each index */
+	byte*		row_id_buf;/* buffer for the row id sys field in row */
+	trx_id_t	trx_id;	/*!< trx id or the last trx which executed the
+				node */
+	byte*		trx_id_buf;/* buffer for the trx id sys field in row */
+	mem_heap_t*	entry_sys_heap;
+				/* memory heap used as auxiliary storage;
+				entry_list and sys fields are stored here;
+				if this is NULL, entry list should be created
+				and buffers for sys fields in row allocated */
+	ulint		magic_n;
+};
+
+#define	INS_NODE_MAGIC_N	15849075
+
+/* Insert node types */
+#define INS_SEARCHED	0	/* INSERT INTO ... SELECT ... */
+#define INS_VALUES	1	/* INSERT INTO ... VALUES ... */
+#define INS_DIRECT	2	/* this is for internal use in dict0crea:
+				insert the row directly */
+
+/* Node execution states */
+#define	INS_NODE_SET_IX_LOCK	1	/* we should set an IX lock on table */
+#define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
+#define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
+					inserted */
+
+#ifndef UNIV_NONINL
+#include "row0ins.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0ins.ic b/storage/innodb_plugin/include/row0ins.ic
new file mode 100644
index 00000000000..84f6da255bf
--- /dev/null
+++ b/storage/innodb_plugin/include/row0ins.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.ic
+Insert into a table
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/storage/innodb_plugin/include/row0merge.h b/storage/innodb_plugin/include/row0merge.h
new file mode 100644
index 00000000000..62a5efd11f7
--- /dev/null
+++ b/storage/innodb_plugin/include/row0merge.h
@@ -0,0 +1,197 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0merge.h
+Index build routines using a merge sort
+
+Created 13/06/2005 Jan Lindstrom
+*******************************************************/
+
+#ifndef row0merge_h
+#define row0merge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"
+#include "rem0types.h"
+#include "rem0rec.h"
+#include "read0types.h"
+#include "btr0types.h"
+#include "row0mysql.h"
+#include "lock0types.h"
+
+/** Index field definition */
+struct merge_index_field_struct {
+	ulint		prefix_len;	/*!< column prefix length, or 0
+					if indexing the whole column */
+	const char*	field_name;	/*!< field name */
+};
+
+/** Index field definition */
+typedef struct merge_index_field_struct merge_index_field_t;
+
+/** Definition of an index being created */
+struct merge_index_def_struct {
+	const char*		name;		/*!< index name */
+	ulint			ind_type;	/*!< 0, DICT_UNIQUE,
+						or DICT_CLUSTERED */
+	ulint			n_fields;	/*!< number of fields
+						in index */
+	merge_index_field_t*	fields;		/*!< field definitions */
+};
+
+/** Definition of an index being created */
+typedef struct merge_index_def_struct merge_index_def_t;
+
+/*********************************************************************//**
+Sets an exclusive lock on a table, for the duration of creating indexes.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+row_merge_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode);		/*!< in: LOCK_X or LOCK_S */
+/*********************************************************************//**
+Drop an index from the InnoDB system tables.  The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_index(
+/*=================*/
+	dict_index_t*	index,	/*!< in: index to be removed */
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx);	/*!< in: transaction handle */
+/*********************************************************************//**
+Drop those indexes which were created before an error occurred when
+building an index.  The data dictionary must have been locked
+exclusively by the caller, because the transaction will not be
+committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes(
+/*===================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	table,		/*!< in: table containing the indexes */
+	dict_index_t**	index,		/*!< in: indexes to drop */
+	ulint		num_created);	/*!< in: number of elements in index[] */
+/*********************************************************************//**
+Drop all partially created indexes during crash recovery. */
+UNIV_INTERN
+void
+row_merge_drop_temp_indexes(void);
+/*=============================*/
+/*********************************************************************//**
+Rename the tables in the data dictionary.  The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+row_merge_rename_tables(
+/*====================*/
+	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
+					tmp_name */
+	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
+					old_table->name */
+	const char*	tmp_name,	/*!< in: new name for old_table */
+	trx_t*		trx);		/*!< in: transaction handle */
+
+/*********************************************************************//**
+Create a temporary table for creating a primary key, using the definition
+of an existing table.
+@return	table, or NULL on error */
+UNIV_INTERN
+dict_table_t*
+row_merge_create_temporary_table(
+/*=============================*/
+	const char*		table_name,	/*!< in: new table name */
+	const merge_index_def_t*index_def,	/*!< in: the index definition
+						of the primary key */
+	const dict_table_t*	table,		/*!< in: old table definition */
+	trx_t*			trx);		/*!< in/out: transaction
+						(sets error_state) */
+/*********************************************************************//**
+Rename the temporary indexes in the dictionary to permanent ones.  The
+data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
+UNIV_INTERN
+ulint
+row_merge_rename_indexes(
+/*=====================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table);		/*!< in/out: table with new indexes */
+/*********************************************************************//**
+Create the index and load in to the dictionary.
+@return	index, or NULL on error */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_index(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
+	dict_table_t*		table,	/*!< in: the index is on this table */
+	const merge_index_def_t*index_def);
+					/*!< in: the index definition */
+/*********************************************************************//**
+Check if a transaction can use an index.
+@return	TRUE if index can be used by the transaction else FALSE */
+UNIV_INTERN
+ibool
+row_merge_is_index_usable(
+/*======================*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_index_t*	index);	/*!< in: index to check */
+/*********************************************************************//**
+If there are views that refer to the old table name then we "attach" to
+the new instance of the table else we drop it immediately.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+row_merge_drop_table(
+/*=================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	table);		/*!< in: table instance to drop */
+
+/*********************************************************************//**
+Build indexes on a table by reading a clustered index,
+creating a temporary file containing index entries, merge sorting
+these index entries and inserting sorted index entries to indexes.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+row_merge_build_indexes(
+/*====================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	old_table,	/*!< in: table where rows are
+					read from */
+	dict_table_t*	new_table,	/*!< in: table where indexes are
+					created; identical to old_table
+					unless creating a PRIMARY KEY */
+	dict_index_t**	indexes,	/*!< in: indexes to be created */
+	ulint		n_indexes,	/*!< in: size of indexes[] */
+	TABLE*		table);		/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+#endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h
similarity index 56%
rename from storage/innobase/include/row0mysql.h
rename to storage/innodb_plugin/include/row0mysql.h
index 5430190fa51..97028622505 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innodb_plugin/include/row0mysql.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.h
 Interface between Innobase row operations and MySQL.
 Contains also create table and other data dictionary operations.
 
-(c) 2000 Innobase Oy
-
 Created 9/17/2000 Heikki Tuuri
 *******************************************************/
 
@@ -23,229 +40,237 @@ extern ibool row_rollback_on_timeout;
 
 typedef struct row_prebuilt_struct row_prebuilt_t;
 
-/***********************************************************************
+/*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
-
+UNIV_INTERN
 void
 row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct of a
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct of a
 					ha_innobase:: table handle */
-/***********************************************************************
+/*******************************************************************//**
 Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
 byte*
 row_mysql_store_true_var_len(
 /*=========================*/
-			/* out: pointer to the data, we skip the 1 or 2 bytes
-			at the start that are used to store the len */
-	byte*	dest,	/* in: where to store */
-	ulint	len,	/* in: length, must fit in two bytes */
-	ulint	lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
+	byte*	dest,	/*!< in: where to store */
+	ulint	len,	/*!< in: length, must fit in two bytes */
+	ulint	lenlen);/*!< in: storage length of len: either 1 or 2 bytes */
+/*******************************************************************//**
 Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+const byte*
 row_mysql_read_true_varchar(
 /*========================*/
-			/* out: pointer to the data, we skip the 1 or 2 bytes
-			at the start that are used to store the len */
-	ulint*	len,	/* out: variable-length field length */
-	byte*	field,	/* in: field in the MySQL format */
-	ulint	lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
+	ulint*		len,	/*!< out: variable-length field length */
+	const byte*	field,	/*!< in: field in the MySQL format */
+	ulint		lenlen);/*!< in: storage length of len: either 1
+				or 2 bytes */
+/*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
-
+UNIV_INTERN
 void
 row_mysql_store_blob_ref(
 /*=====================*/
-	byte*	dest,		/* in: where to store */
-	ulint	col_len,	/* in: dest buffer size: determines into
+	byte*		dest,	/*!< in: where to store */
+	ulint		col_len,/*!< in: dest buffer size: determines into
 				how many bytes the BLOB length is stored,
-				this may vary from 1 to 4 bytes */
-	byte*	data,		/* in: BLOB data */
-	ulint	len);		/* in: BLOB length */
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
+				the space for the length may vary from 1
+				to 4 bytes */
+	const void*	data,	/*!< in: BLOB data; if the value to store
+				is SQL NULL this should be NULL pointer */
+	ulint		len);	/*!< in: BLOB length; if the value to store
+				is SQL NULL this should be 0; remember
+				also to set the NULL bit in the MySQL record
+				header! */
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return	pointer to BLOB data */
+UNIV_INTERN
+const byte*
 row_mysql_read_blob_ref(
 /*====================*/
-				/* out: pointer to BLOB data */
-	ulint*	len,		/* out: BLOB length */
-	byte*	ref,		/* in: BLOB reference in the MySQL format */
-	ulint	col_len);	/* in: BLOB reference length (not BLOB
-				length) */
-/******************************************************************
+	ulint*		len,		/*!< out: BLOB length */
+	const byte*	ref,		/*!< in: BLOB reference in the
+					MySQL format */
+	ulint		col_len);	/*!< in: BLOB reference length
+					(not BLOB length) */
+/**************************************************************//**
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
+row0sel.c.
+@return	up to which byte we used buf in the conversion */
+UNIV_INTERN
 byte*
 row_mysql_store_col_in_innobase_format(
 /*===================================*/
-					/* out: up to which byte we used
-					buf in the conversion */
-	dfield_t*	dfield,		/* in/out: dfield where dtype
+	dfield_t*	dfield,		/*!< in/out: dfield where dtype
 					information must be already set when
 					this function is called! */
-	byte*		buf,		/* in/out: buffer for a converted
+	byte*		buf,		/*!< in/out: buffer for a converted
 					integer value; this must be at least
 					col_len long then! */
-	ibool		row_format_col,	/* TRUE if the mysql_data is from
+	ibool		row_format_col,	/*!< TRUE if the mysql_data is from
 					a MySQL row, FALSE if from a MySQL
 					key value;
 					in MySQL, a true VARCHAR storage
 					format differs in a row and in a
 					key value: in a key value the length
 					is always stored in 2 bytes! */
-	byte*		mysql_data,	/* in: MySQL column value, not
+	const byte*	mysql_data,	/*!< in: MySQL column value, not
 					SQL NULL; NOTE that dfield may also
 					get a pointer to mysql_data,
 					therefore do not discard this as long
 					as dfield is used! */
-	ulint		col_len,	/* in: MySQL column length; NOTE that
+	ulint		col_len,	/*!< in: MySQL column length; NOTE that
 					this is the storage length of the
 					column in the MySQL format row, not
 					necessarily the length of the actual
 					payload data; if the column is a true
 					VARCHAR then this is irrelevant */
-	ulint		comp);		/* in: nonzero=compact format */
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
+	ulint		comp);		/*!< in: nonzero=compact format */
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running the
+query thread */
+UNIV_INTERN
 ibool
 row_mysql_handle_errors(
 /*====================*/
-				/* out: TRUE if it was a lock wait and
-				we should continue running the query thread */
-	ulint*		new_err,/* out: possible new error encountered in
+	ulint*		new_err,/*!< out: possible new error encountered in
 				rollback, or the old error which was
 				during the function entry */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t*	thr,	/* in: query thread */
-	trx_savept_t*	savept);/* in: savepoint */
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_savept_t*	savept);/*!< in: savepoint */
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return	own: a prebuilt struct */
+UNIV_INTERN
 row_prebuilt_t*
 row_create_prebuilt(
 /*================*/
-				/* out, own: a prebuilt struct */
-	dict_table_t*	table);	/* in: Innobase table handle */
-/************************************************************************
+	dict_table_t*	table);	/*!< in: Innobase table handle */
+/********************************************************************//**
 Free a prebuilt struct for a MySQL table handle. */
-
+UNIV_INTERN
 void
 row_prebuilt_free(
 /*==============*/
-	row_prebuilt_t*	prebuilt);	/* in, own: prebuilt struct */
-/*************************************************************************
+	row_prebuilt_t*	prebuilt,	/*!< in, own: prebuilt struct */
+	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
+/*********************************************************************//**
 Updates the transaction pointers in query graphs stored in the prebuilt
 struct. */
-
+UNIV_INTERN
 void
 row_update_prebuilt_trx(
 /*====================*/
-					/* out: prebuilt dtuple */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL
-					handle */
-	trx_t*		trx);		/* in: transaction handle */
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct
+					in MySQL handle */
+	trx_t*		trx);		/*!< in: transaction handle */
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. */
+UNIV_INTERN
 void
 row_unlock_table_autoinc_for_mysql(
 /*===============================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
+	trx_t*	trx);			/*!< in/out: transaction */
+/*********************************************************************//**
 Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
 AUTO_INC lock gives exclusive access to the auto-inc counter of the
 table. The lock is reserved only for the duration of an SQL statement.
 It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
+table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in the MySQL
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in the MySQL
 					table handle */
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
+/*********************************************************************//**
+Sets a table lock on the table mentioned in prebuilt.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_lock_table_for_mysql(
 /*=====================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in the MySQL
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in the MySQL
 					table handle */
-	dict_table_t*	table,		/* in: table to lock, or NULL
+	dict_table_t*	table,		/*!< in: table to lock, or NULL
 					if prebuilt->table should be
 					locked as
 					prebuilt->select_lock_type */
-	ulint		mode);		/* in: lock mode of table
+	ulint		mode);		/*!< in: lock mode of table
 					(ignored if table==NULL) */
 
-/*************************************************************************
-Does an insert for MySQL. */
-
+/*********************************************************************//**
+Does an insert for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_insert_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	byte*		mysql_rec,	/* in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+	byte*		mysql_rec,	/*!< in: row in the MySQL format */
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
 					handle */
-/*************************************************************************
+/*********************************************************************//**
 Builds a dummy query graph used in selects. */
-
+UNIV_INTERN
 void
 row_prebuild_sel_graph(
 /*===================*/
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
 					handle */
-/*************************************************************************
+/*********************************************************************//**
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
+first builds it.
+@return	prebuilt update vector */
+UNIV_INTERN
 upd_t*
 row_get_prebuilt_update_vector(
 /*===========================*/
-					/* out: prebuilt update vector */
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
 					handle */
-/*************************************************************************
+/*********************************************************************//**
 Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
+index on it (on row id).
+@return	TRUE if the clustered index was generated automatically */
+UNIV_INTERN
 ibool
 row_table_got_default_clust_index(
 /*==============================*/
-	dict_table_t*	table);
-/*************************************************************************
+	const dict_table_t*	table);	/*!< in: table */
+/*********************************************************************//**
 Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
+to take into account if we generated a default clustered index for the table
+@return	the key number used inside MySQL */
+UNIV_INTERN
 ulint
 row_get_mysql_key_number_for_index(
 /*===============================*/
-	dict_index_t*	index);
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
+	const dict_index_t*	index);	/*!< in: index */
+/*********************************************************************//**
+Does an update or delete of a row for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_update_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	byte*		mysql_rec,	/* in: the row to be updated, in
+	byte*		mysql_rec,	/*!< in: the row to be updated, in
 					the MySQL format */
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
 					handle */
-/*************************************************************************
+/*********************************************************************//**
 This can only be used when srv_locks_unsafe_for_binlog is TRUE or
 session is using a READ COMMITTED isolation level. Before
 calling this function we must use trx_reset_new_rec_lock_info() and
@@ -255,223 +280,235 @@ and also under prebuilt->clust_pcur. Currently, this is only used and tested
 in the case of an UPDATE or a DELETE statement, where the row lock is of the
 LOCK_X type.
 Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
+locks we set.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_unlock_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in MySQL
 					handle */
-	ibool		has_latches_on_recs);/* TRUE if called so that we have
+	ibool		has_latches_on_recs);/*!< TRUE if called so that we have
 					the latches on the records under pcur
 					and clust_pcur, and we do not need to
 					reposition the cursors. */
-/*************************************************************************
+/*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
+interface.
+@return	own: update node */
+UNIV_INTERN
 upd_node_t*
 row_create_update_node_for_mysql(
 /*=============================*/
-				/* out, own: update node */
-	dict_table_t*	table,	/* in: table to update */
-	mem_heap_t*	heap);	/* in: mem heap from which allocated */
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
+	dict_table_t*	table,	/*!< in: table to update */
+	mem_heap_t*	heap);	/*!< in: mem heap from which allocated */
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 row_update_cascade_for_mysql(
 /*=========================*/
-				/* out: error code or DB_SUCCESS */
-	que_thr_t*	thr,	/* in: query thread */
-	upd_node_t*	node,	/* in: update node used in the cascade
+	que_thr_t*	thr,	/*!< in: query thread */
+	upd_node_t*	node,	/*!< in: update node used in the cascade
 				or set null operation */
-	dict_table_t*	table);	/* in: table where we do the operation */
-/*************************************************************************
+	dict_table_t*	table);	/*!< in: table where we do the operation */
+/*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
-
+UNIV_INTERN
 void
-row_mysql_lock_data_dictionary(
-/*===========================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line);	/*!< in: line number */
+#define row_mysql_lock_data_dictionary(trx)				\
+	row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
 Unlocks the data dictionary exclusive lock. */
-
+UNIV_INTERN
 void
 row_mysql_unlock_data_dictionary(
 /*=============================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
 Locks the data dictionary in shared mode from modifications, for performing
 foreign key check, rollback, or other operation invisible to MySQL. */
-
+UNIV_INTERN
 void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line);	/*!< in: line number */
+#define row_mysql_freeze_data_dictionary(trx)				\
+	row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
 Unlocks the data dictionary shared lock. */
-
+UNIV_INTERN
 void
 row_mysql_unfreeze_data_dictionary(
 /*===============================*/
-	trx_t*	trx);	/* in: transaction */
-/*************************************************************************
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
 Creates a table for MySQL. If the name of the table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also start the printing of monitor
 output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
+InnoDB will try to invoke mem_validate().
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_create_table_for_mysql(
 /*=======================*/
-					/* out: error code or DB_SUCCESS */
-	dict_table_t*	table,		/* in: table definition */
-	trx_t*		trx);		/* in: transaction handle */
-/*************************************************************************
+	dict_table_t*	table,		/*!< in, own: table definition
+					(will be freed) */
+	trx_t*		trx);		/*!< in: transaction handle */
+/*********************************************************************//**
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
+currently as all indexes must be created at the same time as the table.
+@return	error number or DB_SUCCESS */
+UNIV_INTERN
 int
 row_create_index_for_mysql(
 /*=======================*/
-					/* out: error number or DB_SUCCESS */
-	dict_index_t*	index,		/* in: index definition */
-	trx_t*		trx,		/* in: transaction handle */
-	const ulint*	field_lengths); /* in: if not NULL, must contain
+	dict_index_t*	index,		/*!< in, own: index definition
+					(will be freed) */
+	trx_t*		trx,		/*!< in: transaction handle */
+	const ulint*	field_lengths); /*!< in: if not NULL, must contain
 					dict_index_get_n_fields(index)
 					actual field lengths for the
 					index columns, which are
 					then checked for not being too
 					large. */
-/*************************************************************************
+/*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
 should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
+fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_table_add_foreign_constraints(
 /*==============================*/
-					/* out: error code or DB_SUCCESS */
-	trx_t*		trx,		/* in: transaction */
-	const char*	sql_string,	/* in: table create statement where
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
 					foreign keys are declared like:
 				FOREIGN KEY (a, b) REFERENCES table2(c, d),
 					table2 can be written also with the
 					database name before it: test.table2 */
-	const char*	name,		/* in: table full name in the
+	const char*	name,		/*!< in: table full name in the
 					normalized form
 					database_name/table_name */
-	ibool		reject_fks);	/* in: if TRUE, fail with error
+	ibool		reject_fks);	/*!< in: if TRUE, fail with error
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
 
-/*************************************************************************
+/*********************************************************************//**
 The master thread in srv0srv.c calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
+dropping of tables is needed in ALTER TABLE on Unix.
+@return	how many tables dropped + remaining tables in list */
+UNIV_INTERN
 ulint
 row_drop_tables_for_mysql_in_background(void);
 /*=========================================*/
-					/* out: how many tables dropped
-					+ remaining tables in list */
-/*************************************************************************
+/*********************************************************************//**
 Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
+mutex!
+@return	how many tables in list */
+UNIV_INTERN
 ulint
 row_get_background_drop_list_len_low(void);
 /*======================================*/
-					/* out: how many tables in list */
-/*************************************************************************
-Truncates a table for MySQL. */
-
+/*********************************************************************//**
+Truncates a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_truncate_table_for_mysql(
 /*=========================*/
-				/* out: error code or DB_SUCCESS */
-	dict_table_t*	table,	/* in: table handle */
-	trx_t*		trx);	/* in: transaction handle */
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
+	dict_table_t*	table,	/*!< in: table handle */
+	trx_t*		trx);	/*!< in: transaction handle */
+/*********************************************************************//**
+Drops a table for MySQL.  If the name of the dropped table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
+output by the master thread.  If the data dictionary was not already locked
+by the transaction, the transaction will be committed.  Otherwise, the
+data dictionary will remain locked.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_drop_table_for_mysql(
 /*=====================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx,	/* in: transaction handle */
-	ibool		drop_db);/* in: TRUE=dropping whole database */
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx,	/*!< in: transaction handle */
+	ibool		drop_db);/*!< in: TRUE=dropping whole database */
 
-/*************************************************************************
+/*********************************************************************//**
 Discards the tablespace of a table which stored in an .ibd file. Discarding
 means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
+the table. Also the flag table->ibd_file_missing is set TRUE.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_discard_tablespace_for_mysql(
 /*=============================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx);	/* in: transaction handle */
-/*********************************************************************
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx);	/*!< in: transaction handle */
+/*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_import_tablespace_for_mysql(
 /*============================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx);	/* in: transaction handle */
-/*************************************************************************
-Drops a database for MySQL. */
-
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx);	/*!< in: transaction handle */
+/*********************************************************************//**
+Drops a database for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_drop_database_for_mysql(
 /*========================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: database name which ends to '/' */
-	trx_t*		trx);	/* in: transaction handle */
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
+	const char*	name,	/*!< in: database name which ends to '/' */
+	trx_t*		trx);	/*!< in: transaction handle */
+/*********************************************************************//**
+Renames a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
 row_rename_table_for_mysql(
 /*=======================*/
-					/* out: error code or DB_SUCCESS */
-	const char*	old_name,	/* in: old table name */
-	const char*	new_name,	/* in: new table name */
-	trx_t*		trx);		/* in: transaction handle */
-/*************************************************************************
-Checks a table for corruption. */
-
+	const char*	old_name,	/*!< in: old table name */
+	const char*	new_name,	/*!< in: new table name */
+	trx_t*		trx,		/*!< in: transaction handle */
+	ibool		commit);	/*!< in: if TRUE then commit trx */
+/*********************************************************************//**
+Checks a table for corruption.
+@return	DB_ERROR or DB_SUCCESS */
+UNIV_INTERN
 ulint
 row_check_table_for_mysql(
 /*======================*/
-					/* out: DB_ERROR or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
 					handle */
 
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
+/*********************************************************************//**
+Determines if a table is a magic monitor table.
+@return	TRUE if monitor table */
+UNIV_INTERN
 ibool
 row_is_magic_monitor_table(
 /*=======================*/
-					/* out: TRUE if monitor table */
-	const char*	table_name);	/* in: name of the table, in the
+	const char*	table_name);	/*!< in: name of the table, in the
 					form database/table_name */
 
 /* A struct describing a place for an individual column in the MySQL
@@ -481,24 +518,24 @@ Innobase and MySQL. */
 
 typedef struct mysql_row_templ_struct mysql_row_templ_t;
 struct mysql_row_templ_struct {
-	ulint	col_no;			/* column number of the column */
-	ulint	rec_field_no;		/* field number of the column in an
+	ulint	col_no;			/*!< column number of the column */
+	ulint	rec_field_no;		/*!< field number of the column in an
 					Innobase record in the current index;
 					not defined if template_type is
 					ROW_MYSQL_WHOLE_ROW */
-	ulint	mysql_col_offset;	/* offset of the column in the MySQL
+	ulint	mysql_col_offset;	/*!< offset of the column in the MySQL
 					row format */
-	ulint	mysql_col_len;		/* length of the column in the MySQL
+	ulint	mysql_col_len;		/*!< length of the column in the MySQL
 					row format */
-	ulint	mysql_null_byte_offset;	/* MySQL NULL bit byte offset in a
+	ulint	mysql_null_byte_offset;	/*!< MySQL NULL bit byte offset in a
 					MySQL record */
-	ulint	mysql_null_bit_mask;	/* bit mask to get the NULL bit,
+	ulint	mysql_null_bit_mask;	/*!< bit mask to get the NULL bit,
 					zero if column cannot be NULL */
-	ulint	type;			/* column type in Innobase mtype
+	ulint	type;			/*!< column type in Innobase mtype
 					numbers DATA_CHAR... */
-	ulint	mysql_type;		/* MySQL type code; this is always
+	ulint	mysql_type;		/*!< MySQL type code; this is always
 					< 256 */
-	ulint	mysql_length_bytes;	/* if mysql_type
+	ulint	mysql_length_bytes;	/*!< if mysql_type
 					== DATA_MYSQL_TRUE_VARCHAR, this tells
 					whether we should use 1 or 2 bytes to
 					store the MySQL true VARCHAR data
@@ -506,13 +543,13 @@ struct mysql_row_templ_struct {
 					format (NOTE that the MySQL key value
 					format always uses 2 bytes for the data
 					len) */
-	ulint	charset;		/* MySQL charset-collation code
+	ulint	charset;		/*!< MySQL charset-collation code
 					of the column, or zero */
-	ulint	mbminlen;		/* minimum length of a char, in bytes,
+	ulint	mbminlen;		/*!< minimum length of a char, in bytes,
 					or zero if not a char type */
-	ulint	mbmaxlen;		/* maximum length of a char, in bytes,
+	ulint	mbmaxlen;		/*!< maximum length of a char, in bytes,
 					or zero if not a char type */
-	ulint	is_unsigned;		/* if a column type is an integer
+	ulint	is_unsigned;		/*!< if a column type is an integer
 					type and this field is != 0, then
 					it is an unsigned integer type */
 };
@@ -524,80 +561,81 @@ struct mysql_row_templ_struct {
 #define ROW_PREBUILT_ALLOCATED	78540783
 #define ROW_PREBUILT_FREED	26423527
 
-/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
+/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
 handle used within MySQL; these are used to save CPU time. */
 
 struct row_prebuilt_struct {
-	ulint		magic_n;	/* this magic number is set to
-					ROW_PREBUILT_ALLOCATED when created
-					and to ROW_PREBUILT_FREED when the
-					struct has been freed; used in
-					debugging */
-	dict_table_t*	table;		/* Innobase table handle */
-	trx_t*		trx;		/* current transaction handle */
-	ibool		sql_stat_start;	/* TRUE when we start processing of
+	ulint		magic_n;	/*!< this magic number is set to
+					ROW_PREBUILT_ALLOCATED when created,
+					or ROW_PREBUILT_FREED when the
+					struct has been freed */
+	dict_table_t*	table;		/*!< Innobase table handle */
+	dict_index_t*	index;		/*!< current index for a search, if
+					any */
+	trx_t*		trx;		/*!< current transaction handle */
+	unsigned	sql_stat_start:1;/*!< TRUE when we start processing of
 					an SQL statement: we may have to set
 					an intention lock on the table,
 					create a consistent read view etc. */
-	ibool		mysql_has_locked; /* this is set TRUE when MySQL
+	unsigned	mysql_has_locked:1;/*!< this is set TRUE when MySQL
 					calls external_lock on this handle
 					with a lock flag, and set FALSE when
 					with the F_UNLOCK flag */
-	ibool		clust_index_was_generated;
-					/* if the user did not define a
+	unsigned	clust_index_was_generated:1;
+					/*!< if the user did not define a
 					primary key in MySQL, then Innobase
 					automatically generated a clustered
 					index where the ordering column is
 					the row id: in this case this flag
 					is set to TRUE */
-	dict_index_t*	index;		/* current index for a search, if
-					any */
-	ulint		read_just_key;	/* set to 1 when MySQL calls
+	unsigned	index_usable:1;	/*!< caches the value of
+					row_merge_is_index_usable(trx,index) */
+	unsigned	read_just_key:1;/*!< set to 1 when MySQL calls
 					ha_innobase::extra with the
 					argument HA_EXTRA_KEYREAD; it is enough
 					to read just columns defined in
 					the index (i.e., no read of the
 					clustered index record necessary) */
-	ibool		used_in_HANDLER;/* TRUE if we have been using this
+	unsigned	used_in_HANDLER:1;/*!< TRUE if we have been using this
 					handle in a MySQL HANDLER low level
 					index cursor command: then we must
 					store the pcur position even in a
 					unique search from a clustered index,
 					because HANDLER allows NEXT and PREV
 					in such a situation */
-	ulint		template_type;	/* ROW_MYSQL_WHOLE_ROW,
+	unsigned	template_type:2;/*!< ROW_MYSQL_WHOLE_ROW,
 					ROW_MYSQL_REC_FIELDS,
 					ROW_MYSQL_DUMMY_TEMPLATE, or
 					ROW_MYSQL_NO_TEMPLATE */
-	ulint		n_template;	/* number of elements in the
+	unsigned	n_template:10;	/*!< number of elements in the
 					template */
-	ulint		null_bitmap_len;/* number of bytes in the SQL NULL
+	unsigned	null_bitmap_len:10;/*!< number of bytes in the SQL NULL
 					bitmap at the start of a row in the
 					MySQL format */
-	ibool		need_to_access_clustered; /* if we are fetching
+	unsigned	need_to_access_clustered:1; /*!< if we are fetching
 					columns through a secondary index
 					and at least one column is not in
 					the secondary index, then this is
 					set to TRUE */
-	ibool		templ_contains_blob;/* TRUE if the template contains
+	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
 					BLOB column(s) */
-	mysql_row_templ_t* mysql_template;/* template used to transform
+	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
 					is not allocated from 'heap' */
-	mem_heap_t*	heap;		/* memory heap from which
+	mem_heap_t*	heap;		/*!< memory heap from which
 					these auxiliary structures are
 					allocated when needed */
-	ins_node_t*	ins_node;	/* Innobase SQL insert node
+	ins_node_t*	ins_node;	/*!< Innobase SQL insert node
 					used to perform inserts
 					to the table */
-	byte*		ins_upd_rec_buff;/* buffer for storing data converted
+	byte*		ins_upd_rec_buff;/*!< buffer for storing data converted
 					to the Innobase format from the MySQL
 					format */
-	const byte*	default_rec;	/* the default values of all columns
+	const byte*	default_rec;	/*!< the default values of all columns
 					(a "default row") in MySQL format */
 	ulint		hint_need_to_fetch_extra_cols;
-					/* normally this is set to 0; if this
+					/*!< normally this is set to 0; if this
 					is set to ROW_RETRIEVE_PRIMARY_KEY,
 					then we should at least retrieve all
 					columns in the primary key; if this
@@ -605,33 +643,33 @@ struct row_prebuilt_struct {
 					we must retrieve all columns in the
 					key (if read_just_key == 1), or all
 					columns in the table */
-	upd_node_t*	upd_node;	/* Innobase SQL update node used
+	upd_node_t*	upd_node;	/*!< Innobase SQL update node used
 					to perform updates and deletes */
-	que_fork_t*	ins_graph;	/* Innobase SQL query graph used
+	que_fork_t*	ins_graph;	/*!< Innobase SQL query graph used
 					in inserts */
-	que_fork_t*	upd_graph;	/* Innobase SQL query graph used
+	que_fork_t*	upd_graph;	/*!< Innobase SQL query graph used
 					in updates or deletes */
-	btr_pcur_t*	pcur;		/* persistent cursor used in selects
+	btr_pcur_t*	pcur;		/*!< persistent cursor used in selects
 					and updates */
-	btr_pcur_t*	clust_pcur;	/* persistent cursor used in
+	btr_pcur_t*	clust_pcur;	/*!< persistent cursor used in
 					some selects and updates */
-	que_fork_t*	sel_graph;	/* dummy query graph used in
+	que_fork_t*	sel_graph;	/*!< dummy query graph used in
 					selects */
-	dtuple_t*	search_tuple;	/* prebuilt dtuple used in selects */
+	dtuple_t*	search_tuple;	/*!< prebuilt dtuple used in selects */
 	byte		row_id[DATA_ROW_ID_LEN];
-					/* if the clustered index was
+					/*!< if the clustered index was
 					generated, the row id of the
 					last row fetched is stored
 					here */
-	dtuple_t*	clust_ref;	/* prebuilt dtuple used in
+	dtuple_t*	clust_ref;	/*!< prebuilt dtuple used in
 					sel/upd/del */
-	ulint		select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */
-	ulint		stored_select_lock_type;/* this field is used to
+	ulint		select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
+	ulint		stored_select_lock_type;/*!< this field is used to
 					remember the original select_lock_type
 					that was decided in ha_innodb.cc,
 					::store_lock(), ::external_lock(),
 					etc. */
-	ulint		row_read_type;	/* ROW_READ_WITH_LOCKS if row locks
+	ulint		row_read_type;	/*!< ROW_READ_WITH_LOCKS if row locks
 					should be the obtained for records
 					under an UPDATE or DELETE cursor.
 					If innodb_locks_unsafe_for_binlog
@@ -656,7 +694,7 @@ struct row_prebuilt_struct {
 					This eliminates lock waits in some
 					cases; note that this breaks
 					serializability. */
-	ulint		new_rec_locks;	/* normally 0; if
+	ulint		new_rec_locks;	/*!< normally 0; if
 					srv_locks_unsafe_for_binlog is
 					TRUE or session is using READ
 					COMMITTED isolation level, in a
@@ -671,15 +709,15 @@ struct row_prebuilt_struct {
 					these can be used to implement a
 					'mini-rollback' that releases
 					the latest record locks */
-	ulint		mysql_prefix_len;/* byte offset of the end of
+	ulint		mysql_prefix_len;/*!< byte offset of the end of
 					the last requested column */
-	ulint		mysql_row_len;	/* length in bytes of a row in the
+	ulint		mysql_row_len;	/*!< length in bytes of a row in the
 					MySQL format */
-	ulint		n_rows_fetched;	/* number of rows fetched after
+	ulint		n_rows_fetched;	/*!< number of rows fetched after
 					positioning the current cursor */
-	ulint		fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */
+	ulint		fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */
 	byte*		fetch_cache[MYSQL_FETCH_CACHE_SIZE];
-					/* a cache for fetched rows if we
+					/*!< a cache for fetched rows if we
 					fetch many rows from the same cursor:
 					it saves CPU time to fetch them in a
 					batch; we reserve mysql_row_len
@@ -688,34 +726,37 @@ struct row_prebuilt_struct {
 					allocated mem buf start, because
 					there is a 4 byte magic number at the
 					start and at the end */
-	ibool		keep_other_fields_on_keyread; /* when using fetch
+	ibool		keep_other_fields_on_keyread; /*!< when using fetch
 					cache with HA_EXTRA_KEYREAD, don't
 					overwrite other fields in mysql row
 					row buffer.*/
-	ulint		fetch_cache_first;/* position of the first not yet
+	ulint		fetch_cache_first;/*!< position of the first not yet
 					fetched row in fetch_cache */
-	ulint		n_fetch_cached;	/* number of not yet fetched rows
+	ulint		n_fetch_cached;	/*!< number of not yet fetched rows
 					in fetch_cache */
-	mem_heap_t*	blob_heap;	/* in SELECTS BLOB fields are copied
+	mem_heap_t*	blob_heap;	/*!< in SELECTS BLOB fields are copied
 					to this heap */
-	mem_heap_t*	old_vers_heap;	/* memory heap where a previous
+	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
 					version is built in consistent read */
 	/*----------------------*/
-	ulonglong	autoinc_last_value;/* last value of AUTO-INC interval */
-	ulonglong	autoinc_increment;/* The increment step of the auto 
+	ulonglong	autoinc_last_value;
+					/*!< last value of AUTO-INC interval */
+	ulonglong	autoinc_increment;/*!< The increment step of the auto
 					increment column. Value must be
 					greater than or equal to 1. Required to
 					calculate the next value */
-	ulonglong	autoinc_offset; /* The offset passed to
+	ulonglong	autoinc_offset; /*!< The offset passed to
 					get_auto_increment() by MySQL. Required
 					to calculate the next value */
-	ulint		autoinc_error;	/* The actual error code encountered
+	ulint		autoinc_error;	/*!< The actual error code encountered
 					while trying to init or read the
 					autoinc value from the table. We
 					store it here so that we can return
 					it to MySQL */
 	/*----------------------*/
-	ulint		magic_n2;	/* this should be the same as
+	UT_LIST_NODE_T(row_prebuilt_t)	prebuilts;
+					/*!< list node of table->prebuilts */
+	ulint		magic_n2;	/*!< this should be the same as
 					magic_n */
 };
 
diff --git a/storage/innodb_plugin/include/row0mysql.ic b/storage/innodb_plugin/include/row0mysql.ic
new file mode 100644
index 00000000000..35033aa2ad1
--- /dev/null
+++ b/storage/innodb_plugin/include/row0mysql.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.ic
+MySQL interface for Innobase
+
+Created 1/23/2001 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/row0purge.h b/storage/innodb_plugin/include/row0purge.h
new file mode 100644
index 00000000000..89ec54fb54a
--- /dev/null
+++ b/storage/innodb_plugin/include/row0purge.h
@@ -0,0 +1,96 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0purge.h
+Purge obsolete records
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0purge_h
+#define row0purge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/********************************************************************//**
+Creates a purge node to a query graph.
+@return	own: purge node */
+UNIV_INTERN
+purge_node_t*
+row_purge_node_create(
+/*==================*/
+	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/***********************************************************//**
+Does the purge operation for a single undo log record. This is a high-level
+function used in an SQL execution graph.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_purge_step(
+/*===========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/* Purge node structure */
+
+struct purge_node_struct{
+	que_common_t	common;	/*!< node type: QUE_NODE_PURGE */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	roll_ptr_t	roll_ptr;/* roll pointer to undo log record */
+	trx_undo_rec_t*	undo_rec;/* undo log record */
+	trx_undo_inf_t*	reservation;/* reservation for the undo log record in
+				the purge array */
+	undo_no_t	undo_no;/* undo number of the record */
+	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
+				clustered index record */
+	ibool		found_clust;/* TRUE if the clustered index record
+				determined by ref was found in the clustered
+				index, and we were able to position pcur on
+				it */
+	dict_table_t*	table;	/*!< table where purge is done */
+	ulint		cmpl_info;/* compiler analysis info of an update */
+	upd_t*		update;	/*!< update vector for a clustered index
+				record */
+	dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
+				handle */
+	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
+				heap) of the indexed fields of the row to
+				handle */
+	dict_index_t*	index;	/*!< NULL, or the next index whose record should
+				be handled */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage for
+				row; this must be emptied after a successful
+				purge of a row */
+};
+
+#ifndef UNIV_NONINL
+#include "row0purge.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0purge.ic b/storage/innodb_plugin/include/row0purge.ic
new file mode 100644
index 00000000000..23d7d3845a4
--- /dev/null
+++ b/storage/innodb_plugin/include/row0purge.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+
+/**************************************************//**
+@file include/row0purge.ic
+Purge obsolete records
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/row0row.h b/storage/innodb_plugin/include/row0row.h
new file mode 100644
index 00000000000..723b7b53395
--- /dev/null
+++ b/storage/innodb_plugin/include/row0row.h
@@ -0,0 +1,310 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0row.h
+General row routines
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0row_h
+#define row0row_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"
+#include "rem0types.h"
+#include "read0types.h"
+#include "row0types.h"
+#include "btr0types.h"
+
+/*********************************************************************//**
+Gets the offset of the trx id field, in bytes relative to the origin of
+a clustered index record.
+@return	offset of DATA_TRX_ID */
+UNIV_INTERN
+ulint
+row_get_trx_id_offset(
+/*==================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+trx_id_t
+row_get_rec_trx_id(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+roll_ptr_t
+row_get_rec_roll_ptr(
+/*=================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+	const dtuple_t*	row,	/*!< in: row which should be
+				inserted or purged */
+	row_ext_t*	ext,	/*!< in: externally stored column prefixes,
+				or NULL */
+	dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*	heap);	/*!< in: memory heap from which the memory for
+				the index entry is allocated */
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return	own: row built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build(
+/*======*/
+	ulint			type,	/*!< in: ROW_COPY_POINTERS or
+					ROW_COPY_DATA; the latter
+					copies also the data fields to
+					heap while the first only
+					places pointers to data fields
+					on the index page, and thus is
+					more efficient */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const rec_t*		rec,	/*!< in: record in the clustered
+					index; NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the row dtuple is used! */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
+					or NULL, in which case this function
+					will invoke rec_get_offsets() */
+	const dict_table_t*	col_table,
+					/*!< in: table, to check which
+					externally stored columns
+					occur in the ordering columns
+					of an index, or NULL if
+					index->table should be
+					consulted instead; the user
+					columns in this table should be
+					the same columns as in index->table */
+	row_ext_t**		ext,	/*!< out, own: cache of
+					externally stored column
+					prefixes, or NULL */
+	mem_heap_t*		heap);	/*!< in: memory heap from which
+					the memory needed is allocated */
+/*******************************************************************//**
+Converts an index record to a typed data tuple.
+@return index entry built; does not set info_bits, and the data fields
+in the entry will point directly to rec */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+	const rec_t*		rec,	/*!< in: record in the index */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap);	/*!< in: memory heap from which
+					the memory needed is allocated */
+/*******************************************************************//**
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap.
+@return	own: index entry built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+	ulint			type,	/*!< in: ROW_COPY_DATA, or
+					ROW_COPY_POINTERS: the former
+					copies also the data fields to
+					heap as the latter only places
+					pointers to data fields on the
+					index page */
+	const rec_t*		rec,	/*!< in: record in the index;
+					NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the dtuple is used! */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint*			offsets,/*!< in/out: rec_get_offsets(rec) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap);	/*!< in: memory heap from which
+					the memory needed is allocated */
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record.
+@return	own: row reference built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+	ulint		type,	/*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap, whereas the latter only places pointers
+				to data fields on the index page */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const rec_t*	rec,	/*!< in: record in the index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row reference is used! */
+	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+				needed is allocated */
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INTERN
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+	dtuple_t*		ref,	/*!< in/out: row reference built;
+					see the NOTE below! */
+	const rec_t*		rec,	/*!< in: record in the index;
+					NOTE: the data fields in ref
+					will point directly into this
+					record, therefore, the buffer
+					page of this record must be at
+					least s-latched and the latch
+					held as long as the row
+					reference is used! */
+	const dict_index_t*	index,	/*!< in: secondary index */
+	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+					or NULL */
+	trx_t*			trx);	/*!< in: transaction */
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/*!< in/out: typed data tuple where the
+				reference is built */
+	const ulint*	map,	/*!< in: array of field numbers in rec
+				telling how ref should be built from
+				the fields of rec */
+	const rec_t*	rec,	/*!< in: record in the index; must be
+				preserved while ref is used, as we do
+				not copy field values to heap */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Searches the clustered index record for a row, if we have the row
+reference.
+@return	TRUE if found */
+UNIV_INTERN
+ibool
+row_search_on_row_ref(
+/*==================*/
+	btr_pcur_t*		pcur,	/*!< out: persistent cursor, which must
+					be closed by the caller */
+	ulint			mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const dict_table_t*	table,	/*!< in: table */
+	const dtuple_t*		ref,	/*!< in: row reference */
+	mtr_t*			mtr);	/*!< in/out: mtr */
+/*********************************************************************//**
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved.
+@return	record or NULL, if no record found */
+UNIV_INTERN
+rec_t*
+row_get_clust_rec(
+/*==============*/
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: secondary index */
+	dict_index_t**	clust_index,/*!< out: clustered index */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***************************************************************//**
+Searches an index record.
+@return	TRUE if found */
+UNIV_INTERN
+ibool
+row_search_index_entry(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: index entry */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor, which must
+				be closed by the caller */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+
+#define ROW_COPY_DATA		1
+#define ROW_COPY_POINTERS	2
+
+/* The allowed latching order of index records is the following:
+(1) a secondary index record ->
+(2) the clustered index record ->
+(3) rollback segment data for the clustered index record.
+
+No new latches may be obtained while the kernel mutex is reserved.
+However, the kernel mutex can be reserved while latches are owned. */
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) using
+"dict_field" and writes the result to "buf".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size is positive) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+UNIV_INTERN
+ulint
+row_raw_format(
+/*===========*/
+	const char*		data,		/*!< in: raw data */
+	ulint			data_len,	/*!< in: raw data length
+						in bytes */
+	const dict_field_t*	dict_field,	/*!< in: index field */
+	char*			buf,		/*!< out: output buffer */
+	ulint			buf_size);	/*!< in: output buffer size
+						in bytes */
+
+#ifndef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0row.ic b/storage/innodb_plugin/include/row0row.ic
new file mode 100644
index 00000000000..05c007641af
--- /dev/null
+++ b/storage/innodb_plugin/include/row0row.ic
@@ -0,0 +1,120 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0row.ic
+General row routines
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "trx0undo.h"
+
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+trx_id_t
+row_get_rec_trx_id(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	offset;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	offset = index->trx_id_offset;
+
+	if (!offset) {
+		offset = row_get_trx_id_offset(rec, index, offsets);
+	}
+
+	return(trx_read_trx_id(rec + offset));
+}
+
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+roll_ptr_t
+row_get_rec_roll_ptr(
+/*=================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	offset;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	offset = index->trx_id_offset;
+
+	if (!offset) {
+		offset = row_get_trx_id_offset(rec, index, offsets);
+	}
+
+	return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
+}
+
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/*!< in/out: typed data tuple where the
+				reference is built */
+	const ulint*	map,	/*!< in: array of field numbers in rec
+				telling how ref should be built from
+				the fields of rec */
+	const rec_t*	rec,	/*!< in: record in the index; must be
+				preserved while ref is used, as we do
+				not copy field values to heap */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	dfield_t*	dfield;
+	const byte*	field;
+	ulint		len;
+	ulint		ref_len;
+	ulint		field_no;
+	ulint		i;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_any_extern(offsets));
+	ref_len = dtuple_get_n_fields(ref);
+
+	for (i = 0; i < ref_len; i++) {
+		dfield = dtuple_get_nth_field(ref, i);
+
+		field_no = *(map + i);
+
+		if (field_no != ULINT_UNDEFINED) {
+
+			field = rec_get_nth_field(rec, offsets,
+						  field_no, &len);
+			dfield_set_data(dfield, field, len);
+		}
+	}
+}
diff --git a/storage/innodb_plugin/include/row0sel.h b/storage/innodb_plugin/include/row0sel.h
new file mode 100644
index 00000000000..01a5afaa23e
--- /dev/null
+++ b/storage/innodb_plugin/include/row0sel.h
@@ -0,0 +1,413 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.h
+Select
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0sel_h
+#define row0sel_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "btr0pcur.h"
+#include "read0read.h"
+#include "row0mysql.h"
+
+/*********************************************************************//**
+Creates a select node struct.
+@return	own: select node struct */
+UNIV_INTERN
+sel_node_t*
+sel_node_create(
+/*============*/
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/*********************************************************************//**
+Frees the memory private to a select node when a query graph is freed,
+does not free the heap where the node was originally created. */
+UNIV_INTERN
+void
+sel_node_free_private(
+/*==================*/
+	sel_node_t*	node);	/*!< in: select node struct */
+/*********************************************************************//**
+Frees a prefetch buffer for a column, including the dynamically allocated
+memory for data stored there. */
+UNIV_INTERN
+void
+sel_col_prefetch_buf_free(
+/*======================*/
+	sel_buf_t*	prefetch_buf);	/*!< in, own: prefetch buffer */
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return	plan node */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+	sel_node_t*	node,	/*!< in: select node */
+	ulint		i);	/*!< in: get ith plan node */
+/**********************************************************************//**
+Performs a select step. This is a high-level function used in SQL execution
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_sel_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs a fetch for a cursor.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+fetch_step(
+/*=======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/****************************************************************//**
+Sample callback function for fetch that prints each row.
+@return	always returns non-NULL */
+UNIV_INTERN
+void*
+row_fetch_print(
+/*============*/
+	void*	row,		/*!< in:  sel_node_t* */
+	void*	user_arg);	/*!< in:  not used */
+/****************************************************************//**
+Callback function for fetch that stores an unsigned 4 byte integer to the
+location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
+= 4.
+@return	always returns NULL */
+UNIV_INTERN
+void*
+row_fetch_store_uint4(
+/*==================*/
+	void*	row,		/*!< in:  sel_node_t* */
+	void*	user_arg);	/*!< in:  data pointer */
+/***********************************************************//**
+Prints a row in a select result.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_printf_step(
+/*============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/****************************************************************//**
+Converts a key value stored in MySQL format to an Innobase dtuple. The last
+field of the key value may be just a prefix of a fixed length field: hence
+the parameter key_len. But currently we do not allow search keys where the
+last field is only a prefix of the full key field len and print a warning if
+such appears. */
+UNIV_INTERN
+void
+row_sel_convert_mysql_key_to_innobase(
+/*==================================*/
+	dtuple_t*	tuple,		/*!< in/out: tuple where to build;
+					NOTE: we assume that the type info
+					in the tuple is already according
+					to index! */
+	byte*		buf,		/*!< in: buffer to use in field
+					conversions */
+	ulint		buf_len,	/*!< in: buffer length */
+	dict_index_t*	index,		/*!< in: index of the key value */
+	const byte*	key_ptr,	/*!< in: MySQL key value */
+	ulint		key_len,	/*!< in: MySQL key value length */
+	trx_t*		trx);		/*!< in: transaction */
+/********************************************************************//**
+Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor!
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
+UNIV_INTERN
+ulint
+row_search_for_mysql(
+/*=================*/
+	byte*		buf,		/*!< in/out: buffer for the fetched
+					row in the MySQL format */
+	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
+					table handle; this contains the info
+					of search_tuple, index; if search
+					tuple contains 0 fields then we
+					position the cursor at the start or
+					the end of the index, depending on
+					'mode' */
+	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
+					ROW_SEL_EXACT_PREFIX */
+	ulint		direction);	/*!< in: 0 or ROW_SEL_NEXT or
+					ROW_SEL_PREV; NOTE: if this is != 0,
+					then prebuilt must have a pcur
+					with stored position! In opening of a
+					cursor 'direction' should be 0. */
+/*******************************************************************//**
+Checks if MySQL at the moment is allowed for this table to retrieve a
+consistent read result, or store it to the query cache.
+@return	TRUE if storing or retrieving from the query cache is permitted */
+UNIV_INTERN
+ibool
+row_search_check_if_query_cache_permitted(
+/*======================================*/
+	trx_t*		trx,		/*!< in: transaction object */
+	const char*	norm_name);	/*!< in: concatenation of database name,
+					'/' char, table name */
+/*******************************************************************//**
+Read the max AUTOINC value from an index.
+@return	DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+ulint
+row_search_max_autoinc(
+/*===================*/
+	dict_index_t*	index,		/*!< in: index to search */
+	const char*	col_name,	/*!< in: autoinc column name */
+	ib_uint64_t*	value);		/*!< out: AUTOINC value read */
+
+/** A structure for caching column values for prefetched rows */
+struct sel_buf_struct{
+	byte*		data;	/*!< data, or NULL; if not NULL, this field
+				has allocated memory which must be explicitly
+				freed; can be != NULL even when len is
+				UNIV_SQL_NULL */
+	ulint		len;	/*!< data length or UNIV_SQL_NULL */
+	ulint		val_buf_size;
+				/*!< size of memory buffer allocated for data:
+				this can be more than len; this is defined
+				when data != NULL */
+};
+
+/** Query plan */
+struct plan_struct{
+	dict_table_t*	table;		/*!< table struct in the dictionary
+					cache */
+	dict_index_t*	index;		/*!< table index used in the search */
+	btr_pcur_t	pcur;		/*!< persistent cursor used to search
+					the index */
+	ibool		asc;		/*!< TRUE if cursor traveling upwards */
+	ibool		pcur_is_open;	/*!< TRUE if pcur has been positioned
+					and we can try to fetch new rows */
+	ibool		cursor_at_end;	/*!< TRUE if the cursor is open but
+					we know that there are no more
+					qualifying rows left to retrieve from
+					the index tree; NOTE though, that
+					there may still be unprocessed rows in
+					the prefetch stack; always FALSE when
+					pcur_is_open is FALSE */
+	ibool		stored_cursor_rec_processed;
+					/*!< TRUE if the pcur position has been
+					stored and the record it is positioned
+					on has already been processed */
+	que_node_t**	tuple_exps;	/*!< array of expressions
+					which are used to calculate
+					the field values in the search
+					tuple: there is one expression
+					for each field in the search
+					tuple */
+	dtuple_t*	tuple;		/*!< search tuple */
+	ulint		mode;		/*!< search mode: PAGE_CUR_G, ... */
+	ulint		n_exact_match;	/*!< number of first fields in
+					the search tuple which must be
+					exactly matched */
+	ibool		unique_search;	/*!< TRUE if we are searching an
+					index record with a unique key */
+	ulint		n_rows_fetched;	/*!< number of rows fetched using pcur
+					after it was opened */
+	ulint		n_rows_prefetched;/*!< number of prefetched rows cached
+					for fetch: fetching several rows in
+					the same mtr saves CPU time */
+	ulint		first_prefetched;/*!< index of the first cached row in
+					select buffer arrays for each column */
+	ibool		no_prefetch;	/*!< no prefetch for this table */
+	sym_node_list_t	columns;	/*!< symbol table nodes for the columns
+					to retrieve from the table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			end_conds;	/*!< conditions which determine the
+					fetch limit of the index segment we
+					have to look at: when one of these
+					fails, the result set has been
+					exhausted for the cursor in this
+					index; these conditions are normalized
+					so that in a comparison the column
+					for this table is the first argument */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			other_conds;	/*!< the rest of search conditions we can
+					test at this table in a join */
+	ibool		must_get_clust;	/*!< TRUE if index is a non-clustered
+					index and we must also fetch the
+					clustered index record; this is the
+					case if the non-clustered record does
+					not contain all the needed columns, or
+					if this is a single-table explicit
+					cursor, or a searched update or
+					delete */
+	ulint*		clust_map;	/*!< map telling how clust_ref is built
+					from the fields of a non-clustered
+					record */
+	dtuple_t*	clust_ref;	/*!< the reference to the clustered
+					index entry is built here if index is
+					a non-clustered index */
+	btr_pcur_t	clust_pcur;	/*!< if index is non-clustered, we use
+					this pcur to search the clustered
+					index */
+	mem_heap_t*	old_vers_heap;	/*!< memory heap used in building an old
+					version of a row, or NULL */
+};
+
+/** Select node states */
+enum sel_node_state {
+	SEL_NODE_CLOSED,	/*!< it is a declared cursor which is not
+				currently open */
+	SEL_NODE_OPEN,		/*!< intention locks not yet set on tables */
+	SEL_NODE_FETCH,		/*!< intention locks have been set */
+	SEL_NODE_NO_MORE_ROWS	/*!< cursor has reached the result set end */
+};
+
+/** Select statement node */
+struct sel_node_struct{
+	que_common_t	common;		/*!< node type: QUE_NODE_SELECT */
+	enum sel_node_state
+			state;	/*!< node state */
+	que_node_t*	select_list;	/*!< select list */
+	sym_node_t*	into_list;	/*!< variables list or NULL */
+	sym_node_t*	table_list;	/*!< table list */
+	ibool		asc;		/*!< TRUE if the rows should be fetched
+					in an ascending order */
+	ibool		set_x_locks;	/*!< TRUE if the cursor is for update or
+					delete, which means that a row x-lock
+					should be placed on the cursor row */
+	ulint		row_lock_mode;	/*!< LOCK_X or LOCK_S */
+	ulint		n_tables;	/*!< number of tables */
+	ulint		fetch_table;	/*!< number of the next table to access
+					in the join */
+	plan_t*		plans;		/*!< array of n_tables many plan nodes
+					containing the search plan and the
+					search data structures */
+	que_node_t*	search_cond;	/*!< search condition */
+	read_view_t*	read_view;	/*!< if the query is a non-locking
+					consistent read, its read view is
+					placed here, otherwise NULL */
+	ibool		consistent_read;/*!< TRUE if the select is a consistent,
+					non-locking read */
+	order_node_t*	order_by;	/*!< order by column definition, or
+					NULL */
+	ibool		is_aggregate;	/*!< TRUE if the select list consists of
+					aggregate functions */
+	ibool		aggregate_already_fetched;
+					/*!< TRUE if the aggregate row has
+					already been fetched for the current
+					cursor */
+	ibool		can_get_updated;/*!< this is TRUE if the select
+					is in a single-table explicit
+					cursor which can get updated
+					within the stored procedure,
+					or in a searched update or
+					delete; NOTE that to determine
+					of an explicit cursor if it
+					can get updated, the parser
+					checks from a stored procedure
+					if it contains positioned
+					update or delete statements */
+	sym_node_t*	explicit_cursor;/*!< not NULL if an explicit cursor */
+	UT_LIST_BASE_NODE_T(sym_node_t)
+			copy_variables; /*!< variables whose values we have to
+					copy when an explicit cursor is opened,
+					so that they do not change between
+					fetches */
+};
+
+/** Fetch statement node */
+struct fetch_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_FETCH */
+	sel_node_t*	cursor_def;	/*!< cursor definition */
+	sym_node_t*	into_list;	/*!< variables to set */
+
+	pars_user_func_t*
+			func;		/*!< User callback function or NULL.
+					The first argument to the function
+					is a sel_node_t*, containing the
+					results of the SELECT operation for
+					one row. If the function returns
+					NULL, it is not interested in
+					further rows and the cursor is
+					modified so (cursor % NOTFOUND) is
+					true. If it returns not-NULL,
+					continue normally. See
+					row_fetch_print() for an example
+					(and a useful debugging tool). */
+};
+
+/** Open or close cursor operation type */
+enum open_node_op {
+	ROW_SEL_OPEN_CURSOR,	/*!< open cursor */
+	ROW_SEL_CLOSE_CURSOR	/*!< close cursor */
+};
+
+/** Open or close cursor statement node */
+struct open_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_OPEN */
+	enum open_node_op
+			op_type;	/*!< operation type: open or
+					close cursor */
+	sel_node_t*	cursor_def;	/*!< cursor definition */
+};
+
+/** Row printf statement node */
+struct row_printf_node_struct{
+	que_common_t	common;		/*!< type: QUE_NODE_ROW_PRINTF */
+	sel_node_t*	sel_node;	/*!< select */
+};
+
+/** Search direction for the MySQL interface */
+enum row_sel_direction {
+	ROW_SEL_NEXT = 1,	/*!< ascending direction */
+	ROW_SEL_PREV = 2	/*!< descending direction */
+};
+
+/** Match mode for the MySQL interface */
+enum row_sel_match_mode {
+	ROW_SEL_EXACT = 1,	/*!< search using a complete key value */
+	ROW_SEL_EXACT_PREFIX	/*!< search using a key prefix which
+				must match rows: the prefix may
+				contain an incomplete field (the last
+				field in prefix may be just a prefix
+				of a fixed length column) */
+};
+
+#ifndef UNIV_NONINL
+#include "row0sel.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innodb_plugin/include/row0sel.ic
similarity index 54%
rename from storage/innobase/include/row0sel.ic
rename to storage/innodb_plugin/include/row0sel.ic
index 1f92b99271e..5907f9913da 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innodb_plugin/include/row0sel.ic
@@ -1,29 +1,46 @@
-/******************************************************
-Select
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.ic
+Select
 
 Created 12/19/1997 Heikki Tuuri
 *******************************************************/
 
 #include "que0que.h"
 
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return	plan node */
 UNIV_INLINE
 plan_t*
 sel_node_get_nth_plan(
 /*==================*/
-				/* out: plan node */
-	sel_node_t*	node,	/* in: select node */
-	ulint		i)	/* in: get ith plan node */
+	sel_node_t*	node,	/*!< in: select node */
+	ulint		i)	/*!< in: get ith plan node */
 {
 	ut_ad(i < node->n_tables);
 
 	return(node->plans + i);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
 that it will start fetching from the start of the result set again, regardless
 of where it was before, and it will set intention locks on the tables. */
@@ -31,19 +48,19 @@ UNIV_INLINE
 void
 sel_node_reset_cursor(
 /*==================*/
-	sel_node_t*	node)	/* in: select node */
+	sel_node_t*	node)	/*!< in: select node */
 {
 	node->state = SEL_NODE_OPEN;
 }
 
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return	query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 open_step(
 /*======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	sel_node_t*	sel_node;
 	open_node_t*	node;
@@ -51,7 +68,7 @@ open_step(
 
 	ut_ad(thr);
 
-	node = thr->run_node;
+	node = (open_node_t*) thr->run_node;
 	ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
 
 	sel_node = node->cursor_def;
diff --git a/storage/innodb_plugin/include/row0types.h b/storage/innodb_plugin/include/row0types.h
new file mode 100644
index 00000000000..7920fd75061
--- /dev/null
+++ b/storage/innodb_plugin/include/row0types.h
@@ -0,0 +1,59 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0types.h
+Row operation global types
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0types_h
+#define row0types_h
+
+typedef struct plan_struct plan_t;
+
+typedef	struct upd_struct upd_t;
+
+typedef struct upd_field_struct upd_field_t;
+
+typedef	struct upd_node_struct upd_node_t;
+
+typedef	struct del_node_struct del_node_t;
+
+typedef	struct ins_node_struct ins_node_t;
+
+typedef struct sel_node_struct	sel_node_t;
+
+typedef struct open_node_struct	open_node_t;
+
+typedef struct fetch_node_struct fetch_node_t;
+
+typedef struct row_printf_node_struct	row_printf_node_t;
+typedef struct sel_buf_struct	sel_buf_t;
+
+typedef	struct undo_node_struct undo_node_t;
+
+typedef	struct purge_node_struct purge_node_t;
+
+typedef struct row_ext_struct row_ext_t;
+
+/* MySQL data types */
+typedef struct st_table TABLE;
+
+#endif
diff --git a/storage/innodb_plugin/include/row0uins.h b/storage/innodb_plugin/include/row0uins.h
new file mode 100644
index 00000000000..77b071c3a6b
--- /dev/null
+++ b/storage/innodb_plugin/include/row0uins.h
@@ -0,0 +1,54 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.h
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0uins_h
+#define row0uins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***********************************************************//**
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert.  InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return	DB_SUCCESS */
+UNIV_INTERN
+ulint
+row_undo_ins(
+/*=========*/
+	undo_node_t*	node);	/*!< in: row undo node */
+
+#ifndef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0uins.ic b/storage/innodb_plugin/include/row0uins.ic
new file mode 100644
index 00000000000..27606150d8e
--- /dev/null
+++ b/storage/innodb_plugin/include/row0uins.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.ic
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innodb_plugin/include/row0umod.h b/storage/innodb_plugin/include/row0umod.h
new file mode 100644
index 00000000000..ed44cc8d601
--- /dev/null
+++ b/storage/innodb_plugin/include/row0umod.h
@@ -0,0 +1,52 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.h
+Undo modify of a row
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0umod_h
+#define row0umod_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***********************************************************//**
+Undoes a modify operation on a row of a table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+row_undo_mod(
+/*=========*/
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr);	/*!< in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "row0umod.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0umod.ic b/storage/innodb_plugin/include/row0umod.ic
new file mode 100644
index 00000000000..ea3fd3b43c7
--- /dev/null
+++ b/storage/innodb_plugin/include/row0umod.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.ic
+Undo modify of a row
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/row0undo.h b/storage/innodb_plugin/include/row0undo.h
new file mode 100644
index 00000000000..6eb4ca448b3
--- /dev/null
+++ b/storage/innodb_plugin/include/row0undo.h
@@ -0,0 +1,142 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0undo.h
+Row undo
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0undo_h
+#define row0undo_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/********************************************************************//**
+Creates a row undo node to a query graph.
+@return	own: undo node */
+UNIV_INTERN
+undo_node_t*
+row_undo_node_create(
+/*=================*/
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/***********************************************************//**
+Looks for the clustered index record when node has the row reference.
+The pcur in node is used in the search. If found, stores the row to node,
+and stores the position of pcur, and detaches it. The pcur must be closed
+by the caller in any case.
+@return TRUE if found; NOTE the node->pcur must be closed by the
+caller, regardless of the return value */
+UNIV_INTERN
+ibool
+row_undo_search_clust_to_pcur(
+/*==========================*/
+	undo_node_t*	node);	/*!< in: row undo node */
+/***********************************************************//**
+Undoes a row operation in a table. This is a high-level function used
+in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_undo_step(
+/*==========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/* A single query thread will try to perform the undo for all successive
+versions of a clustered index record, if the transaction has modified it
+several times during the execution which is rolled back. It may happen
+that the task is transferred to another query thread, if the other thread
+is assigned to handle an undo log record in the chain of different versions
+of the record, and the other thread happens to get the x-latch to the
+clustered index record at the right time.
+	If a query thread notices that the clustered index record it is looking
+for is missing, or the roll ptr field in the record doed not point to the
+undo log record the thread was assigned to handle, then it gives up the undo
+task for that undo log record, and fetches the next. This situation can occur
+just in the case where the transaction modified the same record several times
+and another thread is currently doing the undo for successive versions of
+that index record. */
+
+/** Execution state of an undo node */
+enum undo_exec {
+	UNDO_NODE_FETCH_NEXT = 1,	/*!< we should fetch the next
+					undo log record */
+	UNDO_NODE_PREV_VERS,		/*!< the roll ptr to previous
+					version of a row is stored in
+					node, and undo should be done
+					based on it */
+	UNDO_NODE_INSERT,		/*!< undo a fresh insert of a
+					row to a table */
+	UNDO_NODE_MODIFY		/*!< undo a modify operation
+					(DELETE or UPDATE) on a row
+					of a table */
+};
+
+/** Undo node structure */
+struct undo_node_struct{
+	que_common_t	common;	/*!< node type: QUE_NODE_UNDO */
+	enum undo_exec	state;	/*!< node execution state */
+	trx_t*		trx;	/*!< trx for which undo is done */
+	roll_ptr_t	roll_ptr;/*!< roll pointer to undo log record */
+	trx_undo_rec_t*	undo_rec;/*!< undo log record */
+	undo_no_t	undo_no;/*!< undo number of the record */
+	ulint		rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	roll_ptr_t	new_roll_ptr;
+				/*!< roll ptr to restore to clustered index
+				record */
+	trx_id_t	new_trx_id; /*!< trx id to restore to clustered index
+				record */
+	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
+				clustered index record */
+	dict_table_t*	table;	/*!< table where undo is done */
+	ulint		cmpl_info;/*!< compiler analysis of an update */
+	upd_t*		update;	/*!< update vector for a clustered index
+				record */
+	dtuple_t*	ref;	/*!< row reference to the next row to handle */
+	dtuple_t*	row;	/*!< a copy (also fields copied to heap) of the
+				row to handle */
+	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
+				stored columns of the row */
+	dtuple_t*	undo_row;/*!< NULL, or the row after undo */
+	row_ext_t*	undo_ext;/*!< NULL, or prefixes of the externally
+				stored columns of undo_row */
+	dict_index_t*	index;	/*!< the next index whose record should be
+				handled */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage for
+				row; this must be emptied after undo is tried
+				on a row */
+};
+
+
+#ifndef UNIV_NONINL
+#include "row0undo.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0undo.ic b/storage/innodb_plugin/include/row0undo.ic
new file mode 100644
index 00000000000..dc788debc14
--- /dev/null
+++ b/storage/innodb_plugin/include/row0undo.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0undo.ic
+Row undo
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/row0upd.h b/storage/innodb_plugin/include/row0upd.h
new file mode 100644
index 00000000000..635d746d5a1
--- /dev/null
+++ b/storage/innodb_plugin/include/row0upd.h
@@ -0,0 +1,483 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.h
+Update of a row
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0upd_h
+#define row0upd_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "row0types.h"
+#include "btr0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "btr0pcur.h"
+# include "que0types.h"
+# include "pars0types.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Creates an update vector object.
+@return	own: update vector object */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+	ulint		n,	/*!< in: number of fields */
+	mem_heap_t*	heap);	/*!< in: heap from which memory allocated */
+/*********************************************************************//**
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector.
+@return	number of fields */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+	const upd_t*	update);	/*!< in: update vector */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return	update vector field */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		n);	/*!< in: field position in update vector */
+#else
+# define upd_get_nth_field(update, n) ((update)->fields + (n))
+#endif
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Sets an index field number to be updated by an update vector field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/*!< in: update vector field */
+	ulint		field_no,	/*!< in: field number in a clustered
+					index */
+	dict_index_t*	index,		/*!< in: index */
+	trx_t*		trx);		/*!< in: transaction */
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return	update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		no)	/*!< in: field_no */
+	__attribute__((nonnull, pure));
+/*********************************************************************//**
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record.
+@return	new pointer to mlog */
+UNIV_INTERN
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	trx_t*		trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr,/*!< in: roll ptr of the undo log record */
+	byte*		log_ptr,/*!< pointer to a buffer of size > 20 opened
+				in mlog */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	trx_t*		trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
+/*********************************************************************//**
+Sets the trx id or roll ptr field of a clustered index entry. */
+UNIV_INTERN
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+	const dtuple_t*	entry,	/*!< in: index entry, where the memory buffers
+				for sys fields are already allocated:
+				the function just copies the new values to
+				them */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
+	dulint		val);	/*!< in: value to write */
+/*********************************************************************//**
+Creates an update node for a query graph.
+@return	own: update node */
+UNIV_INTERN
+upd_node_t*
+upd_node_create(
+/*============*/
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Writes to the redo log the new values of the fields occurring in the index. */
+UNIV_INTERN
+void
+row_upd_index_write_log(
+/*====================*/
+	const upd_t*	update,	/*!< in: update vector */
+	byte*		log_ptr,/*!< in: pointer to mlog buffer: must
+				contain at least MLOG_BUF_MARGIN bytes
+				of free space; the buffer is closed
+				within this function */
+	mtr_t*		mtr);	/*!< in: mtr into whose log to write */
+/***********************************************************//**
+Returns TRUE if row update changes size of some field in index or if some
+field to be updated is stored externally in rec or update.
+@return TRUE if the update changes the size of some field in index or
+the field is external in rec or update */
+UNIV_INTERN
+ibool
+row_upd_changes_field_size_or_external(
+/*===================================*/
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update);/*!< in: update vector */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. */
+UNIV_INTERN
+void
+row_upd_rec_in_place(
+/*=================*/
+	rec_t*		rec,	/*!< in/out: record where replaced */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	page_zip_des_t*	page_zip);/*!< in: compressed page with enough space
+				available, or NULL */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Builds an update vector from those fields which in a secondary index entry
+differ from a record that has the equal ordering fields. NOTE: we compare
+the fields as binary strings!
+@return	own: update vector of differing fields */
+UNIV_INTERN
+upd_t*
+row_upd_build_sec_rec_difference_binary(
+/*====================================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	const rec_t*	rec,	/*!< in: secondary index record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+/***************************************************************//**
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. NOTE: we compare the fields as binary strings!
+@return own: update vector of differing fields, excluding roll ptr and
+trx id */
+UNIV_INTERN
+upd_t*
+row_upd_build_difference_binary(
+/*============================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	const rec_t*	rec,	/*!< in: clustered index record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the index entry
+given. */
+UNIV_INTERN
+void
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
+				non-clustered index */
+	const upd_t*	update,	/*!< in: an update vector built for the index so
+				that the field number in an upd_field is the
+				index position */
+	ibool		order_only,
+				/*!< in: if TRUE, limit the replacement to
+				ordering fields of index; note that this
+				does not work for non-clustered indexes. */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
+	__attribute__((nonnull));
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the index entry
+given. */
+UNIV_INTERN
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
+				non-clustered index */
+	const upd_t*	update,	/*!< in: an update vector built for the
+				CLUSTERED index so that the field number in
+				an upd_field is the clustered index position */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
+	__attribute__((nonnull));
+/***********************************************************//**
+Replaces the new column values stored in the update vector. */
+UNIV_INTERN
+void
+row_upd_replace(
+/*============*/
+	dtuple_t*		row,	/*!< in/out: row where replaced,
+					indexed by col_no;
+					the clustered index record must be
+					covered by a lock or a page latch to
+					prevent deletion (rollback or purge) */
+	row_ext_t**		ext,	/*!< out, own: NULL, or externally
+					stored column prefixes */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const upd_t*		update,	/*!< in: an update vector built for the
+					clustered index */
+	mem_heap_t*		heap);	/*!< in: memory heap */
+/***********************************************************//**
+Checks if an update vector changes an ordering field of an index record.
+
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector changes an ordering field in the index record */
+UNIV_INTERN
+ibool
+row_upd_changes_ord_field_binary(
+/*=============================*/
+	const dtuple_t*	row,	/*!< in: old value of row, or NULL if the
+				row and the data values in update are not
+				known when this function is called, e.g., at
+				compile time */
+	dict_index_t*	index,	/*!< in: index of the record */
+	const upd_t*	update);/*!< in: update vector for the row; NOTE: the
+				field numbers in this MUST be clustered index
+				positions! */
+/***********************************************************//**
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector may change an ordering field in an index
+record */
+UNIV_INTERN
+ibool
+row_upd_changes_some_index_ord_field_binary(
+/*========================================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const upd_t*		update);/*!< in: update vector for the row */
+/***********************************************************//**
+Updates a row in a table. This is a high-level function used
+in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_upd_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses the log data of system field values.
+@return	log data end or NULL */
+UNIV_INTERN
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	ulint*		pos,	/*!< out: TRX_ID position in record */
+	trx_id_t*	trx_id,	/*!< out: trx id */
+	roll_ptr_t*	roll_ptr);/*!< out: roll ptr */
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+UNIV_INTERN
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		pos,	/*!< in: TRX_ID position in rec */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
+/*********************************************************************//**
+Parses the log data written by row_upd_index_write_log.
+@return	log data end or NULL */
+UNIV_INTERN
+byte*
+row_upd_index_parse(
+/*================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
+				built */
+	upd_t**		update_out);/*!< out: update vector */
+
+
+/* Update vector field */
+struct upd_field_struct{
+	unsigned	field_no:16;	/*!< field number in an index, usually
+					the clustered index, but in updating
+					a secondary index record in btr0cur.c
+					this is the position in the secondary
+					index */
+#ifndef UNIV_HOTBACKUP
+	unsigned	orig_len:16;	/*!< original length of the locally
+					stored part of an externally stored
+					column, or 0 */
+	que_node_t*	exp;		/*!< expression for calculating a new
+					value: it refers to column values and
+					constants in the symbol table of the
+					query graph */
+#endif /* !UNIV_HOTBACKUP */
+	dfield_t	new_val;	/*!< new value for the column */
+};
+
+/* Update vector structure */
+struct upd_struct{
+	ulint		info_bits;	/*!< new value of info bits to record;
+					default is 0 */
+	ulint		n_fields;	/*!< number of update fields */
+	upd_field_t*	fields;		/*!< array of update fields */
+};
+
+#ifndef UNIV_HOTBACKUP
+/* Update node structure which also implements the delete operation
+of a row */
+
+struct upd_node_struct{
+	que_common_t	common;	/*!< node type: QUE_NODE_UPDATE */
+	ibool		is_delete;/* TRUE if delete, FALSE if update */
+	ibool		searched_update;
+				/* TRUE if searched update, FALSE if
+				positioned */
+	ibool		in_mysql_interface;
+				/* TRUE if the update node was created
+				for the MySQL interface */
+	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
+				constraint if this update node is used in
+				doing an ON DELETE or ON UPDATE operation */
+	upd_node_t*	cascade_node;/* NULL or an update node template which
+				is used to implement ON DELETE/UPDATE CASCADE
+				or ... SET NULL for foreign keys */
+	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
+				node is created */
+	sel_node_t*	select;	/*!< query graph subtree implementing a base
+				table cursor: the rows returned will be
+				updated */
+	btr_pcur_t*	pcur;	/*!< persistent cursor placed on the clustered
+				index record which should be updated or
+				deleted; the cursor is stored in the graph
+				of 'select' field above, except in the case
+				of the MySQL interface */
+	dict_table_t*	table;	/*!< table where updated */
+	upd_t*		update;	/*!< update vector for the row */
+	ulint		update_n_fields;
+				/* when this struct is used to implement
+				a cascade operation for foreign keys, we store
+				here the size of the buffer allocated for use
+				as the update vector */
+	sym_node_list_t	columns;/* symbol table nodes for the columns
+				to retrieve from the table */
+	ibool		has_clust_rec_x_lock;
+				/* TRUE if the select which retrieves the
+				records to update already sets an x-lock on
+				the clustered record; note that it must always
+				set at least an s-lock */
+	ulint		cmpl_info;/* information extracted during query
+				compilation; speeds up execution:
+				UPD_NODE_NO_ORD_CHANGE and
+				UPD_NODE_NO_SIZE_CHANGE, ORed */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/*!< node execution state */
+	dict_index_t*	index;	/*!< NULL, or the next index whose record should
+				be updated */
+	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
+				heap) of the row to update; this must be reset
+				to NULL after a successful update */
+	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
+				stored columns in the old row */
+	dtuple_t*	upd_row;/* NULL, or a copy of the updated row */
+	row_ext_t*	upd_ext;/* NULL, or prefixes of the externally
+				stored columns in upd_row */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage;
+				this must be emptied after a successful
+				update */
+	/*----------------------*/
+	sym_node_t*	table_sym;/* table node in symbol table */
+	que_node_t*	col_assign_list;
+				/* column assignment list */
+	ulint		magic_n;
+};
+
+#define	UPD_NODE_MAGIC_N	1579975
+
+/* Node execution states */
+#define UPD_NODE_SET_IX_LOCK	   1	/* execution came to the node from
+					a node above and if the field
+					has_clust_rec_x_lock is FALSE, we
+					should set an intention x-lock on
+					the table */
+#define UPD_NODE_UPDATE_CLUSTERED  2	/* clustered index record should be
+					updated */
+#define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
+					inserted, old record is already delete
+					marked */
+#define UPD_NODE_UPDATE_ALL_SEC	   4	/* an ordering field of the clustered
+					index record was changed, or this is
+					a delete operation: should update
+					all the secondary index records */
+#define	UPD_NODE_UPDATE_SOME_SEC   5	/* secondary index entries should be
+					looked at and updated if an ordering
+					field changed */
+
+/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
+#define UPD_NODE_NO_ORD_CHANGE	1	/* no secondary index record will be
+					changed in the update and no ordering
+					field of the clustered index */
+#define UPD_NODE_NO_SIZE_CHANGE	2	/* no record field size will be
+					changed in the update */
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0upd.ic b/storage/innodb_plugin/include/row0upd.ic
new file mode 100644
index 00000000000..18e22f1eca9
--- /dev/null
+++ b/storage/innodb_plugin/include/row0upd.ic
@@ -0,0 +1,184 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.ic
+Update of a row
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0log.h"
+#ifndef UNIV_HOTBACKUP
+# include "trx0trx.h"
+# include "trx0undo.h"
+# include "row0row.h"
+# include "btr0sea.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "page0zip.h"
+
+/*********************************************************************//**
+Creates an update vector object.
+@return	own: update vector object */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+	ulint		n,	/*!< in: number of fields */
+	mem_heap_t*	heap)	/*!< in: heap from which memory allocated */
+{
+	upd_t*	update;
+
+	update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t));
+
+	update->info_bits = 0;
+	update->n_fields = n;
+	update->fields = (upd_field_t*)
+		mem_heap_alloc(heap, sizeof(upd_field_t) * n);
+
+	return(update);
+}
+
+/*********************************************************************//**
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector.
+@return	number of fields */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+	const upd_t*	update)	/*!< in: update vector */
+{
+	ut_ad(update);
+
+	return(update->n_fields);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return	update vector field */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		n)	/*!< in: field position in update vector */
+{
+	ut_ad(update);
+	ut_ad(n < update->n_fields);
+
+	return((upd_field_t*) update->fields + n);
+}
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Sets an index field number to be updated by an update vector field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/*!< in: update vector field */
+	ulint		field_no,	/*!< in: field number in a clustered
+					index */
+	dict_index_t*	index,		/*!< in: index */
+	trx_t*		trx)		/*!< in: transaction */
+{
+	upd_field->field_no = field_no;
+	upd_field->orig_len = 0;
+
+	if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to access field %lu in ",
+			(ulong) field_no);
+		dict_index_name_print(stderr, trx, index);
+		fprintf(stderr, "\n"
+			"InnoDB: but index only has %lu fields\n",
+			(ulong) dict_index_get_n_fields(index));
+	}
+
+	dict_col_copy_type(dict_index_get_nth_col(index, field_no),
+			   dfield_get_type(&upd_field->new_val));
+}
+
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return	update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		no)	/*!< in: field_no */
+{
+	ulint	i;
+	for (i = 0; i < upd_get_n_fields(update); i++) {
+		const upd_field_t*	uf = upd_get_nth_field(update, i);
+
+		if (uf->field_no == no) {
+
+			return(uf);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	trx_t*		trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr)/*!< in: roll ptr of the undo log record */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+#ifdef UNIV_SYNC_DEBUG
+	if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
+		ut_ad(!buf_block_align(rec)->is_hashed);
+	}
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		ulint	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+		page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
+						   pos, trx->id, roll_ptr);
+	} else {
+		ulint	offset = index->trx_id_offset;
+
+		if (!offset) {
+			offset = row_get_trx_id_offset(rec, index, offsets);
+		}
+
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+		trx_write_trx_id(rec + offset, trx->id);
+		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/row0vers.h b/storage/innodb_plugin/include/row0vers.h
new file mode 100644
index 00000000000..5a2e38230d5
--- /dev/null
+++ b/storage/innodb_plugin/include/row0vers.h
@@ -0,0 +1,142 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0vers.h
+Row versions
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0vers_h
+#define row0vers_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "rem0types.h"
+#include "mtr0mtr.h"
+#include "read0types.h"
+
+/*****************************************************************//**
+Finds out if an active transaction has inserted or modified a secondary
+index record. NOTE: the kernel mutex is temporarily released in this
+function!
+@return NULL if committed, else the active transaction */
+UNIV_INTERN
+trx_t*
+row_vers_impl_x_locked_off_kernel(
+/*==============================*/
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*****************************************************************//**
+Finds out if we must preserve a delete marked earlier version of a clustered
+index record, because it is >= the purge view.
+@return	TRUE if earlier version should be preserved */
+UNIV_INTERN
+ibool
+row_vers_must_preserve_del_marked(
+/*==============================*/
+	trx_id_t	trx_id,	/*!< in: transaction id in the version */
+	mtr_t*		mtr);	/*!< in: mtr holding the latch on the
+				clustered index record; it will also
+				hold the latch on purge_view */
+/*****************************************************************//**
+Finds out if a version of the record, where the version >= the current
+purge view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE.
+@return	TRUE if earlier version should have */
+UNIV_INTERN
+ibool
+row_vers_old_has_index_entry(
+/*=========================*/
+	ibool		also_curr,/*!< in: TRUE if also rec is included in the
+				versions to search; otherwise only versions
+				prior to it are searched */
+	const rec_t*	rec,	/*!< in: record in the clustered index; the
+				caller must have a latch on the page */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const dtuple_t*	ientry);/*!< in: the secondary index entry */
+/*****************************************************************//**
+Constructs the version of a clustered index record which a consistent
+read should see. We assume that the trx id stored in rec is such that
+the consistent read should not see rec in its present version.
+@return	DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
+ulint
+row_vers_build_for_consistent_read(
+/*===============================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
+				caller must have a latch on the page; this
+				latch locks the top of the stack of versions
+				of this records */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
+				rec_get_offsets(rec, index) */
+	read_view_t*	view,	/*!< in: the consistent read view */
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
+				the offsets are allocated */
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
+				intermediate versions is allocated and freed
+				locally within the function */
+	rec_t**		old_vers);/*!< out, own: old version, or NULL if the
+				record does not exist in the view, that is,
+				it was freshly inserted afterwards */
+
+/*****************************************************************//**
+Constructs the last committed version of a clustered index record,
+which should be seen by a semi-consistent read.
+@return	DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
+ulint
+row_vers_build_for_semi_consistent_read(
+/*====================================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
+				caller must have a latch on the page; this
+				latch locks the top of the stack of versions
+				of this records */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
+				rec_get_offsets(rec, index) */
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
+				the offsets are allocated */
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
+				intermediate versions is allocated and freed
+				locally within the function */
+	const rec_t**	old_vers);/*!< out: rec, old version, or NULL if the
+				record does not exist in the view, that is,
+				it was freshly inserted afterwards */
+
+
+#ifndef UNIV_NONINL
+#include "row0vers.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/row0vers.ic b/storage/innodb_plugin/include/row0vers.ic
new file mode 100644
index 00000000000..8bb3a5c0cb3
--- /dev/null
+++ b/storage/innodb_plugin/include/row0vers.ic
@@ -0,0 +1,30 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0vers.ic
+Row versions
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+#include "dict0dict.h"
+#include "read0read.h"
+#include "page0page.h"
+#include "log0recv.h"
diff --git a/storage/innodb_plugin/include/srv0que.h b/storage/innodb_plugin/include/srv0que.h
new file mode 100644
index 00000000000..82ee7739ef7
--- /dev/null
+++ b/storage/innodb_plugin/include/srv0que.h
@@ -0,0 +1,42 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0que.h
+Server query execution
+
+Created 6/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef srv0que_h
+#define srv0que_h
+
+#include "univ.i"
+#include "que0types.h"
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+#endif
+
diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
new file mode 100644
index 00000000000..499bccfe2b8
--- /dev/null
+++ b/storage/innodb_plugin/include/srv0srv.h
@@ -0,0 +1,664 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.h
+The server main program
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef srv0srv_h
+#define srv0srv_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+#include "sync0sync.h"
+#include "os0sync.h"
+#include "que0types.h"
+#include "trx0types.h"
+
+extern const char*	srv_main_thread_op_info;
+
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+extern const char	srv_mysql50_table_name_prefix[9];
+
+/* When this event is set the lock timeout and InnoDB monitor
+thread starts running */
+extern os_event_t	srv_lock_timeout_thread_event;
+
+/* If the last data file is auto-extended, we add this many pages to it
+at a time */
+#define SRV_AUTO_EXTEND_INCREMENT	\
+	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+
+/* This is set to TRUE if the MySQL user has set it in MySQL */
+extern ibool	srv_lower_case_table_names;
+
+/* Mutex for locking srv_monitor_file */
+extern mutex_t	srv_monitor_file_mutex;
+/* Temporary file for innodb monitor output */
+extern FILE*	srv_monitor_file;
+/* Mutex for locking srv_dict_tmpfile.
+This mutex has a very high rank; threads reserving it should not
+be holding any InnoDB latches. */
+extern mutex_t	srv_dict_tmpfile_mutex;
+/* Temporary file for output from the data dictionary */
+extern FILE*	srv_dict_tmpfile;
+/* Mutex for locking srv_misc_tmpfile.
+This mutex has a very low rank; threads reserving it should not
+acquire any further latches or sleep before releasing this one. */
+extern mutex_t	srv_misc_tmpfile_mutex;
+/* Temporary file for miscellanous diagnostic output */
+extern FILE*	srv_misc_tmpfile;
+
+/* Server parameters which are read from the initfile */
+
+extern char*	srv_data_home;
+#ifdef UNIV_LOG_ARCHIVE
+extern char*	srv_arch_dir;
+#endif /* UNIV_LOG_ARCHIVE */
+
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+#ifndef UNIV_HOTBACKUP
+extern my_bool	srv_file_per_table;
+#else
+extern ibool	srv_file_per_table;
+#endif /* UNIV_HOTBACKUP */
+/** The file format to use on new *.ibd files. */
+extern ulint	srv_file_format;
+/** Whether to check file format during startup.  A value of
+DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
+set it to the highest format we support. */
+extern ulint	srv_check_file_format_at_startup;
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
+extern ibool	srv_locks_unsafe_for_binlog;
+#endif /* !UNIV_HOTBACKUP */
+
+extern ulint	srv_n_data_files;
+extern char**	srv_data_file_names;
+extern ulint*	srv_data_file_sizes;
+extern ulint*	srv_data_file_is_raw_partition;
+
+extern ibool	srv_auto_extend_last_data_file;
+extern ulint	srv_last_file_size_max;
+extern char**	srv_log_group_home_dirs;
+#ifndef UNIV_HOTBACKUP
+extern ulong	srv_auto_extend_increment;
+
+extern ibool	srv_created_new_raw;
+
+extern ulint	srv_n_log_groups;
+extern ulint	srv_n_log_files;
+extern ulint	srv_log_file_size;
+extern ulint	srv_log_buffer_size;
+extern ulong	srv_flush_log_at_trx_commit;
+extern char	srv_adaptive_flushing;
+
+
+/* The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+extern const byte*	srv_latin1_ordering;
+#ifndef UNIV_HOTBACKUP
+extern my_bool	srv_use_sys_malloc;
+#else
+extern ibool	srv_use_sys_malloc;
+#endif /* UNIV_HOTBACKUP */
+extern ulint	srv_buf_pool_size;	/*!< requested size in bytes */
+extern ulint	srv_buf_pool_old_size;	/*!< previously requested size */
+extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
+extern ulint	srv_mem_pool_size;
+extern ulint	srv_lock_table_size;
+
+extern ulint	srv_n_file_io_threads;
+extern ulong	srv_read_ahead_threshold;
+extern ulint	srv_n_read_io_threads;
+extern ulint	srv_n_write_io_threads;
+
+/* Number of IO operations per second the server can do */
+extern ulong    srv_io_capacity;
+/* Returns the number of IO operations that is X percent of the
+capacity. PCT_IO(5) -> returns the number of IO operations that
+is 5% of the max where max is srv_io_capacity.  */
+#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
+
+#ifdef UNIV_LOG_ARCHIVE
+extern ibool	srv_log_archive_on;
+extern ibool	srv_archive_recovery;
+extern dulint	srv_archive_recovery_limit_lsn;
+#endif /* UNIV_LOG_ARCHIVE */
+
+extern char*	srv_file_flush_method_str;
+extern ulint	srv_unix_file_flush_method;
+extern ulint	srv_win_file_flush_method;
+
+extern ulint	srv_max_n_open_files;
+
+extern ulint	srv_max_dirty_pages_pct;
+
+extern ulint	srv_force_recovery;
+extern ulong	srv_thread_concurrency;
+
+extern ulint	srv_max_n_threads;
+
+extern lint	srv_conc_n_threads;
+
+extern ulint	srv_fast_shutdown;	 /* If this is 1, do not do a
+					 purge and index buffer merge.
+					 If this 2, do not even flush the
+					 buffer pool to data files at the
+					 shutdown: we effectively 'crash'
+					 InnoDB (but lose no committed
+					 transactions). */
+extern ibool	srv_innodb_status;
+
+extern unsigned long long	srv_stats_sample_pages;
+
+extern ibool	srv_use_doublewrite_buf;
+extern ibool	srv_use_checksums;
+
+extern ibool	srv_set_thread_priorities;
+extern int	srv_query_thread_priority;
+
+extern ulong	srv_max_buf_pool_modified_pct;
+extern ulong	srv_max_purge_lag;
+
+extern ulong	srv_replication_delay;
+/*-------------------------------------------*/
+
+extern ulint	srv_n_rows_inserted;
+extern ulint	srv_n_rows_updated;
+extern ulint	srv_n_rows_deleted;
+extern ulint	srv_n_rows_read;
+
+extern ibool	srv_print_innodb_monitor;
+extern ibool	srv_print_innodb_lock_monitor;
+extern ibool	srv_print_innodb_tablespace_monitor;
+extern ibool	srv_print_verbose_log;
+extern ibool	srv_print_innodb_table_monitor;
+
+extern ibool	srv_lock_timeout_and_monitor_active;
+extern ibool	srv_error_monitor_active;
+
+extern ulong	srv_n_spin_wait_rounds;
+extern ulong	srv_n_free_tickets_to_enter;
+extern ulong	srv_thread_sleep_delay;
+extern ulong	srv_spin_wait_delay;
+extern ibool	srv_priority_boost;
+
+extern	ulint	srv_mem_pool_size;
+extern	ulint	srv_lock_table_size;
+
+#ifdef UNIV_DEBUG
+extern	ibool	srv_print_thread_releases;
+extern	ibool	srv_print_lock_waits;
+extern	ibool	srv_print_buf_io;
+extern	ibool	srv_print_log_io;
+extern	ibool	srv_print_latch_waits;
+#else /* UNIV_DEBUG */
+# define srv_print_thread_releases	FALSE
+# define srv_print_lock_waits		FALSE
+# define srv_print_buf_io		FALSE
+# define srv_print_log_io		FALSE
+# define srv_print_latch_waits		FALSE
+#endif /* UNIV_DEBUG */
+
+extern ulint	srv_activity_count;
+extern ulint	srv_fatal_semaphore_wait_threshold;
+extern ulint	srv_dml_needed_delay;
+
+extern mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
+				query threads, and lock table: we allocate
+				it from dynamic memory to get it to the
+				same DRAM page as other hotspot semaphores */
+#define kernel_mutex (*kernel_mutex_temp)
+
+#define SRV_MAX_N_IO_THREADS	130
+
+/* Array of English strings describing the current state of an
+i/o handler thread */
+extern const char* srv_io_thread_op_info[];
+extern const char* srv_io_thread_function[];
+
+/* the number of the log write requests done */
+extern ulint srv_log_write_requests;
+
+/* the number of physical writes to the log performed */
+extern ulint srv_log_writes;
+
+/* amount of data written to the log files in bytes */
+extern ulint srv_os_log_written;
+
+/* amount of writes being done to the log files */
+extern ulint srv_os_log_pending_writes;
+
+/* we increase this counter, when there we don't have enough space in the
+log buffer and have to flush it */
+extern ulint srv_log_waits;
+
+/* variable that counts amount of data read in total (in bytes) */
+extern ulint srv_data_read;
+
+/* here we count the amount of data written in total (in bytes) */
+extern ulint srv_data_written;
+
+/* this variable counts the amount of times, when the doublewrite buffer
+was flushed */
+extern ulint srv_dblwr_writes;
+
+/* here we store the number of pages that have been flushed to the
+doublewrite buffer */
+extern ulint srv_dblwr_pages_written;
+
+/* in this variable we store the number of write requests issued */
+extern ulint srv_buf_pool_write_requests;
+
+/* here we store the number of times when we had to wait for a free page
+in the buffer pool. It happens when the buffer pool is full and we need
+to make a flush, in order to be able to read or create a page. */
+extern ulint srv_buf_pool_wait_free;
+
+/* variable to count the number of pages that were written from the
+buffer pool to disk */
+extern ulint srv_buf_pool_flushed;
+
+/** Number of buffer pool reads that led to the
+reading of a disk page */
+extern ulint srv_buf_pool_reads;
+/** Number of sequential read-aheads */
+extern ulint srv_read_ahead_seq;
+/** Number of random read-aheads */
+extern ulint srv_read_ahead_rnd;
+
+/** Status variables to be passed to MySQL */
+typedef struct export_var_struct export_struc;
+
+/** Status variables to be passed to MySQL */
+extern export_struc export_vars;
+
+/** The server system */
+typedef struct srv_sys_struct	srv_sys_t;
+
+/** The server system */
+extern srv_sys_t*	srv_sys;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Types of raw partitions in innodb_data_file_path */
+enum {
+	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
+	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
+				initialized */
+	SRV_OLD_RAW		/*!< An initialized raw partition */
+};
+
+/** Alternatives for the file flush option in Unix; see the InnoDB manual
+about what these mean */
+enum {
+	SRV_UNIX_FSYNC = 1,	/*!< fsync, the default */
+	SRV_UNIX_O_DSYNC,	/*!< open log files in O_SYNC mode */
+	SRV_UNIX_LITTLESYNC,	/*!< do not call os_file_flush()
+				when writing data files, but do flush
+				after writing to log files */
+	SRV_UNIX_NOSYNC,	/*!< do not flush after writing */
+	SRV_UNIX_O_DIRECT	/*!< invoke os_file_set_nocache() on
+				data files */
+};
+
+/** Alternatives for file i/o in Windows */
+enum {
+	SRV_WIN_IO_NORMAL = 1,	/*!< buffered I/O */
+	SRV_WIN_IO_UNBUFFERED	/*!< unbuffered I/O; this is the default */
+};
+
+/** Alternatives for srv_force_recovery. Non-zero values are intended
+to help the user get a damaged database up so that he can dump intact
+tables and rows with SELECT INTO OUTFILE. The database must not otherwise
+be used with these options! A bigger number below means that all precautions
+of lower numbers are included. */
+enum {
+	SRV_FORCE_IGNORE_CORRUPT = 1,	/*!< let the server run even if it
+					detects a corrupt page */
+	SRV_FORCE_NO_BACKGROUND	= 2,	/*!< prevent the main thread from
+					running: if a crash would occur
+					in purge, this prevents it */
+	SRV_FORCE_NO_TRX_UNDO = 3,	/*!< do not run trx rollback after
+					recovery */
+	SRV_FORCE_NO_IBUF_MERGE = 4,	/*!< prevent also ibuf operations:
+					if they would cause a crash, better
+					not do them */
+	SRV_FORCE_NO_UNDO_LOG_SCAN = 5,	/*!< do not look at undo logs when
+					starting the database: InnoDB will
+					treat even incomplete transactions
+					as committed */
+	SRV_FORCE_NO_LOG_REDO = 6	/*!< do not do the log roll-forward
+					in connection with recovery */
+};
+
+#ifndef UNIV_HOTBACKUP
+/** Types of threads existing in the system. */
+enum srv_thread_type {
+	SRV_COM = 1,	/**< threads serving communication and queries */
+	SRV_CONSOLE,	/**< thread serving console */
+	SRV_WORKER,	/**< threads serving parallelized queries and
+			queries released from lock wait */
+#if 0
+	/* Utility threads */
+	SRV_BUFFER,	/**< thread flushing dirty buffer blocks */
+	SRV_RECOVERY,	/**< threads finishing a recovery */
+	SRV_INSERT,	/**< thread flushing the insert buffer to disk */
+#endif
+	SRV_MASTER	/**< the master thread, (whose type number must
+			be biggest) */
+};
+
+/*********************************************************************//**
+Boots Innobase server.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+srv_boot(void);
+/*==========*/
+/*********************************************************************//**
+Initializes the server. */
+UNIV_INTERN
+void
+srv_init(void);
+/*==========*/
+/*********************************************************************//**
+Frees the OS fast mutex created in srv_boot(). */
+UNIV_INTERN
+void
+srv_free(void);
+/*==========*/
+/*********************************************************************//**
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+UNIV_INTERN
+void
+srv_general_init(void);
+/*==================*/
+/*********************************************************************//**
+Gets the number of threads in the system.
+@return	sum of srv_n_threads[] */
+UNIV_INTERN
+ulint
+srv_get_n_threads(void);
+/*===================*/
+/*********************************************************************//**
+Returns the calling thread type.
+@return	SRV_COM, ... */
+
+enum srv_thread_type
+srv_get_thread_type(void);
+/*=====================*/
+/*********************************************************************//**
+Sets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_set_io_thread_op_info(
+/*======================*/
+	ulint		i,	/*!< in: the 'segment' of the i/o thread */
+	const char*	str);	/*!< in: constant char string describing the
+				state */
+/*********************************************************************//**
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
+UNIV_INTERN
+ulint
+srv_release_threads(
+/*================*/
+	enum srv_thread_type	type,	/*!< in: thread type */
+	ulint			n);	/*!< in: number of threads to release */
+/*********************************************************************//**
+The master thread controlling the server.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_master_thread(
+/*==============*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+/*******************************************************************//**
+Tells the Innobase server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the kernel
+mutex, for performace reasons). */
+UNIV_INTERN
+void
+srv_active_wake_master_thread(void);
+/*===============================*/
+/*******************************************************************//**
+Wakes up the master thread if it is suspended or being suspended. */
+UNIV_INTERN
+void
+srv_wake_master_thread(void);
+/*========================*/
+/*********************************************************************//**
+Puts an OS thread to wait if there are too many concurrent threads
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+UNIV_INTERN
+void
+srv_conc_enter_innodb(
+/*==================*/
+	trx_t*	trx);	/*!< in: transaction object associated with the
+			thread */
+/*********************************************************************//**
+This lets a thread enter InnoDB regardless of the number of threads inside
+InnoDB. This must be called when a thread ends a lock wait. */
+UNIV_INTERN
+void
+srv_conc_force_enter_innodb(
+/*========================*/
+	trx_t*	trx);	/*!< in: transaction object associated with the
+			thread */
+/*********************************************************************//**
+This must be called when a thread exits InnoDB in a lock wait or at the
+end of an SQL statement. */
+UNIV_INTERN
+void
+srv_conc_force_exit_innodb(
+/*=======================*/
+	trx_t*	trx);	/*!< in: transaction object associated with the
+			thread */
+/*********************************************************************//**
+This must be called when a thread exits InnoDB. */
+UNIV_INTERN
+void
+srv_conc_exit_innodb(
+/*=================*/
+	trx_t*	trx);	/*!< in: transaction object associated with the
+			thread */
+/***************************************************************//**
+Puts a MySQL OS thread to wait for a lock to be released. If an error
+occurs during the wait trx->error_state associated with thr is
+!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
+are possible errors. DB_DEADLOCK is returned if selective deadlock
+resolution chose this transaction as a victim. */
+UNIV_INTERN
+void
+srv_suspend_mysql_thread(
+/*=====================*/
+	que_thr_t*	thr);	/*!< in: query thread associated with the MySQL
+				OS thread */
+/********************************************************************//**
+Releases a MySQL OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+UNIV_INTERN
+void
+srv_release_mysql_thread_if_suspended(
+/*==================================*/
+	que_thr_t*	thr);	/*!< in: query thread associated with the
+				MySQL OS thread	 */
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+This also prints the info output by various InnoDB monitors.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_lock_timeout_and_monitor_thread(
+/*================================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+/*********************************************************************//**
+A thread which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_error_monitor_thread(
+/*=====================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor. */
+UNIV_INTERN
+void
+srv_printf_innodb_monitor(
+/*======================*/
+	FILE*	file,		/*!< in: output stream */
+	ulint*	trx_start,	/*!< out: file position of the start of
+				the list of active transactions */
+	ulint*	trx_end);	/*!< out: file position of the end of
+				the list of active transactions */
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
+UNIV_INTERN
+void
+srv_export_innodb_status(void);
+/*==========================*/
+
+/** Thread slot in the thread table */
+typedef struct srv_slot_struct	srv_slot_t;
+
+/** Thread table is an array of slots */
+typedef srv_slot_t	srv_table_t;
+
+/** Status variables to be passed to MySQL */
+struct export_var_struct{
+	ulint innodb_data_pending_reads;	/*!< Pending reads */
+	ulint innodb_data_pending_writes;	/*!< Pending writes */
+	ulint innodb_data_pending_fsyncs;	/*!< Pending fsyncs */
+	ulint innodb_data_fsyncs;		/*!< Number of fsyncs so far */
+	ulint innodb_data_read;			/*!< Data bytes read */
+	ulint innodb_data_writes;		/*!< I/O write requests */
+	ulint innodb_data_written;		/*!< Data bytes written */
+	ulint innodb_data_reads;		/*!< I/O read requests */
+	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
+	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
+	ulint innodb_buffer_pool_pages_dirty;	/*!< Dirty data pages */
+	ulint innodb_buffer_pool_pages_misc;	/*!< Miscellanous pages */
+	ulint innodb_buffer_pool_pages_free;	/*!< Free pages */
+#ifdef UNIV_DEBUG
+	ulint innodb_buffer_pool_pages_latched;	/*!< Latched pages */
+#endif /* UNIV_DEBUG */
+	ulint innodb_buffer_pool_read_requests;	/*!< buf_pool->n_page_gets */
+	ulint innodb_buffer_pool_reads;		/*!< srv_buf_pool_reads */
+	ulint innodb_buffer_pool_wait_free;	/*!< srv_buf_pool_wait_free */
+	ulint innodb_buffer_pool_pages_flushed;	/*!< srv_buf_pool_flushed */
+	ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+	ulint innodb_buffer_pool_read_ahead_seq;/*!< srv_read_ahead_seq */
+	ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
+	ulint innodb_dblwr_pages_written;	/*!< srv_dblwr_pages_written */
+	ulint innodb_dblwr_writes;		/*!< srv_dblwr_writes */
+	ibool innodb_have_atomic_builtins;	/*!< HAVE_ATOMIC_BUILTINS */
+	ulint innodb_log_waits;			/*!< srv_log_waits */
+	ulint innodb_log_write_requests;	/*!< srv_log_write_requests */
+	ulint innodb_log_writes;		/*!< srv_log_writes */
+	ulint innodb_os_log_written;		/*!< srv_os_log_written */
+	ulint innodb_os_log_fsyncs;		/*!< fil_n_log_flushes */
+	ulint innodb_os_log_pending_writes;	/*!< srv_os_log_pending_writes */
+	ulint innodb_os_log_pending_fsyncs;	/*!< fil_n_pending_log_flushes */
+	ulint innodb_page_size;			/*!< UNIV_PAGE_SIZE */
+	ulint innodb_pages_created;		/*!< buf_pool->n_pages_created */
+	ulint innodb_pages_read;		/*!< buf_pool->n_pages_read */
+	ulint innodb_pages_written;		/*!< buf_pool->n_pages_written */
+	ulint innodb_row_lock_waits;		/*!< srv_n_lock_wait_count */
+	ulint innodb_row_lock_current_waits;	/*!< srv_n_lock_wait_current_count */
+	ib_int64_t innodb_row_lock_time;	/*!< srv_n_lock_wait_time
+						/ 1000 */
+	ulint innodb_row_lock_time_avg;		/*!< srv_n_lock_wait_time
+						/ 1000
+						/ srv_n_lock_wait_count */
+	ulint innodb_row_lock_time_max;		/*!< srv_n_lock_max_wait_time
+						/ 1000 */
+	ulint innodb_rows_read;			/*!< srv_n_rows_read */
+	ulint innodb_rows_inserted;		/*!< srv_n_rows_inserted */
+	ulint innodb_rows_updated;		/*!< srv_n_rows_updated */
+	ulint innodb_rows_deleted;		/*!< srv_n_rows_deleted */
+};
+
+/** The server system struct */
+struct srv_sys_struct{
+	srv_table_t*	threads;	/*!< server thread table */
+	UT_LIST_BASE_NODE_T(que_thr_t)
+			tasks;		/*!< task queue */
+};
+
+extern ulint	srv_n_threads_active[];
+#else /* !UNIV_HOTBACKUP */
+# define srv_use_checksums			TRUE
+# define srv_use_adaptive_hash_indexes		FALSE
+# define srv_force_recovery			0UL
+# define srv_set_io_thread_op_info(t,info)	((void) 0)
+# define srv_is_being_started			0
+# define srv_win_file_flush_method		SRV_WIN_IO_UNBUFFERED
+# define srv_unix_file_flush_method		SRV_UNIX_O_DSYNC
+# define srv_start_raw_disk_in_use		0
+# define srv_file_per_table			1
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innodb_plugin/include/srv0srv.ic b/storage/innodb_plugin/include/srv0srv.ic
new file mode 100644
index 00000000000..8a1a678a016
--- /dev/null
+++ b/storage/innodb_plugin/include/srv0srv.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.ic
+Server main program
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/srv0start.h b/storage/innodb_plugin/include/srv0start.h
new file mode 100644
index 00000000000..8abf15da9c1
--- /dev/null
+++ b/storage/innodb_plugin/include/srv0start.h
@@ -0,0 +1,134 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0start.h
+Starts the Innobase database server
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef srv0start_h
+#define srv0start_h
+
+#include "univ.i"
+#include "ut0byte.h"
+
+/*********************************************************************//**
+Normalizes a directory path for Windows: converts slashes to backslashes. */
+UNIV_INTERN
+void
+srv_normalize_path_for_win(
+/*=======================*/
+	char*	str);	/*!< in/out: null-terminated character string */
+/*********************************************************************//**
+Reads the data files and their sizes from a character string given in
+the .cnf file.
+@return	TRUE if ok, FALSE on parse error */
+UNIV_INTERN
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+	char*	str);	/*!< in/out: the data file path string */
+/*********************************************************************//**
+Reads log group home directories from a character string given in
+the .cnf file.
+@return	TRUE if ok, FALSE on parse error */
+UNIV_INTERN
+ibool
+srv_parse_log_group_home_dirs(
+/*==========================*/
+	char*	str);	/*!< in/out: character string */
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void);
+/*==========================*/
+/*********************************************************************//**
+Adds a slash or a backslash to the end of a string if it is missing
+and the string is not empty.
+@return	string which has the separator if the string is not empty */
+UNIV_INTERN
+char*
+srv_add_path_separator_if_needed(
+/*=============================*/
+	char*	str);	/*!< in: null-terminated character string */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Starts Innobase and creates a new database if database files
+are not found and the user wants.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+int
+innobase_start_or_create_for_mysql(void);
+/*====================================*/
+/****************************************************************//**
+Shuts down the Innobase database.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+int
+innobase_shutdown_for_mysql(void);
+/*=============================*/
+/** Log sequence number at shutdown */
+extern	ib_uint64_t	srv_shutdown_lsn;
+/** Log sequence number immediately after startup */
+extern	ib_uint64_t	srv_start_lsn;
+
+#ifdef __NETWARE__
+void set_panic_flag_for_netware(void);
+#endif
+
+#ifdef HAVE_DARWIN_THREADS
+/** TRUE if the F_FULLFSYNC option is available */
+extern	ibool	srv_have_fullfsync;
+#endif
+
+/** TRUE if the server is being started */
+extern	ibool	srv_is_being_started;
+/** TRUE if the server was successfully started */
+extern	ibool	srv_was_started;
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+extern	ibool	srv_startup_is_before_trx_rollback_phase;
+
+/** TRUE if a raw partition is in use */
+extern	ibool	srv_start_raw_disk_in_use;
+
+
+/** Shutdown state */
+enum srv_shutdown_state {
+	SRV_SHUTDOWN_NONE = 0,	/*!< Database running normally */
+	SRV_SHUTDOWN_CLEANUP,	/*!< Cleaning up in
+				logs_empty_and_mark_files_at_shutdown() */
+	SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
+				the buffer pool can be freed: flush
+				all file spaces and close all files */
+	SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */
+};
+
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+extern	enum srv_shutdown_state	srv_shutdown_state;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Log 'spaces' have id's >= this */
+#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
+
+#endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innodb_plugin/include/sync0arr.h
similarity index 53%
rename from storage/innobase/include/sync0arr.h
rename to storage/innodb_plugin/include/sync0arr.h
index fae26b7a63e..5f1280f5e28 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innodb_plugin/include/sync0arr.h
@@ -1,7 +1,24 @@
-/******************************************************
-The wait array used in synchronization primitives
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.h
+The wait array used in synchronization primitives
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
@@ -14,105 +31,108 @@ Created 9/5/1995 Heikki Tuuri
 #include "ut0mem.h"
 #include "os0thread.h"
 
+/** Synchronization wait array cell */
 typedef struct sync_cell_struct		sync_cell_t;
+/** Synchronization wait array */
 typedef struct sync_array_struct	sync_array_t;
 
-#define SYNC_ARRAY_OS_MUTEX	1
-#define SYNC_ARRAY_MUTEX	2
+/** Parameters for sync_array_create() @{ */
+#define SYNC_ARRAY_OS_MUTEX	1	/*!< protected by os_mutex_t */
+#define SYNC_ARRAY_MUTEX	2	/*!< protected by mutex_t */
+/* @} */
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a synchronization wait array. It is protected by a mutex
 which is automatically reserved when the functions operating on it
-are called. */
-
+are called.
+@return	own: created wait array */
+UNIV_INTERN
 sync_array_t*
 sync_array_create(
 /*==============*/
-				/* out, own: created wait array */
-	ulint	n_cells,	/* in: number of cells in the array
+	ulint	n_cells,	/*!< in: number of cells in the array
 				to create */
-	ulint	protection);	/* in: either SYNC_ARRAY_OS_MUTEX or
+	ulint	protection);	/*!< in: either SYNC_ARRAY_OS_MUTEX or
 				SYNC_ARRAY_MUTEX: determines the type
 				of mutex protecting the data structure */
-/**********************************************************************
+/******************************************************************//**
 Frees the resources in a wait array. */
-
+UNIV_INTERN
 void
 sync_array_free(
 /*============*/
-	sync_array_t*	arr);	/* in, own: sync wait array */
-/**********************************************************************
+	sync_array_t*	arr);	/*!< in, own: sync wait array */
+/******************************************************************//**
 Reserves a wait array cell for waiting for an object.
 The event of the cell is reset to nonsignalled state. */
-
+UNIV_INTERN
 void
 sync_array_reserve_cell(
 /*====================*/
-	sync_array_t*	arr,	/* in: wait array */
-	void*		object, /* in: pointer to the object to wait for */
-	ulint		type,	/* in: lock request type */
-	const char*	file,	/* in: file where requested */
-	ulint		line,	/* in: line where requested */
-	ulint*		index); /* out: index of the reserved cell */
-/**********************************************************************
+	sync_array_t*	arr,	/*!< in: wait array */
+	void*		object, /*!< in: pointer to the object to wait for */
+	ulint		type,	/*!< in: lock request type */
+	const char*	file,	/*!< in: file where requested */
+	ulint		line,	/*!< in: line where requested */
+	ulint*		index); /*!< out: index of the reserved cell */
+/******************************************************************//**
 This function should be called when a thread starts to wait on
 a wait array cell. In the debug version this function checks
 if the wait for a semaphore will result in a deadlock, in which
 case prints info and asserts. */
-
+UNIV_INTERN
 void
 sync_array_wait_event(
 /*==================*/
-	sync_array_t*	arr,	/* in: wait array */
-	ulint		index);	 /* in: index of the reserved cell */
-/**********************************************************************
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index);	 /*!< in: index of the reserved cell */
+/******************************************************************//**
 Frees the cell. NOTE! sync_array_wait_event frees the cell
 automatically! */
-
+UNIV_INTERN
 void
 sync_array_free_cell(
 /*=================*/
-	sync_array_t*	arr,	/* in: wait array */
-	ulint		index);	/* in: index of the cell in array */
-/**************************************************************************
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index);	/*!< in: index of the cell in array */
+/**********************************************************************//**
 Note that one of the wait objects was signalled. */
-
+UNIV_INTERN
 void
 sync_array_object_signalled(
 /*========================*/
-	sync_array_t*	arr);	/* in: wait array */
-/**************************************************************************
+	sync_array_t*	arr);	/*!< in: wait array */
+/**********************************************************************//**
 If the wakeup algorithm does not work perfectly at semaphore relases,
 this function will do the waking (see the comment in mutex_exit). This
 function should be called about every 1 second in the server. */
-
+UNIV_INTERN
 void
 sync_arr_wake_threads_if_sema_free(void);
 /*====================================*/
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return	TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
 ibool
 sync_array_print_long_waits(void);
 /*=============================*/
-			/* out: TRUE if fatal semaphore wait threshold
-			was exceeded */
-/************************************************************************
+/********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
-
+UNIV_INTERN
 void
 sync_array_validate(
 /*================*/
-	sync_array_t*	arr);	/* in: sync wait array */
-/**************************************************************************
+	sync_array_t*	arr);	/*!< in: sync wait array */
+/**********************************************************************//**
 Prints info of the wait array. */
-
+UNIV_INTERN
 void
 sync_array_print_info(
 /*==================*/
-	FILE*		file,	/* in: file where to print */
-	sync_array_t*	arr);	/* in: wait array */
+	FILE*		file,	/*!< in: file where to print */
+	sync_array_t*	arr);	/*!< in: wait array */
 
 
 #ifndef UNIV_NONINL
diff --git a/storage/innodb_plugin/include/sync0arr.ic b/storage/innodb_plugin/include/sync0arr.ic
new file mode 100644
index 00000000000..bf57f5b2dc2
--- /dev/null
+++ b/storage/innodb_plugin/include/sync0arr.ic
@@ -0,0 +1,27 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.ic
+The wait array for synchronization primitives
+
+Inline code
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innobase/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h
similarity index 55%
rename from storage/innobase/include/sync0rw.h
rename to storage/innodb_plugin/include/sync0rw.h
index 6de26535689..aedfd5f3f86 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innodb_plugin/include/sync0rw.h
@@ -1,7 +1,31 @@
-/******************************************************
-The read-write lock (for threads, not for database transactions)
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.h
+The read-write lock (for threads, not for database transactions)
 
 Created 9/11/1995 Heikki Tuuri
 *******************************************************/
@@ -10,6 +34,7 @@ Created 9/11/1995 Heikki Tuuri
 #define sync0rw_h
 
 #include "univ.i"
+#ifndef UNIV_HOTBACKUP
 #include "ut0lst.h"
 #include "sync0sync.h"
 #include "os0sync.h"
@@ -17,6 +42,7 @@ Created 9/11/1995 Heikki Tuuri
 /* The following undef is to prevent a name conflict with a macro
 in MySQL: */
 #undef rw_lock_t
+#endif /* !UNIV_HOTBACKUP */
 
 /* Latch types; these are used also in btr0btr.h: keep the numerical values
 smaller than 30 and the order of the numerical values like below! */
@@ -24,6 +50,7 @@ smaller than 30 and the order of the numerical values like below! */
 #define	RW_X_LATCH	2
 #define	RW_NO_LATCH	3
 
+#ifndef UNIV_HOTBACKUP
 /* We decrement lock_word by this amount for each x_lock. It is also the
 start value for the lock_word, meaning that it limits the maximum number
 of concurrent read locks before the rw_lock breaks. The current value of
@@ -46,23 +73,39 @@ To modify the debug info list of an rw-lock, this mutex has to be
 
 acquired in addition to the mutex protecting the lock. */
 extern mutex_t		rw_lock_debug_mutex;
-extern os_event_t	rw_lock_debug_event;	/* If deadlock detection does
+extern os_event_t	rw_lock_debug_event;	/*!< If deadlock detection does
 					not get immediately the mutex it
 					may wait for this event */
-extern ibool		rw_lock_debug_waiters;	/* This is set to TRUE, if
+extern ibool		rw_lock_debug_waiters;	/*!< This is set to TRUE, if
 					there may be waiters for the event */
 #endif /* UNIV_SYNC_DEBUG */
 
-extern	ib_longlong	rw_s_spin_wait_count;
-extern	ib_longlong	rw_s_spin_round_count;
-extern	ib_longlong	rw_s_exit_count;
-extern	ib_longlong	rw_s_os_wait_count;
-extern	ib_longlong	rw_x_spin_wait_count;
-extern	ib_longlong	rw_x_spin_round_count;
-extern	ib_longlong	rw_x_os_wait_count;
-extern	ib_longlong	rw_x_exit_count;
+/** number of spin waits on rw-latches,
+resulted during exclusive (write) locks */
+extern	ib_int64_t	rw_s_spin_wait_count;
+/** number of spin loop rounds on rw-latches,
+resulted during exclusive (write) locks */
+extern	ib_int64_t	rw_s_spin_round_count;
+/** number of unlocks (that unlock shared locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+extern	ib_int64_t	rw_s_exit_count;
+/** number of OS waits on rw-latches,
+resulted during shared (read) locks */
+extern	ib_int64_t	rw_s_os_wait_count;
+/** number of spin waits on rw-latches,
+resulted during shared (read) locks */
+extern	ib_int64_t	rw_x_spin_wait_count;
+/** number of spin loop rounds on rw-latches,
+resulted during shared (read) locks */
+extern	ib_int64_t	rw_x_spin_round_count;
+/** number of OS waits on rw-latches,
+resulted during exclusive (write) locks */
+extern	ib_int64_t	rw_x_os_wait_count;
+/** number of unlocks (that unlock exclusive locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+extern	ib_int64_t	rw_x_exit_count;
 
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
@@ -80,76 +123,77 @@ is necessary only if the memory block containing it is freed. */
 	rw_lock_create_func((L), __FILE__, __LINE__)
 #endif /* UNIV_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
 is necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
 void
 rw_lock_create_func(
 /*================*/
-	rw_lock_t*	lock,		/* in: pointer to memory */
+	rw_lock_t*	lock,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
 # ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/* in: level */
+	ulint		level,		/*!< in: level */
 # endif /* UNIV_SYNC_DEBUG */
-	const char*	cmutex_name, 	/* in: mutex name */
+	const char*	cmutex_name, 	/*!< in: mutex name */
 #endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/* in: file name where created */
-	ulint 		cline);		/* in: file line where created */
-/**********************************************************************
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint 		cline);		/*!< in: file line where created */
+/******************************************************************//**
 Calling this function is obligatory only if the memory buffer containing
 the rw-lock is freed. Removes an rw-lock object from the global list. The
 rw-lock is checked to be in the non-locked state. */
-
+UNIV_INTERN
 void
 rw_lock_free(
 /*=========*/
-	rw_lock_t*	lock);	/* in: rw-lock */
+	rw_lock_t*	lock);	/*!< in: rw-lock */
 #ifdef UNIV_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
+simultaneous shared and exclusive locks.
+@return	TRUE */
+UNIV_INTERN
 ibool
 rw_lock_validate(
 /*=============*/
-	rw_lock_t*	lock);
+	rw_lock_t*	lock);	/*!< in: rw-lock */
 #endif /* UNIV_DEBUG */
-/**********************************************************************
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning. */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
-                                /* out: TRUE if success */
-        rw_lock_t*      lock,   /* in: pointer to rw-lock */
-        ulint           pass,
-                                /* in: pass value; != 0, if the lock will be
-                                passed to another thread to unlock */
-        const char*     file_name, /* in: file name where lock requested */
-        ulint           line);  /* in: line where requested */
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macros should be used in rw s-locking, not the
 corresponding function. */
 
 #define rw_lock_s_lock(M)	rw_lock_s_lock_func(\
 		(M), 0, __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macros should be used in rw s-locking, not the
 corresponding function. */
 
 #define rw_lock_s_lock_gen(M, P)	rw_lock_s_lock_func(\
 		(M), (P), __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macros should be used in rw s-locking, not the
 corresponding function. */
 
 #define rw_lock_s_lock_nowait(M, F, L)    rw_lock_s_lock_low(\
 					  (M), 0, (F), (L))
-/**********************************************************************
+/******************************************************************//**
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning.
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass __attribute__((unused)),
+				/*!< in: pass value; != 0, if the lock will be
+				passed to another thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function, except if
 you supply the file name and line number. Lock an rw-lock in shared mode
 for the current thread. If the rw-lock is locked in exclusive mode, or
@@ -160,70 +204,63 @@ UNIV_INLINE
 void
 rw_lock_s_lock_func(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line);	/* in: line where requested */
-/**********************************************************************
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
+obtained immediately.
+@return	TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
 /*=======================*/
-				/* out: TRUE if success */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line);	/* in: line where requested */
-/**********************************************************************
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 Releases a shared mode lock. */
 UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-	rw_lock_t*	lock	/* in: rw-lock */
 #ifdef UNIV_SYNC_DEBUG
-	,ulint		pass	/* in: pass value; != 0, if the lock may have
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
 #endif
-	);
-/***********************************************************************
-Releases a shared mode lock. */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 
 #ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock(L)	rw_lock_s_unlock_func(L, 0)
+# define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(P, L)
 #else
-#define rw_lock_s_unlock(L)	rw_lock_s_unlock_func(L)
+# define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L)
 #endif
-/***********************************************************************
+/*******************************************************************//**
 Releases a shared mode lock. */
+#define rw_lock_s_unlock(L)		rw_lock_s_unlock_gen(L, 0)
 
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L, P)
-#else
-#define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L)
-#endif
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macro should be used in rw x-locking, not the
 corresponding function. */
 
 #define rw_lock_x_lock(M)	rw_lock_x_lock_func(\
 		(M), 0, __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macro should be used in rw x-locking, not the
 corresponding function. */
 
 #define rw_lock_x_lock_gen(M, P)	rw_lock_x_lock_func(\
 		(M), (P), __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macros should be used in rw x-locking, not the
 corresponding function. */
 
 #define rw_lock_x_lock_nowait(M)	rw_lock_x_lock_func_nowait(\
 		(M), __FILE__, __LINE__)
-/**********************************************************************
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread. If the rw-lock is locked
 in shared or exclusive mode, or there is an exclusive lock request waiting,
@@ -232,44 +269,37 @@ for the lock, before suspending the thread. If the same thread has an x-lock
 on the rw-lock, locking succeed, with the following exception: if pass != 0,
 only a single x-lock may be taken on the lock. NOTE: If the same thread has
 an s-lock, locking does not succeed! */
-
+UNIV_INTERN
 void
 rw_lock_x_lock_func(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line);	/* in: line where requested */
-/**********************************************************************
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 Releases an exclusive mode lock. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-	rw_lock_t*	lock	/* in: rw-lock */
 #ifdef UNIV_SYNC_DEBUG
-	,ulint		pass	/* in: pass value; != 0, if the lock may have
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
 #endif
-	);
-/***********************************************************************
-Releases an exclusive mode lock. */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 
 #ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock(L)	rw_lock_x_unlock_func(L, 0)
+# define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(P, L)
 #else
-#define rw_lock_x_unlock(L)	rw_lock_x_unlock_func(L)
+# define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L)
 #endif
-/***********************************************************************
+/*******************************************************************//**
 Releases an exclusive mode lock. */
+#define rw_lock_x_unlock(L)		rw_lock_x_unlock_gen(L, 0)
 
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L, P)
-#else
-#define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L)
-#endif
-/**********************************************************************
+/******************************************************************//**
 Low-level function which locks an rw-lock in s-mode when we know that it
 is possible and none else is currently accessing the rw-lock structure.
 Then we can do the locking without reserving the mutex. */
@@ -277,11 +307,10 @@ UNIV_INLINE
 void
 rw_lock_s_lock_direct(
 /*==================*/
-	rw_lock_t*	lock,		/* in: pointer to rw-lock */
-	const char*	file_name,	/* in: file name where requested */
-	ulint		line		/* in: line where lock requested */
-);
-/**********************************************************************
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	const char*	file_name,	/*!< in: file name where requested */
+	ulint		line);		/*!< in: line where lock requested */
+/******************************************************************//**
 Low-level function which locks an rw-lock in x-mode when we know that it
 is not locked and none else is currently accessing the rw-lock structure.
 Then we can do the locking without reserving the mutex. */
@@ -289,11 +318,10 @@ UNIV_INLINE
 void
 rw_lock_x_lock_direct(
 /*==================*/
-	rw_lock_t*	lock,		/* in: pointer to rw-lock */
-	const char*	file_name,	/* in: file name where requested */
-	ulint		line		/* in: line where lock requested */
-);
-/**********************************************************************
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	const char*	file_name,	/*!< in: file name where requested */
+	ulint		line);		/*!< in: line where lock requested */
+/******************************************************************//**
 This function is used in the insert buffer to move the ownership of an
 x-latch on a buffer frame to the current thread. The x-latch was set by
 the buffer read operation and it protected the buffer frame while the
@@ -301,211 +329,250 @@ read was done. The ownership is moved because we want that the current
 thread is able to acquire a second x-latch which is stored in an mtr.
 This, in turn, is needed to pass the debug checks of index page
 operations. */
-
+UNIV_INTERN
 void
 rw_lock_x_lock_move_ownership(
 /*==========================*/
-	rw_lock_t*	lock);	/* in: lock which was x-locked in the
+	rw_lock_t*	lock);	/*!< in: lock which was x-locked in the
 				buffer read */
-/**********************************************************************
+/******************************************************************//**
 Releases a shared mode lock when we know there are no waiters and none
 else will access the lock during the time this function is executed. */
 UNIV_INLINE
 void
 rw_lock_s_unlock_direct(
 /*====================*/
-	rw_lock_t*	lock);	/* in: rw-lock */
-/**********************************************************************
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
 Releases an exclusive mode lock when we know there are no waiters, and
 none else will access the lock durint the time this function is executed. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_direct(
 /*====================*/
-	rw_lock_t*	lock);	/* in: rw-lock */
-/**********************************************************************
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
+mutex, so the caller must be sure it is not changed during the call.
+@return	value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
 /*=====================*/
-				/* out: value of writer_count */
-	rw_lock_t*	lock);	/* in: rw-lock */
-/************************************************************************
-Accessor functions for rw lock. */
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return	1 if waiters, 0 otherwise */
 UNIV_INLINE
 ulint
 rw_lock_get_waiters(
 /*================*/
-	rw_lock_t*	lock);
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
 /*===============*/
-	rw_lock_t*	lock);
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Returns the number of readers.
+@return	number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
 /*=====================*/
-	rw_lock_t*	lock);
-/**********************************************************************
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
 Decrements lock_word the specified amount if it is greater than 0.
-This is used by both s_lock and x_lock operations. */
+This is used by both s_lock and x_lock operations.
+@return	TRUE if decr occurs */
 UNIV_INLINE
 ibool
 rw_lock_lock_word_decr(
-					/* out: TRUE if decr occurs */
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		amount);	/* in: amount to decrement */
-/**********************************************************************
-Increments lock_word the specified amount and returns new value. */
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount);	/*!< in: amount to decrement */
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return	lock->lock_word after increment */
 UNIV_INLINE
 lint
 rw_lock_lock_word_incr(
-					/* out: TRUE if decr occurs */
-	rw_lock_t*	lock,
-	ulint		amount);	/* in: rw-lock */
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount);	/*!< in: amount to increment */
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+	rw_lock_t*	lock,		/*!< in/out: lock to work on */
+	ibool		recursive);	/*!< in: TRUE if recursion
+					allowed */
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0. */
-
+UNIV_INTERN
 ibool
 rw_lock_own(
 /*========*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		lock_type);	/* in: lock type: RW_LOCK_SHARED,
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
 					RW_LOCK_EX */
 #endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
 Checks if somebody has locked the rw-lock in the specified mode. */
-
+UNIV_INTERN
 ibool
 rw_lock_is_locked(
 /*==============*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		lock_type);	/* in: lock type: RW_LOCK_SHARED,
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
 					RW_LOCK_EX */
 #ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
+/***************************************************************//**
 Prints debug info of an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_print(
 /*==========*/
-	rw_lock_t*	lock);	/* in: rw-lock */
-/*******************************************************************
+	rw_lock_t*	lock);	/*!< in: rw-lock */
+/***************************************************************//**
 Prints debug info of currently locked rw-locks. */
-
+UNIV_INTERN
 void
 rw_lock_list_print_info(
 /*====================*/
-	FILE*	file);		/* in: file where to print */
-/*******************************************************************
+	FILE*	file);		/*!< in: file where to print */
+/***************************************************************//**
 Returns the number of currently locked rw-locks.
-Works only in the debug version. */
-
+Works only in the debug version.
+@return	number of locked rw-locks */
+UNIV_INTERN
 ulint
 rw_lock_n_locked(void);
 /*==================*/
 
 /*#####################################################################*/
 
-/**********************************************************************
+/******************************************************************//**
 Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
 because the debug mutex is also acquired in sync0arr while holding the OS
 mutex protecting the sync array, and the ordinary mutex_enter might
 recursively call routines in sync0arr, leading to a deadlock on the OS
 mutex. */
-
+UNIV_INTERN
 void
 rw_lock_debug_mutex_enter(void);
 /*==========================*/
-/**********************************************************************
+/******************************************************************//**
 Releases the debug mutex. */
-
+UNIV_INTERN
 void
 rw_lock_debug_mutex_exit(void);
 /*==========================*/
-/*************************************************************************
+/*********************************************************************//**
 Prints info of a debug struct. */
-
+UNIV_INTERN
 void
 rw_lock_debug_print(
 /*================*/
-	rw_lock_debug_t*	info);	/* in: debug struct */
+	rw_lock_debug_t*	info);	/*!< in: debug struct */
 #endif /* UNIV_SYNC_DEBUG */
 
 /* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a read-write lock. Several threads may have a shared lock
-simultaneously in this lock, but only one writer may have an exclusive lock,
-in which case no shared locks are allowed. To prevent starving of a writer
-blocked by readers, a writer may queue for x-lock by decrementing lock_word:
-no new readers will be let in while the thread waits for readers to exit. */
+Do not use its fields directly! */
 
+/** The structure used in the spin lock implementation of a read-write
+lock. Several threads may have a shared lock simultaneously in this
+lock, but only one writer may have an exclusive lock, in which case no
+shared locks are allowed. To prevent starving of a writer blocked by
+readers, a writer may queue for x-lock by decrementing lock_word: no
+new readers will be let in while the thread waits for readers to
+exit. */
 struct rw_lock_struct {
 	volatile lint	lock_word;
-				/* Holds the state of the lock. */
-	volatile ulint	waiters;/* 1: there are waiters */
-	volatile ulint	pass;	/* Default value 0. This is set to some
-				value != 0 given by the caller of an x-lock
-				operation, if the x-lock is to be passed to
-				another thread to unlock (which happens in
-				asynchronous i/o). */
+				/*!< Holds the state of the lock. */
+	volatile ulint	waiters;/*!< 1: there are waiters */
+	volatile ibool	recursive;/*!< Default value FALSE which means the lock
+				is non-recursive. The value is typically set
+				to TRUE making normal rw_locks recursive. In
+				case of asynchronous IO, when a non-zero
+				value of 'pass' is passed then we keep the
+				lock non-recursive.
+				This flag also tells us about the state of
+				writer_thread field. If this flag is set
+				then writer_thread MUST contain the thread
+				id of the current x-holder or wait-x thread.
+				This flag must be reset in x_unlock
+				functions before incrementing the lock_word */
 	volatile os_thread_id_t	writer_thread;
-				/* Thread id of writer thread */
-	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
+				/*!< Thread id of writer thread. Is only
+				guaranteed to have sane and non-stale
+				value iff recursive flag is set. */
+	os_event_t	event;	/*!< Used by sync0arr.c for thread queueing */
 	os_event_t	wait_ex_event;
-				/* Event for next-writer to wait on. A thread
+				/*!< Event for next-writer to wait on. A thread
 				must decrement lock_word before waiting. */
-#ifndef UNIV_SYNC_ATOMIC
-	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
-#endif /* UNIV_SYNC_ATOMIC */
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+	mutex_t	mutex;		/*!< The mutex protecting rw_lock_struct */
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
 	UT_LIST_NODE_T(rw_lock_t) list;
-				/* All allocated rw locks are put into a
+				/*!< All allocated rw locks are put into a
 				list */
 #ifdef UNIV_SYNC_DEBUG
 	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
-				/* In the debug version: pointer to the debug
+				/*!< In the debug version: pointer to the debug
 				info list of the lock */
-	ulint	level;		/* Level in the global latching order. */
+	ulint	level;		/*!< Level in the global latching order. */
 #endif /* UNIV_SYNC_DEBUG */
-	ulint count_os_wait;	/* Count of os_waits. May not be accurate */
-	const char*	cfile_name;/* File name where lock created */
+	ulint count_os_wait;	/*!< Count of os_waits. May not be accurate */
+	const char*	cfile_name;/*!< File name where lock created */
         /* last s-lock file/line is not guaranteed to be correct */
-	const char*	last_s_file_name;/* File name where last s-locked */
-	const char*	last_x_file_name;/* File name where last x-locked */
+	const char*	last_s_file_name;/*!< File name where last s-locked */
+	const char*	last_x_file_name;/*!< File name where last x-locked */
 	ibool		writer_is_wait_ex;
-				/* This is TRUE if the writer field is
+				/*!< This is TRUE if the writer field is
 				RW_LOCK_WAIT_EX; this field is located far
 				from the memory update hotspot fields which
 				are at the start of this struct, thus we can
 				peek this field without causing much memory
 				bus traffic */
-	unsigned	cline:14;	/* Line where created */
-	unsigned	last_s_line:14;	/* Line number where last time s-locked */
-	unsigned	last_x_line:14;	/* Line number where last time x-locked */
-	ulint	magic_n;
+	unsigned	cline:14;	/*!< Line where created */
+	unsigned	last_s_line:14;	/*!< Line number where last time s-locked */
+	unsigned	last_x_line:14;	/*!< Line number where last time x-locked */
+	ulint	magic_n;	/*!< RW_LOCK_MAGIC_N */
 };
 
+/** Value of rw_lock_struct::magic_n */
 #define	RW_LOCK_MAGIC_N	22643
 
 #ifdef UNIV_SYNC_DEBUG
-/* The structure for storing debug info of an rw-lock */
+/** The structure for storing debug info of an rw-lock */
 struct	rw_lock_debug_struct {
 
-	os_thread_id_t thread_id;  /* The thread id of the thread which
+	os_thread_id_t thread_id;  /*!< The thread id of the thread which
 				locked the rw-lock */
-	ulint	pass;		/* Pass value given in the lock operation */
-	ulint	lock_type;	/* Type of the lock: RW_LOCK_EX,
+	ulint	pass;		/*!< Pass value given in the lock operation */
+	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_EX,
 				RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
-	const char*	file_name;/* File name where the lock was obtained */
-	ulint	line;		/* Line where the rw-lock was locked */
+	const char*	file_name;/*!< File name where the lock was obtained */
+	ulint	line;		/*!< Line where the rw-lock was locked */
 	UT_LIST_NODE_T(rw_lock_debug_t) list;
-				/* Debug structs are linked in a two-way
+				/*!< Debug structs are linked in a two-way
 				list */
 };
 #endif /* UNIV_SYNC_DEBUG */
@@ -513,5 +580,6 @@ struct	rw_lock_debug_struct {
 #ifndef UNIV_NONINL
 #include "sync0rw.ic"
 #endif
+#endif /* !UNIV_HOTBACKUP */
 
 #endif
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innodb_plugin/include/sync0rw.ic
similarity index 54%
rename from storage/innobase/include/sync0rw.ic
rename to storage/innodb_plugin/include/sync0rw.ic
index e3f1d881cb4..7116f1b7c9b 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innodb_plugin/include/sync0rw.ic
@@ -1,124 +1,162 @@
-/******************************************************
-The read-write lock (for threads)
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.ic
+The read-write lock (for threads)
 
 Created 9/11/1995 Heikki Tuuri
 *******************************************************/
 
-/**********************************************************************
+/******************************************************************//**
 Lock an rw-lock in shared mode for the current thread. If the rw-lock is
 locked in exclusive mode, or there is an exclusive lock request waiting,
 the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
 waiting for the lock before suspending the thread. */
-
+UNIV_INTERN
 void
 rw_lock_s_lock_spin(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line);	/* in: line where requested */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Inserts the debug information for an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_add_debug_info(
 /*===================*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		pass,		/* in: pass value */
-	ulint		lock_type,	/* in: lock type */
-	const char*	file_name,	/* in: file where requested */
-	ulint		line);		/* in: line where requested */
-/**********************************************************************
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type,	/*!< in: lock type */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line);		/*!< in: line where requested */
+/******************************************************************//**
 Removes a debug information struct for an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_remove_debug_info(
 /*======================*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		pass,		/* in: pass value */
-	ulint		lock_type);	/* in: lock type */
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type);	/*!< in: lock type */
 #endif /* UNIV_SYNC_DEBUG */
 
-/************************************************************************
-Accessor functions for rw lock. */
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return	1 if waiters, 0 otherwise */
 UNIV_INLINE
 ulint
 rw_lock_get_waiters(
 /*================*/
-	rw_lock_t*	lock)
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	return(lock->waiters);
 }
+
+/********************************************************************//**
+Sets lock->waiters to 1. It is not an error if lock->waiters is already
+1. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
 UNIV_INLINE
 void
-rw_lock_set_waiters(
-/*================*/
-	rw_lock_t*	lock)
+rw_lock_set_waiter_flag(
+/*====================*/
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-#ifdef UNIV_SYNC_ATOMIC
-	os_compare_and_swap(&(lock->waiters), 0, 1);
-#else /* UNIV_SYNC_ATOMIC */
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	os_compare_and_swap_ulint(&lock->waiters, 0, 1);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	lock->waiters = 1;
-#endif /* UNIV_SYNC_ATOMIC */
-}
-UNIV_INLINE
-void
-rw_lock_reset_waiters(
-/*================*/
-	rw_lock_t*	lock)
-{
-#ifdef UNIV_SYNC_ATOMIC
-	os_compare_and_swap(&(lock->waiters), 1, 0);
-#else /* UNIV_SYNC_ATOMIC */
-	lock->waiters = 0;
-#endif /* UNIV_SYNC_ATOMIC */
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 }
 
-/**********************************************************************
+/********************************************************************//**
+Resets lock->waiters to 0. It is not an error if lock->waiters is already
+0. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
+UNIV_INLINE
+void
+rw_lock_reset_waiter_flag(
+/*======================*/
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	os_compare_and_swap_ulint(&lock->waiters, 1, 0);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+	lock->waiters = 0;
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
 Returns the write-status of the lock - this function made more sense
 with the old rw_lock implementation.
- */
+@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
 /*===============*/
-	rw_lock_t*	lock)
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if(lock_word > 0) {
+	if (lock_word > 0) {
 		/* return NOT_LOCKED in s-lock state, like the writer
 		member of the old lock implementation. */
-		return RW_LOCK_NOT_LOCKED;
+		return(RW_LOCK_NOT_LOCKED);
 	} else if (((-lock_word) % X_LOCK_DECR) == 0) {
-		return RW_LOCK_EX;
+		return(RW_LOCK_EX);
 	} else {
                 ut_ad(lock_word > -X_LOCK_DECR);
-		return RW_LOCK_WAIT_EX;
+		return(RW_LOCK_WAIT_EX);
 	}
 }
 
+/******************************************************************//**
+Returns the number of readers.
+@return	number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
 /*=====================*/
-	rw_lock_t*	lock)
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if(lock_word > 0) {
+	if (lock_word > 0) {
 		/* s-locked, no x-waiters */
 		return(X_LOCK_DECR - lock_word);
 	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
 		/* s-locked, with x-waiters */
-		return (ulint)(-lock_word);
+		return((ulint)(-lock_word));
 	}
-	return 0;
+	return(0);
 }
 
-#ifndef UNIV_SYNC_ATOMIC
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
 UNIV_INLINE
 mutex_t*
 rw_lock_get_mutex(
@@ -129,84 +167,74 @@ rw_lock_get_mutex(
 }
 #endif
 
-/**********************************************************************
+/******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
+mutex, so the caller must be sure it is not changed during the call.
+@return	value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
 /*=====================*/
-				/* out: value of writer_count */
-	rw_lock_t*	lock)	/* in: rw-lock */
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_copy = lock->lock_word;
 	/* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
-	if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
-		return 0;
+	if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+		return(0);
 	}
-	return ((-lock_copy) / X_LOCK_DECR) + 1;
+	return(((-lock_copy) / X_LOCK_DECR) + 1);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Two different implementations for decrementing the lock_word of a rw_lock:
 one for systems supporting atomic operations, one for others. This does
 does not support recusive x-locks: they should be handled by the caller and
 need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not. */
+Returns true if the decrement was made, false if not.
+@return	TRUE if decr occurs */
 UNIV_INLINE
 ibool
 rw_lock_lock_word_decr(
-				/* out: TRUE if decr occurs */
-	rw_lock_t*	lock,	/* in: rw-lock */
-	ulint		amount)	/* in: amount of decrement */
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount)		/*!< in: amount to decrement */
 {
-
-#ifdef UNIV_SYNC_ATOMIC
-
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
         lint local_lock_word = lock->lock_word;
 	while (local_lock_word > 0) {
-		if(os_compare_and_swap(&(lock->lock_word),
-                                       local_lock_word,
-                                       local_lock_word - amount)) {
-			return TRUE;
+		if (os_compare_and_swap_lint(&lock->lock_word,
+					     local_lock_word,
+					     local_lock_word - amount)) {
+			return(TRUE);
 		}
 		local_lock_word = lock->lock_word;
 	}
 	return(FALSE);
-
-#else /* UNIV_SYNC_ATOMIC */
-
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	ibool success = FALSE;
 	mutex_enter(&(lock->mutex));
-	if(lock->lock_word > 0) {
+	if (lock->lock_word > 0) {
 		lock->lock_word -= amount;
 		success = TRUE;
 	}
 	mutex_exit(&(lock->mutex));
-	return success;
-
-#endif /* UNIV_SYNC_ATOMIC */
-
+	return(success);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 }
 
-/**********************************************************************
-Two different implementations for incrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others.
-Returns the value of lock_word after increment. */
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return	lock->lock_word after increment */
 UNIV_INLINE
 lint
 rw_lock_lock_word_incr(
-				/* out: lock->lock_word after increment */
-	rw_lock_t*	lock,	/* in: rw-lock */
-	ulint		amount)	/* in: amount of increment */
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount)		/*!< in: amount of increment */
 {
-
-#ifdef UNIV_SYNC_ATOMIC
-
-	return(os_atomic_increment(&(lock->lock_word), amount));
-
-#else /* UNIV_SYNC_ATOMIC */
-
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	return(os_atomic_increment_lint(&lock->lock_word, amount));
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	lint local_lock_word;
 
 	mutex_enter(&(lock->mutex));
@@ -216,26 +244,69 @@ rw_lock_lock_word_incr(
 
 	mutex_exit(&(lock->mutex));
 
-        return local_lock_word;
-
-#endif /* UNIV_SYNC_ATOMIC */
-
+        return(local_lock_word);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 }
 
-/**********************************************************************
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+	rw_lock_t*	lock,		/*!< in/out: lock to work on */
+	ibool		recursive)	/*!< in: TRUE if recursion
+					allowed */
+{
+	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	os_thread_id_t	local_thread;
+	ibool		success;
+
+	/* Prevent Valgrind warnings about writer_thread being
+	uninitialized.  It does not matter if writer_thread is
+	uninitialized, because we are comparing writer_thread against
+	itself, and the operation should always succeed. */
+	UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
+
+	local_thread = lock->writer_thread;
+	success = os_compare_and_swap_thread_id(
+		&lock->writer_thread, local_thread, curr_thread);
+	ut_a(success);
+	lock->recursive = recursive;
+
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+	mutex_enter(&lock->mutex);
+	lock->writer_thread = curr_thread;
+	lock->recursive = recursive;
+	mutex_exit(&lock->mutex);
+
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
 Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning. */
+spinning.
+@return	TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_s_lock_low(
 /*===============*/
-				/* out: TRUE if success */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
 	ulint		pass __attribute__((unused)),
-				/* in: pass value; != 0, if the lock will be
+				/*!< in: pass value; != 0, if the lock will be
 				passed to another thread to unlock */
-	const char*	file_name, /* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
 	if (!rw_lock_lock_word_decr(lock, 1)) {
@@ -254,8 +325,7 @@ rw_lock_s_lock_low(
 	return(TRUE);	/* locking succeeded */
 }
 
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
+/******************************************************************//**
 Low-level function which locks an rw-lock in s-mode when we know that it
 is possible and none else is currently accessing the rw-lock structure.
 Then we can do the locking without reserving the mutex. */
@@ -263,9 +333,9 @@ UNIV_INLINE
 void
 rw_lock_s_lock_direct(
 /*==================*/
-	rw_lock_t*	lock,		/* in: pointer to rw-lock */
-	const char*	file_name,	/* in: file name where requested */
-	ulint		line)		/* in: line where lock requested */
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	const char*	file_name,	/*!< in: file name where requested */
+	ulint		line)		/*!< in: line where lock requested */
 {
 	ut_ad(lock->lock_word == X_LOCK_DECR);
 
@@ -280,8 +350,7 @@ rw_lock_s_lock_direct(
 #endif
 }
 
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
+/******************************************************************//**
 Low-level function which locks an rw-lock in x-mode when we know that it
 is not locked and none else is currently accessing the rw-lock structure.
 Then we can do the locking without reserving the mutex. */
@@ -289,16 +358,16 @@ UNIV_INLINE
 void
 rw_lock_x_lock_direct(
 /*==================*/
-	rw_lock_t*	lock,		/* in: pointer to rw-lock */
-	const char*	file_name,	/* in: file name where requested */
-	ulint		line)		/* in: line where lock requested */
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	const char*	file_name,	/*!< in: file name where requested */
+	ulint		line)		/*!< in: line where lock requested */
 {
 	ut_ad(rw_lock_validate(lock));
 	ut_ad(lock->lock_word == X_LOCK_DECR);
 
 	lock->lock_word -= X_LOCK_DECR;
 	lock->writer_thread = os_thread_get_curr_id();
-	lock->pass = 0;
+	lock->recursive = TRUE;
 
 	lock->last_x_file_name = file_name;
 	lock->last_x_line = line;
@@ -308,7 +377,7 @@ rw_lock_x_lock_direct(
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in shared mode for the current thread. If the rw-lock is locked
 in exclusive mode, or there is an exclusive lock request waiting, the
@@ -318,11 +387,11 @@ UNIV_INLINE
 void
 rw_lock_s_lock_func(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	/* NOTE: As we do not know the thread ids for threads which have
 	s-locked a latch, and s-lockers will be served only after waiting
@@ -352,47 +421,41 @@ rw_lock_s_lock_func(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
+obtained immediately.
+@return	TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
 /*=======================*/
-				/* out: TRUE if success */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
 
 	ibool success;
 
-#ifdef UNIV_SYNC_ATOMIC
-	success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
 #else
 
 	success = FALSE;
 	mutex_enter(&(lock->mutex));
-	if(lock->lock_word == X_LOCK_DECR) {
+	if (lock->lock_word == X_LOCK_DECR) {
 		lock->lock_word = 0;
 		success = TRUE;
 	}
 	mutex_exit(&(lock->mutex));
 
 #endif
-	if(success) {
-		lock->writer_thread = curr_thread;
-		lock->pass = 0;
-
-	} else if (!(lock->pass) &&
-		   os_thread_eq(lock->writer_thread, curr_thread)) {
-		/* Must verify pass first: otherwise another thread can
-		call move_ownership suddenly allowing recursive locks.
-		and after we have verified our thread_id matches
-		(though move_ownership has since changed it).*/
+	if (success) {
+		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
 
+	} else if (lock->recursive
+		   && os_thread_eq(lock->writer_thread, curr_thread)) {
 		/* Relock: this lock_word modification is safe since no other
 		threads can modify (lock, unlock, or reserve) lock_word while
 		there is an exclusive writer and this is the writer thread. */
@@ -416,18 +479,17 @@ rw_lock_x_lock_func_nowait(
 	return(TRUE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases a shared mode lock. */
 UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-	rw_lock_t*	lock	/* in: rw-lock */
 #ifdef UNIV_SYNC_DEBUG
-	,ulint		pass	/* in: pass value; != 0, if the lock may have
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
 #endif
-	)
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
 
@@ -436,7 +498,7 @@ rw_lock_s_unlock_func(
 #endif
 
 	/* Increment lock_word to indicate 1 less reader */
-	if(rw_lock_lock_word_incr(lock, 1) == 0) {
+	if (rw_lock_lock_word_incr(lock, 1) == 0) {
 
 		/* wait_ex waiter exists. It may not be asleep, but we signal
                 anyway. We do not wake other waiters, because they can't
@@ -453,15 +515,14 @@ rw_lock_s_unlock_func(
 #endif
 }
 
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
+/******************************************************************//**
 Releases a shared mode lock when we know there are no waiters and none
 else will access the lock during the time this function is executed. */
 UNIV_INLINE
 void
 rw_lock_s_unlock_direct(
 /*====================*/
-	rw_lock_t*	lock)	/* in: rw-lock */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	ut_ad(lock->lock_word < X_LOCK_DECR);
 
@@ -472,54 +533,53 @@ rw_lock_s_unlock_direct(
 	/* Decrease reader count by incrementing lock_word */
 	lock->lock_word++;
 
-	ut_ad(!rw_lock_get_waiters(lock));
+	ut_ad(!lock->waiters);
 	ut_ad(rw_lock_validate(lock));
 #ifdef UNIV_SYNC_PERF_STAT
 	rw_s_exit_count++;
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases an exclusive mode lock. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-	rw_lock_t*	lock	/* in: rw-lock */
 #ifdef UNIV_SYNC_DEBUG
-	,ulint		pass	/* in: pass value; != 0, if the lock may have
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
 #endif
-	)
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-        uint local_pass;
 	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
 
-	/*
-          Must reset pass while we still have the lock.
-	  If we are not the last unlocker, we correct it later in the function,
-	  which is harmless since we still hold the lock.
-        */
-        local_pass = lock->pass;
-        lock->pass = 1;
+	/* lock->recursive flag also indicates if lock->writer_thread is
+	valid or stale. If we are the last of the recursive callers
+	then we must unset lock->recursive flag to indicate that the
+	lock->writer_thread is now stale.
+	Note that since we still hold the x-lock we can safely read the
+	lock_word. */
+	if (lock->lock_word == 0) {
+		/* Last caller in a possible recursive chain. */
+		lock->recursive = FALSE;
+		UNIV_MEM_INVALID(&lock->writer_thread,
+				 sizeof lock->writer_thread);
+	}
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
 #endif
 
-	if(rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+	if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
 		/* Lock is now free. May have to signal read/write waiters.
                 We do not need to signal wait_ex waiters, since they cannot
                 exist when there is a writer. */
-		if(rw_lock_get_waiters(lock)) {
-			rw_lock_reset_waiters(lock);
+		if (lock->waiters) {
+			rw_lock_reset_waiter_flag(lock);
 			os_event_set(lock->event);
 			sync_array_object_signalled(sync_primary_wait_array);
 		}
-
-	} else {
-		/* We still hold x-lock, so we correct pass. */
-		lock->pass = local_pass;
 	}
 
 	ut_ad(rw_lock_validate(lock));
@@ -529,15 +589,14 @@ rw_lock_x_unlock_func(
 #endif
 }
 
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
+/******************************************************************//**
 Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
+none else will access the lock during the time this function is executed. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_direct(
 /*====================*/
-	rw_lock_t*	lock)	/* in: rw-lock */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	/* Reset the exclusive lock if this thread no longer has an x-mode
 	lock */
@@ -547,10 +606,16 @@ rw_lock_x_unlock_direct(
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
 #endif
-	lock->pass = 1;
+
+	if (lock->lock_word == 0) {
+		lock->recursive = FALSE;
+		UNIV_MEM_INVALID(&lock->writer_thread,
+				 sizeof lock->writer_thread);
+	}
+
 	lock->lock_word += X_LOCK_DECR;
 
-	ut_ad(!rw_lock_get_waiters(lock));
+	ut_ad(!lock->waiters);
 	ut_ad(rw_lock_validate(lock));
 
 #ifdef UNIV_SYNC_PERF_STAT
diff --git a/storage/innobase/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h
similarity index 66%
rename from storage/innobase/include/sync0sync.h
rename to storage/innodb_plugin/include/sync0sync.h
index ae6c72bcd15..df990823cc4 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innodb_plugin/include/sync0sync.h
@@ -1,7 +1,31 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.h
+Mutex, the basic synchronization primitive
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
@@ -16,27 +40,31 @@ Created 9/5/1995 Heikki Tuuri
 #include "os0thread.h"
 #include "os0sync.h"
 #include "sync0arr.h"
-#ifndef WIN32
-#include "my_atomic.h"
+
+#if  defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
+extern my_bool	timed_mutexes;
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+#ifdef HAVE_WINDOWS_ATOMICS
+typedef LONG lock_word_t;	/*!< On Windows, InterlockedExchange operates
+				on LONG variable */
+#else
+typedef byte lock_word_t;
 #endif
 
-#ifndef UNIV_HOTBACKUP
-extern my_bool	timed_mutexes;
-#endif /* UNIV_HOTBACKUP */
-
-/**********************************************************************
+/******************************************************************//**
 Initializes the synchronization data structures. */
-
+UNIV_INTERN
 void
 sync_init(void);
 /*===========*/
-/**********************************************************************
+/******************************************************************//**
 Frees the resources in synchronization data structures. */
-
+UNIV_INTERN
 void
 sync_close(void);
 /*===========*/
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes a mutex object to a specified memory
 location (which must be appropriately aligned). The mutex is initialized
 in the reset state. Explicit freeing of the mutex with mutex_free is
@@ -55,57 +83,49 @@ necessary only if the memory block containing it is freed. */
 	mutex_create_func((M), __FILE__, __LINE__)
 #endif
 
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes a mutex object in a specified memory
 location (which must be appropriately aligned). The mutex is initialized
 in the reset state. Explicit freeing of the mutex with mutex_free is
 necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
 void
 mutex_create_func(
 /*==============*/
-	mutex_t*	mutex,		/* in: pointer to memory */
+	mutex_t*	mutex,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
-	const char*	cmutex_name,	/* in: mutex name */
+	const char*	cmutex_name,	/*!< in: mutex name */
 # ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/* in: level */
+	ulint		level,		/*!< in: level */
 # endif /* UNIV_SYNC_DEBUG */
 #endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/* in: file name where created */
-	ulint		cline);		/* in: file line where created */
-/**********************************************************************
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline);		/*!< in: file line where created */
+
+#undef mutex_free			/* Fix for MacOS X */
+
+/******************************************************************//**
 Calling this function is obligatory only if the memory buffer containing
 the mutex is freed. Removes a mutex object from the mutex list. The mutex
 is checked to be in the reset state. */
-
-#undef mutex_free			/* Fix for MacOS X */
+UNIV_INTERN
 void
 mutex_free(
 /*=======*/
-	mutex_t*	mutex);	/* in: mutex */
-/******************************************************************
+	mutex_t*	mutex);	/*!< in: mutex */
+/**************************************************************//**
 NOTE! The following macro should be used in mutex locking, not the
 corresponding function. */
 
 #define mutex_enter(M)	  mutex_enter_func((M), __FILE__, __LINE__)
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
-	mutex_t*	mutex);	/* in: mutex */
-/******************************************************************
+/**************************************************************//**
 NOTE! The following macro should be used in mutex locking, not the
 corresponding function. */
 
 /* NOTE! currently same as mutex_enter! */
 
 #define mutex_enter_fast(M)	mutex_enter_func((M), __FILE__, __LINE__)
-#define mutex_enter_fast_func	mutex_enter_func;
-/**********************************************************************
+/******************************************************************//**
 NOTE! Use the corresponding macro in the header file, not this function
 directly. Locks a mutex for the current thread. If the mutex is reserved
 the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
@@ -114,161 +134,157 @@ UNIV_INLINE
 void
 mutex_enter_func(
 /*=============*/
-	mutex_t*	mutex,		/* in: pointer to mutex */
-	const char*	file_name,	/* in: file name where locked */
-	ulint		line);		/* in: line where locked */
-/******************************************************************
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line);		/*!< in: line where locked */
+/**************************************************************//**
 NOTE! The following macro should be used in mutex locking, not the
 corresponding function. */
 
 #define mutex_enter_nowait(M)	\
 	mutex_enter_nowait_func((M), __FILE__, __LINE__)
-/************************************************************************
+/********************************************************************//**
 NOTE! Use the corresponding macro in the header file, not this function
 directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
+acquired immediately, returns with return value 1.
+@return	0 if succeed, 1 if not */
+UNIV_INTERN
 ulint
 mutex_enter_nowait_func(
 /*====================*/
-					/* out: 0 if succeed, 1 if not */
-	mutex_t*	mutex,		/* in: pointer to mutex */
-	const char*	file_name,	/* in: file name where mutex
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
 					requested */
-	ulint		line);		/* in: line where requested */
-/**********************************************************************
+	ulint		line);		/*!< in: line where requested */
+/******************************************************************//**
 Unlocks a mutex owned by the current thread. */
 UNIV_INLINE
 void
 mutex_exit(
 /*=======*/
-	mutex_t*	mutex);	/* in: pointer to mutex */
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
-	mutex_t*	mutex);	/* in: mutex */
-/**********************************************************************
+	mutex_t*	mutex);	/*!< in: pointer to mutex */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
 Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version. */
-
+Works only in the debug version.
+@return	TRUE if no mutexes and rw-locks reserved */
+UNIV_INTERN
 ibool
 sync_all_freed(void);
 /*================*/
+#endif /* UNIV_SYNC_DEBUG */
 /*#####################################################################
 FUNCTION PROTOTYPES FOR DEBUGGING */
-/***********************************************************************
+/*******************************************************************//**
 Prints wait info of the sync system. */
-
+UNIV_INTERN
 void
 sync_print_wait_info(
 /*=================*/
-	FILE*	file);		/* in: file where to print */
-/***********************************************************************
+	FILE*	file);		/*!< in: file where to print */
+/*******************************************************************//**
 Prints info of the sync system. */
-
+UNIV_INTERN
 void
 sync_print(
 /*=======*/
-	FILE*	file);		/* in: file where to print */
+	FILE*	file);		/*!< in: file where to print */
 #ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
+/******************************************************************//**
+Checks that the mutex has been initialized.
+@return	TRUE */
+UNIV_INTERN
 ibool
 mutex_validate(
 /*===========*/
-	const mutex_t*	mutex);
-/**********************************************************************
+	const mutex_t*	mutex);	/*!< in: mutex */
+/******************************************************************//**
 Checks that the current thread owns the mutex. Works only
-in the debug version. */
-
+in the debug version.
+@return	TRUE if owns */
+UNIV_INTERN
 ibool
 mutex_own(
 /*======*/
-				/* out: TRUE if owns */
-	const mutex_t*	mutex);	/* in: mutex */
+	const mutex_t*	mutex);	/*!< in: mutex */
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Adds a latch and its level in the thread level array. Allocates the memory
 for the array if called first time for this OS thread. Makes the checks
 against other latch levels stored in the array for this thread. */
-
+UNIV_INTERN
 void
 sync_thread_add_level(
 /*==================*/
-	void*	latch,	/* in: pointer to a mutex or an rw-lock */
-	ulint	level);	/* in: level in the latching order; if
+	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
+	ulint	level);	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
+/******************************************************************//**
+Removes a latch from the thread level array if it is found there.
+@return TRUE if found in the array; it is no error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+UNIV_INTERN
 ibool
 sync_thread_reset_level(
 /*====================*/
-			/* out: TRUE if found from the array; it is no error
-			if the latch is not found, as we presently are not
-			able to determine the level for every latch
-			reservation the program does */
-	void*	latch);	/* in: pointer to a mutex or an rw-lock */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+	void*	latch);	/*!< in: pointer to a mutex or an rw-lock */
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return	TRUE if empty */
+UNIV_INTERN
 ibool
 sync_thread_levels_empty(void);
 /*==========================*/
-			/* out: TRUE if empty */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return	TRUE if empty except the exceptions specified below */
+UNIV_INTERN
 ibool
 sync_thread_levels_empty_gen(
 /*=========================*/
-					/* out: TRUE if empty except the
-					exceptions specified below */
-	ibool	dict_mutex_allowed);	/* in: TRUE if dictionary mutex is
+	ibool	dict_mutex_allowed);	/*!< in: TRUE if dictionary mutex is
 					allowed to be owned by the thread,
 					also purge_is_running mutex is
 					allowed */
-/**********************************************************************
+/******************************************************************//**
 Gets the debug information for a reserved mutex. */
-
+UNIV_INTERN
 void
 mutex_get_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/* in: mutex */
-	const char**	file_name,	/* out: file where requested */
-	ulint*		line,		/* out: line where requested */
-	os_thread_id_t* thread_id);	/* out: id of the thread which owns
+	mutex_t*	mutex,		/*!< in: mutex */
+	const char**	file_name,	/*!< out: file where requested */
+	ulint*		line,		/*!< out: line where requested */
+	os_thread_id_t* thread_id);	/*!< out: id of the thread which owns
 					the mutex */
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
+/******************************************************************//**
+Counts currently reserved mutexes. Works only in the debug version.
+@return	number of reserved mutexes */
+UNIV_INTERN
 ulint
 mutex_n_reserved(void);
 /*==================*/
 #endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
 NOT to be used outside this module except in debugging! Gets the value
 of the lock word. */
 UNIV_INLINE
-byte
+lock_word_t
 mutex_get_lock_word(
 /*================*/
-	const mutex_t*	mutex);	/* in: mutex */
+	const mutex_t*	mutex);	/*!< in: mutex */
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex. */
+field in a mutex.
+@return	value to set */
 UNIV_INLINE
 ulint
 mutex_get_waiters(
 /*==============*/
-				/* out: value to set */
-	const mutex_t*	mutex);	/* in: mutex */
+	const mutex_t*	mutex);	/*!< in: mutex */
 #endif /* UNIV_SYNC_DEBUG */
 
 /*
@@ -403,6 +419,12 @@ or row lock! */
 					their level set after the page is
 					locked; see e.g.
 					ibuf_bitmap_get_map_page(). */
+#define SYNC_TRX_I_S_RWLOCK	1910	/* Used for
+					trx_i_s_cache_t::rw_lock */
+#define SYNC_TRX_I_S_LAST_READ	1900	/* Used for
+					trx_i_s_cache_t::last_read_mutex */
+#define SYNC_FILE_FORMAT_TAG	1200	/* Used to serialize access to the
+					file format tag */
 #define	SYNC_DICT_OPERATION	1001	/* table create, drop, etc. reserve
 					this in X-mode, implicit or backround
 					operations purge, rollback, foreign
@@ -445,7 +467,8 @@ or row lock! */
 #define SYNC_TRX_SYS_HEADER	290
 #define SYNC_LOG		170
 #define SYNC_RECV		168
-#define SYNC_WORK_QUEUE		161
+#define	SYNC_WORK_QUEUE		162
+#define	SYNC_SEARCH_SYS_CONF	161	/* for assigning btr_search_enabled */
 #define	SYNC_SEARCH_SYS		160	/* NOTE that if we have a memory
 					heap that can be extended to the
 					buffer pool, its logical level is
@@ -472,85 +495,79 @@ or row lock! */
 Do not use its fields directly! The structure used in the spin lock
 implementation of a mutual exclusion semaphore. */
 
+/** InnoDB mutex */
 struct mutex_struct {
-	os_event_t	event;	/* Used by sync0arr.c for the wait queue */
+	os_event_t	event;	/*!< Used by sync0arr.c for the wait queue */
+	volatile lock_word_t	lock_word;	/*!< lock_word is the target
+				of the atomic test-and-set instruction when
+				atomic operations are enabled. */
 
- 	byte	lock_word;	/* This byte is the target of the atomic
- 				test-and-set instruction in Win32 and
- 				x86 32/64 with GCC 4.1.0 or later version */
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-#elif defined(MY_ATOMIC_NOLOCK)
-				/* We have my_atomic_* routines that are
-				intrinsically atomic, so no need for the
-				mutex. */
-#else
+#if !defined(HAVE_ATOMIC_BUILTINS)
 	os_fast_mutex_t
-		os_fast_mutex;	/* In other systems we use this OS mutex
-				in place of lock_word */
+		os_fast_mutex;	/*!< We use this OS mutex in place of lock_word
+				when atomic operations are not enabled */
 #endif
-	ulint	waiters;	/* This ulint is set to 1 if there are (or
+	ulint	waiters;	/*!< This ulint is set to 1 if there are (or
 				may be) threads waiting in the global wait
 				array for this mutex to be released.
 				Otherwise, this is 0. */
-	UT_LIST_NODE_T(mutex_t)	list; /* All allocated mutexes are put into
+	UT_LIST_NODE_T(mutex_t)	list; /*!< All allocated mutexes are put into
 				a list.	Pointers to the next and prev. */
 #ifdef UNIV_SYNC_DEBUG
-	const char*	file_name;	/* File where the mutex was locked */
-	ulint	line;		/* Line where the mutex was locked */
-	ulint	level;		/* Level in the global latching order */
+	const char*	file_name;	/*!< File where the mutex was locked */
+	ulint	line;		/*!< Line where the mutex was locked */
+	ulint	level;		/*!< Level in the global latching order */
 #endif /* UNIV_SYNC_DEBUG */
-	const char*	cfile_name;/* File name where mutex created */
-	ulint		cline;	/* Line where created */
+	const char*	cfile_name;/*!< File name where mutex created */
+	ulint		cline;	/*!< Line where created */
 #ifdef UNIV_DEBUG
-	os_thread_id_t thread_id; /* The thread id of the thread
+	os_thread_id_t thread_id; /*!< The thread id of the thread
 				which locked the mutex. */
-	ulint		magic_n;
+	ulint		magic_n;	/*!< MUTEX_MAGIC_N */
+/** Value of mutex_struct::magic_n */
 # define MUTEX_MAGIC_N	(ulint)979585
 #endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-	ulong		count_os_wait; /* count of os_wait */
-# ifdef UNIV_DEBUG
-	ulong		count_using; /* count of times mutex used */
-	ulong		count_spin_loop; /* count of spin loops */
-	ulong		count_spin_rounds; /* count of spin rounds */
-	ulong		count_os_yield; /* count of os_wait */
-	ulonglong	lspent_time; /* mutex os_wait timer msec */
-	ulonglong	lmax_spent_time; /* mutex os_wait timer msec */
-	const char*	cmutex_name;/* mutex name */
-	ulint		mutex_type;/* 0 - usual mutex 1 - rw_lock mutex	 */
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+	ulong		count_os_wait;	/*!< count of os_wait */
+#ifdef UNIV_DEBUG
+	ulong		count_using;	/*!< count of times mutex used */
+	ulong		count_spin_loop; /*!< count of spin loops */
+	ulong		count_spin_rounds;/*!< count of spin rounds */
+	ulong		count_os_yield;	/*!< count of os_wait */
+	ulonglong	lspent_time;	/*!< mutex os_wait timer msec */
+	ulonglong	lmax_spent_time;/*!< mutex os_wait timer msec */
+	const char*	cmutex_name;	/*!< mutex name */
+	ulint		mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
+#endif /* UNIV_DEBUG */
 };
 
-/* The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. Appears here for debugging purposes only! */
+/** The global array of wait cells for implementation of the databases own
+mutexes and read-write locks. */
+extern sync_array_t*	sync_primary_wait_array;/* Appears here for
+						debugging purposes only! */
 
-extern sync_array_t*	sync_primary_wait_array;
-
-/* Constant determining how long spin wait is continued before suspending
+/** Constant determining how long spin wait is continued before suspending
 the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
 to 20 microseconds. */
 
 #define	SYNC_SPIN_ROUNDS	srv_n_spin_wait_rounds
 
-/* The number of system calls made in this module. Intended for performance
-monitoring. */
-
-extern	ib_longlong	mutex_exit_count;
+/** The number of mutex_exit calls. Intended for performance monitoring. */
+extern	ib_int64_t	mutex_exit_count;
 
 #ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
+/** Latching order checks start when this is set TRUE */
 extern ibool	sync_order_checks_on;
 #endif /* UNIV_SYNC_DEBUG */
 
-/* This variable is set to TRUE when sync_init is called */
+/** This variable is set to TRUE when sync_init is called */
 extern ibool	sync_initialized;
 
-/* Global list of database mutexes (not OS mutexes) created. */
+/** Global list of database mutexes (not OS mutexes) created. */
 typedef UT_LIST_BASE_NODE_T(mutex_t)  ut_list_base_node_t;
+/** Global list of database mutexes (not OS mutexes) created. */
 extern ut_list_base_node_t  mutex_list;
 
-/* Mutex protecting the mutex_list variable */
+/** Mutex protecting the mutex_list variable */
 extern mutex_t mutex_list_mutex;
 
 
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innodb_plugin/include/sync0sync.ic
similarity index 59%
rename from storage/innobase/include/sync0sync.ic
rename to storage/innodb_plugin/include/sync0sync.ic
index f5a85e0e7fb..b05020b5660 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innodb_plugin/include/sync0sync.ic
@@ -1,96 +1,86 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.ic
+Mutex, the basic synchronization primitive
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
-/**********************************************************************
+/******************************************************************//**
 Sets the waiters field in a mutex. */
-
+UNIV_INTERN
 void
 mutex_set_waiters(
 /*==============*/
-	mutex_t*	mutex,	/* in: mutex */
-	ulint		n);	/* in: value to set */
-/**********************************************************************
+	mutex_t*	mutex,	/*!< in: mutex */
+	ulint		n);	/*!< in: value to set */
+/******************************************************************//**
 Reserves a mutex for the current thread. If the mutex is reserved, the
 function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
 for the mutex before suspending the thread. */
-
+UNIV_INTERN
 void
 mutex_spin_wait(
 /*============*/
-	mutex_t*	mutex,		/* in: pointer to mutex */
-	const char*	file_name,	/* in: file name where mutex
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
 					requested */
-	ulint		line);		/* in: line where requested */
+	ulint		line);		/*!< in: line where requested */
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Sets the debug information for a reserved mutex. */
-
+UNIV_INTERN
 void
 mutex_set_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/* in: mutex */
-	const char*	file_name,	/* in: file where requested */
-	ulint		line);		/* in: line where requested */
+	mutex_t*	mutex,		/*!< in: mutex */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line);		/*!< in: line where requested */
 #endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
 Releases the threads waiting in the primary wait array for this mutex. */
-
+UNIV_INTERN
 void
 mutex_signal_object(
 /*================*/
-	mutex_t*	mutex);	/* in: mutex */
+	mutex_t*	mutex);	/*!< in: mutex */
 
-/**********************************************************************
+/******************************************************************//**
 Performs an atomic test-and-set instruction to the lock_word field of a
-mutex. */
+mutex.
+@return	the previous value of lock_word: 0 or 1 */
 UNIV_INLINE
 byte
 mutex_test_and_set(
 /*===============*/
-				/* out: the previous value of lock_word: 0 or
-				1 */
-	mutex_t*	mutex)	/* in: mutex */
+	mutex_t*	mutex)	/*!< in: mutex */
 {
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-	byte	res;
-	byte*	lw;		/* assembler code is used to ensure that
-				lock_word is loaded from memory */
-	ut_ad(mutex);
-	ut_ad(sizeof(byte) == 1);
-
-	lw = &(mutex->lock_word);
-
-	__asm	MOV	ECX, lw
-		__asm	MOV	EDX, 1
-		__asm	XCHG	DL, BYTE PTR [ECX]
-		__asm	MOV	res, DL
-
-		/* The fence below would prevent this thread from
-		reading the data structure protected by the mutex
-		before the test-and-set operation is committed, but
-		the fence is apparently not needed:
-
-		In a posting to comp.arch newsgroup (August 10, 1997)
-		Andy Glew said that in P6 a LOCKed instruction like
-		XCHG establishes a fence with respect to memory reads
-		and writes and thus an explicit fence is not
-		needed. In P5 he seemed to agree with a previous
-		newsgroup poster that LOCKed instructions serialize
-		all instruction execution, and, consequently, also
-		memory operations. This is confirmed in Intel Software
-		Dev. Manual, Vol. 3. */
-
-		/* mutex_fence(); */
-
-		return(res);
-#elif defined(MY_ATOMIC_NOLOCK)
-	return ((byte)my_atomic_swap8(
-		(int8 volatile *)&(mutex->lock_word), 1));
+#if defined(HAVE_ATOMIC_BUILTINS)
+	return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
 #else
 	ibool	ret;
 
@@ -108,30 +98,20 @@ mutex_test_and_set(
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 Performs a reset instruction to the lock_word field of a mutex. This
 instruction also serializes memory operations to the program order. */
 UNIV_INLINE
 void
 mutex_reset_lock_word(
 /*==================*/
-	mutex_t*	mutex)	/* in: mutex */
+	mutex_t*	mutex)	/*!< in: mutex */
 {
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-	byte*	lw;		/* assembler code is used to ensure that
-				lock_word is loaded from memory */
-	ut_ad(mutex);
-
-	lw = &(mutex->lock_word);
-
-	__asm	MOV	EDX, 0
-		__asm	MOV	ECX, lw
-		__asm	XCHG	DL, BYTE PTR [ECX]
-#elif defined(MY_ATOMIC_NOLOCK)
+#if defined(HAVE_ATOMIC_BUILTINS)
 	/* In theory __sync_lock_release should be used to release the lock.
 	Unfortunately, it does not work properly alone. The workaround is
 	that more conservative __sync_lock_test_and_set is used instead. */
-	(void)my_atomic_swap8((int8 volatile *)&(mutex->lock_word), 0);
+	os_atomic_test_and_set_byte(&mutex->lock_word, 0);
 #else
 	mutex->lock_word = 0;
 
@@ -139,33 +119,29 @@ mutex_reset_lock_word(
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 Gets the value of the lock word. */
 UNIV_INLINE
-byte
+lock_word_t
 mutex_get_lock_word(
 /*================*/
-	const mutex_t*	mutex)	/* in: mutex */
+	const mutex_t*	mutex)	/*!< in: mutex */
 {
-	const volatile byte*	ptr;	/* declared volatile to ensure that
-					lock_word is loaded from memory */
 	ut_ad(mutex);
 
-	ptr = &(mutex->lock_word);
-
-	return(*ptr);
+	return(mutex->lock_word);
 }
 
-/**********************************************************************
-Gets the waiters field in a mutex. */
+/******************************************************************//**
+Gets the waiters field in a mutex.
+@return	value to set */
 UNIV_INLINE
 ulint
 mutex_get_waiters(
 /*==============*/
-				/* out: value to set */
-	const mutex_t*	mutex)	/* in: mutex */
+	const mutex_t*	mutex)	/*!< in: mutex */
 {
-	const volatile ulint*	ptr;	/* declared volatile to ensure that
+	const volatile ulint*	ptr;	/*!< declared volatile to ensure that
 					the value is read from memory */
 	ut_ad(mutex);
 
@@ -175,13 +151,13 @@ mutex_get_waiters(
 				word from memory is atomic */
 }
 
-/**********************************************************************
+/******************************************************************//**
 Unlocks a mutex owned by the current thread. */
 UNIV_INLINE
 void
 mutex_exit(
 /*=======*/
-	mutex_t*	mutex)	/* in: pointer to mutex */
+	mutex_t*	mutex)	/*!< in: pointer to mutex */
 {
 	ut_ad(mutex_own(mutex));
 
@@ -214,7 +190,7 @@ mutex_exit(
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 Locks a mutex for the current thread. If the mutex is reserved, the function
 spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
 before suspending the thread. */
@@ -222,9 +198,9 @@ UNIV_INLINE
 void
 mutex_enter_func(
 /*=============*/
-	mutex_t*	mutex,		/* in: pointer to mutex */
-	const char*	file_name,	/* in: file name where locked */
-	ulint		line)		/* in: line where locked */
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line)		/*!< in: line where locked */
 {
 	ut_ad(mutex_validate(mutex));
 	ut_ad(!mutex_own(mutex));
@@ -232,9 +208,7 @@ mutex_enter_func(
 	/* Note that we do not peek at the value of lock_word before trying
 	the atomic test_and_set; we could peek, and possibly save time. */
 
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	mutex->count_using++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+	ut_d(mutex->count_using++);
 
 	if (!mutex_test_and_set(mutex)) {
 		ut_d(mutex->thread_id = os_thread_get_curr_id());
diff --git a/storage/innodb_plugin/include/sync0types.h b/storage/innodb_plugin/include/sync0types.h
new file mode 100644
index 00000000000..1911bbac7fd
--- /dev/null
+++ b/storage/innodb_plugin/include/sync0types.h
@@ -0,0 +1,34 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0types.h
+Global types for sync
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0types_h
+#define sync0types_h
+
+/** Rename mutex_t to avoid name space collision on some systems */
+#define mutex_t ib_mutex_t
+/** InnoDB mutex */
+typedef struct mutex_struct		mutex_t;
+
+#endif
diff --git a/storage/innobase/include/thr0loc.h b/storage/innodb_plugin/include/thr0loc.h
similarity index 51%
rename from storage/innobase/include/thr0loc.h
rename to storage/innodb_plugin/include/thr0loc.h
index 32e2dc3ae93..b4bdc33e615 100644
--- a/storage/innobase/include/thr0loc.h
+++ b/storage/innodb_plugin/include/thr0loc.h
@@ -1,7 +1,24 @@
-/******************************************************
-The thread local storage
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/thr0loc.h
+The thread local storage
 
 Created 10/5/1995 Heikki Tuuri
 *******************************************************/
@@ -16,49 +33,49 @@ OS handle to the current thread, or its priority. */
 #include "univ.i"
 #include "os0thread.h"
 
-/********************************************************************
+/****************************************************************//**
 Initializes the thread local storage module. */
-
+UNIV_INTERN
 void
 thr_local_init(void);
 /*================*/
-/***********************************************************************
+/*******************************************************************//**
 Creates a local storage struct for the calling new thread. */
-
+UNIV_INTERN
 void
 thr_local_create(void);
 /*==================*/
-/***********************************************************************
+/*******************************************************************//**
 Frees the local storage struct for the specified thread. */
-
+UNIV_INTERN
 void
 thr_local_free(
 /*===========*/
-	os_thread_id_t	id);	/* in: thread id */
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
+	os_thread_id_t	id);	/*!< in: thread id */
+/*******************************************************************//**
+Gets the slot number in the thread table of a thread.
+@return	slot number */
+UNIV_INTERN
 ulint
 thr_local_get_slot_no(
 /*==================*/
-				/* out: slot number */
-	os_thread_id_t	id);	/* in: thread id of the thread */
-/***********************************************************************
+	os_thread_id_t	id);	/*!< in: thread id of the thread */
+/*******************************************************************//**
 Sets in the local storage the slot number in the thread table of a thread. */
-
+UNIV_INTERN
 void
 thr_local_set_slot_no(
 /*==================*/
-	os_thread_id_t	id,	/* in: thread id of the thread */
-	ulint		slot_no);/* in: slot number */
-/***********************************************************************
+	os_thread_id_t	id,	/*!< in: thread id of the thread */
+	ulint		slot_no);/*!< in: slot number */
+/*******************************************************************//**
 Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
+storage.
+@return	pointer to the in_ibuf field */
+UNIV_INTERN
 ibool*
 thr_local_get_in_ibuf_field(void);
 /*=============================*/
-			/* out: pointer to the in_ibuf field */
 
 #ifndef UNIV_NONINL
 #include "thr0loc.ic"
diff --git a/storage/innodb_plugin/include/thr0loc.ic b/storage/innodb_plugin/include/thr0loc.ic
new file mode 100644
index 00000000000..ce44e512320
--- /dev/null
+++ b/storage/innodb_plugin/include/thr0loc.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/thr0loc.ic
+Thread local storage
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/trx0i_s.h b/storage/innodb_plugin/include/trx0i_s.h
new file mode 100644
index 00000000000..9bf032de9f9
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0i_s.h
@@ -0,0 +1,240 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0i_s.h
+INFORMATION SCHEMA innodb_trx, innodb_locks and
+innodb_lock_waits tables cache structures and public
+functions.
+
+Created July 17, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef trx0i_s_h
+#define trx0i_s_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "ut0ut.h"
+
+/** The maximum amount of memory that can be consumed by innodb_trx,
+innodb_locks and innodb_lock_waits information schema tables. */
+#define TRX_I_S_MEM_LIMIT		16777216 /* 16 MiB */
+
+/** The maximum length of a string that can be stored in
+i_s_locks_row_t::lock_data */
+#define TRX_I_S_LOCK_DATA_MAX_LEN	8192
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_query */
+#define TRX_I_S_TRX_QUERY_MAX_LEN	1024
+
+/** A row of INFORMATION_SCHEMA.innodb_locks */
+typedef struct i_s_locks_row_struct	i_s_locks_row_t;
+/** A row of INFORMATION_SCHEMA.innodb_trx */
+typedef struct i_s_trx_row_struct i_s_trx_row_t;
+/** A row of INFORMATION_SCHEMA.innodb_lock_waits */
+typedef struct i_s_lock_waits_row_struct i_s_lock_waits_row_t;
+
+/** Objects of trx_i_s_cache_t::locks_hash */
+typedef struct i_s_hash_chain_struct	i_s_hash_chain_t;
+
+/** Objects of this type are added to the hash table
+trx_i_s_cache_t::locks_hash */
+struct i_s_hash_chain_struct {
+	i_s_locks_row_t*	value;	/*!< row of
+					INFORMATION_SCHEMA.innodb_locks*/
+	i_s_hash_chain_t*	next;	/*!< next item in the hash chain */
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
+struct i_s_locks_row_struct {
+	ullint		lock_trx_id;	/*!< transaction identifier */
+	const char*	lock_mode;	/*!< lock mode from
+					lock_get_mode_str() */
+	const char*	lock_type;	/*!< lock type from
+					lock_get_type_str() */
+	const char*	lock_table;	/*!< table name from
+					lock_get_table_name() */
+	const char*	lock_index;	/*!< index name from
+					lock_rec_get_index_name() */
+	/** Information for record locks.  All these are
+	ULINT_UNDEFINED for table locks. */
+	/* @{ */
+	ulint		lock_space;	/*!< tablespace identifier */
+	ulint		lock_page;	/*!< page number within the_space */
+	ulint		lock_rec;	/*!< heap number of the record
+					on the page */
+	const char*	lock_data;	/*!< (some) content of the record */
+	/* @} */
+
+	/** The following are auxiliary and not included in the table */
+	/* @{ */
+	ullint		lock_table_id;
+					/*!< table identifier from
+					lock_get_table_id */
+	i_s_hash_chain_t hash_chain;	/*!< hash table chain node for
+					trx_i_s_cache_t::locks_hash */
+	/* @} */
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
+struct i_s_trx_row_struct {
+	ullint			trx_id;		/*!< transaction identifier */
+	const char*		trx_state;	/*!< transaction state from
+						trx_get_que_state_str() */
+	ib_time_t		trx_started;	/*!< trx_struct::start_time */
+	const i_s_locks_row_t*	requested_lock_row;
+						/*!< pointer to a row
+						in innodb_locks if trx
+						is waiting, or NULL */
+	ib_time_t		trx_wait_started;
+						/*!< trx_struct::wait_started */
+	ullint			trx_weight;	/*!< TRX_WEIGHT() */
+	ulint			trx_mysql_thread_id;
+						/*!< thd_get_thread_id() */
+	const char*		trx_query;	/*!< MySQL statement being
+						executed in the transaction */
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
+struct i_s_lock_waits_row_struct {
+	const i_s_locks_row_t*	requested_lock_row;	/*!< requested lock */
+	const i_s_locks_row_t*	blocking_lock_row;	/*!< blocking lock */
+};
+
+/** Cache of INFORMATION_SCHEMA table data */
+typedef struct trx_i_s_cache_struct	trx_i_s_cache_t;
+
+/** Auxiliary enum used by functions that need to select one of the
+INFORMATION_SCHEMA tables */
+enum i_s_table {
+	I_S_INNODB_TRX,		/*!< INFORMATION_SCHEMA.innodb_trx */
+	I_S_INNODB_LOCKS,	/*!< INFORMATION_SCHEMA.innodb_locks */
+	I_S_INNODB_LOCK_WAITS	/*!< INFORMATION_SCHEMA.innodb_lock_waits */
+};
+
+/** This is the intermediate buffer where data needed to fill the
+INFORMATION SCHEMA tables is fetched and later retrieved by the C++
+code in handler/i_s.cc. */
+extern trx_i_s_cache_t*	trx_i_s_cache;
+
+/*******************************************************************//**
+Initialize INFORMATION SCHEMA trx related cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_init(
+/*===============*/
+	trx_i_s_cache_t*	cache);	/*!< out: cache to init */
+
+/*******************************************************************//**
+Issue a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_read(
+/*=====================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Release a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_read(
+/*===================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Issue an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_write(
+/*======================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Release an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_write(
+/*====================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+
+/*******************************************************************//**
+Retrieves the number of used rows in the cache for a given
+INFORMATION SCHEMA table.
+@return	number of rows */
+UNIV_INTERN
+ulint
+trx_i_s_cache_get_rows_used(
+/*========================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table);	/*!< in: which table */
+
+/*******************************************************************//**
+Retrieves the nth row in the cache for a given INFORMATION SCHEMA
+table.
+@return	row */
+UNIV_INTERN
+void*
+trx_i_s_cache_get_nth_row(
+/*======================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table,	/*!< in: which table */
+	ulint			n);	/*!< in: row number */
+
+/*******************************************************************//**
+Update the transactions cache if it has not been read for some time.
+@return	0 - fetched, 1 - not */
+UNIV_INTERN
+int
+trx_i_s_possibly_fetch_data_into_cache(
+/*===================================*/
+	trx_i_s_cache_t*	cache);	/*!< in/out: cache */
+
+/*******************************************************************//**
+Returns TRUE if the data in the cache is truncated due to the memory
+limit posed by TRX_I_S_MEM_LIMIT.
+@return	TRUE if truncated */
+UNIV_INTERN
+ibool
+trx_i_s_cache_is_truncated(
+/*=======================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/** The maximum length of a resulting lock_id_size in
+trx_i_s_create_lock_id(), not including the terminating NUL.
+":%lu:%lu:%lu" -> 63 chars */
+#define TRX_I_S_LOCK_ID_MAX_LEN	(TRX_ID_MAX_LEN + 63)
+
+/*******************************************************************//**
+Crafts a lock id string from a i_s_locks_row_t object. Returns its
+second argument. This function aborts if there is not enough space in
+lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
+want to be 100% sure that it will not abort.
+@return	resulting lock id */
+UNIV_INTERN
+char*
+trx_i_s_create_lock_id(
+/*===================*/
+	const i_s_locks_row_t*	row,	/*!< in: innodb_locks row */
+	char*			lock_id,/*!< out: resulting lock_id */
+	ulint			lock_id_size);/*!< in: size of the lock id
+					buffer */
+
+#endif /* trx0i_s_h */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innodb_plugin/include/trx0purge.h
similarity index 51%
rename from storage/innobase/include/trx0purge.h
rename to storage/innodb_plugin/include/trx0purge.h
index c4aab91a93a..7812ad7eb92 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innodb_plugin/include/trx0purge.h
@@ -1,7 +1,24 @@
-/******************************************************
-Purge old versions
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.h
+Purge old versions
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -18,143 +35,140 @@ Created 3/26/1996 Heikki Tuuri
 #include "usr0sess.h"
 #include "fil0fil.h"
 
-/* The global data structure coordinating a purge */
+/** The global data structure coordinating a purge */
 extern trx_purge_t*	purge_sys;
 
-/* A dummy undo record used as a return value when we have a whole undo log
+/** A dummy undo record used as a return value when we have a whole undo log
 which needs no purge */
 extern trx_undo_rec_t	trx_purge_dummy_rec;
 
-/************************************************************************
+/********************************************************************//**
 Calculates the file address of an undo log header when we have the file
-address of its history list node. */
+address of its history list node.
+@return	file address of the log */
 UNIV_INLINE
 fil_addr_t
 trx_purge_get_log_from_hist(
 /*========================*/
-					/* out: file address of the log */
-	fil_addr_t	node_addr);	/* in: file address of the history
+	fil_addr_t	node_addr);	/*!< in: file address of the history
 					list node of the log */
-/*********************************************************************
+/*****************************************************************//**
 Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
+undo log still exists in the system.
+@return TRUE if is sure that it is preserved, also if the function
+returns FALSE, it is possible that the undo log still exists in the
+system */
+UNIV_INTERN
 ibool
 trx_purge_update_undo_must_exist(
 /*=============================*/
-			/* out: TRUE if is sure that it is preserved, also
-			if the function returns FALSE, it is possible that
-			the undo log still exists in the system */
-	dulint	trx_id);/* in: transaction id */
-/************************************************************************
+	trx_id_t	trx_id);/*!< in: transaction id */
+/********************************************************************//**
 Creates the global purge system control structure and inits the history
 mutex. */
-
+UNIV_INTERN
 void
 trx_purge_sys_create(void);
 /*======================*/
-/************************************************************************
+/********************************************************************//**
 Adds the update undo log as the first log in the history list. Removes the
 update undo log segment from the rseg slot if it is too big for reuse. */
-
+UNIV_INTERN
 void
 trx_purge_add_update_undo_to_history(
 /*=================================*/
-	trx_t*	trx,		/* in: transaction */
-	page_t*	undo_page,	/* in: update undo log header page,
+	trx_t*	trx,		/*!< in: transaction */
+	page_t*	undo_page,	/*!< in: update undo log header page,
 				x-latched */
-	mtr_t*	mtr);		/* in: mtr */
-/************************************************************************
+	mtr_t*	mtr);		/*!< in: mtr */
+/********************************************************************//**
 Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
+released with the corresponding release function.
+@return copy of an undo log record or pointer to trx_purge_dummy_rec,
+if the whole undo log can skipped in purge; NULL if none left */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_purge_fetch_next_rec(
 /*=====================*/
-				/* out: copy of an undo log record, or
-				pointer to the dummy undo log record
-				&trx_purge_dummy_rec if the whole undo log
-				can skipped in purge; NULL if none left */
-	dulint*		roll_ptr,/* out: roll pointer to undo record */
-	trx_undo_inf_t** cell,	/* out: storage cell for the record in the
+	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
+	trx_undo_inf_t** cell,	/*!< out: storage cell for the record in the
 				purge array */
-	mem_heap_t*	heap);	/* in: memory heap where copied */
-/***********************************************************************
+	mem_heap_t*	heap);	/*!< in: memory heap where copied */
+/*******************************************************************//**
 Releases a reserved purge undo record. */
-
+UNIV_INTERN
 void
 trx_purge_rec_release(
 /*==================*/
-	trx_undo_inf_t*	cell);	/* in: storage cell */
-/***********************************************************************
-This function runs a purge batch. */
-
+	trx_undo_inf_t*	cell);	/*!< in: storage cell */
+/*******************************************************************//**
+This function runs a purge batch.
+@return	number of undo log pages handled in the batch */
+UNIV_INTERN
 ulint
 trx_purge(void);
 /*===========*/
-				/* out: number of undo log pages handled in
-				the batch */
-/**********************************************************************
+/******************************************************************//**
 Prints information of the purge system to stderr. */
-
+UNIV_INTERN
 void
 trx_purge_sys_print(void);
 /*======================*/
 
-/* The control structure used in the purge operation */
+/** The control structure used in the purge operation */
 struct trx_purge_struct{
-	ulint		state;		/* Purge system state */
-	sess_t*		sess;		/* System session running the purge
+	ulint		state;		/*!< Purge system state */
+	sess_t*		sess;		/*!< System session running the purge
 					query */
-	trx_t*		trx;		/* System transaction running the purge
+	trx_t*		trx;		/*!< System transaction running the purge
 					query: this trx is not in the trx list
 					of the trx system and it never ends */
-	que_t*		query;		/* The query graph which will do the
+	que_t*		query;		/*!< The query graph which will do the
 					parallelized purge operation */
-	rw_lock_t	latch;		/* The latch protecting the purge view.
+	rw_lock_t	latch;		/*!< The latch protecting the purge view.
 					A purge operation must acquire an
 					x-latch here for the instant at which
 					it changes the purge view: an undo
 					log operation can prevent this by
 					obtaining an s-latch here. */
-	read_view_t*	view;		/* The purge will not remove undo logs
+	read_view_t*	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
-	mutex_t		mutex;		/* Mutex protecting the fields below */
-	ulint		n_pages_handled;/* Approximate number of undo log
+	mutex_t		mutex;		/*!< Mutex protecting the fields below */
+	ulint		n_pages_handled;/*!< Approximate number of undo log
 					pages processed in purge */
-	ulint		handle_limit;	/* Target of how many pages to get
+	ulint		handle_limit;	/*!< Target of how many pages to get
 					processed in the current purge */
 	/*------------------------------*/
 	/* The following two fields form the 'purge pointer' which advances
 	during a purge, and which is used in history list truncation */
 
-	dulint		purge_trx_no;	/* Purge has advanced past all
+	trx_id_t	purge_trx_no;	/*!< Purge has advanced past all
 					transactions whose number is less
 					than this */
-	dulint		purge_undo_no;	/* Purge has advanced past all records
+	undo_no_t	purge_undo_no;	/*!< Purge has advanced past all records
 					whose undo number is less than this */
 	/*-----------------------------*/
-	ibool		next_stored;	/* TRUE if the info of the next record
+	ibool		next_stored;	/*!< TRUE if the info of the next record
 					to purge is stored below: if yes, then
 					the transaction number and the undo
 					number of the record are stored in
 					purge_trx_no and purge_undo_no above */
-	trx_rseg_t*	rseg;		/* Rollback segment for the next undo
+	trx_rseg_t*	rseg;		/*!< Rollback segment for the next undo
 					record to purge */
-	ulint		page_no;	/* Page number for the next undo
+	ulint		page_no;	/*!< Page number for the next undo
 					record to purge, page number of the
 					log header, if dummy record */
-	ulint		offset;		/* Page offset for the next undo
+	ulint		offset;		/*!< Page offset for the next undo
 					record to purge, 0 if the dummy
 					record */
-	ulint		hdr_page_no;	/* Header page of the undo log where
+	ulint		hdr_page_no;	/*!< Header page of the undo log where
 					the next record to purge belongs */
-	ulint		hdr_offset;	/* Header byte offset on the page */
+	ulint		hdr_offset;	/*!< Header byte offset on the page */
 	/*-----------------------------*/
-	trx_undo_arr_t*	arr;		/* Array of transaction numbers and
+	trx_undo_arr_t*	arr;		/*!< Array of transaction numbers and
 					undo numbers of the undo records
 					currently under processing in purge */
-	mem_heap_t*	heap;		/* Temporary storage used during a
+	mem_heap_t*	heap;		/*!< Temporary storage used during a
 					purge: can be emptied after purge
 					completes */
 };
diff --git a/storage/innodb_plugin/include/trx0purge.ic b/storage/innodb_plugin/include/trx0purge.ic
new file mode 100644
index 00000000000..de09e393654
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0purge.ic
@@ -0,0 +1,43 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.ic
+Purge old versions
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+/********************************************************************//**
+Calculates the file address of an undo log header when we have the file
+address of its history list node.
+@return	file address of the log */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+	fil_addr_t	node_addr)	/*!< in: file address of the history
+					list node of the log */
+{
+	node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
+
+	return(node_addr);
+}
+
diff --git a/storage/innodb_plugin/include/trx0rec.h b/storage/innodb_plugin/include/trx0rec.h
new file mode 100644
index 00000000000..0ae82c33afe
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0rec.h
@@ -0,0 +1,338 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.h
+Transaction undo log record
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rec_h
+#define trx0rec_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+#include "dict0types.h"
+#include "data0data.h"
+#include "rem0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "que0types.h"
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return	own: copy of undo log record */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	mem_heap_t*	heap);		/*!< in: heap where copied */
+/**********************************************************************//**
+Reads the undo log record type.
+@return	record type */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return	compiler info */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return	TRUE if extern */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Reads the undo log record number.
+@return	undo no */
+UNIV_INLINE
+undo_no_t
+trx_undo_rec_get_undo_no(
+/*=====================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return	offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+	undo_no_t	undo_no)	/*!< in: undo no read from node */
+	__attribute__((const));
+
+/**********************************************************************//**
+Returns the start of the undo record data area. */
+#define trx_undo_rec_get_ptr(undo_rec, undo_no)		\
+	((undo_rec) + trx_undo_rec_get_offset(undo_no))
+
+/**********************************************************************//**
+Reads from an undo log record the general parameters.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	ulint*		type,		/*!< out: undo record type:
+					TRX_UNDO_INSERT_REC, ... */
+	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
+					for update type records */
+	ibool*		updated_extern,	/*!< out: TRUE if we updated an
+					externally stored fild */
+	undo_no_t*	undo_no,	/*!< out: undo log record number */
+	dulint*		table_id);	/*!< out: table id */
+/*******************************************************************//**
+Builds a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+	byte*		ptr,	/*!< in: remaining part of a copy of an undo log
+				record, at the start of the row reference;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the row reference is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	ref,	/*!< out, own: row reference */
+	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+				needed is allocated */
+/*******************************************************************//**
+Skips a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record, at the start of the row reference */
+	dict_index_t*	index);	/*!< in: clustered index */
+/**********************************************************************//**
+Reads from an undo log update record the system field values of the old
+version.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+	byte*		ptr,		/*!< in: remaining part of undo
+					log record after reading
+					general parameters */
+	trx_id_t*	trx_id,		/*!< out: trx id */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll ptr */
+	ulint*		info_bits);	/*!< out: info bits state */
+/*******************************************************************//**
+Builds an update vector based on a remaining part of an undo log record.
+@return remaining part of the record, NULL if an error detected, which
+means that the record is corrupted */
+UNIV_INTERN
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record, after reading the row reference
+				NOTE that this copy of the undo log record must
+				be preserved as long as the update vector is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
+				TRX_UNDO_UPD_DEL_REC, or
+				TRX_UNDO_DEL_MARK_REC; in the last case,
+				only trx id and roll ptr fields are added to
+				the update vector */
+	trx_id_t	trx_id,	/*!< in: transaction id from this undorecord */
+	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
+	ulint		info_bits,/*!< in: info bits from this undo record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
+				needed is allocated */
+	upd_t**		upd);	/*!< out, own: update vector */
+/*******************************************************************//**
+Builds a partial row from an update undo log record. It contains the
+columns which occur as ordering in any index of the table.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record of a suitable type, at the start of
+				the stored index columns;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the partial row is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	row,	/*!< out, own: partial row */
+	ibool		ignore_prefix, /*!< in: flag to indicate if we
+				expect blob prefixes in undo. Used
+				only in the assertion. */
+	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************//**
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+trx_undo_report_row_operation(
+/*==========================*/
+	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
+					set, does nothing */
+	ulint		op_type,	/*!< in: TRX_UNDO_INSERT_OP or
+					TRX_UNDO_MODIFY_OP */
+	que_thr_t*	thr,		/*!< in: query thread */
+	dict_index_t*	index,		/*!< in: clustered index */
+	const dtuple_t*	clust_entry,	/*!< in: in the case of an insert,
+					index entry to insert into the
+					clustered index, otherwise NULL */
+	const upd_t*	update,		/*!< in: in the case of an update,
+					the update vector, otherwise NULL */
+	ulint		cmpl_info,	/*!< in: compiler info on secondary
+					index updates */
+	const rec_t*	rec,		/*!< in: case of an update or delete
+					marking, the record in the clustered
+					index, otherwise NULL */
+	roll_ptr_t*	roll_ptr);	/*!< out: rollback pointer to the
+					inserted undo log record,
+					ut_dulint_zero if BTR_NO_UNDO_LOG
+					flag was specified */
+/******************************************************************//**
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists.
+@return	own: copy of the record */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
+	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+/******************************************************************//**
+Copies an undo record to heap.
+
+NOTE: the caller must have latches on the clustered index page and
+purge_view.
+
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
+truncated and we cannot fetch the old version */
+UNIV_INTERN
+ulint
+trx_undo_get_undo_rec(
+/*==================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
+	trx_id_t	trx_id,		/*!< in: id of the trx that generated
+					the roll pointer: it points to an
+					undo log of this transaction */
+	trx_undo_rec_t** undo_rec,	/*!< out, own: copy of the record */
+	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+/*******************************************************************//**
+Build a previous version of a clustered index record. This function checks
+that the caller has a latch on the index page of the clustered index record
+and an s-latch on the purge_view. This guarantees that the stack of versions
+is locked.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
+earlier than purge_view, which means that it may have been removed,
+DB_ERROR if corrupted record */
+UNIV_INTERN
+ulint
+trx_undo_prev_version_build(
+/*========================*/
+	const rec_t*	index_rec,/*!< in: clustered index record in the
+				index tree */
+	mtr_t*		index_mtr,/*!< in: mtr which contains the latch to
+				index_rec page and purge_view */
+	const rec_t*	rec,	/*!< in: version of a clustered index record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
+				needed is allocated */
+	rec_t**		old_vers);/*!< out, own: previous version, or NULL if
+				rec is the first inserted version, or if
+				history data has been deleted */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of adding an undo log record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page);	/*!< in: page or NULL */
+/***********************************************************//**
+Parses a redo log record of erasing of an undo page end.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+
+#ifndef UNIV_HOTBACKUP
+
+/* Types of an undo log record: these have to be smaller than 16, as the
+compilation info multiplied by 16 is ORed to this value in an undo log
+record */
+
+#define	TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
+#define	TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
+					record */
+#define	TRX_UNDO_UPD_DEL_REC	13	/* update of a delete marked record to
+					a not delete marked record; also the
+					fields of the record can change */
+#define	TRX_UNDO_DEL_MARK_REC	14	/* delete marking of a record; fields
+					do not change */
+#define	TRX_UNDO_CMPL_INFO_MULT	16	/* compilation info is multiplied by
+					this and ORed to the type above */
+#define	TRX_UNDO_UPD_EXTERN	128	/* This bit can be ORed to type_cmpl
+					to denote that we updated external
+					storage fields: used by purge to
+					free the external storage */
+
+/* Operation type flags used in trx_undo_report_row_operation */
+#define	TRX_UNDO_INSERT_OP		1
+#define	TRX_UNDO_MODIFY_OP		2
+
+#ifndef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#endif /* !UNIV_HOTBACKUP */
+
+#endif /* trx0rec_h */
diff --git a/storage/innodb_plugin/include/trx0rec.ic b/storage/innodb_plugin/include/trx0rec.ic
new file mode 100644
index 00000000000..037b5d4f6cf
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0rec.ic
@@ -0,0 +1,112 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.ic
+Transaction undo log record
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Reads from an undo log record the record type.
+@return	record type */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
+}
+
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return	compiler info */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
+}
+
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return	TRUE if extern */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************************//**
+Reads the undo log record number.
+@return	undo no */
+UNIV_INLINE
+undo_no_t
+trx_undo_rec_get_undo_no(
+/*=====================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	const byte*	ptr;
+
+	ptr = undo_rec + 3;
+
+	return(mach_dulint_read_much_compressed(ptr));
+}
+
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return	offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+	undo_no_t	undo_no)	/*!< in: undo no read from node */
+{
+	return (3 + mach_dulint_get_much_compressed_size(undo_no));
+}
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return	own: copy of undo log record */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	mem_heap_t*	heap)		/*!< in: heap where copied */
+{
+	ulint		len;
+
+	len = mach_read_from_2(undo_rec)
+		- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
+	return(mem_heap_dup(heap, undo_rec, len));
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/trx0roll.h b/storage/innodb_plugin/include/trx0roll.h
new file mode 100644
index 00000000000..ddca9e9e4ef
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0roll.h
@@ -0,0 +1,341 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.h
+Transaction rollback
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0roll_h
+#define trx0roll_h
+
+#include "univ.i"
+#include "trx0trx.h"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+
+#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
+
+/*******************************************************************//**
+Determines if this transaction is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if trx is an incomplete transaction that is being rolled
+back in crash recovery */
+UNIV_INTERN
+ibool
+trx_is_recv(
+/*========*/
+	const trx_t*	trx);	/*!< in: transaction */
+/*******************************************************************//**
+Returns a transaction savepoint taken at this point in time.
+@return	savepoint */
+UNIV_INTERN
+trx_savept_t
+trx_savept_take(
+/*============*/
+	trx_t*	trx);	/*!< in: transaction */
+/*******************************************************************//**
+Creates an undo number array. */
+UNIV_INTERN
+trx_undo_arr_t*
+trx_undo_arr_create(void);
+/*=====================*/
+/*******************************************************************//**
+Frees an undo number array. */
+UNIV_INTERN
+void
+trx_undo_arr_free(
+/*==============*/
+	trx_undo_arr_t*	arr);	/*!< in: undo number array */
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return	pointer to the nth element */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+	trx_undo_arr_t*	arr,	/*!< in: undo number array */
+	ulint		n);	/*!< in: position */
+/***********************************************************************//**
+Tries truncate the undo logs. */
+UNIV_INTERN
+void
+trx_roll_try_truncate(
+/*==================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/********************************************************************//**
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+	trx_t*		trx,	/*!< in: transaction */
+	undo_no_t	limit,	/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);	/*!< in: memory heap where copied */
+/********************************************************************//**
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above.
+@return	TRUE if succeeded */
+UNIV_INTERN
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no);/*!< in: undo number of the record */
+/*******************************************************************//**
+Releases a reserved undo record. */
+UNIV_INTERN
+void
+trx_undo_rec_release(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no);/*!< in: undo number */
+/*********************************************************************//**
+Starts a rollback operation. */
+UNIV_INTERN
+void
+trx_rollback(
+/*=========*/
+	trx_t*		trx,	/*!< in: transaction */
+	trx_sig_t*	sig,	/*!< in: signal starting the rollback */
+	que_thr_t**	next_thr);/*!< in/out: next query thread to run;
+				if the value which is passed in is
+				a pointer to a NULL pointer, then the
+				calling function can start running
+				a new query thread */
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery.  If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+trx_rollback_or_clean_all_recovered(
+/*================================*/
+	void*	arg __attribute__((unused)));
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+/****************************************************************//**
+Finishes a transaction rollback. */
+UNIV_INTERN
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+	que_t*		graph,	/*!< in: undo graph which can now be freed */
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t**	next_thr);/*!< in/out: next query thread to run;
+				if the value which is passed in is
+				a pointer to a NULL pointer, then the
+				calling function can start running
+				a new query thread; if this parameter is
+				NULL, it is ignored */
+/****************************************************************//**
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph.
+@return	own: the query graph */
+UNIV_INTERN
+que_t*
+trx_roll_graph_build(
+/*=================*/
+	trx_t*	trx);	/*!< in: trx handle */
+/*********************************************************************//**
+Creates a rollback command node struct.
+@return	own: rollback node struct */
+UNIV_INTERN
+roll_node_t*
+roll_node_create(
+/*=============*/
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a rollback command node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+int
+trx_rollback_for_mysql(
+/*===================*/
+	trx_t*	trx);	/*!< in: transaction handle */
+/*******************************************************************//**
+Rollback the latest SQL statement for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+	trx_t*	trx);	/*!< in: transaction handle */
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+	trx_t*		trx,	/*!< in: transaction handle */
+	ibool		partial,/*!< in: TRUE if partial rollback requested */
+	trx_savept_t*	savept);/*!< in: pointer to savepoint undo number, if
+				partial rollback requested */
+/*******************************************************************//**
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
+ulint
+trx_rollback_to_savepoint_for_mysql(
+/*================================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t*	mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
+						position corresponding to this
+						savepoint; MySQL needs this
+						information to remove the
+						binlog entries of the queries
+						executed after the savepoint */
+/*******************************************************************//**
+Creates a named savepoint. If the transaction is not yet started, starts it.
+If there is already a savepoint of the same name, this call erases that old
+savepoint and replaces it with a new. Savepoints are deleted in a transaction
+commit or rollback.
+@return	always DB_SUCCESS */
+UNIV_INTERN
+ulint
+trx_savepoint_for_mysql(
+/*====================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t	binlog_cache_pos);	/*!< in: MySQL binlog cache
+						position corresponding to this
+						connection at the time of the
+						savepoint */
+
+/*******************************************************************//**
+Releases a named savepoint. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
+ulint
+trx_release_savepoint_for_mysql(
+/*============================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name);	/*!< in: savepoint name */
+
+/*******************************************************************//**
+Frees a single savepoint struct. */
+UNIV_INTERN
+void
+trx_roll_savepoint_free(
+/*=====================*/
+	trx_t*			trx,	/*!< in: transaction handle */
+	trx_named_savept_t*	savep);	/*!< in: savepoint to free */
+
+/*******************************************************************//**
+Frees savepoint structs starting from savep, if savep == NULL then
+free all savepoints. */
+UNIV_INTERN
+void
+trx_roll_savepoints_free(
+/*=====================*/
+	trx_t*			trx,	/*!< in: transaction handle */
+	trx_named_savept_t*	savep);	/*!< in: free all savepoints > this one;
+					if this is NULL, free all savepoints
+					of trx */
+
+/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
+struct	trx_undo_inf_struct{
+	trx_id_t	trx_no;	/*!< transaction number: not defined during
+				a rollback */
+	undo_no_t	undo_no;/*!< undo number of an undo record */
+	ibool		in_use;	/*!< TRUE if the cell is in use */
+};
+
+/** During a rollback and a purge, undo numbers of undo records currently being
+processed are stored in this array */
+
+struct trx_undo_arr_struct{
+	ulint		n_cells;	/*!< number of cells in the array */
+	ulint		n_used;		/*!< number of cells currently in use */
+	trx_undo_inf_t*	infos;		/*!< the array of undo infos */
+	mem_heap_t*	heap;		/*!< memory heap from which allocated */
+};
+
+/** Rollback node states */
+enum roll_node_state {
+	ROLL_NODE_SEND = 1,	/*!< about to send a rollback signal to
+				the transaction */
+	ROLL_NODE_WAIT		/*!< rollback signal sent to the transaction,
+				waiting for completion */
+};
+
+/** Rollback command node in a query graph */
+struct roll_node_struct{
+	que_common_t		common;	/*!< node type: QUE_NODE_ROLLBACK */
+	enum roll_node_state	state;	/*!< node execution state */
+	ibool			partial;/*!< TRUE if we want a partial
+					rollback */
+	trx_savept_t		savept;	/*!< savepoint to which to
+					roll back, in the case of a
+					partial rollback */
+};
+
+/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
+struct trx_named_savept_struct{
+	char*		name;		/*!< savepoint name */
+	trx_savept_t	savept;		/*!< the undo number corresponding to
+					the savepoint */
+	ib_int64_t	mysql_binlog_cache_pos;
+					/*!< the MySQL binlog cache position
+					corresponding to this savepoint, not
+					defined if the MySQL binlogging is not
+					enabled */
+	UT_LIST_NODE_T(trx_named_savept_t)
+			trx_savepoints;	/*!< the list of savepoints of a
+					transaction */
+};
+
+#ifndef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/trx0roll.ic b/storage/innodb_plugin/include/trx0roll.ic
new file mode 100644
index 00000000000..3460832b18c
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0roll.ic
@@ -0,0 +1,40 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.ic
+Transaction rollback
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return	pointer to the nth element */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+	trx_undo_arr_t*	arr,	/*!< in: undo number array */
+	ulint		n)	/*!< in: position */
+{
+	ut_ad(arr);
+	ut_ad(n < arr->n_cells);
+
+	return(arr->infos + n);
+}
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innodb_plugin/include/trx0rseg.h
similarity index 54%
rename from storage/innobase/include/trx0rseg.h
rename to storage/innodb_plugin/include/trx0rseg.h
index 46ba010bd1d..dbc732651ca 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innodb_plugin/include/trx0rseg.h
@@ -1,7 +1,24 @@
-/******************************************************
-Rollback segment
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.h
+Rollback segment
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -13,104 +30,105 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0types.h"
 #include "trx0sys.h"
 
-/**********************************************************************
-Gets a rollback segment header. */
+/******************************************************************//**
+Gets a rollback segment header.
+@return	rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get(
 /*==========*/
-				/* out: rollback segment header, page
-				x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number of the header */
-	mtr_t*	mtr);		/* in: mtr */
-/**********************************************************************
-Gets a newly created rollback segment header. */
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return	rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get_new(
 /*==============*/
-				/* out: rollback segment header, page
-				x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number of the header */
-	mtr_t*	mtr);		/* in: mtr */
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr);		/*!< in: mtr */
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return	page number of the undo log segment */
 UNIV_INLINE
 ulint
 trx_rsegf_get_nth_undo(
 /*===================*/
-				/* out: page number of the undo log segment */
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	ulint		n,	/* in: index of slot */
-	mtr_t*		mtr);	/* in: mtr */
-/*******************************************************************
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***************************************************************//**
 Sets the file page number of the nth undo log slot. */
 UNIV_INLINE
 void
 trx_rsegf_set_nth_undo(
 /*===================*/
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	ulint		n,	/* in: index of slot */
-	ulint		page_no,/* in: page number of the undo log segment */
-	mtr_t*		mtr);	/* in: mtr */
-/********************************************************************
-Looks for a free slot for an undo log segment. */
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	ulint		page_no,/*!< in: page number of the undo log segment */
+	mtr_t*		mtr);	/*!< in: mtr */
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return	slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 trx_rsegf_undo_find_free(
 /*=====================*/
-				/* out: slot index or ULINT_UNDEFINED if not
-				found */
-	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
-	mtr_t*		mtr);	/* in: mtr */
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	mtr_t*		mtr);	/*!< in: mtr */
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return	rollback segment */
+UNIV_INTERN
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-			/* out: rollback segment */
-	ulint	id);	/* in: rollback segment id */
-/********************************************************************
+	ulint	id);	/*!< in: rollback segment id */
+/****************************************************************//**
 Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
+a new rollback segment is created in the database.
+@return	page number of the created segment, FIL_NULL if fail */
+UNIV_INTERN
 ulint
 trx_rseg_header_create(
 /*===================*/
-				/* out: page number of the created segment,
-				FIL_NULL if fail */
-	ulint	space,		/* in: space id */
-	ulint	max_size,	/* in: max size in pages */
-	ulint*	slot_no,	/* out: rseg id == slot number in trx sys */
-	mtr_t*	mtr);		/* in: mtr */
-/*************************************************************************
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	max_size,	/*!< in: max size in pages */
+	ulint*	slot_no,	/*!< out: rseg id == slot number in trx sys */
+	mtr_t*	mtr);		/*!< in: mtr */
+/*********************************************************************//**
 Creates the memory copies for rollback segments and initializes the
 rseg list and array in trx_sys at a database startup. */
-
+UNIV_INTERN
 void
 trx_rseg_list_and_array_init(
 /*=========================*/
-	trx_sysf_t*	sys_header,	/* in: trx system header */
-	mtr_t*		mtr);		/* in: mtr */
-/********************************************************************
-Creates a new rollback segment to the database. */
-
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	mtr_t*		mtr);		/*!< in: mtr */
+/****************************************************************//**
+Creates a new rollback segment to the database.
+@return	the created segment object, NULL if fail */
+UNIV_INTERN
 trx_rseg_t*
 trx_rseg_create(
 /*============*/
-				/* out: the created segment object, NULL if
-				fail */
-	ulint	space,		/* in: space id */
-	ulint	max_size,	/* in: max size in pages */
-	ulint*	id,		/* out: rseg id */
-	mtr_t*	mtr);		/* in: mtr */
+	ulint	space,		/*!< in: space id */
+	ulint	max_size,	/*!< in: max size in pages */
+	ulint*	id,		/*!< out: rseg id */
+	mtr_t*	mtr);		/*!< in: mtr */
 
 
 /* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS	1024
+#define TRX_RSEG_N_SLOTS	(UNIV_PAGE_SIZE / 16)
 
 /* Maximum number of transactions supported by a single rollback segment */
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
@@ -118,14 +136,16 @@ trx_rseg_create(
 /* The rollback segment memory object */
 struct trx_rseg_struct{
 	/*--------------------------------------------------------*/
-	ulint		id;	/* rollback segment id == the index of
+	ulint		id;	/*!< rollback segment id == the index of
 				its slot in the trx system file copy */
-	mutex_t		mutex;	/* mutex protecting the fields in this
+	mutex_t		mutex;	/*!< mutex protecting the fields in this
 				struct except id; NOTE that the latching
 				order must always be kernel mutex ->
 				rseg mutex */
-	ulint		space;	/* space where the rollback segment is
+	ulint		space;	/*!< space where the rollback segment is
 				header is placed */
+	ulint		zip_size;/* compressed page size of space
+				in bytes, or 0 for uncompressed spaces */
 	ulint		page_no;/* page number of the rollback segment
 				header */
 	ulint		max_size;/* maximum allowed size in pages */
@@ -145,14 +165,14 @@ struct trx_rseg_struct{
 					/* List of insert undo log segments
 					cached for fast reuse */
 	/*--------------------------------------------------------*/
-	ulint		last_page_no;	/* Page number of the last not yet
+	ulint		last_page_no;	/*!< Page number of the last not yet
 					purged log header in the history list;
 					FIL_NULL if all list purged */
-	ulint		last_offset;	/* Byte offset of the last not yet
+	ulint		last_offset;	/*!< Byte offset of the last not yet
 					purged log header */
-	dulint		last_trx_no;	/* Transaction number of the last not
+	trx_id_t	last_trx_no;	/*!< Transaction number of the last not
 					yet purged log */
-	ibool		last_del_marks;	/* TRUE if the last not yet purged log
+	ibool		last_del_marks;	/*!< TRUE if the last not yet purged log
 					needs purging */
 	/*--------------------------------------------------------*/
 	UT_LIST_NODE_T(trx_rseg_t) rseg_list;
diff --git a/storage/innodb_plugin/include/trx0rseg.ic b/storage/innodb_plugin/include/trx0rseg.ic
new file mode 100644
index 00000000000..daffa92fc7d
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0rseg.ic
@@ -0,0 +1,145 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.ic
+Rollback segment
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+#include "mtr0log.h"
+
+/******************************************************************//**
+Gets a rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block;
+	trx_rsegf_t*	header;
+
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
+
+	header = TRX_RSEG + buf_block_get_frame(block);
+
+	return(header);
+}
+
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block;
+	trx_rsegf_t*	header;
+
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
+
+	header = TRX_RSEG + buf_block_get_frame(block);
+
+	return(header);
+}
+
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return	page number of the undo log segment */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to get slot %lu of rseg\n",
+			(ulong) n);
+		ut_error;
+	}
+
+	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
+			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
+}
+
+/***************************************************************//**
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	ulint		page_no,/*!< in: page number of the undo log segment */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to set slot %lu of rseg\n",
+			(ulong) n);
+		ut_error;
+	}
+
+	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
+			 page_no, MLOG_4BYTES, mtr);
+}
+
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint		i;
+	ulint		page_no;
+
+	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+
+		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+
+		if (page_no == FIL_NULL) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
diff --git a/storage/innodb_plugin/include/trx0sys.h b/storage/innodb_plugin/include/trx0sys.h
new file mode 100644
index 00000000000..812e8cfa0ba
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0sys.h
@@ -0,0 +1,618 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.h
+Transaction system
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0sys_h
+#define trx0sys_h
+
+#include "univ.i"
+
+#include "trx0types.h"
+#include "fsp0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0mtr.h"
+#include "ut0byte.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "read0types.h"
+#include "page0types.h"
+
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
+extern char		trx_sys_mysql_master_log_name[];
+/** Master binlog file position.  We have successfully got the updates
+up to this position.  -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+extern ib_int64_t	trx_sys_mysql_master_log_pos;
+/* @} */
+
+/** If this MySQL server uses binary logging, after InnoDB has been inited
+and if it has done a crash recovery, we store the binlog file name and position
+here. */
+/* @{ */
+/** Binlog file name */
+extern char		trx_sys_mysql_bin_log_name[];
+/** Binlog file position, or -1 if unknown */
+extern ib_int64_t	trx_sys_mysql_bin_log_pos;
+/* @} */
+
+/** The transaction system */
+extern trx_sys_t*	trx_sys;
+
+/** Doublewrite system */
+extern trx_doublewrite_t*	trx_doublewrite;
+/** The following is set to TRUE when we are upgrading from pre-4.1
+format data files to the multiple tablespaces format data files */
+extern ibool			trx_doublewrite_must_reset_space_ids;
+/** Set to TRUE when the doublewrite buffer is being created */
+extern ibool			trx_doublewrite_buf_is_being_created;
+/** The following is TRUE when we are using the database in the
+post-4.1 format, i.e., we have successfully upgraded, or have created
+a new database installation */
+extern ibool			trx_sys_multiple_tablespace_format;
+
+/****************************************************************//**
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
+doublewrite buffer is placed on the trx system header page. */
+UNIV_INTERN
+void
+trx_sys_create_doublewrite_buf(void);
+/*================================*/
+/****************************************************************//**
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
+half-written pages in the data files. */
+UNIV_INTERN
+void
+trx_sys_doublewrite_init_or_restore_pages(
+/*======================================*/
+	ibool	restore_corrupt_pages);	/*!< in: TRUE=restore pages */
+/****************************************************************//**
+Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
+multiple tablespace format. */
+UNIV_INTERN
+void
+trx_sys_mark_upgraded_to_multiple_tablespaces(void);
+/*===============================================*/
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
+ibool
+trx_doublewrite_page_inside(
+/*========================*/
+	ulint	page_no);	/*!< in: page number */
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return	TRUE if trx sys header page */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no);/*!< in: page number */
+/*****************************************************************//**
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started. */
+UNIV_INTERN
+void
+trx_sys_init_at_db_start(void);
+/*==========================*/
+/*****************************************************************//**
+Creates and initializes the transaction system at the database creation. */
+UNIV_INTERN
+void
+trx_sys_create(void);
+/*================*/
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+	mtr_t*		mtr);		/*!< in: mtr */
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return	pointer to rseg object, NULL if slot not in use */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n);	/*!< in: index of slot */
+/***************************************************************//**
+Sets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+void
+trx_sys_set_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n,	/*!< in: index of slot */
+	trx_rseg_t*	rseg);	/*!< in: pointer to rseg object, NULL if slot
+				not in use */
+/**********************************************************************//**
+Gets a pointer to the transaction system file copy and x-locks its page.
+@return	pointer to system file copy, page x-locked */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Gets the space of the nth rollback segment slot in the trx system
+file copy.
+@return	space id */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Gets the page number of the nth rollback segment slot in the trx system
+file copy.
+@return	page number, FIL_NULL if slot unused */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		space,		/*!< in: space id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Sets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		page_no,	/*!< in: page number, FIL_NULL if
+					the slot is reset to unused */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Allocates a new transaction id.
+@return	new, allocated trx id */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_new_trx_id(void);
+/*========================*/
+/*****************************************************************//**
+Allocates a new transaction number.
+@return	new, allocated trx number */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_new_trx_no(void);
+/*========================*/
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+	byte*		ptr,	/*!< in: pointer to memory where written */
+	trx_id_t	id);	/*!< in: id */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_...
+@return	id */
+UNIV_INLINE
+trx_id_t
+trx_read_trx_id(
+/*============*/
+	const byte*	ptr);	/*!< in: pointer to memory from where to read */
+/****************************************************************//**
+Looks for the trx handle with the given id in trx_list.
+@return	the trx handle or NULL if not found */
+UNIV_INLINE
+trx_t*
+trx_get_on_id(
+/*==========*/
+	trx_id_t	trx_id);/*!< in: trx id to search for */
+/****************************************************************//**
+Returns the minumum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->conc_state to
+find out if the minimum trx id transaction itself is active, or already
+committed.)
+@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+UNIV_INLINE
+trx_id_t
+trx_list_get_min_trx_id(void);
+/*=========================*/
+/****************************************************************//**
+Checks if a transaction with the given id is active.
+@return	TRUE if active */
+UNIV_INLINE
+ibool
+trx_is_active(
+/*==========*/
+	trx_id_t	trx_id);/*!< in: trx id of the transaction */
+/****************************************************************//**
+Checks that trx is in the trx list.
+@return	TRUE if is in */
+UNIV_INTERN
+ibool
+trx_in_trx_list(
+/*============*/
+	trx_t*	in_trx);/*!< in: trx */
+/*****************************************************************//**
+Updates the offset information about the end of the MySQL binlog entry
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
+UNIV_INTERN
+void
+trx_sys_update_mysql_binlog_offset(
+/*===============================*/
+	const char*	file_name,/*!< in: MySQL log file name */
+	ib_int64_t	offset,	/*!< in: position in that log file */
+	ulint		field,	/*!< in: offset of the MySQL log info field in
+				the trx sys header */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog offset info in the trx system header if
+the magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset(void);
+/*===================================*/
+/*****************************************************************//**
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_master_log_pos(void);
+/*====================================*/
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void);
+/*==========================*/
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void);
+/*===========================*/
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void);
+/*==============================*/
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id);		/*!< in: id of the file format */
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return	TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+	ulint		format_id,	/*!< in: file format id */
+	const char**	name);		/*!< out: max file format name or
+					NULL if not needed. */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void);
+/*=============================*/
+/*****************************************************************//**
+Check for the max file format tag stored on disk.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+trx_sys_file_format_max_check(
+/*==========================*/
+	ulint		max_format_id);	/*!< in: the max format id to check */
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return	TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+	const char**	name,		/*!< out: max file format name */
+	ulint		format_id);	/*!< in: file format identifier */
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+	const byte*	page);	/*!< in: buffer containing the trx
+				system header page, i.e., page number
+				TRX_SYS_PAGE_NO in the tablespace */
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+	const char *pathname,	/*!< in: pathname of the first system
+				table space file */
+	ulint *format_id);	/*!< out: file format of the system table
+				space */
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+	const char *pathname,	/*!< in: pathname of a per-table
+				datafile */
+	ulint *format_id);	/*!< out: file format of the per-table
+				data file */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id);	/*!< in: id of the file format */
+
+#endif /* !UNIV_HOTBACKUP */
+/* The automatically created system rollback segment has this id */
+#define TRX_SYS_SYSTEM_RSEG_ID	0
+
+/* Space id and page no where the trx system file copy resides */
+#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
+#include "fsp0fsp.h"
+#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
+
+/* The offset of the transaction system header on the page */
+#define	TRX_SYS		FSEG_PAGE_DATA
+
+/** Transaction system header */
+/*------------------------------------------------------------- @{ */
+#define	TRX_SYS_TRX_ID_STORE	0	/*!< the maximum trx id or trx
+					number modulo
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					written to a file page by any
+					transaction; the assignment of
+					transaction ids continues from
+					this number rounded up by
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					plus
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					when the database is
+					started */
+#define TRX_SYS_FSEG_HEADER	8	/*!< segment header for the
+					tablespace segment the trx
+					system is created into */
+#define	TRX_SYS_RSEGS		(8 + FSEG_HEADER_SIZE)
+					/*!< the start of the array of
+					rollback segment specification
+					slots */
+/*------------------------------------------------------------- @} */
+
+/** Maximum number of rollback segments: the number of segment
+specification slots in the transaction system array; rollback segment
+id must fit in one byte, therefore 256; each slot is currently 8 bytes
+in size */
+#define	TRX_SYS_N_RSEGS		256
+
+/** Maximum length of MySQL binlog file name, in bytes.
+@see trx_sys_mysql_master_log_name
+@see trx_sys_mysql_bin_log_name */
+#define TRX_SYS_MYSQL_LOG_NAME_LEN	512
+/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+#define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
+
+#if UNIV_PAGE_SIZE < 4096
+# error "UNIV_PAGE_SIZE < 4096"
+#endif
+/** The offset of the MySQL replication info in the trx system header;
+this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+#define TRX_SYS_MYSQL_MASTER_LOG_INFO	(UNIV_PAGE_SIZE - 2000)
+
+/** The offset of the MySQL binlog offset info in the trx system header */
+#define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
+#define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
+						TRX_SYS_MYSQL_LOG_MAGIC_N
+						if we have valid data in the
+						MySQL binlog info */
+#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH	4	/*!< high 4 bytes of the offset
+						within that file */
+#define TRX_SYS_MYSQL_LOG_OFFSET_LOW	8	/*!< low 4 bytes of the offset
+						within that file */
+#define TRX_SYS_MYSQL_LOG_NAME		12	/*!< MySQL log file name */
+
+#ifndef UNIV_HOTBACKUP
+/** Doublewrite buffer */
+/* @{ */
+/** The offset of the doublewrite buffer header on the trx system header page */
+#define TRX_SYS_DOUBLEWRITE		(UNIV_PAGE_SIZE - 200)
+/*-------------------------------------------------------------*/
+#define TRX_SYS_DOUBLEWRITE_FSEG	0	/*!< fseg header of the fseg
+						containing the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_MAGIC	FSEG_HEADER_SIZE
+						/*!< 4-byte magic number which
+						shows if we already have
+						created the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK1	(4 + FSEG_HEADER_SIZE)
+						/*!< page number of the
+						first page in the first
+						sequence of 64
+						(= FSP_EXTENT_SIZE) consecutive
+						pages in the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK2	(8 + FSEG_HEADER_SIZE)
+						/*!< page number of the
+						first page in the second
+						sequence of 64 consecutive
+						pages in the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_REPEAT	12	/*!< we repeat
+						TRX_SYS_DOUBLEWRITE_MAGIC,
+						TRX_SYS_DOUBLEWRITE_BLOCK1,
+						TRX_SYS_DOUBLEWRITE_BLOCK2
+						so that if the trx sys
+						header is half-written
+						to disk, we still may
+						be able to recover the
+						information */
+/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+we must reset the doublewrite buffer, because starting from 4.1.x the
+space id of a data page is stored into
+FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
+
+/*-------------------------------------------------------------*/
+/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
+#define TRX_SYS_DOUBLEWRITE_MAGIC_N	536853855
+/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
+
+/** Size of the doublewrite block in pages */
+#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
+/* @} */
+
+/** File format tag */
+/* @{ */
+/** The offset of the file format tag on the trx system header page
+(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
+#define TRX_SYS_FILE_FORMAT_TAG		(UNIV_PAGE_SIZE - 16)
+
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid.  The file format
+identifier is added to this constant. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW	3645922177UL
+/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH	2745987765UL
+/* @} */
+
+/** Doublewrite control struct */
+struct trx_doublewrite_struct{
+	mutex_t	mutex;		/*!< mutex protecting the first_free field and
+				write_buf */
+	ulint	block1;		/*!< the page number of the first
+				doublewrite block (64 pages) */
+	ulint	block2;		/*!< page number of the second block */
+	ulint	first_free;	/*!< first free position in write_buf measured
+				in units of UNIV_PAGE_SIZE */
+	byte*	write_buf;	/*!< write buffer used in writing to the
+				doublewrite buffer, aligned to an
+				address divisible by UNIV_PAGE_SIZE
+				(which is required by Windows aio) */
+	byte*	write_buf_unaligned;
+				/*!< pointer to write_buf, but unaligned */
+	buf_page_t**
+		buf_block_arr;	/*!< array to store pointers to the buffer
+				blocks which have been cached to write_buf */
+};
+
+/** The transaction system central memory data structure; protected by the
+kernel mutex */
+struct trx_sys_struct{
+	trx_id_t	max_trx_id;	/*!< The smallest number not yet
+					assigned as a transaction id or
+					transaction number */
+	UT_LIST_BASE_NODE_T(trx_t) trx_list;
+					/*!< List of active and committed in
+					memory transactions, sorted on trx id,
+					biggest first */
+	UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
+					/*!< List of transactions created
+					for MySQL */
+	UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
+					/*!< List of rollback segment
+					objects */
+	trx_rseg_t*	latest_rseg;	/*!< Latest rollback segment in the
+					round-robin assignment of rollback
+					segments to transactions */
+	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
+					/*!< Pointer array to rollback
+					segments; NULL if slot not in use */
+	ulint		rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
+					list (update undo logs for committed
+					transactions), protected by
+					rseg->mutex */
+	UT_LIST_BASE_NODE_T(read_view_t) view_list;
+					/*!< List of read views sorted
+					on trx no, biggest first */
+};
+
+/** When a trx id which is zero modulo this number (which must be a power of
+two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
+page is updated */
+#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innodb_plugin/include/trx0sys.ic
similarity index 65%
rename from storage/innobase/include/trx0sys.ic
rename to storage/innodb_plugin/include/trx0sys.ic
index 86b71df08d6..1c7c732751b 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innodb_plugin/include/trx0sys.ic
@@ -1,14 +1,33 @@
-/******************************************************
-Transaction system
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.ic
+Transaction system
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
-#include "srv0srv.h"
 #include "trx0trx.h"
 #include "data0type.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "mtr0log.h"
 
 /* The typedef for rseg slot in the file copy */
 typedef byte	trx_sysf_rseg_t;
@@ -26,22 +45,22 @@ typedef byte	trx_sysf_rseg_t;
 /* Size of a rollback segment specification slot */
 #define TRX_SYS_RSEG_SLOT_SIZE	8
 
-/*********************************************************************
+/*****************************************************************//**
 Writes the value of max_trx_id to the file based trx system header. */
-
+UNIV_INTERN
 void
 trx_sys_flush_max_trx_id(void);
 /*==========================*/
 
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return	TRUE if trx sys header page */
 UNIV_INLINE
 ibool
 trx_sys_hdr_page(
 /*=============*/
-			/* out: TRUE if trx sys header page */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
 {
 	if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
 
@@ -51,16 +70,15 @@ trx_sys_hdr_page(
 	return(FALSE);
 }
 
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return	pointer to rseg object, NULL if slot not in use */
 UNIV_INLINE
 trx_rseg_t*
 trx_sys_get_nth_rseg(
 /*=================*/
-				/* out: pointer to rseg object, NULL if slot
-				not in use */
-	trx_sys_t*	sys,	/* in: trx system */
-	ulint		n)	/* in: index of slot */
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n)	/*!< in: index of slot */
 {
 	ut_ad(mutex_own(&(kernel_mutex)));
 	ut_ad(n < TRX_SYS_N_RSEGS);
@@ -68,15 +86,15 @@ trx_sys_get_nth_rseg(
 	return(sys->rseg_array[n]);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Sets the pointer in the nth slot of the rseg array. */
 UNIV_INLINE
 void
 trx_sys_set_nth_rseg(
 /*=================*/
-	trx_sys_t*	sys,	/* in: trx system */
-	ulint		n,	/* in: index of slot */
-	trx_rseg_t*	rseg)	/* in: pointer to rseg object, NULL if slot
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n,	/*!< in: index of slot */
+	trx_rseg_t*	rseg)	/*!< in: pointer to rseg object, NULL if slot
 				not in use */
 {
 	ut_ad(n < TRX_SYS_N_RSEGS);
@@ -84,40 +102,40 @@ trx_sys_set_nth_rseg(
 	sys->rseg_array[n] = rseg;
 }
 
-/**************************************************************************
-Gets a pointer to the transaction system header and x-latches its page. */
+/**********************************************************************//**
+Gets a pointer to the transaction system header and x-latches its page.
+@return	pointer to system header, page x-latched. */
 UNIV_INLINE
 trx_sysf_t*
 trx_sysf_get(
 /*=========*/
-			/* out: pointer to system header, page x-latched. */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
+	buf_block_t*	block;
 	trx_sysf_t*	header;
 
 	ut_ad(mtr);
 
-	header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
-					RW_X_LATCH, mtr);
+	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+			     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+	header = TRX_SYS + buf_block_get_frame(block);
 
 	return(header);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Gets the space of the nth rollback segment slot in the trx system
-file copy. */
+file copy.
+@return	space id */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_space(
 /*====================*/
-					/* out: space id */
-	trx_sysf_t*	sys_header,	/* in: trx sys header */
-	ulint		i,		/* in: slot index == rseg id */
-	mtr_t*		mtr)		/* in: mtr */
+	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ut_ad(mutex_own(&(kernel_mutex)));
 	ut_ad(sys_header);
@@ -128,18 +146,17 @@ trx_sysf_rseg_get_space(
 			      + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Gets the page number of the nth rollback segment slot in the trx system
-header. */
+header.
+@return	page number, FIL_NULL if slot unused */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_page_no(
 /*======================*/
-					/* out: page number, FIL_NULL
-					if slot unused */
-	trx_sysf_t*	sys_header,	/* in: trx system header */
-	ulint		i,		/* in: slot index == rseg id */
-	mtr_t*		mtr)		/* in: mtr */
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ut_ad(sys_header);
 	ut_ad(mutex_own(&(kernel_mutex)));
@@ -150,17 +167,17 @@ trx_sysf_rseg_get_page_no(
 			      + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Sets the space id of the nth rollback segment slot in the trx system
 file copy. */
 UNIV_INLINE
 void
 trx_sysf_rseg_set_space(
 /*====================*/
-	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
-	ulint		i,		/* in: slot index == rseg id */
-	ulint		space,		/* in: space id */
-	mtr_t*		mtr)		/* in: mtr */
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		space,		/*!< in: space id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ut_ad(mutex_own(&(kernel_mutex)));
 	ut_ad(sys_header);
@@ -173,18 +190,18 @@ trx_sysf_rseg_set_space(
 			 MLOG_4BYTES, mtr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Sets the page number of the nth rollback segment slot in the trx system
 header. */
 UNIV_INLINE
 void
 trx_sysf_rseg_set_page_no(
 /*======================*/
-	trx_sysf_t*	sys_header,	/* in: trx sys header */
-	ulint		i,		/* in: slot index == rseg id */
-	ulint		page_no,	/* in: page number, FIL_NULL if the
+	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		page_no,	/*!< in: page number, FIL_NULL if the
 					slot is reset to unused */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ut_ad(mutex_own(&(kernel_mutex)));
 	ut_ad(sys_header);
@@ -196,8 +213,9 @@ trx_sysf_rseg_set_page_no(
 			 page_no,
 			 MLOG_4BYTES, mtr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*********************************************************************
+/*****************************************************************//**
 Writes a trx id to an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_write_... */
@@ -205,38 +223,41 @@ UNIV_INLINE
 void
 trx_write_trx_id(
 /*=============*/
-	byte*	ptr,	/* in: pointer to memory where written */
-	dulint	id)	/* in: id */
+	byte*		ptr,	/*!< in: pointer to memory where written */
+	trx_id_t	id)	/*!< in: id */
 {
-	ut_ad(DATA_TRX_ID_LEN == 6);
-
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
 	mach_write_to_6(ptr, id);
 }
 
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
 Reads a trx id from an index page. In case that the id size changes in
 some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return	id */
 UNIV_INLINE
-dulint
+trx_id_t
 trx_read_trx_id(
 /*============*/
-			/* out: id */
-	byte*	ptr)	/* in: pointer to memory from where to read */
+	const byte*	ptr)	/*!< in: pointer to memory from where to read */
 {
-	ut_ad(DATA_TRX_ID_LEN == 6);
-
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
 	return(mach_read_from_6(ptr));
 }
 
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
+/****************************************************************//**
+Looks for the trx handle with the given id in trx_list.
+@return	the trx handle or NULL if not found */
 UNIV_INLINE
 trx_t*
 trx_get_on_id(
 /*==========*/
-			/* out: the trx handle or NULL if not found */
-	dulint	trx_id)	/* in: trx id to search for */
+	trx_id_t	trx_id)	/*!< in: trx id to search for */
 {
 	trx_t*	trx;
 
@@ -256,17 +277,16 @@ trx_get_on_id(
 	return(NULL);
 }
 
-/********************************************************************
+/****************************************************************//**
 Returns the minumum trx id in trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->conc_state to
 find out if the minimum trx id transaction itself is active, or already
-committed.) */
+committed.)
+@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
 UNIV_INLINE
-dulint
+trx_id_t
 trx_list_get_min_trx_id(void)
 /*=========================*/
-			/* out: the minimum trx id, or trx_sys->max_trx_id
-			if the trx list is empty */
 {
 	trx_t*	trx;
 
@@ -282,14 +302,14 @@ trx_list_get_min_trx_id(void)
 	return(trx->id);
 }
 
-/********************************************************************
-Checks if a transaction with the given id is active. */
+/****************************************************************//**
+Checks if a transaction with the given id is active.
+@return	TRUE if active */
 UNIV_INLINE
 ibool
 trx_is_active(
 /*==========*/
-			/* out: TRUE if active */
-	dulint	trx_id)	/* in: trx id of the transaction */
+	trx_id_t	trx_id)	/*!< in: trx id of the transaction */
 {
 	trx_t*	trx;
 
@@ -320,15 +340,15 @@ trx_is_active(
 	return(FALSE);
 }
 
-/*********************************************************************
-Allocates a new transaction id. */
+/*****************************************************************//**
+Allocates a new transaction id.
+@return	new, allocated trx id */
 UNIV_INLINE
-dulint
+trx_id_t
 trx_sys_get_new_trx_id(void)
 /*========================*/
-			/* out: new, allocated trx id */
 {
-	dulint	id;
+	trx_id_t	id;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
@@ -352,15 +372,16 @@ trx_sys_get_new_trx_id(void)
 	return(id);
 }
 
-/*********************************************************************
-Allocates a new transaction number. */
+/*****************************************************************//**
+Allocates a new transaction number.
+@return	new, allocated trx number */
 UNIV_INLINE
-dulint
+trx_id_t
 trx_sys_get_new_trx_no(void)
 /*========================*/
-			/* out: new, allocated trx number */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
 	return(trx_sys_get_new_trx_id());
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
similarity index 56%
rename from storage/innobase/include/trx0trx.h
rename to storage/innodb_plugin/include/trx0trx.h
index f0833bc6f21..681feeaec94 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innodb_plugin/include/trx0trx.h
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.h
+The transaction
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -11,365 +28,432 @@ Created 3/26/1996 Heikki Tuuri
 
 #include "univ.i"
 #include "trx0types.h"
+#include "dict0types.h"
+#ifndef UNIV_HOTBACKUP
 #include "lock0types.h"
 #include "usr0types.h"
 #include "que0types.h"
 #include "mem0mem.h"
 #include "read0types.h"
-#include "dict0types.h"
 #include "trx0xa.h"
+#include "ut0vec.h"
 
+/** Dummy session used currently in MySQL interface */
+extern sess_t*	trx_dummy_sess;
+
+/** Number of transactions currently allocated for MySQL: protected by
+the kernel mutex */
 extern ulint	trx_n_mysql_transactions;
 
-/************************************************************************
+/********************************************************************//**
 Releases the search latch if trx has reserved it. */
-
+UNIV_INTERN
 void
 trx_search_latch_release_if_reserved(
 /*=================================*/
-	trx_t*	   trx); /* in: transaction */
-/**********************************************************************
+	trx_t*	   trx); /*!< in: transaction */
+/******************************************************************//**
 Set detailed error message for the transaction. */
+UNIV_INTERN
 void
 trx_set_detailed_error(
 /*===================*/
-	trx_t*		trx,	/* in: transaction struct */
-	const char*	msg);	/* in: detailed error message */
-/*****************************************************************
+	trx_t*		trx,	/*!< in: transaction struct */
+	const char*	msg);	/*!< in: detailed error message */
+/*************************************************************//**
 Set detailed error message for the transaction from a file. Note that the
 file is rewinded before reading from it. */
-
+UNIV_INTERN
 void
 trx_set_detailed_error_from_file(
 /*=============================*/
-	trx_t*	trx,	/* in: transaction struct */
-	FILE*	file);	/* in: file to read message from */
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
+	trx_t*	trx,	/*!< in: transaction struct */
+	FILE*	file);	/*!< in: file to read message from */
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return	the error info */
+UNIV_INLINE
+const dict_index_t*
 trx_get_error_info(
 /*===============*/
-			/* out: the error info */
-	trx_t*	trx);	/* in: trx object */
-/********************************************************************
-Creates and initializes a transaction object. */
-
+	const trx_t*	trx);	/*!< in: trx object */
+/****************************************************************//**
+Creates and initializes a transaction object.
+@return	own: the transaction */
+UNIV_INTERN
 trx_t*
 trx_create(
 /*=======*/
-			/* out, own: the transaction */
-	sess_t*	sess);	/* in: session or NULL */
-/************************************************************************
-Creates a transaction object for MySQL. */
-
+	sess_t*	sess)	/*!< in: session */
+	__attribute__((nonnull));
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return	own: transaction object */
+UNIV_INTERN
 trx_t*
 trx_allocate_for_mysql(void);
 /*========================*/
-				/* out, own: transaction object */
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return	own: transaction object */
+UNIV_INTERN
 trx_t*
 trx_allocate_for_background(void);
 /*=============================*/
-				/* out, own: transaction object */
-/************************************************************************
+/********************************************************************//**
 Frees a transaction object. */
-
+UNIV_INTERN
 void
 trx_free(
 /*=====*/
-	trx_t*	trx);	/* in, own: trx object */
-/************************************************************************
+	trx_t*	trx);	/*!< in, own: trx object */
+/********************************************************************//**
 Frees a transaction object for MySQL. */
-
+UNIV_INTERN
 void
 trx_free_for_mysql(
 /*===============*/
-	trx_t*	trx);	/* in, own: trx object */
-/************************************************************************
+	trx_t*	trx);	/*!< in, own: trx object */
+/********************************************************************//**
 Frees a transaction object of a background operation of the master thread. */
-
+UNIV_INTERN
 void
 trx_free_for_background(
 /*====================*/
-	trx_t*	trx);	/* in, own: trx object */
-/********************************************************************
+	trx_t*	trx);	/*!< in, own: trx object */
+/****************************************************************//**
 Creates trx objects for transactions and initializes the trx list of
 trx_sys at database start. Rollback segment and undo log lists must
 already exist when this function is called, because the lists of
 transactions to be rolled back or cleaned up are built based on the
 undo log lists. */
-
+UNIV_INTERN
 void
 trx_lists_init_at_db_start(void);
 /*============================*/
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return TRUE if success, FALSE if the rollback segment could not
+support this many transactions */
+UNIV_INTERN
 ibool
 trx_start(
 /*======*/
-			/* out: TRUE if success, FALSE if the rollback
-			segment could not support this many transactions */
-	trx_t*	trx,	/* in: transaction */
-	ulint	rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+	trx_t*	trx,	/*!< in: transaction */
+	ulint	rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
 			is passed, the system chooses the rollback segment
 			automatically in a round-robin fashion */
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return	TRUE */
+UNIV_INTERN
 ibool
 trx_start_low(
 /*==========*/
-			/* out: TRUE */
-	trx_t*	trx,	/* in: transaction */
-	ulint	rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+	trx_t*	trx,	/*!< in: transaction */
+	ulint	rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
 			is passed, the system chooses the rollback segment
 			automatically in a round-robin fashion */
-/*****************************************************************
+/*************************************************************//**
 Starts the transaction if it is not yet started. */
 UNIV_INLINE
 void
 trx_start_if_not_started(
 /*=====================*/
-	trx_t*	trx);	/* in: transaction */
-/*****************************************************************
+	trx_t*	trx);	/*!< in: transaction */
+/*************************************************************//**
 Starts the transaction if it is not yet started. Assumes we have reserved
 the kernel mutex! */
 UNIV_INLINE
 void
 trx_start_if_not_started_low(
 /*=========================*/
-	trx_t*	trx);	/* in: transaction */
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
-	trx_t*	trx);	/* in: transaction */
-/********************************************************************
+	trx_t*	trx);	/*!< in: transaction */
+/****************************************************************//**
 Commits a transaction. */
-
+UNIV_INTERN
 void
 trx_commit_off_kernel(
 /*==================*/
-	trx_t*	trx);	/* in: transaction */
-/********************************************************************
+	trx_t*	trx);	/*!< in: transaction */
+/****************************************************************//**
 Cleans up a transaction at database startup. The cleanup is needed if
 the transaction already got to the middle of a commit when the database
 crashed, andf we cannot roll it back. */
-
+UNIV_INTERN
 void
 trx_cleanup_at_db_startup(
 /*======================*/
-	trx_t*	trx);	/* in: transaction */
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
+	trx_t*	trx);	/*!< in: transaction */
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 trx_commit_for_mysql(
 /*=================*/
-			/* out: 0 or error number */
-	trx_t*	trx);	/* in: trx handle */
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
+	trx_t*	trx);	/*!< in: trx handle */
+/**********************************************************************//**
+Does the transaction prepare for MySQL.
+@return	0 or error number */
+UNIV_INTERN
 ulint
 trx_prepare_for_mysql(
 /*==================*/
-			/* out: 0 or error number */
-	trx_t*	trx);	/* in: trx handle */
-/**************************************************************************
+	trx_t*	trx);	/*!< in: trx handle */
+/**********************************************************************//**
 This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
+their transaction objects for a recovery.
+@return	number of prepared transactions */
+UNIV_INTERN
 int
 trx_recover_for_mysql(
 /*==================*/
-				/* out: number of prepared transactions */
-	XID*	xid_list,	/* in/out: prepared transactions */
-	ulint	len);		/* in: number of slots in xid_list */
-/***********************************************************************
+	XID*	xid_list,	/*!< in/out: prepared transactions */
+	ulint	len);		/*!< in: number of slots in xid_list */
+/*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return	trx or NULL */
+UNIV_INTERN
 trx_t *
 trx_get_trx_by_xid(
 /*===============*/
-			/* out: trx or NULL */
-	XID*	xid);	/*  in: X/Open XA transaction identification */
-/**************************************************************************
+	XID*	xid);	/*!< in: X/Open XA transaction identification */
+/**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
+with trx->flush_log_later == TRUE.
+@return	0 or error number */
+UNIV_INTERN
 ulint
 trx_commit_complete_for_mysql(
 /*==========================*/
-			/* out: 0 or error number */
-	trx_t*	trx);	/* in: trx handle */
-/**************************************************************************
+	trx_t*	trx);	/*!< in: trx handle */
+/**********************************************************************//**
 Marks the latest SQL statement ended. */
-
+UNIV_INTERN
 void
 trx_mark_sql_stat_end(
 /*==================*/
-	trx_t*	trx);	/* in: trx handle */
-/************************************************************************
+	trx_t*	trx);	/*!< in: trx handle */
+/********************************************************************//**
 Assigns a read view for a consistent read query. All the consistent reads
 within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
+when this function is first called for a new started transaction.
+@return	consistent read view */
+UNIV_INTERN
 read_view_t*
 trx_assign_read_view(
 /*=================*/
-			/* out: consistent read view */
-	trx_t*	trx);	/* in: active transaction */
-/***************************************************************
+	trx_t*	trx);	/*!< in: active transaction */
+/***********************************************************//**
 The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
 the TRX_QUE_RUNNING state and releases query threads which were
 waiting for a lock in the wait_thrs list. */
-
+UNIV_INTERN
 void
 trx_end_lock_wait(
 /*==============*/
-	trx_t*	trx);	/* in: transaction */
-/********************************************************************
+	trx_t*	trx);	/*!< in: transaction */
+/****************************************************************//**
 Sends a signal to a trx object. */
-
+UNIV_INTERN
 void
 trx_sig_send(
 /*=========*/
-	trx_t*		trx,		/* in: trx handle */
-	ulint		type,		/* in: signal type */
-	ulint		sender,		/* in: TRX_SIG_SELF or
+	trx_t*		trx,		/*!< in: trx handle */
+	ulint		type,		/*!< in: signal type */
+	ulint		sender,		/*!< in: TRX_SIG_SELF or
 					TRX_SIG_OTHER_SESS */
-	que_thr_t*	receiver_thr,	/* in: query thread which wants the
+	que_thr_t*	receiver_thr,	/*!< in: query thread which wants the
 					reply, or NULL; if type is
 					TRX_SIG_END_WAIT, this must be NULL */
-	trx_savept_t*	savept,		/* in: possible rollback savepoint, or
+	trx_savept_t*	savept,		/*!< in: possible rollback savepoint, or
 					NULL */
-	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+	que_thr_t**	next_thr);	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
 					a new query thread; if the parameter
 					is NULL, it is ignored */
-/********************************************************************
+/****************************************************************//**
 Send the reply message when a signal in the queue of the trx has
 been handled. */
-
+UNIV_INTERN
 void
 trx_sig_reply(
 /*==========*/
-	trx_sig_t*	sig,		/* in: signal */
-	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+	trx_sig_t*	sig,		/*!< in: signal */
+	que_thr_t**	next_thr);	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
 					a new query thread */
-/********************************************************************
+/****************************************************************//**
 Removes the signal object from a trx signal queue. */
-
+UNIV_INTERN
 void
 trx_sig_remove(
 /*===========*/
-	trx_t*		trx,	/* in: trx handle */
-	trx_sig_t*	sig);	/* in, own: signal */
-/********************************************************************
+	trx_t*		trx,	/*!< in: trx handle */
+	trx_sig_t*	sig);	/*!< in, own: signal */
+/****************************************************************//**
 Starts handling of a trx signal. */
-
+UNIV_INTERN
 void
 trx_sig_start_handle(
 /*=================*/
-	trx_t*		trx,		/* in: trx handle */
-	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+	trx_t*		trx,		/*!< in: trx handle */
+	que_thr_t**	next_thr);	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
 					a new query thread */
-/********************************************************************
+/****************************************************************//**
 Ends signal handling. If the session is in the error state, and
 trx->graph_before_signal_handling != NULL, returns control to the error
 handling routine of the graph (currently only returns the control to the
 graph root which then sends an error message to the client). */
-
+UNIV_INTERN
 void
 trx_end_signal_handling(
 /*====================*/
-	trx_t*	trx);	/* in: trx */
-/*************************************************************************
-Creates a commit command node struct. */
-
+	trx_t*	trx);	/*!< in: trx */
+/*********************************************************************//**
+Creates a commit command node struct.
+@return	own: commit node struct */
+UNIV_INTERN
 commit_node_t*
 commit_node_create(
 /*===============*/
-				/* out, own: commit node struct */
-	mem_heap_t*	heap);	/* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
 que_thr_t*
 trx_commit_step(
 /*============*/
-				/* out: query thread to run next, or NULL */
-	que_thr_t*	thr);	/* in: query thread */
+	que_thr_t*	thr);	/*!< in: query thread */
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info about a transaction to the given file. The caller must own the
 kernel mutex and must have called
 innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
 or InnoDB cannot meanwhile change the info printed here. */
-
+UNIV_INTERN
 void
 trx_print(
 /*======*/
-	FILE*	f,		/* in: output stream */
-	trx_t*	trx,		/* in: transaction */
-	ulint	max_query_len);	/* in: max query length to print, or 0 to
+	FILE*	f,		/*!< in: output stream */
+	trx_t*	trx,		/*!< in: transaction */
+	ulint	max_query_len);	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 
-#ifndef UNIV_HOTBACKUP
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
+/** Type of data dictionary operation */
+enum trx_dict_op {
+	/** The transaction is not modifying the data dictionary. */
+	TRX_DICT_OP_NONE = 0,
+	/** The transaction is creating a table or an index, or
+	dropping a table.  The table must be dropped in crash
+	recovery.  This and TRX_DICT_OP_NONE are the only possible
+	operation modes in crash recovery. */
+	TRX_DICT_OP_TABLE = 1,
+	/** The transaction is creating or dropping an index in an
+	existing table.  In crash recovery, the the data dictionary
+	must be locked, but the table must not be dropped. */
+	TRX_DICT_OP_INDEX = 2
+};
 
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return	dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op
+trx_get_dict_operation(
+/*===================*/
+	const trx_t*	trx)	/*!< in: transaction */
+	__attribute__((pure));
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: transaction */
+	enum trx_dict_op	op);	/*!< in: operation, not
+					TRX_DICT_OP_NONE */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return	TRUE if interrupted */
+UNIV_INTERN
 ibool
 trx_is_interrupted(
 /*===============*/
-			/* out: TRUE if interrupted */
-	trx_t*	trx);	/* in: transaction */
+	trx_t*	trx);	/*!< in: transaction */
 #else /* !UNIV_HOTBACKUP */
 #define trx_is_interrupted(trx) FALSE
 #endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
+/*******************************************************************//**
+Calculates the "weight" of a transaction. The weight of one transaction
+is estimated as the number of altered rows + the number of locked rows.
+@param t	transaction
+@return		transaction weight */
+#define TRX_WEIGHT(t)	\
+	ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
 
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return	<0, 0 or >0; similar to strcmp(3) */
+UNIV_INTERN
 int
 trx_weight_cmp(
 /*===========*/
-			/* out: <0, 0 or >0; similar to strcmp(3) */
-	trx_t*	a,	/* in: the first transaction to be compared */
-	trx_t*	b);	/* in: the second transaction to be compared */
+	const trx_t*	a,	/*!< in: the first transaction to be compared */
+	const trx_t*	b);	/*!< in: the second transaction to be compared */
+
+/*******************************************************************//**
+Retrieves transacion's id, represented as unsigned long long.
+@return	transaction's id */
+UNIV_INLINE
+ullint
+trx_get_id(
+/*=======*/
+	const trx_t*	trx);	/*!< in: transaction */
+
+/* Maximum length of a string that can be returned by
+trx_get_que_state_str(). */
+#define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return	string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+	const trx_t*	trx);	/*!< in: transaction */
 
 /* Signal to a transaction */
 struct trx_sig_struct{
-	ulint		type;		/* signal type */
-	ulint		sender;		/* TRX_SIG_SELF or
+	unsigned	type:3;		/*!< signal type */
+	unsigned	sender:1;	/*!< TRX_SIG_SELF or
 					TRX_SIG_OTHER_SESS */
-	que_thr_t*	receiver;	/* non-NULL if the sender of the signal
+	que_thr_t*	receiver;	/*!< non-NULL if the sender of the signal
 					wants reply after the operation induced
 					by the signal is completed */
-	trx_savept_t	savept;		/* possible rollback savepoint */
+	trx_savept_t	savept;		/*!< possible rollback savepoint */
 	UT_LIST_NODE_T(trx_sig_t)
-			signals;	/* queue of pending signals to the
+			signals;	/*!< queue of pending signals to the
 					transaction */
 	UT_LIST_NODE_T(trx_sig_t)
-			reply_signals;	/* list of signals for which the sender
+			reply_signals;	/*!< list of signals for which the sender
 					transaction is waiting a reply */
 };
 
@@ -383,65 +467,82 @@ struct trx_struct{
 	ulint		magic_n;
 	/* All the next fields are protected by the kernel mutex, except the
 	undo logs which are protected by undo_mutex */
-	const char*	op_info;	/* English text describing the
+	const char*	op_info;	/*!< English text describing the
 					current operation, or an empty
 					string */
-	unsigned	is_purge:1;	/* 0=user transaction, 1=purge */
-	ulint		conc_state;	/* state of the trx from the point
+	unsigned	is_purge:1;	/*!< 0=user transaction, 1=purge */
+	unsigned	is_recovered:1;	/*!< 0=normal transaction,
+					1=recovered, must be rolled back */
+	unsigned	conc_state:2;	/*!< state of the trx from the point
 					of view of concurrency control:
 					TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
 					... */
-	time_t		start_time;	/* time the trx object was created
-					or the state last time became
-					TRX_ACTIVE */
-	ulint		isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
-	ibool		check_foreigns;	/* normally TRUE, but if the user
+	unsigned	que_state:2;	/*!< valid when conc_state == TRX_ACTIVE:
+					TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
+					... */
+	unsigned	isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
+	unsigned	check_foreigns:1;/* normally TRUE, but if the user
 					wants to suppress foreign key checks,
 					(in table imports, for example) we
 					set this FALSE */
-	ibool		check_unique_secondary;
+	unsigned	check_unique_secondary:1;
 					/* normally TRUE, but if the user
 					wants to speed up inserts by
 					suppressing unique key checks
 					for secondary indexes when we decide
 					if we can use the insert buffer for
 					them, we set this FALSE */
-	dulint		id;		/* transaction id */
-	XID		xid;		/* X/Open XA transaction
-					identification to identify a
-					transaction branch */
-	ibool		support_xa;	/* normally we do the XA two-phase
+	unsigned	support_xa:1;	/*!< normally we do the XA two-phase
 					commit steps, but by setting this to
 					FALSE, one can save CPU time and about
 					150 bytes in the undo log size as then
 					we skip XA steps */
-	dulint		no;		/* transaction serialization number ==
-					max trx id when the transaction is
-					moved to COMMITTED_IN_MEMORY state */
-	ibool		flush_log_later;/* when we commit the transaction
-					in MySQL's binlog write, we will
-					flush the log to disk later in
-					a separate call */
-	ibool		must_flush_log_later;/* this flag is set to TRUE in
+	unsigned	flush_log_later:1;/* In 2PC, we hold the
+					prepare_commit mutex across
+					both phases. In that case, we
+					defer flush of the logs to disk
+					until after we release the
+					mutex. */
+	unsigned	must_flush_log_later:1;/* this flag is set to TRUE in
 					trx_commit_off_kernel() if
 					flush_log_later was TRUE, and there
 					were modifications by the transaction;
 					in that case we must flush the log
 					in trx_commit_complete_for_mysql() */
-	dulint		commit_lsn;	/* lsn at the time of the commit */
-	ibool		dict_operation;	/* TRUE if the trx is used to create
-					a table, create an index, or drop a
-					table.	This is a hint that the table
-					may need to be dropped in crash
-					recovery. */
-	dulint		table_id;	/* table id if the preceding field is
-					TRUE */
-	/*------------------------------*/
-	unsigned	duplicates:2;	/* TRX_DUP_IGNORE | TRX_DUP_REPLACE */
-	unsigned	active_trans:2;	/* 1 - if a transaction in MySQL
+	unsigned	dict_operation:2;/**< @see enum trx_dict_op */
+	unsigned	duplicates:2;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+	unsigned	active_trans:2;	/*!< 1 - if a transaction in MySQL
 					is active. 2 - if prepare_commit_mutex
 					was taken */
-	void*		mysql_thd;	/* MySQL thread handle corresponding
+	unsigned	has_search_latch:1;
+					/* TRUE if this trx has latched the
+					search system latch in S-mode */
+	unsigned	declared_to_be_inside_innodb:1;
+					/* this is TRUE if we have declared
+					this transaction in
+					srv_conc_enter_innodb to be inside the
+					InnoDB engine */
+	unsigned	handling_signals:1;/* this is TRUE as long as the trx
+					is handling signals */
+	unsigned	dict_operation_lock_mode:2;
+					/* 0, RW_S_LATCH, or RW_X_LATCH:
+					the latch mode trx currently holds
+					on dict_operation_lock */
+	time_t		start_time;	/*!< time the trx object was created
+					or the state last time became
+					TRX_ACTIVE */
+	trx_id_t	id;		/*!< transaction id */
+	XID		xid;		/*!< X/Open XA transaction
+					identification to identify a
+					transaction branch */
+	trx_id_t	no;		/*!< transaction serialization number ==
+					max trx id when the transaction is
+					moved to COMMITTED_IN_MEMORY state */
+	ib_uint64_t	commit_lsn;	/*!< lsn at the time of the commit */
+	trx_id_t	table_id;	/*!< Table to drop iff dict_operation
+					is TRUE, or ut_dulint_zero. */
+	/*------------------------------*/
+	void*		mysql_thd;	/*!< MySQL thread handle corresponding
 					to this trx, or NULL */
 	char**		mysql_query_str;/* pointer to the field in mysqld_thd
 					which contains the pointer to the
@@ -451,7 +552,7 @@ struct trx_struct{
 					contains a pointer to the latest file
 					name; this is NULL if binlog is not
 					used */
-	ib_longlong	mysql_log_offset;/* if MySQL binlog is used, this field
+	ib_int64_t	mysql_log_offset;/* if MySQL binlog is used, this field
 					contains the end offset of the binlog
 					entry */
 	os_thread_id_t	mysql_thread_id;/* id of the MySQL thread associated
@@ -467,13 +568,6 @@ struct trx_struct{
 					/* how many tables the current SQL
 					statement uses, except those
 					in consistent read */
-	ibool		dict_operation_lock_mode;
-					/* 0, RW_S_LATCH, or RW_X_LATCH:
-					the latch mode trx currently holds
-					on dict_operation_lock */
-	ibool		has_search_latch;
-					/* TRUE if this trx has latched the
-					search system latch in S-mode */
 	ulint		search_latch_timeout;
 					/* If we notice that someone is
 					waiting for our S-lock on the search
@@ -485,64 +579,55 @@ struct trx_struct{
 					to reduce contention on the search
 					latch */
 	/*------------------------------*/
-	ibool		declared_to_be_inside_innodb;
-					/* this is TRUE if we have declared
-					this transaction in
-					srv_conc_enter_innodb to be inside the
-					InnoDB engine */
 	ulint		n_tickets_to_enter_innodb;
 					/* this can be > 0 only when
 					declared_to_... is TRUE; when we come
 					to srv_conc_innodb_enter, if the value
 					here is > 0, we decrement this by 1 */
 	/*------------------------------*/
-	lock_t*		auto_inc_lock;	/* possible auto-inc lock reserved by
-					the transaction; note that it is also
-					in the lock list trx_locks */
 	UT_LIST_NODE_T(trx_t)
-			trx_list;	/* list of transactions */
+			trx_list;	/*!< list of transactions */
 	UT_LIST_NODE_T(trx_t)
-			mysql_trx_list;	/* list of transactions created for
+			mysql_trx_list;	/*!< list of transactions created for
 					MySQL */
 	/*------------------------------*/
-	ulint		error_state;	/* 0 if no error, otherwise error
+	ulint		error_state;	/*!< 0 if no error, otherwise error
 					number; NOTE That ONLY the thread
 					doing the transaction is allowed to
 					set this field: this is NOT protected
 					by the kernel mutex */
-	void*		error_info;	/* if the error number indicates a
+	const dict_index_t*error_info;	/*!< if the error number indicates a
 					duplicate key error, a pointer to
 					the problematic index is stored here */
-	sess_t*		sess;		/* session of the trx, NULL if none */
-	ulint		que_state;	/* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
-					... */
-	que_t*		graph;		/* query currently run in the session,
+	ulint		error_key_num;	/*!< if the index creation fails to a
+					duplicate key error, a mysql key
+					number of that index is stored here */
+	sess_t*		sess;		/*!< session of the trx, NULL if none */
+	que_t*		graph;		/*!< query currently run in the session,
 					or NULL if none; NOTE that the query
 					belongs to the session, and it can
 					survive over a transaction commit, if
 					it is a stored procedure with a COMMIT
 					WORK statement, for instance */
-	ulint		n_active_thrs;	/* number of active query threads */
-	ibool		handling_signals;/* this is TRUE as long as the trx
-					is handling signals */
+	ulint		n_active_thrs;	/*!< number of active query threads */
 	que_t*		graph_before_signal_handling;
 					/* value of graph when signal handling
 					for this trx started: this is used to
 					return control to the original query
 					graph for error processing */
-	trx_sig_t	sig;		/* one signal object can be allocated
+	trx_sig_t	sig;		/*!< one signal object can be allocated
 					in this space, avoiding mem_alloc */
 	UT_LIST_BASE_NODE_T(trx_sig_t)
-			signals;	/* queue of processed or pending
+			signals;	/*!< queue of processed or pending
 					signals to the trx */
 	UT_LIST_BASE_NODE_T(trx_sig_t)
-			reply_signals;	/* list of signals sent by the query
+			reply_signals;	/*!< list of signals sent by the query
 					threads of this trx for which a thread
 					is waiting for a reply; if this trx is
 					killed, the reply requests in the list
 					must be canceled */
 	/*------------------------------*/
-	lock_t*		wait_lock;	/* if trx execution state is
+	lock_t*		wait_lock;	/*!< if trx execution state is
 					TRX_QUE_LOCK_WAIT, this points to
 					the lock request, otherwise this is
 					NULL */
@@ -552,18 +637,21 @@ struct trx_struct{
 					if another transaction chooses this
 					transaction as a victim in deadlock
 					resolution, it sets this to TRUE */
-	time_t		wait_started;	/* lock wait started at this time */
+	time_t		wait_started;	/*!< lock wait started at this time */
 	UT_LIST_BASE_NODE_T(que_thr_t)
-			wait_thrs;	/* query threads belonging to this
+			wait_thrs;	/*!< query threads belonging to this
 					trx that are in the QUE_THR_LOCK_WAIT
 					state */
-	ulint		deadlock_mark;	/* a mark field used in deadlock
-					checking algorithm */
+	ulint		deadlock_mark;	/*!< a mark field used in deadlock
+					checking algorithm.  This must be
+					in its own machine word, because
+					it can be changed by other
+					threads while holding kernel_mutex. */
 	/*------------------------------*/
-	mem_heap_t*	lock_heap;	/* memory heap for the locks of the
+	mem_heap_t*	lock_heap;	/*!< memory heap for the locks of the
 					transaction */
 	UT_LIST_BASE_NODE_T(lock_t)
-			trx_locks;	/* locks reserved by the transaction */
+			trx_locks;	/*!< locks reserved by the transaction */
 	/*------------------------------*/
 	mem_heap_t*	global_read_view_heap;
 					/* memory heap for the global read
@@ -571,7 +659,7 @@ struct trx_struct{
 	read_view_t*	global_read_view;
 					/* consistent read view associated
 					to a transaction or NULL */
-	read_view_t*	read_view;	/* consistent read view used in the
+	read_view_t*	read_view;	/*!< consistent read view used in the
 					transaction or NULL, this read view
 					if defined can be normal read view
 					associated to a transaction (i.e.
@@ -579,16 +667,16 @@ struct trx_struct{
 					associated to a cursor */
 	/*------------------------------*/
 	UT_LIST_BASE_NODE_T(trx_named_savept_t)
-			trx_savepoints;	/* savepoints set with SAVEPOINT ...,
+			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	mutex_t		undo_mutex;	/* mutex protecting the fields in this
+	mutex_t		undo_mutex;	/*!< mutex protecting the fields in this
 					section (down to undo_no_arr), EXCEPT
 					last_sql_stat_start, which can be
 					accessed only when we know that there
 					cannot be any activity in the undo
 					logs! */
-	dulint		undo_no;	/* next undo log record number to
+	undo_no_t	undo_no;	/*!< next undo log record number to
 					assign; since the undo log is
 					private for a transaction, this
 					is a simple ascending sequence
@@ -600,25 +688,31 @@ struct trx_struct{
 					was started: in case of an error, trx
 					is rolled back down to this undo
 					number; see note at undo_mutex! */
-	trx_rseg_t*	rseg;		/* rollback segment assigned to the
+	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
 					transaction, or NULL if not assigned
 					yet */
-	trx_undo_t*	insert_undo;	/* pointer to the insert undo log, or
+	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
 					NULL if no inserts performed yet */
-	trx_undo_t*	update_undo;	/* pointer to the update undo log, or
+	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
 					NULL if no update performed yet */
-	dulint		roll_limit;	/* least undo number to undo during
+	undo_no_t	roll_limit;	/*!< least undo number to undo during
 					a rollback */
-	ulint		pages_undone;	/* number of undo log pages undone
+	ulint		pages_undone;	/*!< number of undo log pages undone
 					since the last undo log truncation */
-	trx_undo_arr_t*	undo_no_arr;	/* array of undo numbers of undo log
+	trx_undo_arr_t*	undo_no_arr;	/*!< array of undo numbers of undo log
 					records which are currently processed
 					by a rollback operation */
-	ulint		n_autoinc_rows;	/* no. of AUTO-INC rows required for
+	/*------------------------------*/
+	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
 					an SQL statement. This is useful for
 					multi-row INSERTs */
+	ib_vector_t*    autoinc_locks;  /* AUTOINC locks held by this
+					transaction. Note that these are
+					also in the lock list trx_locks. This
+					vector needs to be freed explicitly
+					when the trx_t instance is desrtoyed */
 	/*------------------------------*/
-	char detailed_error[256];	/* detailed error message for last
+	char detailed_error[256];	/*!< detailed error message for last
 					error, or empty. */
 };
 
@@ -628,19 +722,19 @@ struct trx_struct{
 					transaction, e.g., a parallel
 					query */
 /* Transaction concurrency states (trx->conc_state) */
-#define	TRX_NOT_STARTED		1
-#define	TRX_ACTIVE		2
-#define	TRX_COMMITTED_IN_MEMORY	3
-#define	TRX_PREPARED		4	/* Support for 2PC/XA */
+#define	TRX_NOT_STARTED		0
+#define	TRX_ACTIVE		1
+#define	TRX_COMMITTED_IN_MEMORY	2
+#define	TRX_PREPARED		3	/* Support for 2PC/XA */
 
 /* Transaction execution states when trx->conc_state == TRX_ACTIVE */
-#define TRX_QUE_RUNNING		1	/* transaction is running */
-#define TRX_QUE_LOCK_WAIT	2	/* transaction is waiting for a lock */
-#define TRX_QUE_ROLLING_BACK	3	/* transaction is rolling back */
-#define TRX_QUE_COMMITTING	4	/* transaction is committing */
+#define TRX_QUE_RUNNING		0	/* transaction is running */
+#define TRX_QUE_LOCK_WAIT	1	/* transaction is waiting for a lock */
+#define TRX_QUE_ROLLING_BACK	2	/* transaction is rolling back */
+#define TRX_QUE_COMMITTING	3	/* transaction is committing */
 
 /* Transaction isolation levels (trx->isolation_level) */
-#define TRX_ISO_READ_UNCOMMITTED	1	/* dirty read: non-locking
+#define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
 						SELECTs are performed so that
 						we do not look at a possible
 						earlier version of a record;
@@ -649,7 +743,7 @@ struct trx_struct{
 						level; otherwise like level
 						2 */
 
-#define TRX_ISO_READ_COMMITTED		2	/* somewhat Oracle-like
+#define TRX_ISO_READ_COMMITTED		1	/* somewhat Oracle-like
 						isolation, except that in
 						range UPDATE and DELETE we
 						must block phantom rows
@@ -662,7 +756,7 @@ struct trx_struct{
 						each consistent read reads its
 						own snapshot */
 
-#define TRX_ISO_REPEATABLE_READ		3	/* this is the default;
+#define TRX_ISO_REPEATABLE_READ		2	/* this is the default;
 						all consistent reads in the
 						same trx read the same
 						snapshot;
@@ -670,7 +764,7 @@ struct trx_struct{
 						in locking reads to block
 						insertions into gaps */
 
-#define TRX_ISO_SERIALIZABLE		4	/* all plain SELECTs are
+#define TRX_ISO_SERIALIZABLE		3	/* all plain SELECTs are
 						converted to LOCK IN SHARE
 						MODE reads */
 
@@ -681,7 +775,7 @@ Multiple flags can be combined with bitwise OR. */
 
 
 /* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL		100
+#define TRX_SIG_NO_SIGNAL		0
 #define TRX_SIG_TOTAL_ROLLBACK		1
 #define TRX_SIG_ROLLBACK_TO_SAVEPT	2
 #define TRX_SIG_COMMIT			3
@@ -689,25 +783,32 @@ Multiple flags can be combined with bitwise OR. */
 #define TRX_SIG_BREAK_EXECUTION		5
 
 /* Sender types of a signal */
-#define TRX_SIG_SELF		1	/* sent by the session itself, or
+#define TRX_SIG_SELF		0	/* sent by the session itself, or
 					by an error occurring within this
 					session */
-#define TRX_SIG_OTHER_SESS	2	/* sent by another session (which
+#define TRX_SIG_OTHER_SESS	1	/* sent by another session (which
 					must hold rights to this) */
 
-/* Commit command node in a query graph */
-struct commit_node_struct{
-	que_common_t	common;	/* node type: QUE_NODE_COMMIT */
-	ulint		state;	/* node execution state */
+/** Commit node states */
+enum commit_node_state {
+	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
+				the transaction */
+	COMMIT_NODE_WAIT	/*!< commit signal sent to the transaction,
+				waiting for completion */
+};
+
+/** Commit command node in a query graph */
+struct commit_node_struct{
+	que_common_t	common;	/*!< node type: QUE_NODE_COMMIT */
+	enum commit_node_state
+			state;	/*!< node execution state */
 };
 
-/* Commit node states */
-#define COMMIT_NODE_SEND	1
-#define COMMIT_NODE_WAIT	2
 
 
 #ifndef UNIV_NONINL
 #include "trx0trx.ic"
 #endif
+#endif /* !UNIV_HOTBACKUP */
 
 #endif
diff --git a/storage/innodb_plugin/include/trx0trx.ic b/storage/innodb_plugin/include/trx0trx.ic
new file mode 100644
index 00000000000..7332eeece85
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0trx.ic
@@ -0,0 +1,164 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.ic
+The transaction
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+UNIV_INLINE
+void
+trx_start_if_not_started(
+/*=====================*/
+	trx_t*	trx)	/*!< in: transaction */
+{
+	ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
+
+	if (trx->conc_state == TRX_NOT_STARTED) {
+
+		trx_start(trx, ULINT_UNDEFINED);
+	}
+}
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. Assumes we have reserved
+the kernel mutex! */
+UNIV_INLINE
+void
+trx_start_if_not_started_low(
+/*=========================*/
+	trx_t*	trx)	/*!< in: transaction */
+{
+	ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
+
+	if (trx->conc_state == TRX_NOT_STARTED) {
+
+		trx_start_low(trx, ULINT_UNDEFINED);
+	}
+}
+
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return	the error info */
+UNIV_INLINE
+const dict_index_t*
+trx_get_error_info(
+/*===============*/
+	const trx_t*	trx)	/*!< in: trx object */
+{
+	return(trx->error_info);
+}
+
+/*******************************************************************//**
+Retrieves transacion's id, represented as unsigned long long.
+@return	transaction's id */
+UNIV_INLINE
+ullint
+trx_get_id(
+/*=======*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	return((ullint)ut_conv_dulint_to_longlong(trx->id));
+}
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return	string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	/* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
+	switch (trx->que_state) {
+	case TRX_QUE_RUNNING:
+		return("RUNNING");
+	case TRX_QUE_LOCK_WAIT:
+		return("LOCK WAIT");
+	case TRX_QUE_ROLLING_BACK:
+		return("ROLLING BACK");
+	case TRX_QUE_COMMITTING:
+		return("COMMITTING");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return	dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op
+trx_get_dict_operation(
+/*===================*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
+
+#ifdef UNIV_DEBUG
+	switch (op) {
+	case TRX_DICT_OP_NONE:
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
+		return(op);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE));
+}
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: transaction */
+	enum trx_dict_op	op)	/*!< in: operation, not
+					TRX_DICT_OP_NONE */
+{
+#ifdef UNIV_DEBUG
+	enum trx_dict_op	old_op = trx_get_dict_operation(trx);
+
+	switch (op) {
+	case TRX_DICT_OP_NONE:
+		ut_error;
+		break;
+	case TRX_DICT_OP_TABLE:
+		switch (old_op) {
+		case TRX_DICT_OP_NONE:
+		case TRX_DICT_OP_INDEX:
+		case TRX_DICT_OP_TABLE:
+			goto ok;
+		}
+		ut_error;
+		break;
+	case TRX_DICT_OP_INDEX:
+		ut_ad(old_op == TRX_DICT_OP_NONE);
+		break;
+	}
+ok:
+#endif /* UNIV_DEBUG */
+
+	trx->dict_operation = op;
+}
diff --git a/storage/innodb_plugin/include/trx0types.h b/storage/innodb_plugin/include/trx0types.h
new file mode 100644
index 00000000000..08cc9622d02
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0types.h
@@ -0,0 +1,108 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0types.h
+Transaction system global type definitions
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0types_h
+#define trx0types_h
+
+#include "ut0byte.h"
+
+/** prepare trx_t::id for being printed via printf(3) */
+#define TRX_ID_PREP_PRINTF(id)	(ullint) ut_conv_dulint_to_longlong(id)
+
+/** printf(3) format used for printing TRX_ID_PRINTF_PREP() */
+#define TRX_ID_FMT		"%llX"
+
+/** maximum length that a formatted trx_t::id could take, not including
+the terminating NUL character. */
+#define TRX_ID_MAX_LEN		17
+
+/** Memory objects */
+/* @{ */
+/** Transaction */
+typedef struct trx_struct	trx_t;
+/** Transaction system */
+typedef struct trx_sys_struct	trx_sys_t;
+/** Doublewrite information */
+typedef struct trx_doublewrite_struct	trx_doublewrite_t;
+/** Signal */
+typedef struct trx_sig_struct	trx_sig_t;
+/** Rollback segment */
+typedef struct trx_rseg_struct	trx_rseg_t;
+/** Transaction undo log */
+typedef struct trx_undo_struct	trx_undo_t;
+/** Array of undo numbers of undo records being rolled back or purged */
+typedef struct trx_undo_arr_struct trx_undo_arr_t;
+/** A cell of trx_undo_arr_t */
+typedef struct trx_undo_inf_struct trx_undo_inf_t;
+/** The control structure used in the purge operation */
+typedef struct trx_purge_struct	trx_purge_t;
+/** Rollback command node in a query graph */
+typedef struct roll_node_struct	roll_node_t;
+/** Commit command node in a query graph */
+typedef struct commit_node_struct commit_node_t;
+/** SAVEPOINT command node in a query graph */
+typedef struct trx_named_savept_struct trx_named_savept_t;
+/* @} */
+
+/** Rollback contexts */
+enum trx_rb_ctx {
+	RB_NONE = 0,	/*!< no rollback */
+	RB_NORMAL,	/*!< normal rollback */
+	RB_RECOVERY,	/*!< rolling back an incomplete transaction,
+			in crash recovery */
+};
+
+/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
+typedef dulint	trx_id_t;
+/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
+typedef dulint	roll_ptr_t;
+/** Undo number */
+typedef dulint	undo_no_t;
+
+/** Transaction savepoint */
+typedef struct trx_savept_struct trx_savept_t;
+/** Transaction savepoint */
+struct trx_savept_struct{
+	undo_no_t	least_undo_no;	/*!< least undo number to undo */
+};
+
+/** File objects */
+/* @{ */
+/** Transaction system header */
+typedef byte	trx_sysf_t;
+/** Rollback segment header */
+typedef byte	trx_rsegf_t;
+/** Undo segment header */
+typedef byte	trx_usegf_t;
+/** Undo log header */
+typedef byte	trx_ulogf_t;
+/** Undo log page header */
+typedef byte	trx_upagef_t;
+
+/** Undo log record */
+typedef	byte	trx_undo_rec_t;
+/* @} */
+
+#endif
diff --git a/storage/innodb_plugin/include/trx0undo.h b/storage/innodb_plugin/include/trx0undo.h
new file mode 100644
index 00000000000..4db10eaa92e
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0undo.h
@@ -0,0 +1,544 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.h
+Transaction undo log
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0undo_h
+#define trx0undo_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "page0types.h"
+#include "trx0xa.h"
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return	roll pointer */
+UNIV_INLINE
+roll_ptr_t
+trx_undo_build_roll_ptr(
+/*====================*/
+	ibool	is_insert,	/*!< in: TRUE if insert undo log */
+	ulint	rseg_id,	/*!< in: rollback segment id */
+	ulint	page_no,	/*!< in: page number */
+	ulint	offset);	/*!< in: offset of the undo entry within page */
+/***********************************************************************//**
+Decodes a roll pointer. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer */
+	ibool*		is_insert,	/*!< out: TRUE if insert undo log */
+	ulint*		rseg_id,	/*!< out: rollback segment id */
+	ulint*		page_no,	/*!< out: page number */
+	ulint*		offset);	/*!< out: offset of the undo
+					entry within page */
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return	TRUE if insert undo log */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+	roll_ptr_t	roll_ptr);	/*!< in: roll pointer */
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+	byte*		ptr,		/*!< in: pointer to memory where
+					written */
+	roll_ptr_t	roll_ptr);	/*!< in: roll ptr */
+/*****************************************************************//**
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_...
+@return	roll ptr */
+UNIV_INLINE
+roll_ptr_t
+trx_read_roll_ptr(
+/*==============*/
+	const byte*	ptr);	/*!< in: pointer to memory from where to read */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return	pointer to page x-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return	pointer to page s-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*========================*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset);	/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset);/*!< in: undo log header offset on page */
+/***********************************************************************//**
+Gets the previous record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************************//**
+Gets the next record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************************//**
+Gets the first record in an undo log.
+@return	undo log record, the page latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+	ulint	space,	/*!< in: undo log header space */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset,	/*!< in: undo log header offset on page */
+	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
+	mtr_t*	mtr);	/*!< in: mtr */
+/********************************************************************//**
+Tries to add a page to the undo log segment where the undo log is placed.
+@return	page number if success, else FIL_NULL */
+UNIV_INTERN
+ulint
+trx_undo_add_page(
+/*==============*/
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory object */
+	mtr_t*		mtr);	/*!< in: mtr which does not have a latch to any
+				undo log page; the caller must have reserved
+				the rollback segment mutex */
+/***********************************************************************//**
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+UNIV_INTERN
+void
+trx_undo_truncate_end(
+/*==================*/
+	trx_t*		trx,	/*!< in: transaction whose undo log it is */
+	trx_undo_t*	undo,	/*!< in: undo log */
+	undo_no_t	limit);	/*!< in: all undo records with undo number
+				>= this value should be truncated */
+/***********************************************************************//**
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+UNIV_INTERN
+void
+trx_undo_truncate_start(
+/*====================*/
+	trx_rseg_t*	rseg,		/*!< in: rollback segment */
+	ulint		space,		/*!< in: space id of the log */
+	ulint		hdr_page_no,	/*!< in: header page number */
+	ulint		hdr_offset,	/*!< in: header offset on the page */
+	undo_no_t	limit);		/*!< in: all undo pages with
+					undo numbers < this value
+					should be truncated; NOTE that
+					the function only frees whole
+					pages; the header page is not
+					freed, but emptied, if all the
+					records there are < limit */
+/********************************************************************//**
+Initializes the undo log lists for a rollback segment memory copy.
+This function is only called when the database is started or a new
+rollback segment created.
+@return	the combined size of undo log segments in pages */
+UNIV_INTERN
+ulint
+trx_undo_lists_init(
+/*================*/
+	trx_rseg_t*	rseg);	/*!< in: rollback segment memory object */
+/**********************************************************************//**
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused.
+@return DB_SUCCESS if undo log assign successful, possible error codes
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+DB_OUT_OF_MEMORY */
+UNIV_INTERN
+ulint
+trx_undo_assign_undo(
+/*=================*/
+	trx_t*		trx,	/*!< in: transaction */
+	ulint		type);	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction finish.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr);	/*!< in: mtr */
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction prepare.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
+page_t*
+trx_undo_set_state_at_prepare(
+/*==========================*/
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**********************************************************************//**
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+UNIV_INTERN
+void
+trx_undo_update_cleanup(
+/*====================*/
+	trx_t*	trx,		/*!< in: trx owning the update undo log */
+	page_t*	undo_page,	/*!< in: update undo log header page,
+				x-latched */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+UNIV_INTERN
+void
+trx_undo_insert_cleanup(
+/*====================*/
+	trx_t*	trx);	/*!< in: transaction handle */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses the redo log entry of an undo log page initialization.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_page_init(
+/*=====================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header create or reuse.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header discard.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+
+/* Types of an undo log segment */
+#define	TRX_UNDO_INSERT		1	/* contains undo entries for inserts */
+#define	TRX_UNDO_UPDATE		2	/* contains undo entries for updates
+					and delete markings: in short,
+					modifys (the name 'UPDATE' is a
+					historical relic) */
+/* States of an undo log segment */
+#define TRX_UNDO_ACTIVE		1	/* contains an undo log of an active
+					transaction */
+#define	TRX_UNDO_CACHED		2	/* cached for quick reuse */
+#define	TRX_UNDO_TO_FREE	3	/* insert undo segment can be freed */
+#define	TRX_UNDO_TO_PURGE	4	/* update undo segment will not be
+					reused: it can be freed in purge when
+					all undo data in it is removed */
+#define	TRX_UNDO_PREPARED	5	/* contains an undo log of an
+					prepared transaction */
+
+#ifndef UNIV_HOTBACKUP
+/** Transaction undo log memory object; this is protected by the undo_mutex
+in the corresponding transaction object */
+
+struct trx_undo_struct{
+	/*-----------------------------*/
+	ulint		id;		/*!< undo log slot number within the
+					rollback segment */
+	ulint		type;		/*!< TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+	ulint		state;		/*!< state of the corresponding undo log
+					segment */
+	ibool		del_marks;	/*!< relevant only in an update undo log:
+					this is TRUE if the transaction may
+					have delete marked records, because of
+					a delete of a row or an update of an
+					indexed field; purge is then
+					necessary; also TRUE if the transaction
+					has updated an externally stored
+					field */
+	trx_id_t	trx_id;		/*!< id of the trx assigned to the undo
+					log */
+	XID		xid;		/*!< X/Open XA transaction
+					identification */
+	ibool		dict_operation;	/*!< TRUE if a dict operation trx */
+	dulint		table_id;	/*!< if a dict operation, then the table
+					id */
+	trx_rseg_t*	rseg;		/*!< rseg where the undo log belongs */
+	/*-----------------------------*/
+	ulint		space;		/*!< space id where the undo log
+					placed */
+	ulint		zip_size;	/*!< compressed page size of space
+					in bytes, or 0 for uncompressed */
+	ulint		hdr_page_no;	/*!< page number of the header page in
+					the undo log */
+	ulint		hdr_offset;	/*!< header offset of the undo log on the
+					page */
+	ulint		last_page_no;	/*!< page number of the last page in the
+					undo log; this may differ from
+					top_page_no during a rollback */
+	ulint		size;		/*!< current size in pages */
+	/*-----------------------------*/
+	ulint		empty;		/*!< TRUE if the stack of undo log
+					records is currently empty */
+	ulint		top_page_no;	/*!< page number where the latest undo
+					log record was catenated; during
+					rollback the page from which the latest
+					undo record was chosen */
+	ulint		top_offset;	/*!< offset of the latest undo record,
+					i.e., the topmost element in the undo
+					log if we think of it as a stack */
+	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
+	buf_block_t*	guess_block;	/*!< guess for the buffer block where
+					the top page might reside */
+	/*-----------------------------*/
+	UT_LIST_NODE_T(trx_undo_t) undo_list;
+					/*!< undo log objects in the rollback
+					segment are chained into lists */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/** The offset of the undo log page header on pages of the undo log */
+#define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
+/*-------------------------------------------------------------*/
+/** Transaction undo log page header offsets */
+/* @{ */
+#define	TRX_UNDO_PAGE_TYPE	0	/*!< TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+#define	TRX_UNDO_PAGE_START	2	/*!< Byte offset where the undo log
+					records for the LATEST transaction
+					start on this page (remember that
+					in an update undo log, the first page
+					can contain several undo logs) */
+#define	TRX_UNDO_PAGE_FREE	4	/*!< On each page of the undo log this
+					field contains the byte offset of the
+					first free byte on the page */
+#define TRX_UNDO_PAGE_NODE	6	/*!< The file list node in the chain
+					of undo log pages */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_PAGE_HDR_SIZE	(6 + FLST_NODE_SIZE)
+					/*!< Size of the transaction undo
+					log page header, in bytes */
+/* @} */
+
+/** An update undo segment with just one page can be reused if it has
+at most this many bytes used; we must leave space at least for one new undo
+log header on the page */
+
+#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
+
+/* An update undo log segment may contain several undo logs on its first page
+if the undo logs took so little space that the segment could be cached and
+reused. All the undo log headers are then on the first page, and the last one
+owns the undo log records on subsequent pages if the segment is bigger than
+one page. If an undo log is stored in a segment, then on the first page it is
+allowed to have zero undo records, but if the segment extends to several
+pages, then all the rest of the pages must contain at least one undo log
+record. */
+
+/** The offset of the undo log segment header on the first page of the undo
+log segment */
+
+#define	TRX_UNDO_SEG_HDR	(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
+/** Undo log segment header */
+/* @{ */
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_STATE		0	/*!< TRX_UNDO_ACTIVE, ... */
+#define	TRX_UNDO_LAST_LOG	2	/*!< Offset of the last undo log header
+					on the segment header page, 0 if
+					none */
+#define	TRX_UNDO_FSEG_HEADER	4	/*!< Header for the file segment which
+					the undo log segment occupies */
+#define	TRX_UNDO_PAGE_LIST	(4 + FSEG_HEADER_SIZE)
+					/*!< Base node for the list of pages in
+					the undo log segment; defined only on
+					the undo log segment's first page */
+/*-------------------------------------------------------------*/
+/** Size of the undo log segment header */
+#define TRX_UNDO_SEG_HDR_SIZE	(4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
+/* @} */
+
+
+/** The undo log header. There can be several undo log headers on the first
+page of an update undo log segment. */
+/* @{ */
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_TRX_ID		0	/*!< Transaction id */
+#define	TRX_UNDO_TRX_NO		8	/*!< Transaction number of the
+					transaction; defined only if the log
+					is in a history list */
+#define TRX_UNDO_DEL_MARKS	16	/*!< Defined only in an update undo
+					log: TRUE if the transaction may have
+					done delete markings of records, and
+					thus purge is necessary */
+#define	TRX_UNDO_LOG_START	18	/*!< Offset of the first undo log record
+					of this log on the header page; purge
+					may remove undo log record from the
+					log start, and therefore this is not
+					necessarily the same as this log
+					header end offset */
+#define	TRX_UNDO_XID_EXISTS	20	/*!< TRUE if undo log header includes
+					X/Open XA transaction identification
+					XID */
+#define	TRX_UNDO_DICT_TRANS	21	/*!< TRUE if the transaction is a table
+					create, index create, or drop
+					transaction: in recovery
+					the transaction cannot be rolled back
+					in the usual way: a 'rollback' rather
+					means dropping the created or dropped
+					table, if it still exists */
+#define TRX_UNDO_TABLE_ID	22	/*!< Id of the table if the preceding
+					field is TRUE */
+#define	TRX_UNDO_NEXT_LOG	30	/*!< Offset of the next undo log header
+					on this page, 0 if none */
+#define	TRX_UNDO_PREV_LOG	32	/*!< Offset of the previous undo log
+					header on this page, 0 if none */
+#define TRX_UNDO_HISTORY_NODE	34	/*!< If the log is put to the history
+					list, the file list node is here */
+/*-------------------------------------------------------------*/
+/** Size of the undo log header without XID information */
+#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
+
+/* Note: the writing of the undo log old header is coded by a log record
+MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+header is logged separately. In this sense, the XID is not really a member
+of the undo log header. TODO: do not append the XID to the log header if XA
+is not needed by the user. The XID wastes about 150 bytes of space in every
+undo log. In the history list we may have millions of undo logs, which means
+quite a large overhead. */
+
+/** X/Open XA Transaction Identification (XID) */
+/* @{ */
+/** xid_t::formatID */
+#define	TRX_UNDO_XA_FORMAT	(TRX_UNDO_LOG_OLD_HDR_SIZE)
+/** xid_t::gtrid_length */
+#define	TRX_UNDO_XA_TRID_LEN	(TRX_UNDO_XA_FORMAT + 4)
+/** xid_t::bqual_length */
+#define	TRX_UNDO_XA_BQUAL_LEN	(TRX_UNDO_XA_TRID_LEN + 4)
+/** Distributed transaction identifier data */
+#define	TRX_UNDO_XA_XID		(TRX_UNDO_XA_BQUAL_LEN + 4)
+/*--------------------------------------------------------------*/
+#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
+				/*!< Total size of the undo log header
+				with the XA XID */
+/* @} */
+
+#ifndef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innodb_plugin/include/trx0undo.ic
similarity index 52%
rename from storage/innobase/include/trx0undo.ic
rename to storage/innodb_plugin/include/trx0undo.ic
index f28f36ade03..2d289b34ef1 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innodb_plugin/include/trx0undo.ic
@@ -1,24 +1,43 @@
-/******************************************************
-Transaction undo log
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.ic
+Transaction undo log
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
 #include "data0type.h"
+#include "page0page.h"
 
-/***************************************************************************
-Builds a roll pointer dulint. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return	roll pointer */
 UNIV_INLINE
-dulint
+roll_ptr_t
 trx_undo_build_roll_ptr(
 /*====================*/
-				/* out: roll pointer */
-	ibool	is_insert,	/* in: TRUE if insert undo log */
-	ulint	rseg_id,	/* in: rollback segment id */
-	ulint	page_no,	/* in: page number */
-	ulint	offset)		/* in: offset of the undo entry within page */
+	ibool	is_insert,	/*!< in: TRUE if insert undo log */
+	ulint	rseg_id,	/*!< in: rollback segment id */
+	ulint	page_no,	/*!< in: page number */
+	ulint	offset)		/*!< in: offset of the undo entry within page */
 {
 #if DATA_ROLL_PTR_LEN != 7
 # error "DATA_ROLL_PTR_LEN != 7"
@@ -32,17 +51,18 @@ trx_undo_build_roll_ptr(
 				+ offset));
 }
 
-/***************************************************************************
-Decodes a roll pointer dulint. */
+/***********************************************************************//**
+Decodes a roll pointer. */
 UNIV_INLINE
 void
 trx_undo_decode_roll_ptr(
 /*=====================*/
-	dulint	roll_ptr,	/* in: roll pointer */
-	ibool*	is_insert,	/* out: TRUE if insert undo log */
-	ulint*	rseg_id,	/* out: rollback segment id */
-	ulint*	page_no,	/* out: page number */
-	ulint*	offset)		/* out: offset of the undo entry within page */
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer */
+	ibool*		is_insert,	/*!< out: TRUE if insert undo log */
+	ulint*		rseg_id,	/*!< out: rollback segment id */
+	ulint*		page_no,	/*!< out: page number */
+	ulint*		offset)		/*!< out: offset of the undo
+					entry within page */
 {
 	ulint	low;
 	ulint	high;
@@ -64,14 +84,14 @@ trx_undo_decode_roll_ptr(
 		+ (low / 256) / 256;
 }
 
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return	TRUE if insert undo log */
 UNIV_INLINE
 ibool
 trx_undo_roll_ptr_is_insert(
 /*========================*/
-				/* out: TRUE if insert undo log */
-	dulint	roll_ptr)	/* in: roll pointer */
+	roll_ptr_t	roll_ptr)	/*!< in: roll pointer */
 {
 	ulint	high;
 #if DATA_ROLL_PTR_LEN != 7
@@ -84,8 +104,9 @@ trx_undo_roll_ptr_is_insert(
 
 	return(high / (256 * 256 * 128));
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*********************************************************************
+/*****************************************************************//**
 Writes a roll ptr to an index page. In case that the size changes in
 some future version, this function should be used instead of
 mach_write_... */
@@ -93,24 +114,26 @@ UNIV_INLINE
 void
 trx_write_roll_ptr(
 /*===============*/
-	byte*	ptr,		/* in: pointer to memory where written */
-	dulint	roll_ptr)	/* in: roll ptr */
+	byte*		ptr,		/*!< in: pointer to memory where
+					written */
+	roll_ptr_t	roll_ptr)	/*!< in: roll ptr */
 {
-	ut_ad(DATA_ROLL_PTR_LEN == 7);
-
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
 	mach_write_to_7(ptr, roll_ptr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Reads a roll ptr from an index page. In case that the roll ptr size
 changes in some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return	roll ptr */
 UNIV_INLINE
-dulint
+roll_ptr_t
 trx_read_roll_ptr(
 /*==============*/
-			/* out: roll ptr */
-	byte*	ptr)	/* in: pointer to memory from where to read */
+	const byte*	ptr)	/*!< in: pointer to memory from where to read */
 {
 #if DATA_ROLL_PTR_LEN != 7
 # error "DATA_ROLL_PTR_LEN != 7"
@@ -118,65 +141,62 @@ trx_read_roll_ptr(
 	return(mach_read_from_7(ptr));
 }
 
-/**********************************************************************
-Gets an undo log page and x-latches it. */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return	pointer to page x-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get(
 /*==============*/
-				/* out: pointer to page x-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
-	page_t*	page;
+	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+					     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(page);
+	return(buf_block_get_frame(block));
 }
 
-/**********************************************************************
-Gets an undo log page and s-latches it. */
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return	pointer to page s-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get_s_latched(
 /*========================*/
-				/* out: pointer to page s-latched */
-	ulint	space,		/* in: space where placed */
-	ulint	page_no,	/* in: page number */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
-	page_t*	page;
+	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+					     RW_S_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
-	page = buf_page_get(space, page_no, RW_S_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
-	return(page);
+	return(buf_block_get_frame(block));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the start offset of the undo log records of the specified undo
-log on the page. */
+log on the page.
+@return	start offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_start(
 /*====================*/
-			/* out: start offset */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset)	/* in: undo log header offset on page */
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
 {
 	ulint	start;
 
-	if (page_no == buf_frame_get_page_no(undo_page)) {
+	if (page_no == page_get_page_no(undo_page)) {
 
 		start = mach_read_from_2(offset + undo_page
 					 + TRX_UNDO_LOG_START);
@@ -187,22 +207,22 @@ trx_undo_page_get_start(
 	return(start);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the end offset of the undo log records of the specified undo
-log on the page. */
+log on the page.
+@return	end offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_end(
 /*==================*/
-			/* out: end offset */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset)	/* in: undo log header offset on page */
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
 {
 	trx_ulogf_t*	log_hdr;
 	ulint		end;
 
-	if (page_no == buf_frame_get_page_no(undo_page)) {
+	if (page_no == page_get_page_no(undo_page)) {
 
 		log_hdr = undo_page + offset;
 
@@ -220,22 +240,22 @@ trx_undo_page_get_end(
 	return(end);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return	pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_prev_rec(
 /*=======================*/
-				/* out: pointer to record, NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo log record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset)	/* in: undo log header offset on page */
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset)	/*!< in: undo log header offset on page */
 {
 	page_t*	undo_page;
 	ulint	start;
 
-	undo_page = buf_frame_align(rec);
+	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
 
 	start = trx_undo_page_get_start(undo_page, page_no, offset);
 
@@ -247,23 +267,23 @@ trx_undo_page_get_prev_rec(
 	return(undo_page + mach_read_from_2(rec - 2));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return	pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_next_rec(
 /*=======================*/
-				/* out: pointer to record, NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo log record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset)	/* in: undo log header offset on page */
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset)	/*!< in: undo log header offset on page */
 {
 	page_t*	undo_page;
 	ulint	end;
 	ulint	next;
 
-	undo_page = buf_frame_align(rec);
+	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
 
 	end = trx_undo_page_get_end(undo_page, page_no, offset);
 
@@ -277,17 +297,17 @@ trx_undo_page_get_next_rec(
 	return(undo_page + next);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return	pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_last_rec(
 /*=======================*/
-			/* out: pointer to record, NULL if none */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset)	/* in: undo log header offset on page */
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
 {
 	ulint	start;
 	ulint	end;
@@ -303,17 +323,17 @@ trx_undo_page_get_last_rec(
 	return(undo_page + mach_read_from_2(undo_page + end - 2));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return	pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_first_rec(
 /*========================*/
-			/* out: pointer to record, NULL if none */
-	page_t*	undo_page,/* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset)	/* in: undo log header offset on page */
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
 {
 	ulint	start;
 	ulint	end;
@@ -328,3 +348,4 @@ trx_undo_page_get_first_rec(
 
 	return(undo_page + start);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/trx0xa.h b/storage/innodb_plugin/include/trx0xa.h
new file mode 100644
index 00000000000..e0dd8a1af5b
--- /dev/null
+++ b/storage/innodb_plugin/include/trx0xa.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*
+ * Start of xa.h header
+ *
+ * Define a symbol to prevent multiple inclusions of this header file
+ */
+#ifndef	XA_H
+#define	XA_H
+
+/*
+ * Transaction branch identification: XID and NULLXID:
+ */
+#ifndef XIDDATASIZE
+
+/** Sizes of transaction identifier */
+#define	XIDDATASIZE	128		/*!< maximum size of a transaction
+					identifier, in bytes */
+#define	MAXGTRIDSIZE	 64		/*!< maximum size in bytes of gtrid */
+#define	MAXBQUALSIZE	 64		/*!< maximum size in bytes of bqual */
+
+/** X/Open XA distributed transaction identifier */
+struct xid_t {
+	long formatID;			/*!< format identifier; -1
+					means that the XID is null */
+	long gtrid_length;		/*!< value from 1 through 64 */
+	long bqual_length;		/*!< value from 1 through 64 */
+	char data[XIDDATASIZE];		/*!< distributed transaction
+					identifier */
+};
+/** X/Open XA distributed transaction identifier */
+typedef	struct xid_t XID;
+#endif
+/** X/Open XA distributed transaction status codes */
+/* @{ */
+#define	XA_OK		0		/*!< normal execution */
+#define	XAER_ASYNC	-2		/*!< asynchronous operation already
+					outstanding */
+#define	XAER_RMERR	-3		/*!< a resource manager error
+					occurred in the transaction
+					branch */
+#define	XAER_NOTA	-4		/*!< the XID is not valid */
+#define	XAER_INVAL	-5		/*!< invalid arguments were given */
+#define	XAER_PROTO	-6		/*!< routine invoked in an improper
+					context */
+#define	XAER_RMFAIL	-7		/*!< resource manager unavailable */
+#define	XAER_DUPID	-8		/*!< the XID already exists */
+#define	XAER_OUTSIDE	-9		/*!< resource manager doing
+					work outside transaction */
+/* @} */
+#endif /* ifndef XA_H */
+/*
+ * End of xa.h header
+ */
diff --git a/storage/innobase/include/univ.i b/storage/innodb_plugin/include/univ.i
similarity index 58%
rename from storage/innobase/include/univ.i
rename to storage/innodb_plugin/include/univ.i
index bb44a91a343..6bce6dd765e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innodb_plugin/include/univ.i
@@ -1,7 +1,38 @@
-/***************************************************************************
-Version control for database, common definitions, and include files
+/*****************************************************************************
 
-(c) 1994 - 2000 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***********************************************************************//**
+@file include/univ.i
+Version control for database, common definitions, and include files
 
 Created 1/20/1994 Heikki Tuuri
 ****************************************************************************/
@@ -9,9 +40,43 @@ Created 1/20/1994 Heikki Tuuri
 #ifndef univ_i
 #define univ_i
 
-#ifdef __SUNPRO_C
-# include <sun_prefetch.h>
-#endif
+#ifdef UNIV_HOTBACKUP
+#include "hb_univ.i"
+#endif /* UNIV_HOTBACKUP */
+
+#define INNODB_VERSION_MAJOR	1
+#define INNODB_VERSION_MINOR	0
+#define INNODB_VERSION_BUGFIX	4
+
+/* The following is the InnoDB version as shown in
+SELECT plugin_version FROM information_schema.plugins;
+calculated in in make_version_string() in sql/sql_show.cc like this:
+"version >> 8" . "version & 0xff"
+because the version is shown with only one dot, we skip the last
+component, i.e. we show M.N.P as M.N */
+#define INNODB_VERSION_SHORT	\
+	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+
+/* auxiliary macros to help creating the version as string */
+#define __INNODB_VERSION(a, b, c)	(#a "." #b "." #c)
+#define _INNODB_VERSION(a, b, c)	__INNODB_VERSION(a, b, c)
+
+#define INNODB_VERSION_STR			\
+	_INNODB_VERSION(INNODB_VERSION_MAJOR,	\
+			INNODB_VERSION_MINOR,	\
+			INNODB_VERSION_BUGFIX)
+
+#define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
+
+#ifdef MYSQL_DYNAMIC_PLUGIN
+/* In the dynamic plugin, redefine some externally visible symbols
+in order not to conflict with the symbols of a builtin InnoDB. */
+
+/* Rename all C++ classes that contain virtual functions, because we
+have not figured out how to apply the visibility=hidden attribute to
+the virtual method table (vtable) in GCC 3. */
+# define ha_innobase ha_innodb
+#endif /* MYSQL_DYNAMIC_PLUGIN */
 
 #if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
 # undef __WIN__
@@ -19,9 +84,10 @@ Created 1/20/1994 Heikki Tuuri
 
 # include <windows.h>
 
-# if !defined(WIN64) && !defined(_WIN64)
-#  define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
+# if defined(HAVE_WINDOWS_ATOMICS)
+/* If atomics are defined we use them in InnoDB mutex implementation */
+#  define HAVE_ATOMIC_BUILTINS
+# endif /* HAVE_WINDOWS_ATOMICS */
 
 # ifdef _NT_
 #  define __NT__
@@ -34,35 +100,60 @@ Created 1/20/1994 Heikki Tuuri
 in compiling more Posix-compatible. These headers also define __WIN__
 if we are compiling on Windows. */
 
+#ifndef UNIV_HOTBACKUP
 # include <my_global.h>
 # include <my_pthread.h>
+#endif /* UNIV_HOTBACKUP */
 
 /* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
 # include <sys/stat.h>
+# if !defined(__NETWARE__) && !defined(__WIN__) 
+#  include <sys/mman.h> /* mmap() for os0proc.c */
+# endif
 
 # undef PACKAGE
 # undef VERSION
 
 /* Include the header file generated by GNU autoconf */
 # ifndef __WIN__
-#  include "config.h"
+#ifndef UNIV_HOTBACKUP
+# include "config.h"
+#endif /* UNIV_HOTBACKUP */
 # endif
 
 # ifdef HAVE_SCHED_H
 #  include <sched.h>
 # endif
 
-/* When compiling for Itanium IA64, undefine the flag below to prevent use
-of the 32-bit x86 assembler in mutex operations. */
+# if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \
+     || defined(HAVE_WINDOWS_ATOMICS)
+/* If atomics are defined we use them in InnoDB mutex implementation */
+#  define HAVE_ATOMIC_BUILTINS
+# endif /* (HAVE_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS)
+	|| (HAVE_WINDOWS_ATOMICS) */
 
-# if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64)
-#  define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
+/* For InnoDB rw_locks to work with atomics we need the thread_id
+to be no more than machine word wide. The following enables using
+atomics for InnoDB rw_locks where these conditions are met. */
+#ifdef HAVE_ATOMIC_BUILTINS
+/* if HAVE_ATOMIC_PTHREAD_T is defined at this point that means that
+the code from plug.in has defined it and we do not need to include
+ut0auxconf.h which would either define HAVE_ATOMIC_PTHREAD_T or will
+be empty */
+# ifndef HAVE_ATOMIC_PTHREAD_T
+#  include "ut0auxconf.h"
+# endif /* HAVE_ATOMIC_PTHREAD_T */
+/* now HAVE_ATOMIC_PTHREAD_T is eventually defined either by plug.in or
+from Makefile.in->ut0auxconf.h */
+# ifdef HAVE_ATOMIC_PTHREAD_T
+#  define INNODB_RW_LOCKS_USE_ATOMICS
+# endif /* HAVE_ATOMIC_PTHREAD_T */
+#endif /* HAVE_ATOMIC_BUILTINS */
 
 /* We only try to do explicit inlining of functions with gcc and
- Sun Studio */
+Sun Studio */
 
-# if !defined(__GNUC__) && !defined(__SUNPRO_C)
+# if !defined(__GNUC__) && !(defined(__SUNPRO_C) || defined(__SUNPRO_CC))
 #  undef  UNIV_MUST_NOT_INLINE			/* Remove compiler warning */
 #  define UNIV_MUST_NOT_INLINE
 # endif
@@ -84,15 +175,36 @@ memory is read outside the allocated blocks. */
 #define UNIV_INIT_MEM_TO_ZERO
 */
 
-/* Make a non-inline debug version */
+/* When this macro is defined then additional test functions will be
+compiled. These functions live at the end of each relevant source file
+and have "test_" prefix. These functions are not called from anywhere in
+the code, they can be called from gdb after
+innobase_start_or_create_for_mysql() has executed using the call
+command. Not tested on Windows. */
+/*
+#define UNIV_COMPILE_TEST_FUNCS
+*/
 
 #if 0
 #define UNIV_DEBUG_VALGRIND			/* Enable extra
 						Valgrind instrumentation */
-#define UNIV_DEBUG				/* Enable ut_ad() assertions */
+#define UNIV_DEBUG_PRINT			/* Enable the compilation of
+						some debug print functions */
+#define UNIV_AHI_DEBUG				/* Enable adaptive hash index
+						debugging without UNIV_DEBUG */
+#define UNIV_BUF_DEBUG				/* Enable buffer pool
+						debugging without UNIV_DEBUG */
+#define UNIV_DEBUG				/* Enable ut_ad() assertions
+						and disable UNIV_INLINE */
+#define UNIV_DEBUG_FILE_ACCESSES		/* Debug .ibd file access
+						(field file_page_was_freed
+						in buf_page_t) */
+#define UNIV_LRU_DEBUG				/* debug the buffer pool LRU */
+#define UNIV_HASH_DEBUG				/* debug HASH_ macros */
 #define UNIV_LIST_DEBUG				/* debug UT_LIST_ macros */
 #define UNIV_MEM_DEBUG				/* detect memory leaks etc */
-#define UNIV_IBUF_DEBUG				/* debug the insert buffer;
+#define UNIV_IBUF_DEBUG				/* debug the insert buffer */
+#define UNIV_IBUF_COUNT_DEBUG			/* debug the insert buffer;
 this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
 and the insert buffer must be empty when the database is started */
 #define UNIV_SYNC_DEBUG				/* debug mutex and latch
@@ -106,6 +218,10 @@ operations (very slow); also UNIV_DEBUG must be defined */
 						in sync0sync.c */
 #define UNIV_BTR_PRINT				/* enable functions for
 						printing B-trees */
+#define UNIV_ZIP_DEBUG				/* extensive consistency checks
+						for compressed pages */
+#define UNIV_ZIP_COPY				/* call page_zip_copy_recs()
+						more often */
 #endif
 
 #define UNIV_BTR_DEBUG				/* check B-tree links */
@@ -120,24 +236,6 @@ by one. */
 #define UNIV_SET_MEM_TO_ZERO
 #endif
 
-/* Use malloc instead of innodb additional memory pool (great with tcmalloc) */
-#define UNIV_DISABLE_MEM_POOL
-
-#if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMIC)
-/*
- * We have a full set of atomic ops available - we will use them
- */
-#define UNIV_SYNC_ATOMIC
-#endif
-
-#if defined(WIN_ATOMICS32) || defined(WIN_ATOMICS64)
-/*
- * We have a full set of atomic ops available - we will use them
- * This is on Windows
- */
-#define UNIV_SYNC_ATOMIC
-#endif
-
 /*
 #define UNIV_SQL_DEBUG
 #define UNIV_LOG_DEBUG
@@ -151,13 +249,23 @@ by one. */
 			/* the above option enables basic recovery debugging:
 			new allocated file pages are reset */
 
-#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC) && !defined(UNIV_MUST_NOT_INLINE))
+/* Linkage specifier for non-static InnoDB symbols (variables and functions)
+that are only referenced from within InnoDB, not from MySQL */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(UNIV_HOTBACKUP)
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
+#else
+# define UNIV_INTERN
+#endif
+
+#if (!defined(UNIV_DEBUG) && !defined(UNIV_MUST_NOT_INLINE))
 /* Definition for inline version */
 
 #ifdef __WIN__
-#define UNIV_INLINE	__inline
+# define UNIV_INLINE	__inline
+#elif defined(__SUNPRO_CC) || defined(__SUNPRO_C)
+# define UNIV_INLINE static inline
 #else
-#define UNIV_INLINE static __inline__
+# define UNIV_INLINE static __inline__
 #endif
 
 #else
@@ -165,7 +273,7 @@ by one. */
 definitions: */
 
 #define UNIV_NONINL
-#define UNIV_INLINE
+#define UNIV_INLINE	UNIV_INTERN
 
 #endif	/* UNIV_DEBUG */
 
@@ -190,11 +298,10 @@ management to ensure correct alignment for doubles etc. */
 			========================
 */
 
-/* The universal page size of the database */
-#define UNIV_PAGE_SIZE          (2 * 8192) /* NOTE! Currently, this has to be a
-					power of 2 */
 /* The 2-logarithm of UNIV_PAGE_SIZE: */
 #define UNIV_PAGE_SIZE_SHIFT	14
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE		(1 << UNIV_PAGE_SIZE_SHIFT)
 
 /* Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
@@ -233,15 +340,17 @@ typedef long int		lint;
 #endif
 
 #ifdef __WIN__
-typedef __int64			ib_longlong;
-typedef unsigned __int64	ib_ulonglong;
-#else
+typedef __int64			ib_int64_t;
+typedef unsigned __int64	ib_uint64_t;
+#elif !defined(UNIV_HOTBACKUP)
 /* Note: longlong and ulonglong come from MySQL headers. */
-typedef longlong		ib_longlong;
-typedef ulonglong               ib_ulonglong;
+typedef longlong		ib_int64_t;
+typedef ulonglong		ib_uint64_t;
 #endif
 
+#ifndef UNIV_HOTBACKUP
 typedef unsigned long long int	ullint;
+#endif /* UNIV_HOTBACKUP */
 
 #ifndef __WIN__
 #if SIZEOF_LONG != SIZEOF_VOIDP
@@ -258,6 +367,9 @@ typedef unsigned long long int	ullint;
 /* Maximum value for a ulint */
 #define ULINT_MAX		((ulint)(-2))
 
+/* Maximum value for ib_uint64_t */
+#define IB_ULONGLONG_MAX	((ib_uint64_t) (~0ULL))
+
 /* This 'ibool' type is used within Innobase. Remember that different included
 headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
 #define ibool			ulint
@@ -296,7 +408,13 @@ it is read. */
 /* Minimize cache-miss latency by moving data at addr into a cache before
 it is read or written. */
 # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
-#elif defined(__SUNPRO_C)
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+# include <sun_prefetch.h>
+#if __SUNPRO_C >= 0x550
+# undef UNIV_INTERN
+# define UNIV_INTERN __hidden
+#endif /* __SUNPRO_C >= 0x550 */
+/* Use sun_prefetch when compile with Sun Studio */
 # define UNIV_EXPECT(expr,value) (expr)
 # define UNIV_LIKELY_NULL(expr) (expr)
 # define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
@@ -338,6 +456,8 @@ typedef void* os_thread_ret_t;
 # define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
 # define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
 # define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b)
+# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
 # define UNIV_MEM_ASSERT_RW(addr, size) do {				\
 	const void* _p = (const void*) (ulint)				\
 		VALGRIND_CHECK_MEM_IS_DEFINED(addr, size);		\
@@ -361,6 +481,8 @@ typedef void* os_thread_ret_t;
 # define UNIV_MEM_INVALID(addr, size) do {} while(0)
 # define UNIV_MEM_FREE(addr, size) do {} while(0)
 # define UNIV_MEM_ALLOC(addr, size) do {} while(0)
+# define UNIV_MEM_DESC(addr, size, b) do {} while(0)
+# define UNIV_MEM_UNDESC(b) do {} while(0)
 # define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
 # define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
 #endif
diff --git a/storage/innodb_plugin/include/usr0sess.h b/storage/innodb_plugin/include/usr0sess.h
new file mode 100644
index 00000000000..7638a0c69e2
--- /dev/null
+++ b/storage/innodb_plugin/include/usr0sess.h
@@ -0,0 +1,78 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.h
+Sessions
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0sess_h
+#define usr0sess_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "trx0types.h"
+#include "srv0srv.h"
+#include "trx0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0rec.h"
+
+/*********************************************************************//**
+Opens a session.
+@return	own: session object */
+UNIV_INTERN
+sess_t*
+sess_open(void);
+/*============*/
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it, if it is in a state
+where it should be closed.
+@return	TRUE if closed */
+UNIV_INTERN
+ibool
+sess_try_close(
+/*===========*/
+	sess_t*		sess);	/*!< in, own: session object */
+
+/* The session handle. All fields are protected by the kernel mutex */
+struct sess_struct{
+	ulint		state;		/*!< state of the session */
+	trx_t*		trx;		/*!< transaction object permanently
+					assigned for the session: the
+					transaction instance designated by the
+					trx id changes, but the memory
+					structure is preserved */
+	UT_LIST_BASE_NODE_T(que_t)
+			graphs;		/*!< query graphs belonging to this
+					session */
+};
+
+/* Session states */
+#define SESS_ACTIVE		1
+#define SESS_ERROR		2	/* session contains an error message
+					which has not yet been communicated
+					to the client */
+#ifndef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/usr0sess.ic b/storage/innodb_plugin/include/usr0sess.ic
new file mode 100644
index 00000000000..35a75d75acc
--- /dev/null
+++ b/storage/innodb_plugin/include/usr0sess.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.ic
+Sessions
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innodb_plugin/include/usr0types.h b/storage/innodb_plugin/include/usr0types.h
new file mode 100644
index 00000000000..6cc6f015613
--- /dev/null
+++ b/storage/innodb_plugin/include/usr0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0types.h
+Users and sessions global types
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0types_h
+#define usr0types_h
+
+typedef struct sess_struct	sess_t;
+
+#endif
diff --git a/storage/innodb_plugin/include/ut0auxconf.h b/storage/innodb_plugin/include/ut0auxconf.h
new file mode 100644
index 00000000000..88fb26f1863
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0auxconf.h
@@ -0,0 +1,14 @@
+/* Do not remove this file even though it is empty.
+This file is included in univ.i and will cause compilation failure
+if not present.
+A custom check has been added in the generated
+storage/innobase/Makefile.in that is shipped with the InnoDB Plugin
+source archive. This check tries to compile a test program and if
+successful then adds "#define HAVE_ATOMIC_PTHREAD_T" to this file.
+This is a hack that has been developed in order to check for pthread_t
+atomicity without the need to regenerate the ./configure script that is
+distributed in the MySQL 5.1 official source archives.
+If by any chance Makefile.in and ./configure are regenerated and thus
+the hack from Makefile.in wiped away then the "real" check from plug.in
+will take over.
+*/
diff --git a/storage/innodb_plugin/include/ut0byte.h b/storage/innodb_plugin/include/ut0byte.h
new file mode 100644
index 00000000000..a2687e62f08
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0byte.h
@@ -0,0 +1,270 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0byte.h
+Utilities for byte operations
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0byte_h
+#define ut0byte_h
+
+
+#include "univ.i"
+
+/** Pair of ulint integers. */
+typedef	struct dulint_struct	dulint;
+/** Type definition for a 64-bit unsigned integer, which works also
+in 32-bit machines. NOTE! Access the fields only with the accessor
+functions. This definition appears here only for the compiler to
+know the size of a dulint. */
+struct dulint_struct{
+	ulint	high;	/*!< most significant 32 bits */
+	ulint	low;	/*!< least significant 32 bits */
+};
+
+/** Zero value for a dulint */
+extern const dulint	ut_dulint_zero;
+
+/** Maximum value for a dulint */
+extern const dulint	ut_dulint_max;
+
+/*******************************************************//**
+Creates a 64-bit dulint out of two ulints.
+@return	created dulint */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+	ulint	high,	/*!< in: high-order 32 bits */
+	ulint	low);	/*!< in: low-order 32 bits */
+/*******************************************************//**
+Gets the high-order 32 bits of a dulint.
+@return	32 bits in ulint */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+	dulint	d);	/*!< in: dulint */
+/*******************************************************//**
+Gets the low-order 32 bits of a dulint.
+@return	32 bits in ulint */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+	dulint	d);	/*!< in: dulint */
+/*******************************************************//**
+Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
+integer type.
+@return	value in ib_int64_t type */
+UNIV_INLINE
+ib_int64_t
+ut_conv_dulint_to_longlong(
+/*=======================*/
+	dulint	d);	/*!< in: dulint */
+/*******************************************************//**
+Tests if a dulint is zero.
+@return	TRUE if zero */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+	dulint	a);	/*!< in: dulint */
+/*******************************************************//**
+Compares two dulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b);	/*!< in: dulint */
+/*******************************************************//**
+Calculates the max of two dulints.
+@return	max(a, b) */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b);	/*!< in: dulint */
+/*******************************************************//**
+Calculates the min of two dulints.
+@return	min(a, b) */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b);	/*!< in: dulint */
+/*******************************************************//**
+Adds a ulint to a dulint.
+@return	sum a + b */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+	dulint	a,	/*!< in: dulint */
+	ulint	b);	/*!< in: ulint */
+/*******************************************************//**
+Subtracts a ulint from a dulint.
+@return	a - b */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+	dulint	a,	/*!< in: dulint */
+	ulint	b);	/*!< in: ulint, b <= a */
+/*******************************************************//**
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G.
+@return	a - b */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+	dulint	a,	/*!< in: dulint; NOTE a must be >= b and at most
+			2 to power 32 - 1 greater */
+	dulint	b);	/*!< in: dulint */
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+	dulint	 n,		/*!< in: number to be rounded */
+	ulint	 align_no);	/*!< in: align by this number which must be a
+				power of 2 */
+/********************************************************//**
+Rounds a dulint upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+	dulint	 n,		/*!< in: number to be rounded */
+	ulint	 align_no);	/*!< in: align by this number which must be a
+				power of 2 */
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no);	/*!< in: align by this number
+					which must be a power of 2 */
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no);	/*!< in: align by this number
+					which must be a power of 2 */
+/*******************************************************//**
+Increments a dulint variable by 1. */
+#define UT_DULINT_INC(D)\
+{\
+	if ((D).low == 0xFFFFFFFFUL) {\
+		(D).high = (D).high + 1;\
+		(D).low = 0;\
+	} else {\
+		(D).low = (D).low + 1;\
+	}\
+}
+/*******************************************************//**
+Tests if two dulints are equal. */
+#define UT_DULINT_EQ(D1, D2)	(((D1).low == (D2).low)\
+						&& ((D1).high == (D2).high))
+#ifdef notdefined
+/************************************************************//**
+Sort function for dulint arrays. */
+UNIV_INTERN
+void
+ut_dulint_sort(
+/*===========*/
+	dulint*	arr,	/*!< in/out: array to be sorted */
+	dulint*	aux_arr,/*!< in/out: auxiliary array (same size as arr) */
+	ulint	low,	/*!< in: low bound of sort interval, inclusive */
+	ulint	high);	/*!< in: high bound of sort interval, noninclusive */
+#endif /* notdefined */
+
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+	void*	ptr,		/*!< in: pointer */
+	ulint	align_no);	/*!< in: align by this number */
+/*********************************************************//**
+The following function rounds down a pointer to the nearest
+aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+		__attribute__((const));
+/*********************************************************//**
+The following function computes the offset of a pointer from the nearest
+aligned address.
+@return	distance from aligned pointer */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+			__attribute__((const));
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n);	/*!< in: nth bit requested */
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return	the ulint with the bit set as requested */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n,	/*!< in: nth bit requested */
+	ibool	val);	/*!< in: value for the bit to set */
+
+#ifndef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/ut0byte.ic b/storage/innodb_plugin/include/ut0byte.ic
new file mode 100644
index 00000000000..e3beed65138
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0byte.ic
@@ -0,0 +1,411 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0byte.ic
+Utilities for byte operations
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/*******************************************************//**
+Creates a 64-bit dulint out of two ulints.
+@return	created dulint */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+	ulint	high,	/*!< in: high-order 32 bits */
+	ulint	low)	/*!< in: low-order 32 bits */
+{
+	dulint	res;
+
+	ut_ad(high <= 0xFFFFFFFF);
+	ut_ad(low <= 0xFFFFFFFF);
+
+	res.high = high;
+	res.low	 = low;
+
+	return(res);
+}
+
+/*******************************************************//**
+Gets the high-order 32 bits of a dulint.
+@return	32 bits in ulint */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+	dulint	d)	/*!< in: dulint */
+{
+	return(d.high);
+}
+
+/*******************************************************//**
+Gets the low-order 32 bits of a dulint.
+@return	32 bits in ulint */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+	dulint	d)	/*!< in: dulint */
+{
+	return(d.low);
+}
+
+/*******************************************************//**
+Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
+integer type.
+@return	value in ib_int64_t type */
+UNIV_INLINE
+ib_int64_t
+ut_conv_dulint_to_longlong(
+/*=======================*/
+	dulint	d)	/*!< in: dulint */
+{
+	return((ib_int64_t)d.low
+	       + (((ib_int64_t)d.high) << 32));
+}
+
+/*******************************************************//**
+Tests if a dulint is zero.
+@return	TRUE if zero */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+	dulint	a)	/*!< in: dulint */
+{
+	if ((a.low == 0) && (a.high == 0)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************//**
+Compares two dulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b)	/*!< in: dulint */
+{
+	if (a.high > b.high) {
+		return(1);
+	} else if (a.high < b.high) {
+		return(-1);
+	} else if (a.low > b.low) {
+		return(1);
+	} else if (a.low < b.low) {
+		return(-1);
+	} else {
+		return(0);
+	}
+}
+
+/*******************************************************//**
+Calculates the max of two dulints.
+@return	max(a, b) */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b)	/*!< in: dulint */
+{
+	if (ut_dulint_cmp(a, b) > 0) {
+
+		return(a);
+	}
+
+	return(b);
+}
+
+/*******************************************************//**
+Calculates the min of two dulints.
+@return	min(a, b) */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+	dulint	a,	/*!< in: dulint */
+	dulint	b)	/*!< in: dulint */
+{
+	if (ut_dulint_cmp(a, b) > 0) {
+
+		return(b);
+	}
+
+	return(a);
+}
+
+/*******************************************************//**
+Adds a ulint to a dulint.
+@return	sum a + b */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+	dulint	a,	/*!< in: dulint */
+	ulint	b)	/*!< in: ulint */
+{
+	if (0xFFFFFFFFUL - b >= a.low) {
+		a.low += b;
+
+		return(a);
+	}
+
+	a.low = a.low - (0xFFFFFFFFUL - b) - 1;
+
+	a.high++;
+
+	return(a);
+}
+
+/*******************************************************//**
+Subtracts a ulint from a dulint.
+@return	a - b */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+	dulint	a,	/*!< in: dulint */
+	ulint	b)	/*!< in: ulint, b <= a */
+{
+	if (a.low >= b) {
+		a.low -= b;
+
+		return(a);
+	}
+
+	b -= a.low + 1;
+
+	a.low = 0xFFFFFFFFUL - b;
+
+	ut_ad(a.high > 0);
+
+	a.high--;
+
+	return(a);
+}
+
+/*******************************************************//**
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G.
+@return	a - b */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+	dulint	a,	/*!< in: dulint; NOTE a must be >= b and at most
+			2 to power 32 - 1 greater */
+	dulint	b)	/*!< in: dulint */
+{
+	ulint	diff;
+
+	if (a.high == b.high) {
+		ut_ad(a.low >= b.low);
+
+		return(a.low - b.low);
+	}
+
+	ut_ad(a.high == b.high + 1);
+
+	diff = (ulint)(0xFFFFFFFFUL - b.low);
+	diff += 1 + a.low;
+
+	ut_ad(diff > a.low);
+
+	return(diff);
+}
+
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+	dulint	 n,		/*!< in: number to be rounded */
+	ulint	 align_no)	/*!< in: align by this number which must be a
+				power of 2 */
+{
+	ulint	low, high;
+
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+
+	low = ut_dulint_get_low(n);
+	high = ut_dulint_get_high(n);
+
+	low = low & ~(align_no - 1);
+
+	return(ut_dulint_create(high, low));
+}
+
+/********************************************************//**
+Rounds a dulint upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+	dulint	 n,		/*!< in: number to be rounded */
+	ulint	 align_no)	/*!< in: align by this number which must be a
+				power of 2 */
+{
+	return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
+}
+
+/********************************************************//**
+Rounds ib_uint64_t downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no)	/*!< in: align by this number
+					which must be a power of 2 */
+{
+	ut_ad(align_no > 0);
+	ut_ad(ut_is_2pow(align_no));
+
+	return(n & ~((ib_uint64_t) align_no - 1));
+}
+
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no)	/*!< in: align by this number
+					which must be a power of 2 */
+{
+	ib_uint64_t	align_1 = (ib_uint64_t) align_no - 1;
+
+	ut_ad(align_no > 0);
+	ut_ad(ut_is_2pow(align_no));
+
+	return((n + align_1) & ~align_1);
+}
+
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+	void*	ptr,		/*!< in: pointer */
+	ulint	align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
+}
+
+/*********************************************************//**
+The following function rounds down a pointer to the nearest
+aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint)ptr)) & ~(align_no - 1)));
+}
+
+/*********************************************************//**
+The following function computes the offset of a pointer from the nearest
+aligned address.
+@return	distance from aligned pointer */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return(((ulint)ptr) & (align_no - 1));
+}
+
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n)	/*!< in: nth bit requested */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	return(1 & (a >> n));
+}
+
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return	the ulint with the bit set as requested */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n,	/*!< in: nth bit requested */
+	ibool	val)	/*!< in: value for the bit to set */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	if (val) {
+		return(((ulint) 1 << n) | a);
+	} else {
+		return(~((ulint) 1 << n) & a);
+	}
+}
diff --git a/storage/innodb_plugin/include/ut0dbg.h b/storage/innodb_plugin/include/ut0dbg.h
new file mode 100644
index 00000000000..78b525c38ab
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0dbg.h
@@ -0,0 +1,175 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************//**
+@file include/ut0dbg.h
+Debug utilities for Innobase
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#ifndef ut0dbg_h
+#define ut0dbg_h
+
+#include "univ.i"
+#include <stdlib.h>
+#include "os0thread.h"
+
+#if defined(__GNUC__) && (__GNUC__ > 2)
+/** Test if an assertion fails.
+@param EXPR	assertion expression
+@return		nonzero if EXPR holds, zero if not */
+# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
+#else
+/** This is used to eliminate compiler warnings */
+extern ulint	ut_dbg_zero;
+/** Test if an assertion fails.
+@param EXPR	assertion expression
+@return		nonzero if EXPR holds, zero if not */
+# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
+#endif
+
+/*************************************************************//**
+Report a failed assertion. */
+UNIV_INTERN
+void
+ut_dbg_assertion_failed(
+/*====================*/
+	const char* expr,	/*!< in: the failed assertion */
+	const char* file,	/*!< in: source file containing the assertion */
+	ulint line);		/*!< in: line number of the assertion */
+
+#ifdef __NETWARE__
+/** Flag for ignoring further assertion failures.  This is set to TRUE
+when on NetWare there happens an InnoDB assertion failure or other
+fatal error condition that requires an immediate shutdown. */
+extern ibool	panic_shutdown;
+/* Abort the execution. */
+void ut_dbg_panic(void);
+# define UT_DBG_PANIC ut_dbg_panic()
+/* Stop threads in ut_a(). */
+# define UT_DBG_STOP	do {} while (0)	/* We do not do this on NetWare */
+#else /* __NETWARE__ */
+# if defined(__WIN__) || defined(__INTEL_COMPILER)
+#  undef UT_DBG_USE_ABORT
+# elif defined(__GNUC__) && (__GNUC__ > 2)
+#  define UT_DBG_USE_ABORT
+# endif
+
+# ifndef UT_DBG_USE_ABORT
+/** A null pointer that will be dereferenced to trigger a memory trap */
+extern ulint*	ut_dbg_null_ptr;
+# endif
+
+# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
+/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
+will stop at the next ut_a() or ut_ad(). */
+extern ibool	ut_dbg_stop_threads;
+
+/*************************************************************//**
+Stop a thread after assertion failure. */
+UNIV_INTERN
+void
+ut_dbg_stop_thread(
+/*===============*/
+	const char*	file,
+	ulint		line);
+# endif
+
+# ifdef UT_DBG_USE_ABORT
+/** Abort the execution. */
+#  define UT_DBG_PANIC abort()
+/** Stop threads (null operation) */
+#  define UT_DBG_STOP do {} while (0)
+# else /* UT_DBG_USE_ABORT */
+/** Abort the execution. */
+#  define UT_DBG_PANIC					\
+	if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
+/** Stop threads in ut_a(). */
+#  define UT_DBG_STOP do						\
+	if (UNIV_UNLIKELY(ut_dbg_stop_threads)) {		\
+		ut_dbg_stop_thread(__FILE__, (ulint) __LINE__);	\
+	} while (0)
+# endif /* UT_DBG_USE_ABORT */
+#endif /* __NETWARE__ */
+
+/** Abort execution if EXPR does not evaluate to nonzero.
+@param EXPR	assertion expression that should hold */
+#define ut_a(EXPR) do {						\
+	if (UT_DBG_FAIL(EXPR)) {				\
+		ut_dbg_assertion_failed(#EXPR,			\
+				__FILE__, (ulint) __LINE__);	\
+		UT_DBG_PANIC;					\
+	}							\
+	UT_DBG_STOP;						\
+} while (0)
+
+/** Abort execution. */
+#define ut_error do {						\
+	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__);	\
+	UT_DBG_PANIC;						\
+} while (0)
+
+#ifdef UNIV_DEBUG
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_ad(EXPR)	ut_a(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_d(EXPR)	do {EXPR;} while (0)
+#else
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_ad(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_d(EXPR)
+#endif
+
+/** Silence warnings about an unused variable by doing a null assignment.
+@param A	the unused variable */
+#define UT_NOT_USED(A)	A = A
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/** structure used for recording usage statistics */
+typedef struct speedo_struct {
+	struct rusage	ru;	/*!< getrusage() result */
+	struct timeval	tv;	/*!< gettimeofday() result */
+} speedo_t;
+
+/*******************************************************************//**
+Resets a speedo (records the current time in it). */
+UNIV_INTERN
+void
+speedo_reset(
+/*=========*/
+	speedo_t*	speedo);	/*!< out: speedo */
+
+/*******************************************************************//**
+Shows the time elapsed and usage statistics since the last reset of a
+speedo. */
+UNIV_INTERN
+void
+speedo_show(
+/*========*/
+	const speedo_t*	speedo);	/*!< in: speedo */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+
+#endif
diff --git a/storage/innobase/include/ut0list.h b/storage/innodb_plugin/include/ut0list.h
similarity index 52%
rename from storage/innobase/include/ut0list.h
rename to storage/innodb_plugin/include/ut0list.h
index c35cf202600..ec67f4e2a0f 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innodb_plugin/include/ut0list.h
@@ -1,4 +1,29 @@
-/***********************************************************************
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.h
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
 A double-linked list. This differs from the one in ut0lst.h in that in this
 one, each list node contains a pointer to the data, whereas the one in
 ut0lst.h uses a strategy where the list pointers are embedded in the data
@@ -18,7 +43,6 @@ automatically freeing the list node when the item's heap is freed.
 
 ************************************************************************/
 
-
 #ifndef IB_LIST_H
 #define IB_LIST_H
 
@@ -28,117 +52,117 @@ typedef struct ib_list_struct ib_list_t;
 typedef struct ib_list_node_struct ib_list_node_t;
 typedef struct ib_list_helper_struct ib_list_helper_t;
 
-/********************************************************************
+/****************************************************************//**
 Create a new list using mem_alloc. Lists created with this function must be
-freed with ib_list_free. */
-
+freed with ib_list_free.
+@return	list */
+UNIV_INTERN
 ib_list_t*
 ib_list_create(void);
 /*=================*/
-			/* out: list */
 
 
-/********************************************************************
+/****************************************************************//**
 Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
+lists created with this function.
+@return	list */
+UNIV_INTERN
 ib_list_t*
 ib_list_create_heap(
 /*================*/
-				/* out: list */
-	mem_heap_t*	heap);	/* in: memory heap to use */
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
 
-/********************************************************************
+/****************************************************************//**
 Free a list. */
-
+UNIV_INTERN
 void
 ib_list_free(
 /*=========*/
-	ib_list_t*	list);	/* in: list */
-
-/********************************************************************
-Add the data to the start of the list. */
+	ib_list_t*	list);	/*!< in: list */
 
+/****************************************************************//**
+Add the data to the start of the list.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_first(
 /*==============*/
-				/* out: new list node*/
-	ib_list_t*	list,	/* in: list */
-	void*		data,	/* in: data */
-	mem_heap_t*	heap);	/* in: memory heap to use */
-
-/********************************************************************
-Add the data to the end of the list. */
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
 
+/****************************************************************//**
+Add the data to the end of the list.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_last(
 /*=============*/
-				/* out: new list node*/
-	ib_list_t*	list,	/* in: list */
-	void*		data,	/* in: data */
-	mem_heap_t*	heap);	/* in: memory heap to use */
-
-/********************************************************************
-Add the data after the indicated node. */
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
 
+/****************************************************************//**
+Add the data after the indicated node.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_after(
 /*==============*/
-					/* out: new list node*/
-	ib_list_t*	list,		/* in: list */
-	ib_list_node_t*	prev_node,	/* in: node preceding new node (can
+	ib_list_t*	list,		/*!< in: list */
+	ib_list_node_t*	prev_node,	/*!< in: node preceding new node (can
 					be NULL) */
-	void*		data,		/* in: data */
-	mem_heap_t*	heap);		/* in: memory heap to use */
+	void*		data,		/*!< in: data */
+	mem_heap_t*	heap);		/*!< in: memory heap to use */
 
-/********************************************************************
+/****************************************************************//**
 Remove the node from the list. */
-
+UNIV_INTERN
 void
 ib_list_remove(
 /*===========*/
-	ib_list_t*	list,	/* in: list */
-	ib_list_node_t*	node);	/* in: node to remove */
+	ib_list_t*	list,	/*!< in: list */
+	ib_list_node_t*	node);	/*!< in: node to remove */
 
-/********************************************************************
-Get the first node in the list. */
+/****************************************************************//**
+Get the first node in the list.
+@return	first node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_first(
 /*==============*/
-				/* out: first node, or NULL */
-	ib_list_t*	list);	/* in: list */
+	ib_list_t*	list);	/*!< in: list */
 
-/********************************************************************
-Get the last node in the list. */
+/****************************************************************//**
+Get the last node in the list.
+@return	last node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_last(
 /*=============*/
-				/* out: last node, or NULL */
-	ib_list_t*	list);	/* in: list */
+	ib_list_t*	list);	/*!< in: list */
 
 /* List. */
 struct ib_list_struct {
-	ib_list_node_t*		first;		/* first node */
-	ib_list_node_t*		last;		/* last node */
-	ibool			is_heap_list;	/* TRUE if this list was
+	ib_list_node_t*		first;		/*!< first node */
+	ib_list_node_t*		last;		/*!< last node */
+	ibool			is_heap_list;	/*!< TRUE if this list was
 						allocated through a heap */
 };
 
 /* A list node. */
 struct ib_list_node_struct {
-	ib_list_node_t*		prev;		/* previous node */
-	ib_list_node_t*		next;		/* next node */
-	void*			data;		/* user data */
+	ib_list_node_t*		prev;		/*!< previous node */
+	ib_list_node_t*		next;		/*!< next node */
+	void*			data;		/*!< user data */
 };
 
 /* Quite often, the only additional piece of data you need is the per-item
 memory heap, so we have this generic struct available to use in those
 cases. */
 struct ib_list_helper_struct {
-	mem_heap_t*	heap;		/* memory heap */
-	void*		data;		/* user data */
+	mem_heap_t*	heap;		/*!< memory heap */
+	void*		data;		/*!< user data */
 };
 
 #ifndef UNIV_NONINL
diff --git a/storage/innodb_plugin/include/ut0list.ic b/storage/innodb_plugin/include/ut0list.ic
new file mode 100644
index 00000000000..eb5c62796e8
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0list.ic
@@ -0,0 +1,48 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.ic
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Get the first node in the list.
+@return	first node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_first(
+/*==============*/
+	ib_list_t*	list)	/*!< in: list */
+{
+	return(list->first);
+}
+
+/****************************************************************//**
+Get the last node in the list.
+@return	last node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_last(
+/*=============*/
+	ib_list_t*	list)	/*!< in: list */
+{
+	return(list->last);
+}
diff --git a/storage/innobase/include/ut0lst.h b/storage/innodb_plugin/include/ut0lst.h
similarity index 51%
rename from storage/innobase/include/ut0lst.h
rename to storage/innodb_plugin/include/ut0lst.h
index ebe2803fe23..261d33963dc 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innodb_plugin/include/ut0lst.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-List utilities
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0lst.h
+List utilities
 
 Created 9/10/1995 Heikki Tuuri
 ***********************************************************************/
@@ -16,45 +33,46 @@ if a list is used in the database. Note that a single struct may belong
 to two or more lists, provided that the list are given different names.
 An example of the usage of the lists can be found in fil0fil.c. */
 
-/***********************************************************************
+/*******************************************************************//**
 This macro expands to the unnamed type definition of a struct which acts
 as the two-way list base node. The base node contains pointers
 to both ends of the list and a count of nodes in the list (excluding
-the base node from the count). TYPE should be the list node type name. */
-
+the base node from the count).
+@param TYPE	the name of the list node data type */
 #define UT_LIST_BASE_NODE_T(TYPE)\
 struct {\
-	ulint	count;	/* count of nodes in list */\
-	TYPE *	start;	/* pointer to list start, NULL if empty */\
-	TYPE *	end;	/* pointer to list end, NULL if empty */\
+	ulint	count;	/*!< count of nodes in list */\
+	TYPE *	start;	/*!< pointer to list start, NULL if empty */\
+	TYPE *	end;	/*!< pointer to list end, NULL if empty */\
 }\
 
-/***********************************************************************
+/*******************************************************************//**
 This macro expands to the unnamed type definition of a struct which
 should be embedded in the nodes of the list, the node type must be a struct.
 This struct contains the pointers to next and previous nodes in the list.
 The name of the field in the node struct should be the name given
-to the list. TYPE should be the list node type name. Example of usage:
-
+to the list.
+@param TYPE	the list node type name */
+/* Example:
 typedef struct LRU_node_struct	LRU_node_t;
 struct LRU_node_struct {
 	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
 	...
 }
 The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t.
-*/
+LRU_node_t. */
 
 #define UT_LIST_NODE_T(TYPE)\
 struct {\
-	TYPE *	prev;	/* pointer to the previous node,\
+	TYPE *	prev;	/*!< pointer to the previous node,\
 			NULL if start of list */\
-	TYPE *	next;	/* pointer to next node, NULL if end of list */\
+	TYPE *	next;	/*!< pointer to next node, NULL if end of list */\
 }\
 
-/***********************************************************************
-Initializes the base node of a two-way list. */
-
+/*******************************************************************//**
+Initializes the base node of a two-way list.
+@param BASE	the list base node
+*/
 #define UT_LIST_INIT(BASE)\
 {\
 	(BASE).count = 0;\
@@ -62,32 +80,34 @@ Initializes the base node of a two-way list. */
 	(BASE).end   = NULL;\
 }\
 
-/***********************************************************************
+/*******************************************************************//**
 Adds the node as the first element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
+@param NAME	list name
+@param BASE	the base node (not a pointer to it)
+@param N	pointer to the node to be added to the list.
+*/
 #define UT_LIST_ADD_FIRST(NAME, BASE, N)\
 {\
 	ut_ad(N);\
 	((BASE).count)++;\
 	((N)->NAME).next = (BASE).start;\
 	((N)->NAME).prev = NULL;\
-	if ((BASE).start != NULL) {\
+	if (UNIV_LIKELY((BASE).start != NULL)) {\
 		ut_ad((BASE).start != (N));\
 		(((BASE).start)->NAME).prev = (N);\
 	}\
 	(BASE).start = (N);\
-	if ((BASE).end == NULL) {\
+	if (UNIV_UNLIKELY((BASE).end == NULL)) {\
 		(BASE).end = (N);\
 	}\
 }\
 
-/***********************************************************************
+/*******************************************************************//**
 Adds the node as the last element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
+@param NAME	list name
+@param BASE	the base node (not a pointer to it)
+@param N	pointer to the node to be added to the list
+*/
 #define UT_LIST_ADD_LAST(NAME, BASE, N)\
 {\
 	ut_ad(N);\
@@ -104,11 +124,13 @@ the pointer to the node to be added to the list. NAME is the list name. */
 	}\
 }\
 
-/***********************************************************************
+/*******************************************************************//**
 Inserts a NODE2 after NODE1 in a list.
-BASE has to be the base node (not a pointer to it). NAME is the list
-name, NODE1 and NODE2 are pointers to nodes. */
-
+@param NAME	list name
+@param BASE	the base node (not a pointer to it)
+@param NODE1	pointer to node after which NODE2 is inserted
+@param NODE2	pointer to node being inserted after NODE1
+*/
 #define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
 {\
 	ut_ad(NODE1);\
@@ -126,19 +148,25 @@ name, NODE1 and NODE2 are pointers to nodes. */
 	}\
 }\
 
-/* Invalidate the pointers in a list node. */
 #ifdef UNIV_LIST_DEBUG
+/** Invalidate the pointers in a list node.
+@param NAME	list name
+@param N	pointer to the node that was removed */
 # define UT_LIST_REMOVE_CLEAR(NAME, N)		\
 ((N)->NAME.prev = (N)->NAME.next = (void*) -1)
 #else
+/** Invalidate the pointers in a list node.
+@param NAME	list name
+@param N	pointer to the node that was removed */
 # define UT_LIST_REMOVE_CLEAR(NAME, N) while (0)
 #endif
 
-/***********************************************************************
-Removes a node from a two-way linked list. BASE has to be the base node
-(not a pointer to it). N has to be the pointer to the node to be removed
-from the list. NAME is the list name. */
-
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param NAME	list name
+@param BASE	the base node (not a pointer to it)
+@param N	pointer to the node to be removed from the list
+*/
 #define UT_LIST_REMOVE(NAME, BASE, N)					\
 do {									\
 	ut_ad(N);							\
@@ -157,71 +185,77 @@ do {									\
 	UT_LIST_REMOVE_CLEAR(NAME, N);					\
 } while (0)
 
-/************************************************************************
-Gets the next node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
+/********************************************************************//**
+Gets the next node in a two-way list.
+@param NAME	list name
+@param N	pointer to a node
+@return		the successor of N in NAME, or NULL */
 #define UT_LIST_GET_NEXT(NAME, N)\
 	(((N)->NAME).next)
 
-/************************************************************************
-Gets the previous node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
+/********************************************************************//**
+Gets the previous node in a two-way list.
+@param NAME	list name
+@param N	pointer to a node
+@return		the predecessor of N in NAME, or NULL */
 #define UT_LIST_GET_PREV(NAME, N)\
 	(((N)->NAME).prev)
 
-/************************************************************************
+/********************************************************************//**
 Alternative macro to get the number of nodes in a two-way list, i.e.,
-its length. BASE is the base node (not a pointer to it). */
-
+its length.
+@param BASE	the base node (not a pointer to it).
+@return		the number of nodes in the list */
 #define UT_LIST_GET_LEN(BASE)\
 	(BASE).count
 
-/************************************************************************
-Gets the first node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
+/********************************************************************//**
+Gets the first node in a two-way list.
+@param BASE	the base node (not a pointer to it)
+@return		first node, or NULL if the list is empty */
 #define UT_LIST_GET_FIRST(BASE)\
 	(BASE).start
 
-/************************************************************************
-Gets the last node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
+/********************************************************************//**
+Gets the last node in a two-way list.
+@param BASE	the base node (not a pointer to it)
+@return		last node, or NULL if the list is empty */
 #define UT_LIST_GET_LAST(BASE)\
 	(BASE).end
 
-/************************************************************************
-Checks the consistency of a two-way list. NAME is the name of the list,
-TYPE is the node type, and BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\
-{\
-	ulint	ut_list_i_313;\
-	TYPE *	ut_list_node_313;\
-\
-	ut_list_node_313 = (BASE).start;\
-\
-	for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
-						ut_list_i_313++) {\
-		ut_a(ut_list_node_313);\
-		ut_list_node_313 = (ut_list_node_313->NAME).next;\
-	}\
-\
-	ut_a(ut_list_node_313 == NULL);\
-\
-	ut_list_node_313 = (BASE).end;\
-\
-	for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
-						ut_list_i_313++) {\
-		ut_a(ut_list_node_313);\
-		ut_list_node_313 = (ut_list_node_313->NAME).prev;\
-	}\
-\
-	ut_a(ut_list_node_313 == NULL);\
-}\
-
+/********************************************************************//**
+Checks the consistency of a two-way list.
+@param NAME		the name of the list
+@param TYPE		node type
+@param BASE		base node (not a pointer to it)
+@param ASSERTION	a condition on ut_list_node_313 */
+#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION)			\
+do {									\
+	ulint	ut_list_i_313;						\
+	TYPE*	ut_list_node_313;					\
+									\
+	ut_list_node_313 = (BASE).start;				\
+									\
+	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
+		ut_a(ut_list_node_313);					\
+		ASSERTION;						\
+		ut_ad((ut_list_node_313->NAME).next || !ut_list_i_313);	\
+		ut_list_node_313 = (ut_list_node_313->NAME).next;	\
+	}								\
+									\
+	ut_a(ut_list_node_313 == NULL);					\
+									\
+	ut_list_node_313 = (BASE).end;					\
+									\
+	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
+		ut_a(ut_list_node_313);					\
+		ASSERTION;						\
+		ut_ad((ut_list_node_313->NAME).prev || !ut_list_i_313);	\
+		ut_list_node_313 = (ut_list_node_313->NAME).prev;	\
+	}								\
+									\
+	ut_a(ut_list_node_313 == NULL);					\
+} while (0)
 
 #endif
 
diff --git a/storage/innodb_plugin/include/ut0mem.h b/storage/innodb_plugin/include/ut0mem.h
new file mode 100644
index 00000000000..cf41cba4643
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0mem.h
@@ -0,0 +1,306 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.h
+Memory primitives
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef ut0mem_h
+#define ut0mem_h
+
+#include "univ.i"
+#include <string.h>
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h"
+
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large() or malloc().  Does not count malloc()
+if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
+extern ulint		ut_total_allocated_memory;
+
+/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
+extern os_fast_mutex_t	ut_list_mutex;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Wrapper for memcpy(3).  Copy memory area when the source and
+target are not overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, const void* sour, ulint n);
+
+/** Wrapper for memmove(3).  Copy memory area when the source and
+target are overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memmove(void* dest, const void* sour, ulint n);
+
+/** Wrapper for memcmp(3).  Compare memory areas.
+* @param str1	in: first memory block to compare
+* @param str2	in: second memory block to compare
+* @param n	in: number of bytes to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_memcmp(const void* str1, const void* str2, ulint n);
+
+/**********************************************************************//**
+Initializes the mem block list at database startup. */
+UNIV_INTERN
+void
+ut_mem_init(void);
+/*=============*/
+
+/**********************************************************************//**
+Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
+defined and set_to_zero is TRUE.
+@return	own: allocated memory */
+UNIV_INTERN
+void*
+ut_malloc_low(
+/*==========*/
+	ulint	n,			/*!< in: number of bytes to allocate */
+	ibool	set_to_zero,		/*!< in: TRUE if allocated memory
+					should be set to zero if
+					UNIV_SET_MEM_TO_ZERO is defined */
+	ibool	assert_on_error);	/*!< in: if TRUE, we crash mysqld if
+					the memory cannot be allocated */
+/**********************************************************************//**
+Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
+defined.
+@return	own: allocated memory */
+UNIV_INTERN
+void*
+ut_malloc(
+/*======*/
+	ulint	n);	/*!< in: number of bytes to allocate */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
+out. It cannot be used if we want to return an error message. Prints to
+stderr a message if fails.
+@return	TRUE if succeeded */
+UNIV_INTERN
+ibool
+ut_test_malloc(
+/*===========*/
+	ulint	n);	/*!< in: try to allocate this many bytes */
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Frees a memory block allocated with ut_malloc. */
+UNIV_INTERN
+void
+ut_free(
+/*====*/
+	void* ptr);  /*!< in, own: memory block */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
+use this function because the allocation functions in mem0mem.h are the
+recommended ones in InnoDB.
+
+man realloc in Linux, 2004:
+
+       realloc()  changes the size of the memory block pointed to
+       by ptr to size bytes.  The contents will be  unchanged  to
+       the minimum of the old and new sizes; newly allocated mem�
+       ory will be uninitialized.  If ptr is NULL,  the	 call  is
+       equivalent  to malloc(size); if size is equal to zero, the
+       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
+       must  have  been	 returned by an earlier call to malloc(),
+       calloc() or realloc().
+
+RETURN VALUE
+       realloc() returns a pointer to the newly allocated memory,
+       which is suitably aligned for any kind of variable and may
+       be different from ptr, or NULL if the  request  fails.  If
+       size  was equal to 0, either NULL or a pointer suitable to
+       be passed to free() is returned.	 If realloc()  fails  the
+       original	 block	is  left  untouched  - it is not freed or
+       moved.
+@return	own: pointer to new mem block or NULL */
+UNIV_INTERN
+void*
+ut_realloc(
+/*=======*/
+	void*	ptr,	/*!< in: pointer to old block or NULL */
+	ulint	size);	/*!< in: desired size */
+/**********************************************************************//**
+Frees in shutdown all allocated memory not freed yet. */
+UNIV_INTERN
+void
+ut_free_all_mem(void);
+/*=================*/
+#endif /* !UNIV_HOTBACKUP */
+
+/** Wrapper for strcpy(3).  Copy a NUL-terminated string.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @return	dest */
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, const char* sour);
+
+/** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
+* @param str	in: string
+* @return	length of the string in bytes, excluding the terminating NUL */
+UNIV_INLINE
+ulint
+ut_strlen(const char* str);
+
+/** Wrapper for strcmp(3).  Compare NUL-terminated strings.
+* @param str1	in: first string to compare
+* @param str2	in: second string to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_strcmp(const char* str1, const char* str2);
+
+/**********************************************************************//**
+Copies up to size - 1 characters from the NUL-terminated string src to
+dst, NUL-terminating the result. Returns strlen(src), so truncation
+occurred if the return value >= size.
+@return	strlen(src) */
+UNIV_INTERN
+ulint
+ut_strlcpy(
+/*=======*/
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size);	/*!< in: size of destination buffer */
+
+/**********************************************************************//**
+Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
+(size - 1) bytes of src, not the first.
+@return	strlen(src) */
+UNIV_INTERN
+ulint
+ut_strlcpy_rev(
+/*===========*/
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size);	/*!< in: size of destination buffer */
+
+/**********************************************************************//**
+Compute strlen(ut_strcpyq(str, q)).
+@return	length of the string when quoted */
+UNIV_INLINE
+ulint
+ut_strlenq(
+/*=======*/
+	const char*	str,	/*!< in: null-terminated string */
+	char		q);	/*!< in: the quote character */
+
+/**********************************************************************//**
+Make a quoted copy of a NUL-terminated string.	Leading and trailing
+quotes will not be included; only embedded quotes will be escaped.
+See also ut_strlenq() and ut_memcpyq().
+@return	pointer to end of dest */
+UNIV_INTERN
+char*
+ut_strcpyq(
+/*=======*/
+	char*		dest,	/*!< in: output buffer */
+	char		q,	/*!< in: the quote character */
+	const char*	src);	/*!< in: null-terminated string */
+
+/**********************************************************************//**
+Make a quoted copy of a fixed-length string.  Leading and trailing
+quotes will not be included; only embedded quotes will be escaped.
+See also ut_strlenq() and ut_strcpyq().
+@return	pointer to end of dest */
+UNIV_INTERN
+char*
+ut_memcpyq(
+/*=======*/
+	char*		dest,	/*!< in: output buffer */
+	char		q,	/*!< in: the quote character */
+	const char*	src,	/*!< in: string to be quoted */
+	ulint		len);	/*!< in: length of src */
+
+/**********************************************************************//**
+Return the number of times s2 occurs in s1. Overlapping instances of s2
+are only counted once.
+@return	the number of times s2 occurs in s1 */
+UNIV_INTERN
+ulint
+ut_strcount(
+/*========*/
+	const char*	s1,	/*!< in: string to search in */
+	const char*	s2);	/*!< in: string to search for */
+
+/**********************************************************************//**
+Replace every occurrence of s1 in str with s2. Overlapping instances of s1
+are only replaced once.
+@return	own: modified string, must be freed with mem_free() */
+UNIV_INTERN
+char*
+ut_strreplace(
+/*==========*/
+	const char*	str,	/*!< in: string to operate on */
+	const char*	s1,	/*!< in: string to replace */
+	const char*	s2);	/*!< in: string to replace s1 with */
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return	number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+	const void*	raw,		/*!< in: raw data */
+	ulint		raw_size,	/*!< in: "raw" length in bytes */
+	char*		hex,		/*!< out: hex string */
+	ulint		hex_size);	/*!< in: "hex" size in bytes */
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return	number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+	const char*	str,		/*!< in: string */
+	ulint		str_len,	/*!< in: string length in bytes */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size);	/*!< in: output buffer size
+					in bytes */
+
+#ifndef UNIV_NONINL
+#include "ut0mem.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/ut0mem.ic b/storage/innodb_plugin/include/ut0mem.ic
new file mode 100644
index 00000000000..f36c28f1989
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0mem.ic
@@ -0,0 +1,338 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.ic
+Memory primitives
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#include "ut0byte.h"
+#include "mach0data.h"
+
+/** Wrapper for memcpy(3).  Copy memory area when the source and
+target are not overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, const void* sour, ulint n)
+{
+	return(memcpy(dest, sour, n));
+}
+
+/** Wrapper for memmove(3).  Copy memory area when the source and
+target are overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memmove(void* dest, const void* sour, ulint n)
+{
+	return(memmove(dest, sour, n));
+}
+
+/** Wrapper for memcmp(3).  Compare memory areas.
+* @param str1	in: first memory block to compare
+* @param str2	in: second memory block to compare
+* @param n	in: number of bytes to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_memcmp(const void* str1, const void* str2, ulint n)
+{
+	return(memcmp(str1, str2, n));
+}
+
+/** Wrapper for strcpy(3).  Copy a NUL-terminated string.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @return	dest */
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, const char* sour)
+{
+	return(strcpy(dest, sour));
+}
+
+/** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
+* @param str	in: string
+* @return	length of the string in bytes, excluding the terminating NUL */
+UNIV_INLINE
+ulint
+ut_strlen(const char* str)
+{
+	return(strlen(str));
+}
+
+/** Wrapper for strcmp(3).  Compare NUL-terminated strings.
+* @param str1	in: first string to compare
+* @param str2	in: second string to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_strcmp(const char* str1, const char* str2)
+{
+	return(strcmp(str1, str2));
+}
+
+/**********************************************************************//**
+Compute strlen(ut_strcpyq(str, q)).
+@return	length of the string when quoted */
+UNIV_INLINE
+ulint
+ut_strlenq(
+/*=======*/
+	const char*	str,	/*!< in: null-terminated string */
+	char		q)	/*!< in: the quote character */
+{
+	ulint len;
+
+	for (len = 0; *str; len++, str++) {
+		if (*str == q) {
+			len++;
+		}
+	}
+
+	return(len);
+}
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return	number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+	const void*	raw,		/*!< in: raw data */
+	ulint		raw_size,	/*!< in: "raw" length in bytes */
+	char*		hex,		/*!< out: hex string */
+	ulint		hex_size)	/*!< in: "hex" size in bytes */
+{
+
+#ifdef WORDS_BIGENDIAN
+
+#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
+
+#define UINT16_GET_A(u)	((unsigned char) ((u) >> 8))
+#define UINT16_GET_B(u)	((unsigned char) ((u) & 0xFF))
+
+#else /* WORDS_BIGENDIAN */
+
+#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
+
+#define UINT16_GET_A(u)	((unsigned char) ((u) & 0xFF))
+#define UINT16_GET_B(u)	((unsigned char) ((u) >> 8))
+
+#endif /* WORDS_BIGENDIAN */
+
+#define MK_ALL_UINT16_WITH_A(a)	\
+	MK_UINT16(a, '0'),	\
+	MK_UINT16(a, '1'),	\
+	MK_UINT16(a, '2'),	\
+	MK_UINT16(a, '3'),	\
+	MK_UINT16(a, '4'),	\
+	MK_UINT16(a, '5'),	\
+	MK_UINT16(a, '6'),	\
+	MK_UINT16(a, '7'),	\
+	MK_UINT16(a, '8'),	\
+	MK_UINT16(a, '9'),	\
+	MK_UINT16(a, 'A'),	\
+	MK_UINT16(a, 'B'),	\
+	MK_UINT16(a, 'C'),	\
+	MK_UINT16(a, 'D'),	\
+	MK_UINT16(a, 'E'),	\
+	MK_UINT16(a, 'F')
+
+	static const uint16	hex_map[256] = {
+		MK_ALL_UINT16_WITH_A('0'),
+		MK_ALL_UINT16_WITH_A('1'),
+		MK_ALL_UINT16_WITH_A('2'),
+		MK_ALL_UINT16_WITH_A('3'),
+		MK_ALL_UINT16_WITH_A('4'),
+		MK_ALL_UINT16_WITH_A('5'),
+		MK_ALL_UINT16_WITH_A('6'),
+		MK_ALL_UINT16_WITH_A('7'),
+		MK_ALL_UINT16_WITH_A('8'),
+		MK_ALL_UINT16_WITH_A('9'),
+		MK_ALL_UINT16_WITH_A('A'),
+		MK_ALL_UINT16_WITH_A('B'),
+		MK_ALL_UINT16_WITH_A('C'),
+		MK_ALL_UINT16_WITH_A('D'),
+		MK_ALL_UINT16_WITH_A('E'),
+		MK_ALL_UINT16_WITH_A('F')
+	};
+	const unsigned char*	rawc;
+	ulint			read_bytes;
+	ulint			write_bytes;
+	ulint			i;
+
+	rawc = (const unsigned char*) raw;
+
+	if (hex_size == 0) {
+
+		return(0);
+	}
+
+	if (hex_size <= 2 * raw_size) {
+
+		read_bytes = hex_size / 2;
+		write_bytes = hex_size;
+	} else {
+
+		read_bytes = raw_size;
+		write_bytes = 2 * raw_size + 1;
+	}
+
+#define LOOP_READ_BYTES(ASSIGN)			\
+	for (i = 0; i < read_bytes; i++) {	\
+		ASSIGN;				\
+		hex += 2;			\
+		rawc++;				\
+	}
+
+	if (ut_align_offset(hex, 2) == 0) {
+
+		LOOP_READ_BYTES(
+			*(uint16*) hex = hex_map[*rawc]
+		);
+	} else {
+
+		LOOP_READ_BYTES(
+			*hex       = UINT16_GET_A(hex_map[*rawc]);
+			*(hex + 1) = UINT16_GET_B(hex_map[*rawc])
+		);
+	}
+
+	if (hex_size <= 2 * raw_size && hex_size % 2 == 0) {
+
+		hex--;
+	}
+
+	*hex = '\0';
+
+	return(write_bytes);
+}
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return	number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+	const char*	str,		/*!< in: string */
+	ulint		str_len,	/*!< in: string length in bytes */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size)	/*!< in: output buffer size
+					in bytes */
+{
+	ulint	str_i;
+	ulint	buf_i;
+
+	buf_i = 0;
+
+	switch (buf_size) {
+	case 3:
+
+		if (str_len == 0) {
+
+			buf[buf_i] = '\'';
+			buf_i++;
+			buf[buf_i] = '\'';
+			buf_i++;
+		}
+		/* FALLTHROUGH */
+	case 2:
+	case 1:
+
+		buf[buf_i] = '\0';
+		buf_i++;
+		/* FALLTHROUGH */
+	case 0:
+
+		return(buf_i);
+	}
+
+	/* buf_size >= 4 */
+
+	buf[0] = '\'';
+	buf_i = 1;
+
+	for (str_i = 0; str_i < str_len; str_i++) {
+
+		char	ch;
+
+		if (buf_size - buf_i == 2) {
+
+			break;
+		}
+
+		ch = str[str_i];
+
+		switch (ch) {
+		case '\0':
+
+			if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+
+				goto func_exit;
+			}
+			buf[buf_i] = '\\';
+			buf_i++;
+			buf[buf_i] = '0';
+			buf_i++;
+			break;
+		case '\'':
+		case '\\':
+
+			if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+
+				goto func_exit;
+			}
+			buf[buf_i] = ch;
+			buf_i++;
+			/* FALLTHROUGH */
+		default:
+
+			buf[buf_i] = ch;
+			buf_i++;
+		}
+	}
+
+func_exit:
+
+	buf[buf_i] = '\'';
+	buf_i++;
+	buf[buf_i] = '\0';
+	buf_i++;
+
+	return(buf_i);
+}
diff --git a/storage/innodb_plugin/include/ut0rnd.h b/storage/innodb_plugin/include/ut0rnd.h
new file mode 100644
index 00000000000..ce5152e942f
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0rnd.h
@@ -0,0 +1,143 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0rnd.h
+Random numbers and hashing
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0rnd_h
+#define ut0rnd_h
+
+#include "univ.i"
+
+#include "ut0byte.h"
+
+/** The 'character code' for end of field or string (used
+in folding records */
+#define UT_END_OF_FIELD		257
+
+/********************************************************//**
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+	ulint	 seed);		 /*!< in: seed */
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return	the next 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+	ulint	rnd);	/*!< in: the previous random number value */
+/*********************************************************//**
+The following function generates 'random' ulint integers which
+enumerate the value space (let there be N of them) of ulint integers
+in a pseudo-random fashion. Note that the same integer is repeated
+always after N calls to the generator.
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void);
+/*==================*/
+/********************************************************//**
+Generates a random integer from a given interval.
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+	ulint	low,	/*!< in: low limit; can generate also this value */
+	ulint	high);	/*!< in: high limit; can generate also this value */
+/*********************************************************//**
+Generates a random iboolean value.
+@return	the random value */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void);
+/*=================*/
+/*******************************************************//**
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime or some
+random number to work reliably.
+@return	hash value */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*==========*/
+	ulint	 key,		/*!< in: value to be hashed */
+	ulint	 table_size);	/*!< in: hash table size */
+/*************************************************************//**
+Folds a pair of ulints.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+	ulint	n1,	/*!< in: ulint */
+	ulint	n2)	/*!< in: ulint */
+	__attribute__((const));
+/*************************************************************//**
+Folds a dulint.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_dulint(
+/*===========*/
+	dulint	d)	/*!< in: dulint */
+	__attribute__((const));
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+	const char*	str)	/*!< in: null-terminated string */
+	__attribute__((pure));
+/*************************************************************//**
+Folds a binary string.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+	const byte*	str,	/*!< in: string of bytes */
+	ulint		len)	/*!< in: length */
+	__attribute__((pure));
+/***********************************************************//**
+Looks for a prime number slightly greater than the given argument.
+The prime is chosen so that it is not near any power of 2.
+@return	prime */
+UNIV_INTERN
+ulint
+ut_find_prime(
+/*==========*/
+	ulint	n)	/*!< in: positive number > 100 */
+	__attribute__((const));
+
+
+#ifndef UNIV_NONINL
+#include "ut0rnd.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innodb_plugin/include/ut0rnd.ic
similarity index 52%
rename from storage/innobase/include/ut0rnd.ic
rename to storage/innodb_plugin/include/ut0rnd.ic
index 625c378489a..763469142ec 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innodb_plugin/include/ut0rnd.ic
@@ -1,7 +1,24 @@
-/******************************************************************
-Random numbers and hashing
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0rnd.ic
+Random numbers and hashing
 
 Created 5/30/1994 Heikki Tuuri
 *******************************************************************/
@@ -18,27 +35,28 @@ Created 5/30/1994 Heikki Tuuri
 #define UT_XOR_RND1		187678878
 #define UT_XOR_RND2		143537923
 
+/** Seed value of ut_rnd_gen_ulint() */
 extern	ulint	 ut_rnd_ulint_counter;
 
-/************************************************************
+/********************************************************//**
 This is used to set the random number seed. */
 UNIV_INLINE
 void
 ut_rnd_set_seed(
 /*============*/
-	ulint	 seed)		 /* in: seed */
+	ulint	 seed)		 /*!< in: seed */
 {
 	ut_rnd_ulint_counter = seed;
 }
 
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return	the next 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_next_ulint(
 /*==================*/
-			/* out: the next 'random' number */
-	ulint	rnd)	/* in: the previous random number value */
+	ulint	rnd)	/*!< in: the previous random number value */
 {
 	ulint	n_bits;
 
@@ -55,16 +73,16 @@ ut_rnd_gen_next_ulint(
 	return(rnd);
 }
 
-/************************************************************
+/********************************************************//**
 The following function generates 'random' ulint integers which
 enumerate the value space of ulint integers in a pseudo random
 fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit). */
+2 to power 32 calls to the generator (if ulint is 32-bit).
+@return	the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void)
 /*==================*/
-			/* out: the 'random' number */
 {
 	ulint	rnd;
 	ulint	n_bits;
@@ -78,15 +96,15 @@ ut_rnd_gen_ulint(void)
 	return(rnd);
 }
 
-/************************************************************
-Generates a random integer from a given interval. */
+/********************************************************//**
+Generates a random integer from a given interval.
+@return	the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_interval(
 /*============*/
-			/* out: the 'random' number */
-	ulint	low,	/* in: low limit; can generate also this value */
-	ulint	high)	/* in: high limit; can generate also this value */
+	ulint	low,	/*!< in: low limit; can generate also this value */
+	ulint	high)	/*!< in: high limit; can generate also this value */
 {
 	ulint	rnd;
 
@@ -102,13 +120,13 @@ ut_rnd_interval(
 	return(low + (rnd % (high - low + 1)));
 }
 
-/*************************************************************
-Generates a random iboolean value. */
+/*********************************************************//**
+Generates a random iboolean value.
+@return	the random value */
 UNIV_INLINE
 ibool
 ut_rnd_gen_ibool(void)
 /*=================*/
-			/* out: the random value */
 {
 	ulint	 x;
 
@@ -122,73 +140,64 @@ ut_rnd_gen_ibool(void)
 	return(FALSE);
 }
 
-/***********************************************************
+/*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime
-or some random number for the hash table to work reliably. */
+or some random number for the hash table to work reliably.
+@return	hash value */
 UNIV_INLINE
 ulint
 ut_hash_ulint(
 /*==========*/
-				/* out: hash value */
-	ulint	 key,		/* in: value to be hashed */
-	ulint	 table_size)	/* in: hash table size */
+	ulint	 key,		/*!< in: value to be hashed */
+	ulint	 table_size)	/*!< in: hash table size */
 {
 	key = key ^ UT_HASH_RANDOM_MASK2;
 
 	return(key % table_size);
 }
 
-/*****************************************************************
-Folds a pair of ulints. */
+/*************************************************************//**
+Folds a pair of ulints.
+@return	folded value */
 UNIV_INLINE
 ulint
 ut_fold_ulint_pair(
 /*===============*/
-			/* out: folded value */
-	ulint	n1,	/* in: ulint */
-	ulint	n2)	/* in: ulint */
+	ulint	n1,	/*!< in: ulint */
+	ulint	n2)	/*!< in: ulint */
 {
 	return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
 		^ UT_HASH_RANDOM_MASK) + n2);
 }
 
-/*****************************************************************
-Folds a dulint. */
+/*************************************************************//**
+Folds a dulint.
+@return	folded value */
 UNIV_INLINE
 ulint
 ut_fold_dulint(
 /*===========*/
-			/* out: folded value */
-	dulint	d)	/* in: dulint */
+	dulint	d)	/*!< in: dulint */
 {
 	return(ut_fold_ulint_pair(ut_dulint_get_low(d),
 				  ut_dulint_get_high(d)));
 }
 
-/*****************************************************************
-Folds a character string ending in the null character. */
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return	folded value */
 UNIV_INLINE
 ulint
 ut_fold_string(
 /*===========*/
-				/* out: folded value */
-	const char*	str)	/* in: null-terminated string */
+	const char*	str)	/*!< in: null-terminated string */
 {
-#ifdef UNIV_DEBUG
-	ulint	i = 0;
-#endif
 	ulint	fold = 0;
 
 	ut_ad(str);
 
 	while (*str != '\0') {
-
-#ifdef UNIV_DEBUG
-		i++;
-		ut_a(i < 100);
-#endif
-
 		fold = ut_fold_ulint_pair(fold, (ulint)(*str));
 		str++;
 	}
@@ -196,20 +205,20 @@ ut_fold_string(
 	return(fold);
 }
 
-/*****************************************************************
-Folds a binary string. */
+/*************************************************************//**
+Folds a binary string.
+@return	folded value */
 UNIV_INLINE
 ulint
 ut_fold_binary(
 /*===========*/
-				/* out: folded value */
-	const byte*	str,	/* in: string of bytes */
-	ulint		len)	/* in: length */
+	const byte*	str,	/*!< in: string of bytes */
+	ulint		len)	/*!< in: length */
 {
 	const byte*	str_end	= str + len;
 	ulint		fold = 0;
 
-	ut_ad(str);
+	ut_ad(str || !len);
 
 	while (str < str_end) {
 		fold = ut_fold_ulint_pair(fold, (ulint)(*str));
diff --git a/storage/innobase/include/ut0sort.h b/storage/innodb_plugin/include/ut0sort.h
similarity index 70%
rename from storage/innobase/include/ut0sort.h
rename to storage/innodb_plugin/include/ut0sort.h
index 87d30dee6f2..5c6647dda9e 100644
--- a/storage/innobase/include/ut0sort.h
+++ b/storage/innodb_plugin/include/ut0sort.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-Sort utility
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0sort.h
+Sort utility
 
 Created 11/9/1995 Heikki Tuuri
 ***********************************************************************/
@@ -18,7 +35,7 @@ the macro. The sort algorithm is mergesort which has logarithmic
 worst case.
 */
 
-/***********************************************************************
+/*******************************************************************//**
 This macro expands to the body of a standard sort function.
 The sort function uses mergesort and must be defined separately
 for each type of array.
@@ -30,8 +47,7 @@ and the low (LOW), inclusive, and high (HIGH), noninclusive,
 limits for the sort interval as arguments.
 CMP_FUN is the comparison function name. It takes as arguments
 two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. For an eaxmaple of use
-see test program in tsut.c. */
+0 if equal, and -1 if the second bigger. */
 
 #define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
 {\
@@ -81,9 +97,8 @@ see test program in tsut.c. */
 		}\
 	}\
 \
-	for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
-		(ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\
-	}\
+	memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\
+	       ((HIGH) - (LOW)) * sizeof *(ARR));\
 }\
 
 
diff --git a/storage/innodb_plugin/include/ut0ut.h b/storage/innodb_plugin/include/ut0ut.h
new file mode 100644
index 00000000000..80094321041
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0ut.h
@@ -0,0 +1,385 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0ut.h
+Various utilities
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0ut_h
+#define ut0ut_h
+
+#include "univ.i"
+#include <time.h>
+#ifndef MYSQL_SERVER
+#include <ctype.h>
+#endif
+
+/** Index name prefix in fast index creation */
+#define	TEMP_INDEX_PREFIX	'\377'
+/** Index name prefix in fast index creation, as a string constant */
+#define TEMP_INDEX_PREFIX_STR	"\377"
+
+/** Time stamp */
+typedef time_t	ib_time_t;
+
+#if defined(IB_HAVE_PAUSE_INSTRUCTION)
+#  ifdef WIN32
+     /* In the Win32 API, the x86 PAUSE instruction is executed by calling
+     the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+     independent way by using YieldProcessor.*/
+#    define UT_RELAX_CPU() YieldProcessor()
+#  else
+     /* According to the gcc info page, asm volatile means that the
+     instruction has important side-effects and must not be removed.
+     Also asm volatile may trigger a memory barrier (spilling all registers
+     to memory). */
+#    define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+#  endif
+#elif defined(HAVE_ATOMIC_BUILTINS)
+#  define UT_RELAX_CPU() do { \
+     volatile lint	volatile_var; \
+     os_compare_and_swap_lint(&volatile_var, 0, 1); \
+   } while (0)
+#else
+#  define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
+#endif
+
+/*********************************************************************//**
+Delays execution for at most max_wait_us microseconds or returns earlier
+if cond becomes true.
+@param cond		in: condition to wait for; evaluated every 2 ms
+@param max_wait_us	in: maximum delay to wait, in microseconds */
+#define UT_WAIT_FOR(cond, max_wait_us)				\
+do {								\
+	ullint	start_us;					\
+	start_us = ut_time_us(NULL);				\
+	while (!(cond) 						\
+	       && ut_time_us(NULL) - start_us < (max_wait_us)) {\
+								\
+		os_thread_sleep(2000 /* 2 ms */);		\
+	}							\
+} while (0)
+
+/********************************************************//**
+Gets the high 32 bits in a ulint. That is makes a shift >> 32,
+but since there seem to be compiler bugs in both gcc and Visual C++,
+we do this by a special conversion.
+@return	a >> 32 */
+UNIV_INTERN
+ulint
+ut_get_high32(
+/*==========*/
+	ulint	a);	/*!< in: ulint */
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return	minimum */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2);	/*!< in: second number */
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return	maximum */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2);	/*!< in: second number */
+/****************************************************************//**
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/*!< out: more significant part of minimum */
+	ulint*	b,	/*!< out: less significant part of minimum */
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	b1,	/*!< in: less significant part of first pair */
+	ulint	a2,	/*!< in: more significant part of second pair */
+	ulint	b2);	/*!< in: less significant part of second pair */
+/******************************************************//**
+Compares two ulints.
+@return	1 if a > b, 0 if a == b, -1 if a < b */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	b);	/*!< in: ulint */
+/*******************************************************//**
+Compares two pairs of ulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	a2,	/*!< in: less significant part of first pair */
+	ulint	b1,	/*!< in: more significant part of second pair */
+	ulint	b2);	/*!< in: less significant part of second pair */
+/*************************************************************//**
+Determines if a number is zero or a power of two.
+@param n	in: number
+@return		nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+/*************************************************************//**
+Calculates fast the remainder of n/m when m is a power of two.
+@param n	in: numerator
+@param m	in: denominator, must be a power of two
+@return		the remainder of n/m */
+#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
+/*************************************************************//**
+Calculates the biggest multiple of m that is not bigger than n
+when m is a power of two.  In other words, rounds n down to m * k.
+@param n	in: number to round down
+@param m	in: alignment, must be a power of two
+@return		n rounded down to the biggest possible integer multiple of m */
+#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
+/** Align a number down to a multiple of a power of two.
+@param n	in: number to round down
+@param m	in: alignment, must be a power of two
+@return		n rounded down to the biggest possible integer multiple of m */
+#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
+/********************************************************//**
+Calculates the smallest multiple of m that is not smaller than n
+when m is a power of two.  In other words, rounds n up to m * k.
+@param n	in: number to round up
+@param m	in: alignment, must be a power of two
+@return		n rounded up to the smallest possible integer multiple of m */
+#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
+/*************************************************************//**
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer.
+@return	logarithm in the base 2, rounded upward */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+	ulint	n);	/*!< in: number */
+/*************************************************************//**
+Calculates 2 to power n.
+@return	2 to power n */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+	ulint	n);	/*!< in: number */
+/*************************************************************//**
+Calculates fast the number rounded up to the nearest power of 2.
+@return	first power of 2 which is >= n */
+UNIV_INTERN
+ulint
+ut_2_power_up(
+/*==========*/
+	ulint	n)	/*!< in: number != 0 */
+	__attribute__((const));
+
+/** Determine how many bytes (groups of 8 bits) are needed to
+store the given number of bits.
+@param b	in: bits
+@return		number of bytes (octets) needed to represent b */
+#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
+
+/**********************************************************//**
+Returns system time. We do not specify the format of the time returned:
+the only way to manipulate it is to use the function ut_difftime.
+@return	system time */
+UNIV_INTERN
+ib_time_t
+ut_time(void);
+/*=========*/
+/**********************************************************//**
+Returns system time.
+Upon successful completion, the value 0 is returned; otherwise the
+value -1 is returned and the global variable errno is set to indicate the
+error.
+@return	0 on success, -1 otherwise */
+UNIV_INTERN
+int
+ut_usectime(
+/*========*/
+	ulint*	sec,	/*!< out: seconds since the Epoch */
+	ulint*	ms);	/*!< out: microseconds since the Epoch+*sec */
+
+/**********************************************************//**
+Returns the number of microseconds since epoch. Similar to
+time(3), the return value is also stored in *tloc, provided
+that tloc is non-NULL.
+@return	us since epoch */
+UNIV_INTERN
+ullint
+ut_time_us(
+/*=======*/
+	ullint*	tloc);	/*!< out: us since epoch, if non-NULL */
+
+/**********************************************************//**
+Returns the difference of two times in seconds.
+@return	time2 - time1 expressed in seconds */
+UNIV_INTERN
+double
+ut_difftime(
+/*========*/
+	ib_time_t	time2,	/*!< in: time */
+	ib_time_t	time1);	/*!< in: time */
+/**********************************************************//**
+Prints a timestamp to a file. */
+UNIV_INTERN
+void
+ut_print_timestamp(
+/*===============*/
+	FILE*  file); /*!< in: file where to print */
+/**********************************************************//**
+Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
+UNIV_INTERN
+void
+ut_sprintf_timestamp(
+/*=================*/
+	char*	buf); /*!< in: buffer where to sprintf */
+#ifdef UNIV_HOTBACKUP
+/**********************************************************//**
+Sprintfs a timestamp to a buffer with no spaces and with ':' characters
+replaced by '_'. */
+UNIV_INTERN
+void
+ut_sprintf_timestamp_without_extra_chars(
+/*=====================================*/
+	char*	buf); /*!< in: buffer where to sprintf */
+/**********************************************************//**
+Returns current year, month, day. */
+UNIV_INTERN
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/*!< out: current year */
+	ulint*	month,	/*!< out: month */
+	ulint*	day);	/*!< out: day */
+#else /* UNIV_HOTBACKUP */
+/*************************************************************//**
+Runs an idle loop on CPU. The argument gives the desired delay
+in microseconds on 100 MHz Pentium + Visual C++.
+@return	dummy value */
+UNIV_INTERN
+ulint
+ut_delay(
+/*=====*/
+	ulint	delay);	/*!< in: delay in microseconds on 100 MHz Pentium */
+#endif /* UNIV_HOTBACKUP */
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+UNIV_INTERN
+void
+ut_print_buf(
+/*=========*/
+	FILE*		file,	/*!< in: file where to print */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len);	/*!< in: length of the buffer */
+
+/**********************************************************************//**
+Outputs a NUL-terminated file name, quoted with apostrophes. */
+UNIV_INTERN
+void
+ut_print_filename(
+/*==============*/
+	FILE*		f,	/*!< in: output stream */
+	const char*	name);	/*!< in: name to print */
+
+#ifndef UNIV_HOTBACKUP
+/* Forward declaration of transaction handle */
+struct trx_struct;
+
+/**********************************************************************//**
+Outputs a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier. */
+UNIV_INTERN
+void
+ut_print_name(
+/*==========*/
+	FILE*		f,	/*!< in: output stream */
+	struct trx_struct*trx,	/*!< in: transaction */
+	ibool		table_id,/*!< in: TRUE=print a table name,
+				FALSE=print other identifier */
+	const char*	name);	/*!< in: name to print */
+
+/**********************************************************************//**
+Outputs a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier. */
+UNIV_INTERN
+void
+ut_print_namel(
+/*===========*/
+	FILE*		f,	/*!< in: output stream */
+	struct trx_struct*trx,	/*!< in: transaction (NULL=no quotes) */
+	ibool		table_id,/*!< in: TRUE=print a table name,
+				FALSE=print other identifier */
+	const char*	name,	/*!< in: name to print */
+	ulint		namelen);/*!< in: length of name */
+
+/**********************************************************************//**
+Catenate files. */
+UNIV_INTERN
+void
+ut_copy_file(
+/*=========*/
+	FILE*	dest,	/*!< in: output file */
+	FILE*	src);	/*!< in: input file to be appended to output */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef __WIN__
+/**********************************************************************//**
+A substitute for snprintf(3), formatted output conversion into
+a limited buffer.
+@return number of characters that would have been printed if the size
+were unlimited, not including the terminating '\0'. */
+UNIV_INTERN
+int
+ut_snprintf(
+/*========*/
+	char*		str,	/*!< out: string */
+	size_t		size,	/*!< in: str size */
+	const char*	fmt,	/*!< in: format */
+	...);			/*!< in: format values */
+#else
+/**********************************************************************//**
+A wrapper for snprintf(3), formatted output conversion into
+a limited buffer. */
+# define ut_snprintf	snprintf
+#endif /* __WIN__ */
+
+#ifndef UNIV_NONINL
+#include "ut0ut.ic"
+#endif
+
+#endif
+
diff --git a/storage/innodb_plugin/include/ut0ut.ic b/storage/innodb_plugin/include/ut0ut.ic
new file mode 100644
index 00000000000..6f55c7e410e
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0ut.ic
@@ -0,0 +1,162 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0ut.ic
+Various utilities
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return	minimum */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2)	/*!< in: second number */
+{
+	return((n1 <= n2) ? n1 : n2);
+}
+
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return	maximum */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2)	/*!< in: second number */
+{
+	return((n1 <= n2) ? n2 : n1);
+}
+
+/****************************************************************//**
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/*!< out: more significant part of minimum */
+	ulint*	b,	/*!< out: less significant part of minimum */
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	b1,	/*!< in: less significant part of first pair */
+	ulint	a2,	/*!< in: more significant part of second pair */
+	ulint	b2)	/*!< in: less significant part of second pair */
+{
+	if (a1 == a2) {
+		*a = a1;
+		*b = ut_min(b1, b2);
+	} else if (a1 < a2) {
+		*a = a1;
+		*b = b1;
+	} else {
+		*a = a2;
+		*b = b2;
+	}
+}
+
+/******************************************************//**
+Compares two ulints.
+@return	1 if a > b, 0 if a == b, -1 if a < b */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	b)	/*!< in: ulint */
+{
+	if (a < b) {
+		return(-1);
+	} else if (a == b) {
+		return(0);
+	} else {
+		return(1);
+	}
+}
+
+/*******************************************************//**
+Compares two pairs of ulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	a2,	/*!< in: less significant part of first pair */
+	ulint	b1,	/*!< in: more significant part of second pair */
+	ulint	b2)	/*!< in: less significant part of second pair */
+{
+	if (a1 > b1) {
+		return(1);
+	} else if (a1 < b1) {
+		return(-1);
+	} else if (a2 > b2) {
+		return(1);
+	} else if (a2 < b2) {
+		return(-1);
+	} else {
+		return(0);
+	}
+}
+
+/*************************************************************//**
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer.
+@return	logarithm in the base 2, rounded upward */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+	ulint	n)	/*!< in: number != 0 */
+{
+	ulint	res;
+
+	res = 0;
+
+	ut_ad(n > 0);
+
+	n = n - 1;
+
+	for (;;) {
+		n = n / 2;
+
+		if (n == 0) {
+			break;
+		}
+
+		res++;
+	}
+
+	return(res + 1);
+}
+
+/*************************************************************//**
+Calculates 2 to power n.
+@return	2 to power n */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+	ulint	n)	/*!< in: number */
+{
+	return((ulint) 1 << n);
+}
diff --git a/storage/innodb_plugin/include/ut0vec.h b/storage/innodb_plugin/include/ut0vec.h
new file mode 100644
index 00000000000..a770f671cfc
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0vec.h
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.h
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+#ifndef IB_VECTOR_H
+#define IB_VECTOR_H
+
+#include "univ.i"
+#include "mem0mem.h"
+
+/** An automatically resizing vector data type. */
+typedef struct ib_vector_struct ib_vector_t;
+
+/* An automatically resizing vector datatype with the following properties:
+
+ -Contains void* items.
+
+ -The items are owned by the caller.
+
+ -All memory allocation is done through a heap owned by the caller, who is
+ responsible for freeing it when done with the vector.
+
+ -When the vector is resized, the old memory area is left allocated since it
+ uses the same heap as the new memory area, so this is best used for
+ relatively small or short-lived uses.
+*/
+
+/****************************************************************//**
+Create a new vector with the given initial size.
+@return	vector */
+UNIV_INTERN
+ib_vector_t*
+ib_vector_create(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: heap */
+	ulint		size);	/*!< in: initial size */
+
+/****************************************************************//**
+Push a new element to the vector, increasing its size if necessary. */
+UNIV_INTERN
+void
+ib_vector_push(
+/*===========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	void*		elem);	/*!< in: data element */
+
+/****************************************************************//**
+Get the number of elements in the vector.
+@return	number of elements in vector */
+UNIV_INLINE
+ulint
+ib_vector_size(
+/*===========*/
+	const ib_vector_t*	vec);	/*!< in: vector */
+
+/****************************************************************//**
+Test whether a vector is empty or not.
+@return	TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+	const ib_vector_t*	vec);	/*!< in: vector */
+
+/****************************************************************//**
+Get the n'th element.
+@return	n'th element */
+UNIV_INLINE
+void*
+ib_vector_get(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	ulint		n);	/*!< in: element index to get */
+
+/****************************************************************//**
+Remove the last element from the vector. */
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+	ib_vector_t*	vec);	/*!< in: vector */
+
+/****************************************************************//**
+Free the underlying heap of the vector. Note that vec is invalid
+after this call. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+	ib_vector_t*	vec);	/*!< in,own: vector */
+
+/** An automatically resizing vector data type. */
+struct ib_vector_struct {
+	mem_heap_t*	heap;	/*!< heap */
+	void**		data;	/*!< data elements */
+	ulint		used;	/*!< number of elements currently used */
+	ulint		total;	/*!< number of elements allocated */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0vec.ic"
+#endif
+
+#endif
diff --git a/storage/innodb_plugin/include/ut0vec.ic b/storage/innodb_plugin/include/ut0vec.ic
new file mode 100644
index 00000000000..02e881f9bca
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0vec.ic
@@ -0,0 +1,96 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.ic
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Get number of elements in vector.
+@return	number of elements in vector */
+UNIV_INLINE
+ulint
+ib_vector_size(
+/*===========*/
+	const ib_vector_t*	vec)	/*!< in: vector */
+{
+	return(vec->used);
+}
+
+/****************************************************************//**
+Get n'th element.
+@return	n'th element */
+UNIV_INLINE
+void*
+ib_vector_get(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	ulint		n)	/*!< in: element index to get */
+{
+	ut_a(n < vec->used);
+
+	return(vec->data[n]);
+}
+
+/****************************************************************//**
+Remove the last element from the vector.
+@return	last vector element */
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+	ib_vector_t*    vec)    /*!< in/out: vector */
+{
+	void*           elem;
+
+	ut_a(vec->used > 0);
+	--vec->used;
+	elem = vec->data[vec->used];
+
+	ut_d(vec->data[vec->used] = NULL);
+	UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data));
+
+	return(elem);
+}
+
+/****************************************************************//**
+Free the underlying heap of the vector. Note that vec is invalid
+after this call. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+	ib_vector_t*    vec)    /*!< in, own: vector */
+{
+	mem_heap_free(vec->heap);
+}
+
+/****************************************************************//**
+Test whether a vector is empty or not.
+@return	TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+	const ib_vector_t*	vec)	/*!< in: vector */
+{
+	return(ib_vector_size(vec) == 0);
+}
diff --git a/storage/innodb_plugin/include/ut0wqueue.h b/storage/innodb_plugin/include/ut0wqueue.h
new file mode 100644
index 00000000000..2ec0f16ab05
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0wqueue.h
@@ -0,0 +1,85 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0wqueue.h
+A work queue
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
+A Work queue. Threads can add work items to the queue and other threads can
+wait for work items to be available and take them off the queue for
+processing.
+************************************************************************/
+
+#ifndef IB_WORK_QUEUE_H
+#define IB_WORK_QUEUE_H
+
+#include "ut0list.h"
+#include "mem0mem.h"
+#include "os0sync.h"
+#include "sync0types.h"
+
+typedef struct ib_wqueue_struct ib_wqueue_t;
+
+/****************************************************************//**
+Create a new work queue.
+@return	work queue */
+UNIV_INTERN
+ib_wqueue_t*
+ib_wqueue_create(void);
+/*===================*/
+
+/****************************************************************//**
+Free a work queue. */
+UNIV_INTERN
+void
+ib_wqueue_free(
+/*===========*/
+	ib_wqueue_t*	wq);	/*!< in: work queue */
+
+/****************************************************************//**
+Add a work item to the queue. */
+UNIV_INTERN
+void
+ib_wqueue_add(
+/*==========*/
+	ib_wqueue_t*	wq,	/*!< in: work queue */
+	void*		item,	/*!< in: work item */
+	mem_heap_t*	heap);	/*!< in: memory heap to use for allocating the
+				list node */
+
+/****************************************************************//**
+Wait for a work item to appear in the queue.
+@return	work item */
+UNIV_INTERN
+void*
+ib_wqueue_wait(
+/*===========*/
+	ib_wqueue_t*	wq);	/*!< in: work queue */
+
+/* Work queue. */
+struct ib_wqueue_struct {
+	mutex_t		mutex;	/*!< mutex protecting everything */
+	ib_list_t*	items;	/*!< work item list */
+	os_event_t	event;	/*!< event we use to signal additions to list */
+};
+
+#endif
diff --git a/storage/innobase/lock/lock0iter.c b/storage/innodb_plugin/lock/lock0iter.c
similarity index 54%
rename from storage/innobase/lock/lock0iter.c
rename to storage/innodb_plugin/lock/lock0iter.c
index 0afa7019c86..51d1802ccde 100644
--- a/storage/innobase/lock/lock0iter.c
+++ b/storage/innodb_plugin/lock/lock0iter.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0iter.c
 Lock queue iterator. Can iterate over table and record
 lock queues.
 
-(c) 2007 Innobase Oy
-
 Created July 16, 2007 Vasil Dimov
 *******************************************************/
 
@@ -15,8 +32,11 @@ Created July 16, 2007 Vasil Dimov
 #include "lock0priv.h"
 #include "ut0dbg.h"
 #include "ut0lst.h"
+#ifdef UNIV_DEBUG
+# include "srv0srv.h" /* kernel_mutex */
+#endif /* UNIV_DEBUG */
 
-/***********************************************************************
+/*******************************************************************//**
 Initialize lock queue iterator so that it starts to iterate from
 "lock". bit_no specifies the record number within the heap where the
 record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
@@ -25,15 +45,17 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
    bit_no is calculated in this function by using
    lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
    of a wait lock. */
-
+UNIV_INTERN
 void
 lock_queue_iterator_reset(
 /*======================*/
-	lock_queue_iterator_t*	iter,	/* out: iterator */
-	lock_t*			lock,	/* in: lock to start from */
-	ulint			bit_no)	/* in: record number in the
+	lock_queue_iterator_t*	iter,	/*!< out: iterator */
+	const lock_t*		lock,	/*!< in: lock to start from */
+	ulint			bit_no)	/*!< in: record number in the
 					heap */
 {
+	ut_ad(mutex_own(&kernel_mutex));
+
 	iter->current_lock = lock;
 
 	if (bit_no != ULINT_UNDEFINED) {
@@ -41,7 +63,7 @@ lock_queue_iterator_reset(
 		iter->bit_no = bit_no;
 	} else {
 
-		switch (lock_get_type(lock)) {
+		switch (lock_get_type_low(lock)) {
 		case LOCK_TABLE:
 			iter->bit_no = ULINT_UNDEFINED;
 			break;
@@ -55,20 +77,22 @@ lock_queue_iterator_reset(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the previous lock in the lock queue, returns NULL if there are no
 more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
-
-lock_t*
+receded (if not-NULL is returned).
+@return	previous lock or NULL */
+UNIV_INTERN
+const lock_t*
 lock_queue_iterator_get_prev(
 /*=========================*/
-					/* out: previous lock or NULL */
-	lock_queue_iterator_t*	iter)	/* in/out: iterator */
+	lock_queue_iterator_t*	iter)	/*!< in/out: iterator */
 {
-	lock_t*	prev_lock;
+	const lock_t*	prev_lock;
 
-	switch (lock_get_type(iter->current_lock)) {
+	ut_ad(mutex_own(&kernel_mutex));
+
+	switch (lock_get_type_low(iter->current_lock)) {
 	case LOCK_REC:
 		prev_lock = lock_rec_get_prev(
 			iter->current_lock, iter->bit_no);
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c
similarity index 62%
rename from storage/innobase/lock/lock0lock.c
rename to storage/innodb_plugin/lock/lock0lock.c
index 5afd19aa7e7..fcd8d268331 100644
--- a/storage/innobase/lock/lock0lock.c
+++ b/storage/innodb_plugin/lock/lock0lock.c
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction lock system
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0lock.c
+The transaction lock system
 
 Created 5/7/1996 Heikki Tuuri
 *******************************************************/
@@ -16,37 +33,12 @@ Created 5/7/1996 Heikki Tuuri
 #include "lock0priv.ic"
 #endif
 
+#include "ha_prototypes.h"
 #include "usr0sess.h"
 #include "trx0purge.h"
 #include "dict0mem.h"
 #include "trx0sys.h"
 
-
-/* 2 function prototypes copied from ha_innodb.cc: */
-
-/*****************************************************************
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_prepare_print_arbitrary_thd(void);
-/*============================================*/
-
-/*****************************************************************
-Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_end_print_arbitrary_thd(void);
-/*========================================*/
-
 /* Restricts the length of search we will do in the waits-for
 graph of transactions */
 #define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
@@ -315,75 +307,124 @@ locks on the inserted record. */
  * statement-level MySQL binlog.
  * See also lock_mode_compatible().
  */
+#define LK(a,b) (1 << ((a) * LOCK_NUM + (b)))
+#define LKS(a,b) LK(a,b) | LK(b,a)
+
+/* Define the lock compatibility matrix in a ulint.  The first line below
+defines the diagonal entries.  The following lines define the compatibility
+for LOCK_IX, LOCK_S, and LOCK_AUTO_INC using LKS(), since the matrix
+is symmetric. */
+#define LOCK_MODE_COMPATIBILITY 0					\
+ | LK(LOCK_IS, LOCK_IS) | LK(LOCK_IX, LOCK_IX) | LK(LOCK_S, LOCK_S)	\
+ | LKS(LOCK_IX, LOCK_IS) | LKS(LOCK_IS, LOCK_AUTO_INC)			\
+ | LKS(LOCK_S, LOCK_IS)							\
+ | LKS(LOCK_AUTO_INC, LOCK_IS) | LKS(LOCK_AUTO_INC, LOCK_IX)
+
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ *    IS IX S  X  AI
+ * IS +  -  -  -  -
+ * IX +  +  -  -  -
+ * S  +  -  +  -  -
+ * X  +  +  +  +  +
+ * AI -  -  -  -  +
+ * See lock_mode_stronger_or_eq().
+ */
+
+/* Define the stronger-or-equal lock relation in a ulint.  This relation
+contains all pairs LK(mode1, mode2) where mode1 is stronger than or
+equal to mode2. */
+#define LOCK_MODE_STRONGER_OR_EQ 0					\
+ | LK(LOCK_IS, LOCK_IS)							\
+ | LK(LOCK_IX, LOCK_IS) | LK(LOCK_IX, LOCK_IX)				\
+ | LK(LOCK_S, LOCK_IS) | LK(LOCK_S, LOCK_S)				\
+ | LK(LOCK_AUTO_INC, LOCK_AUTO_INC)					\
+ | LK(LOCK_X, LOCK_IS) | LK(LOCK_X, LOCK_IX) | LK(LOCK_X, LOCK_S)	\
+ | LK(LOCK_X, LOCK_AUTO_INC) | LK(LOCK_X, LOCK_X)
 
 #ifdef UNIV_DEBUG
-ibool	lock_print_waits	= FALSE;
+UNIV_INTERN ibool	lock_print_waits	= FALSE;
+
+/*********************************************************************//**
+Validates the lock system.
+@return	TRUE if ok */
+static
+ibool
+lock_validate(void);
+/*===============*/
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return	TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+
+/* Define the following in order to enable lock_rec_validate_page() checks. */
+# undef UNIV_DEBUG_LOCK_VALIDATE
 #endif /* UNIV_DEBUG */
 
 /* The lock system */
-lock_sys_t*	lock_sys	= NULL;
+UNIV_INTERN lock_sys_t*	lock_sys	= NULL;
 
 /* We store info on the latest deadlock error to this buffer. InnoDB
 Monitor will then fetch it and print */
-ibool	lock_deadlock_found = FALSE;
-FILE*	lock_latest_err_file;
+UNIV_INTERN ibool	lock_deadlock_found = FALSE;
+UNIV_INTERN FILE*	lock_latest_err_file;
 
 /* Flags for recursive deadlock search */
 #define LOCK_VICTIM_IS_START	1
 #define LOCK_VICTIM_IS_OTHER	2
 
-/************************************************************************
-Checks if a lock request results in a deadlock. */
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
 static
 ibool
 lock_deadlock_occurs(
 /*=================*/
-			/* out: TRUE if a deadlock was detected and we
-			chose trx as a victim; FALSE if no deadlock, or
-			there was a deadlock, but we chose other
-			transaction(s) as victim(s) */
-	lock_t*	lock,	/* in: lock the transaction is requesting */
-	trx_t*	trx);	/* in: transaction */
-/************************************************************************
-Looks recursively for a deadlock. */
+	lock_t*	lock,	/*!< in: lock the transaction is requesting */
+	trx_t*	trx);	/*!< in: transaction */
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
 static
 ulint
 lock_deadlock_recursive(
 /*====================*/
-				/* out: 0 if no deadlock found,
-				LOCK_VICTIM_IS_START if there was a deadlock
-				and we chose 'start' as the victim,
-				LOCK_VICTIM_IS_OTHER if a deadlock
-				was found and we chose some other trx as a
-				victim: we must do the search again in this
-				last case because there may be another
-				deadlock! */
-	trx_t*	start,		/* in: recursion starting point */
-	trx_t*	trx,		/* in: a transaction waiting for a lock */
-	lock_t*	wait_lock,	/* in: the lock trx is waiting to be granted */
-	ulint*	cost,		/* in/out: number of calculation steps thus
+	trx_t*	start,		/*!< in: recursion starting point */
+	trx_t*	trx,		/*!< in: a transaction waiting for a lock */
+	lock_t*	wait_lock,	/*!< in: the lock trx is waiting to be granted */
+	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
 				we return LOCK_VICTIM_IS_START */
-	ulint	depth);		/* in: recursion depth: if this exceeds
+	ulint	depth);		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
 				return LOCK_VICTIM_IS_START */
 
-/*************************************************************************
-Gets the nth bit of a record lock. */
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return	TRUE if bit set */
 UNIV_INLINE
 ibool
 lock_rec_get_nth_bit(
 /*=================*/
-			/* out: TRUE if bit set */
-	lock_t*	lock,	/* in: record lock */
-	ulint	i)	/* in: index of the bit */
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i)	/*!< in: index of the bit */
 {
 	ulint	byte_index;
 	ulint	bit_index;
-	ulint	b;
 
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	if (i >= lock->un_member.rec_lock.n_bits) {
 
@@ -393,9 +434,7 @@ lock_rec_get_nth_bit(
 	byte_index = i / 8;
 	bit_index = i % 8;
 
-	b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index);
-
-	return(ut_bit_get_nth(b, bit_index));
+	return(1 & ((const byte*) &lock[1])[byte_index] >> bit_index);
 }
 
 /*************************************************************************/
@@ -403,18 +442,18 @@ lock_rec_get_nth_bit(
 #define lock_mutex_enter_kernel()	mutex_enter(&kernel_mutex)
 #define lock_mutex_exit_kernel()	mutex_exit(&kernel_mutex)
 
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 lock_check_trx_id_sanity(
 /*=====================*/
-					/* out: TRUE if ok */
-	dulint		trx_id,		/* in: trx id */
-	rec_t*		rec,		/* in: user record */
-	dict_index_t*	index,		/* in: index */
-	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
-	ibool		has_kernel_mutex)/* in: TRUE if the caller owns the
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	ibool		has_kernel_mutex)/*!< in: TRUE if the caller owns the
 					kernel mutex */
 {
 	ibool	is_ok		= TRUE;
@@ -437,14 +476,12 @@ lock_check_trx_id_sanity(
 		fputs("InnoDB: in ", stderr);
 		dict_index_name_print(stderr, NULL, index);
 		fprintf(stderr, "\n"
-			"InnoDB: is %lu %lu which is higher than the"
-			" global trx id counter %lu %lu!\n"
+			"InnoDB: is " TRX_ID_FMT " which is higher than the"
+			" global trx id counter " TRX_ID_FMT "!\n"
 			"InnoDB: The table is corrupt. You have to do"
 			" dump + drop + reimport.\n",
-			(ulong) ut_dulint_get_high(trx_id),
-			(ulong) ut_dulint_get_low(trx_id),
-			(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
-			(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+			TRX_ID_PREP_PRINTF(trx_id),
+			TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 
 		is_ok = FALSE;
 	}
@@ -456,23 +493,23 @@ lock_check_trx_id_sanity(
 	return(is_ok);
 }
 
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return TRUE if sees, or FALSE if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
 ibool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
-				/* out: TRUE if sees, or FALSE if an earlier
-				version of the record should be retrieved */
-	rec_t*		rec,	/* in: user record which should be read or
+	const rec_t*	rec,	/*!< in: user record which should be read or
 				passed over by a read cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	read_view_t*	view)	/* in: consistent read view */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	read_view_t*	view)	/*!< in: consistent read view */
 {
-	dulint	trx_id;
+	trx_id_t	trx_id;
 
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
@@ -485,30 +522,27 @@ lock_clust_rec_cons_read_sees(
 	return(read_view_sees_trx_id(view, trx_id));
 }
 
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
 
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case FALSE, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return TRUE if certainly sees, or FALSE if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
 ulint
 lock_sec_rec_cons_read_sees(
 /*========================*/
-				/* out: TRUE if certainly sees, or FALSE if an
-				earlier version of the clustered index record
-				might be needed: NOTE that a non-clustered
-				index page contains so little information on
-				its modifications that also in the case FALSE,
-				the present version of rec may be the right,
-				but we must check this from the clustered
-				index record */
-	rec_t*		rec,	/* in: user record which should be read or
-				passed over by a read cursor */
-	dict_index_t*	index,	/* in: non-clustered index */
-	read_view_t*	view)	/* in: consistent read view */
+	const rec_t*		rec,	/*!< in: user record which
+					should be read or passed over
+					by a read cursor */
+	const read_view_t*	view)	/*!< in: consistent read view */
 {
-	dulint	max_trx_id;
+	trx_id_t	max_trx_id;
 
-	UT_NOT_USED(index);
-
-	ut_ad(!(index->type & DICT_CLUSTERED));
 	ut_ad(page_rec_is_user_rec(rec));
 
 	/* NOTE that we might call this function while holding the search
@@ -520,23 +554,19 @@ lock_sec_rec_cons_read_sees(
 		return(FALSE);
 	}
 
-	max_trx_id = page_get_max_trx_id(buf_frame_align(rec));
+	max_trx_id = page_get_max_trx_id(page_align(rec));
+	ut_ad(!ut_dulint_is_zero(max_trx_id));
 
-	if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
+	return(ut_dulint_cmp(max_trx_id, view->up_limit_id) < 0);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Creates the lock system at database start. */
-
+UNIV_INTERN
 void
 lock_sys_create(
 /*============*/
-	ulint	n_cells)	/* in: number of slots in lock hash table */
+	ulint	n_cells)	/*!< in: number of slots in lock hash table */
 {
 	lock_sys = mem_alloc(sizeof(lock_sys_t));
 
@@ -548,43 +578,43 @@ lock_sys_create(
 	ut_a(lock_latest_err_file);
 }
 
-/*************************************************************************
-Gets the size of a lock struct. */
-
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return	size in bytes */
+UNIV_INTERN
 ulint
 lock_get_size(void)
 /*===============*/
-			/* out: size in bytes */
 {
 	return((ulint)sizeof(lock_t));
 }
 
-/*************************************************************************
-Gets the mode of a lock. */
+/*********************************************************************//**
+Gets the mode of a lock.
+@return	mode */
 UNIV_INLINE
-ulint
+enum lock_mode
 lock_get_mode(
 /*==========*/
-				/* out: mode */
-	const lock_t*	lock)	/* in: lock */
+	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_ad(lock);
 
 	return(lock->type_mode & LOCK_MODE_MASK);
 }
 
-/*************************************************************************
-Gets the wait flag of a lock. */
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return	TRUE if waiting */
 UNIV_INLINE
 ibool
 lock_get_wait(
 /*==========*/
-			/* out: TRUE if waiting */
-	lock_t*	lock)	/* in: lock */
+	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_ad(lock);
 
-	if (lock->type_mode & LOCK_WAIT) {
+	if (UNIV_UNLIKELY(lock->type_mode & LOCK_WAIT)) {
 
 		return(TRUE);
 	}
@@ -592,21 +622,20 @@ lock_get_wait(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets the source table of an ALTER TABLE transaction.  The table must be
-covered by an IX or IS table lock. */
-
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
 dict_table_t*
 lock_get_src_table(
 /*===============*/
-				/* out: the source table of transaction,
-				if it is covered by an IX or IS table lock;
-				dest if there is no source table, and
-				NULL if the transaction is locking more than
-				two tables or an inconsistency is found */
-	trx_t*		trx,	/* in: transaction */
-	dict_table_t*	dest,	/* in: destination of ALTER TABLE */
-	ulint*		mode)	/* out: lock mode of the source table */
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
+	enum lock_mode*	mode)	/*!< out: lock mode of the source table */
 {
 	dict_table_t*	src;
 	lock_t*		lock;
@@ -618,8 +647,8 @@ lock_get_src_table(
 	     lock;
 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
 		lock_table_t*	tab_lock;
-		ulint		lock_mode;
-		if (!(lock_get_type(lock) & LOCK_TABLE)) {
+		enum lock_mode	lock_mode;
+		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
 			/* We are only interested in table locks. */
 			continue;
 		}
@@ -645,15 +674,12 @@ lock_get_src_table(
 		/* Check that the source table is locked by
 		LOCK_IX or LOCK_IS. */
 		lock_mode = lock_get_mode(lock);
-		switch (lock_mode) {
-		case LOCK_IX:
-		case LOCK_IS:
+		if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
 			if (*mode != LOCK_NONE && *mode != lock_mode) {
 				/* There are multiple locks on src. */
 				return(NULL);
 			}
 			*mode = lock_mode;
-			break;
 		}
 	}
 
@@ -665,21 +691,21 @@ lock_get_src_table(
 	return(src);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Determine if the given table is exclusively "owned" by the given
 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
 ibool
 lock_is_table_exclusive(
 /*====================*/
-				/* out: TRUE if table is only locked by trx,
-				with LOCK_IX, and possibly LOCK_AUTO_INC */
-	dict_table_t*	table,	/* in: table */
-	trx_t*		trx)	/* in: transaction */
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx)	/*!< in: transaction */
 {
-	lock_t*	lock;
-	ibool	ok	= FALSE;
+	const lock_t*	lock;
+	ibool		ok	= FALSE;
 
 	ut_ad(table);
 	ut_ad(trx);
@@ -695,7 +721,7 @@ lock_is_table_exclusive(
 			goto not_ok;
 		}
 
-		if (!(lock_get_type(lock) & LOCK_TABLE)) {
+		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
 			/* We are interested in table locks only. */
 			continue;
 		}
@@ -722,30 +748,30 @@ func_exit:
 	return(ok);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the wait flag of a lock and the back pointer in trx to lock. */
 UNIV_INLINE
 void
 lock_set_lock_and_trx_wait(
 /*=======================*/
-	lock_t*	lock,	/* in: lock */
-	trx_t*	trx)	/* in: trx */
+	lock_t*	lock,	/*!< in: lock */
+	trx_t*	trx)	/*!< in: trx */
 {
 	ut_ad(lock);
 	ut_ad(trx->wait_lock == NULL);
 
 	trx->wait_lock = lock;
-	lock->type_mode = lock->type_mode | LOCK_WAIT;
+	lock->type_mode |= LOCK_WAIT;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 The back pointer to a waiting lock request in the transaction is set to NULL
 and the wait bit in lock type_mode is reset. */
 UNIV_INLINE
 void
 lock_reset_lock_and_trx_wait(
 /*=========================*/
-	lock_t*	lock)	/* in: record lock */
+	lock_t*	lock)	/*!< in: record lock */
 {
 	ut_ad((lock->trx)->wait_lock == lock);
 	ut_ad(lock_get_wait(lock));
@@ -753,20 +779,20 @@ lock_reset_lock_and_trx_wait(
 	/* Reset the back pointer in trx to this waiting lock request */
 
 	(lock->trx)->wait_lock = NULL;
-	lock->type_mode = lock->type_mode & ~LOCK_WAIT;
+	lock->type_mode &= ~LOCK_WAIT;
 }
 
-/*************************************************************************
-Gets the gap flag of a record lock. */
+/*********************************************************************//**
+Gets the gap flag of a record lock.
+@return	TRUE if gap flag set */
 UNIV_INLINE
 ibool
 lock_rec_get_gap(
 /*=============*/
-			/* out: TRUE if gap flag set */
-	lock_t*	lock)	/* in: record lock */
+	const lock_t*	lock)	/*!< in: record lock */
 {
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	if (lock->type_mode & LOCK_GAP) {
 
@@ -776,17 +802,17 @@ lock_rec_get_gap(
 	return(FALSE);
 }
 
-/*************************************************************************
-Gets the LOCK_REC_NOT_GAP flag of a record lock. */
+/*********************************************************************//**
+Gets the LOCK_REC_NOT_GAP flag of a record lock.
+@return	TRUE if LOCK_REC_NOT_GAP flag set */
 UNIV_INLINE
 ibool
 lock_rec_get_rec_not_gap(
 /*=====================*/
-			/* out: TRUE if LOCK_REC_NOT_GAP flag set */
-	lock_t*	lock)	/* in: record lock */
+	const lock_t*	lock)	/*!< in: record lock */
 {
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	if (lock->type_mode & LOCK_REC_NOT_GAP) {
 
@@ -796,17 +822,17 @@ lock_rec_get_rec_not_gap(
 	return(FALSE);
 }
 
-/*************************************************************************
-Gets the waiting insert flag of a record lock. */
+/*********************************************************************//**
+Gets the waiting insert flag of a record lock.
+@return	TRUE if gap flag set */
 UNIV_INLINE
 ibool
 lock_rec_get_insert_intention(
 /*==========================*/
-			/* out: TRUE if gap flag set */
-	lock_t*	lock)	/* in: record lock */
+	const lock_t*	lock)	/*!< in: record lock */
 {
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	if (lock->type_mode & LOCK_INSERT_INTENTION) {
 
@@ -816,108 +842,65 @@ lock_rec_get_insert_intention(
 	return(FALSE);
 }
 
-/*************************************************************************
-Calculates if lock mode 1 is stronger or equal to lock mode 2. */
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return	nonzero if mode1 stronger or equal to mode2 */
 UNIV_INLINE
-ibool
+ulint
 lock_mode_stronger_or_eq(
 /*=====================*/
-			/* out: TRUE if mode1 stronger or equal to mode2 */
-	ulint	mode1,	/* in: lock mode */
-	ulint	mode2)	/* in: lock mode */
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
 {
 	ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
 	      || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
 	ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
 	      || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
-	if (mode1 == LOCK_X) {
 
-		return(TRUE);
-
-	} else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) {
-
-		return(TRUE);
-
-	} else if (mode1 == LOCK_S
-		   && (mode2 == LOCK_S || mode2 == LOCK_IS)) {
-		return(TRUE);
-
-	} else if (mode1 == LOCK_IS && mode2 == LOCK_IS) {
-
-		return(TRUE);
-
-	} else if (mode1 == LOCK_IX && (mode2 == LOCK_IX
-					|| mode2 == LOCK_IS)) {
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return((LOCK_MODE_STRONGER_OR_EQ) & LK(mode1, mode2));
 }
 
-/*************************************************************************
-Calculates if lock mode 1 is compatible with lock mode 2. */
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return	nonzero if mode1 compatible with mode2 */
 UNIV_INLINE
-ibool
+ulint
 lock_mode_compatible(
 /*=================*/
-			/* out: TRUE if mode1 compatible with mode2 */
-	ulint	mode1,	/* in: lock mode */
-	ulint	mode2)	/* in: lock mode */
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
 {
 	ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
 	      || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
 	ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
 	      || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
 
-	if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) {
-
-		return(TRUE);
-
-	} else if (mode1 == LOCK_X) {
-
-		return(FALSE);
-
-	} else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS
-					      || mode2 == LOCK_IX)) {
-		return(TRUE);
-
-	} else if (mode1 == LOCK_IS && (mode2 == LOCK_IS
-					|| mode2 == LOCK_IX
-					|| mode2 == LOCK_AUTO_INC
-					|| mode2 == LOCK_S)) {
-		return(TRUE);
-
-	} else if (mode1 == LOCK_IX && (mode2 == LOCK_IS
-					|| mode2 == LOCK_AUTO_INC
-					|| mode2 == LOCK_IX)) {
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return((LOCK_MODE_COMPATIBILITY) & LK(mode1, mode2));
 }
 
-/*************************************************************************
-Checks if a lock request for a new lock has to wait for request lock2. */
+/*********************************************************************//**
+Checks if a lock request for a new lock has to wait for request lock2.
+@return	TRUE if new lock has to wait for lock2 to be removed */
 UNIV_INLINE
 ibool
 lock_rec_has_to_wait(
 /*=================*/
-			/* out: TRUE if new lock has to wait for lock2 to be
-			removed */
-	trx_t*	trx,	/* in: trx of new lock */
-	ulint	type_mode,/* in: precise mode of the new lock to set:
-			LOCK_S or LOCK_X, possibly ORed to
-			LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */
-	lock_t*	lock2,	/* in: another record lock; NOTE that it is assumed
-			that this has a lock bit set on the same record as
-			in the new lock we are setting */
-	ibool lock_is_on_supremum)  /* in: TRUE if we are setting the lock
-			on the 'supremum' record of an index
-			page: we know then that the lock request
-			is really for a 'gap' type lock */
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
+				LOCK_INSERT_INTENTION */
+	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+	ibool lock_is_on_supremum)  /*!< in: TRUE if we are setting the
+				lock on the 'supremum' record of an
+				index page: we know then that the lock
+				request is really for a 'gap' type lock */
 {
 	ut_ad(trx && lock2);
-	ut_ad(lock_get_type(lock2) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
 
 	if (trx != lock2->trx
 	    && !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
@@ -977,26 +960,26 @@ lock_rec_has_to_wait(
 	return(FALSE);
 }
 
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return	TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
 ibool
 lock_has_to_wait(
 /*=============*/
-			/* out: TRUE if lock1 has to wait for lock2 to be
-			removed */
-	lock_t*	lock1,	/* in: waiting lock */
-	lock_t*	lock2)	/* in: another lock; NOTE that it is assumed that this
-			has a lock bit set on the same record as in lock1 if
-			the locks are record locks */
+	const lock_t*	lock1,	/*!< in: waiting lock */
+	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
+				assumed that this has a lock bit set
+				on the same record as in lock1 if the
+				locks are record locks */
 {
 	ut_ad(lock1 && lock2);
 
 	if (lock1->trx != lock2->trx
 	    && !lock_mode_compatible(lock_get_mode(lock1),
 				     lock_get_mode(lock2))) {
-		if (lock_get_type(lock1) == LOCK_REC) {
-			ut_ad(lock_get_type(lock2) == LOCK_REC);
+		if (lock_get_type_low(lock1) == LOCK_REC) {
+			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
 
 			/* If this lock request is for a supremum record
 			then the second bit on the lock bitmap is set */
@@ -1015,58 +998,50 @@ lock_has_to_wait(
 
 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
 
-/*************************************************************************
-Gets the number of bits in a record lock bitmap. */
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return	number of bits */
 UNIV_INLINE
 ulint
 lock_rec_get_n_bits(
 /*================*/
-			/* out: number of bits */
-	lock_t*	lock)	/* in: record lock */
+	const lock_t*	lock)	/*!< in: record lock */
 {
 	return(lock->un_member.rec_lock.n_bits);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Sets the nth bit of a record lock to TRUE. */
 UNIV_INLINE
 void
 lock_rec_set_nth_bit(
 /*=================*/
-	lock_t*	lock,	/* in: record lock */
-	ulint	i)	/* in: index of the bit */
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit */
 {
 	ulint	byte_index;
 	ulint	bit_index;
-	byte*	ptr;
-	ulint	b;
 
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 	ut_ad(i < lock->un_member.rec_lock.n_bits);
 
 	byte_index = i / 8;
 	bit_index = i % 8;
 
-	ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
-	b = (ulint)*ptr;
-
-	b = ut_bit_set_nth(b, bit_index, TRUE);
-
-	*ptr = (byte)b;
+	((byte*) &lock[1])[byte_index] |= 1 << bit_index;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
 ulint
 lock_rec_find_set_bit(
 /*==================*/
-			/* out: bit index == heap number of the record, or
-			ULINT_UNDEFINED if none found */
-	lock_t*	lock)	/* in: record lock with at least one bit set */
+	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
 {
 	ulint	i;
 
@@ -1081,51 +1056,43 @@ lock_rec_find_set_bit(
 	return(ULINT_UNDEFINED);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Resets the nth bit of a record lock. */
 UNIV_INLINE
 void
 lock_rec_reset_nth_bit(
 /*===================*/
-	lock_t*	lock,	/* in: record lock */
-	ulint	i)	/* in: index of the bit which must be set to TRUE
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit which must be set to TRUE
 			when this function is called */
 {
 	ulint	byte_index;
 	ulint	bit_index;
-	byte*	ptr;
-	ulint	b;
 
 	ut_ad(lock);
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 	ut_ad(i < lock->un_member.rec_lock.n_bits);
 
 	byte_index = i / 8;
 	bit_index = i % 8;
 
-	ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
-	b = (ulint)*ptr;
-
-	b = ut_bit_set_nth(b, bit_index, FALSE);
-
-	*ptr = (byte)b;
+	((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
 }
 
-/*************************************************************************
-Gets the first or next record lock on a page. */
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return	next lock, NULL if none exists */
 UNIV_INLINE
 lock_t*
 lock_rec_get_next_on_page(
 /*======================*/
-			/* out: next lock, NULL if none exists */
-	lock_t*	lock)	/* in: a record lock */
+	lock_t*	lock)	/*!< in: a record lock */
 {
 	ulint	space;
 	ulint	page_no;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	space = lock->un_member.rec_lock.space;
 	page_no = lock->un_member.rec_lock.page_no;
@@ -1148,16 +1115,16 @@ lock_rec_get_next_on_page(
 	return(lock);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets the first record lock on a page, where the page is identified by its
-file address. */
+file address.
+@return	first lock, NULL if none exists */
 UNIV_INLINE
 lock_t*
 lock_rec_get_first_on_page_addr(
 /*============================*/
-			/* out: first lock, NULL if none exists */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
 {
 	lock_t*	lock;
 
@@ -1178,16 +1145,15 @@ lock_rec_get_first_on_page_addr(
 	return(lock);
 }
 
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
+/*********************************************************************//**
+Returns TRUE if there are explicit record locks on a page.
+@return	TRUE if there are explicit record locks on the page */
+UNIV_INTERN
 ibool
 lock_rec_expl_exist_on_page(
 /*========================*/
-			/* out: TRUE if there are explicit record locks on
-			the page */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
 {
 	ibool	ret;
 
@@ -1204,31 +1170,28 @@ lock_rec_expl_exist_on_page(
 	return(ret);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets the first record lock on a page, where the page is identified by a
-pointer to it. */
+pointer to it.
+@return	first lock, NULL if none exists */
 UNIV_INLINE
 lock_t*
 lock_rec_get_first_on_page(
 /*=======================*/
-			/* out: first lock, NULL if none exists */
-	byte*	ptr)	/* in: pointer to somewhere on the page */
+	const buf_block_t*	block)	/*!< in: buffer block */
 {
 	ulint	hash;
 	lock_t*	lock;
-	ulint	space;
-	ulint	page_no;
+	ulint	space	= buf_block_get_space(block);
+	ulint	page_no	= buf_block_get_page_no(block);
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	hash = buf_frame_get_lock_hash_val(ptr);
+	hash = buf_block_get_lock_hash_val(block);
 
 	lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
 
 	while (lock) {
-		space = buf_frame_get_space_id(ptr);
-		page_no = buf_frame_get_page_no(ptr);
-
 		if ((lock->un_member.rec_lock.space == space)
 		    && (lock->un_member.rec_lock.page_no == page_no)) {
 
@@ -1241,60 +1204,51 @@ lock_rec_get_first_on_page(
 	return(lock);
 }
 
-/*************************************************************************
-Gets the next explicit lock request on a record. */
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists */
 UNIV_INLINE
 lock_t*
 lock_rec_get_next(
 /*==============*/
-			/* out: next lock, NULL if none exists */
-	rec_t*	rec,	/* in: record on a page */
-	lock_t*	lock)	/* in: lock */
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock)	/*!< in: lock */
 {
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(lock) == LOCK_REC);
 
-	if (page_rec_is_comp(rec)) {
-		do {
-			lock = lock_rec_get_next_on_page(lock);
-		} while (lock && !lock_rec_get_nth_bit(
-				 lock, rec_get_heap_no(rec, TRUE)));
-	} else {
-		do {
-			lock = lock_rec_get_next_on_page(lock);
-		} while (lock && !lock_rec_get_nth_bit(
-				 lock, rec_get_heap_no(rec, FALSE)));
-	}
+	do {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+		lock = lock_rec_get_next_on_page(lock);
+	} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
 
 	return(lock);
 }
 
-/*************************************************************************
-Gets the first explicit lock request on a record. */
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return	first lock, NULL if none exists */
 UNIV_INLINE
 lock_t*
 lock_rec_get_first(
 /*===============*/
-			/* out: first lock, NULL if none exists */
-	rec_t*	rec)	/* in: record on a page */
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no)/*!< in: heap number of the record */
 {
 	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	lock = lock_rec_get_first_on_page(rec);
-	if (UNIV_LIKELY_NULL(lock)) {
-		ulint	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
-		while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
-			lock = lock_rec_get_next_on_page(lock);
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		if (lock_rec_get_nth_bit(lock, heap_no)) {
+			break;
 		}
 	}
 
 	return(lock);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
 pointer in the transaction! This function is used in lock object creation
 and resetting. */
@@ -1302,64 +1256,50 @@ static
 void
 lock_rec_bitmap_reset(
 /*==================*/
-	lock_t*	lock)	/* in: record lock */
+	lock_t*	lock)	/*!< in: record lock */
 {
-	byte*	ptr;
 	ulint	n_bytes;
-	ulint	i;
 
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	/* Reset to zero the bitmap which resides immediately after the lock
 	struct */
 
-	ptr = (byte*)lock + sizeof(lock_t);
-
 	n_bytes = lock_rec_get_n_bits(lock) / 8;
 
 	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
 
-	for (i = 0; i < n_bytes; i++) {
-
-		*ptr = 0;
-		ptr++;
-	}
+	memset(&lock[1], 0, n_bytes);
 }
 
-/*************************************************************************
-Copies a record lock to heap. */
+/*********************************************************************//**
+Copies a record lock to heap.
+@return	copy of lock */
 static
 lock_t*
 lock_rec_copy(
 /*==========*/
-				/* out: copy of lock */
-	lock_t*		lock,	/* in: record lock */
-	mem_heap_t*	heap)	/* in: memory heap */
+	const lock_t*	lock,	/*!< in: record lock */
+	mem_heap_t*	heap)	/*!< in: memory heap */
 {
-	lock_t*	dupl_lock;
 	ulint	size;
 
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
 
-	dupl_lock = mem_heap_alloc(heap, size);
-
-	ut_memcpy(dupl_lock, lock, size);
-
-	return(dupl_lock);
+	return(mem_heap_dup(heap, lock, size));
 }
 
-/*************************************************************************
-Gets the previous record lock set on a record. */
-
-lock_t*
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return	previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
 lock_rec_get_prev(
 /*==============*/
-			/* out: previous lock on the same record, NULL if
-			none exists */
-	lock_t*	in_lock,/* in: record lock */
-	ulint	heap_no)/* in: heap number of the record */
+	const lock_t*	in_lock,/*!< in: record lock */
+	ulint		heap_no)/*!< in: heap number of the record */
 {
 	lock_t*	lock;
 	ulint	space;
@@ -1367,7 +1307,7 @@ lock_rec_get_prev(
 	lock_t*	found_lock	= NULL;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(in_lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
 
 	space = in_lock->un_member.rec_lock.space;
 	page_no = in_lock->un_member.rec_lock.page_no;
@@ -1393,16 +1333,16 @@ lock_rec_get_prev(
 
 /*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
 
-/*************************************************************************
-Checks if a transaction has the specified table lock, or stronger. */
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger.
+@return	lock or NULL */
 UNIV_INLINE
 lock_t*
 lock_table_has(
 /*===========*/
-				/* out: lock or NULL */
-	trx_t*		trx,	/* in: transaction */
-	dict_table_t*	table,	/* in: table */
-	ulint		mode)	/* in: lock mode */
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	table,	/*!< in: table */
+	enum lock_mode	mode)	/*!< in: lock mode */
 {
 	lock_t*	lock;
 
@@ -1433,20 +1373,23 @@ lock_table_has(
 
 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
-to precise_mode. */
+to precise_mode.
+@return	lock or NULL */
 UNIV_INLINE
 lock_t*
 lock_rec_has_expl(
 /*==============*/
-			/* out: lock or NULL */
-	ulint	precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to
-			LOCK_GAP or LOCK_REC_NOT_GAP,
-			for a supremum record we regard this always a gap
-			type request */
-	rec_t*	rec,	/* in: record */
-	trx_t*	trx)	/* in: transaction */
+	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
+					possibly ORed to LOCK_GAP or
+					LOCK_REC_NOT_GAP, for a
+					supremum record we regard this
+					always a gap type request */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	trx_t*			trx)	/*!< in: transaction */
 {
 	lock_t*	lock;
 
@@ -1455,7 +1398,7 @@ lock_rec_has_expl(
 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock) {
 		if (lock->trx == trx
@@ -1464,37 +1407,42 @@ lock_rec_has_expl(
 		    && !lock_get_wait(lock)
 		    && (!lock_rec_get_rec_not_gap(lock)
 			|| (precise_mode & LOCK_REC_NOT_GAP)
-			|| page_rec_is_supremum(rec))
+			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
 		    && (!lock_rec_get_gap(lock)
 			|| (precise_mode & LOCK_GAP)
-			|| page_rec_is_supremum(rec))
+			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
 		    && (!lock_rec_get_insert_intention(lock))) {
 
 			return(lock);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 
 	return(NULL);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Checks if some other transaction has a lock request in the queue. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some other transaction has a lock request in the queue.
+@return	lock or NULL */
 static
 lock_t*
 lock_rec_other_has_expl_req(
 /*========================*/
-			/* out: lock or NULL */
-	ulint	mode,	/* in: LOCK_S or LOCK_X */
-	ulint	gap,	/* in: LOCK_GAP if also gap locks are taken
-			into account, or 0 if not */
-	ulint	wait,	/* in: LOCK_WAIT if also waiting locks are
-			taken into account, or 0 if not */
-	rec_t*	rec,	/* in: record to look at */
-	trx_t*	trx)	/* in: transaction, or NULL if requests by all
-			transactions are taken into account */
+	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
+	ulint			gap,	/*!< in: LOCK_GAP if also gap
+					locks are taken into account,
+					or 0 if not */
+	ulint			wait,	/*!< in: LOCK_WAIT if also
+					waiting locks are taken into
+					account, or 0 if not */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: transaction, or NULL if
+					requests by all transactions
+					are taken into account */
 {
 	lock_t*	lock;
 
@@ -1503,80 +1451,92 @@ lock_rec_other_has_expl_req(
 	ut_ad(gap == 0 || gap == LOCK_GAP);
 	ut_ad(wait == 0 || wait == LOCK_WAIT);
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock) {
 		if (lock->trx != trx
 		    && (gap
 			|| !(lock_rec_get_gap(lock)
-			     || page_rec_is_supremum(rec)))
+			     || heap_no == PAGE_HEAP_NO_SUPREMUM))
 		    && (wait || !lock_get_wait(lock))
 		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
 
 			return(lock);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 
 	return(NULL);
 }
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if some other transaction has a conflicting explicit lock request
-in the queue, so that we have to wait. */
+in the queue, so that we have to wait.
+@return	lock or NULL */
 static
 lock_t*
 lock_rec_other_has_conflicting(
 /*===========================*/
-			/* out: lock or NULL */
-	ulint	mode,	/* in: LOCK_S or LOCK_X,
-			possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
-			LOCK_INSERT_INTENTION */
-	rec_t*	rec,	/* in: record to look at */
-	trx_t*	trx)	/* in: our transaction */
+	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X,
+					possibly ORed to LOCK_GAP or
+					LOC_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	trx_t*			trx)	/*!< in: our transaction */
 {
 	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
-	while (lock) {
-		if (lock_rec_has_to_wait(trx, mode, lock,
-					 page_rec_is_supremum(rec))) {
+	if (UNIV_LIKELY_NULL(lock)) {
+		if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
 
-			return(lock);
+			do {
+				if (lock_rec_has_to_wait(trx, mode, lock,
+							 TRUE)) {
+					return(lock);
+				}
+
+				lock = lock_rec_get_next(heap_no, lock);
+			} while (lock);
+		} else {
+
+			do {
+				if (lock_rec_has_to_wait(trx, mode, lock,
+							 FALSE)) {
+					return(lock);
+				}
+
+				lock = lock_rec_get_next(heap_no, lock);
+			} while (lock);
 		}
-
-		lock = lock_rec_get_next(rec, lock);
 	}
 
 	return(NULL);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Looks for a suitable type record lock struct by the same trx on the same page.
 This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found. */
+no new struct is needed, if a suitable old is found.
+@return	lock or NULL */
 UNIV_INLINE
 lock_t*
 lock_rec_find_similar_on_page(
 /*==========================*/
-				/* out: lock or NULL */
-	ulint	type_mode,	/* in: lock type_mode field */
-	rec_t*	rec,		/* in: record */
-	trx_t*	trx)		/* in: transaction */
+	ulint		type_mode,	/*!< in: lock type_mode field */
+	ulint		heap_no,	/*!< in: heap number of the record */
+	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
+	const trx_t*	trx)		/*!< in: transaction */
 {
-	lock_t*	lock;
-	ulint	heap_no;
-
 	ut_ad(mutex_own(&kernel_mutex));
 
-	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-	lock = lock_rec_get_first_on_page(rec);
-
 	while (lock != NULL) {
 		if (lock->trx == trx
 		    && lock->type_mode == type_mode
@@ -1591,28 +1551,25 @@ lock_rec_find_similar_on_page(
 	return(NULL);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
+index.
+@return	transaction which has the x-lock, or NULL */
+static
 trx_t*
 lock_sec_rec_some_has_impl_off_kernel(
 /*==================================*/
-				/* out: transaction which has the x-lock, or
-				NULL */
-	rec_t*		rec,	/* in: user record */
-	dict_index_t*	index,	/* in: secondary index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+	const rec_t*	rec,	/*!< in: user record */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
-	page_t*	page;
+	const page_t*	page = page_align(rec);
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(!(index->type & DICT_CLUSTERED));
+	ut_ad(!dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	page = buf_frame_align(rec);
-
 	/* Some transaction may have an implicit x-lock on the record only
 	if the max trx id for the page >= min trx id for the trx list, or
 	database recovery is running. We do not write the changes of a page
@@ -1631,7 +1588,7 @@ lock_sec_rec_some_has_impl_off_kernel(
 
 	if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
 				      rec, index, offsets, TRUE)) {
-		buf_page_print(page);
+		buf_page_print(page, 0);
 
 		/* The page is corrupt: try to avoid a crash by returning
 		NULL */
@@ -1641,15 +1598,15 @@ lock_sec_rec_some_has_impl_off_kernel(
 	return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise. */
-
+UNIV_INTERN
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	lock_t*	lock;
 	ulint   n_records = 0;
@@ -1659,7 +1616,7 @@ lock_number_of_rows_locked(
 	lock = UT_LIST_GET_FIRST(trx->trx_locks);
 
 	while (lock) {
-		if (lock_get_type(lock) == LOCK_REC) {
+		if (lock_get_type_low(lock) == LOCK_REC) {
 			n_bits = lock_rec_get_n_bits(lock);
 
 			for (n_bit = 0; n_bit < n_bits; n_bit++) {
@@ -1677,34 +1634,35 @@ lock_number_of_rows_locked(
 
 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
 
-/*************************************************************************
+/*********************************************************************//**
 Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility! */
+for deadlocks or lock compatibility!
+@return	created lock */
 static
 lock_t*
 lock_rec_create(
 /*============*/
-				/* out: created lock */
-	ulint		type_mode,/* in: lock mode and wait flag, type is
-				ignored and replaced by LOCK_REC */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: index of record */
-	trx_t*		trx)	/* in: transaction */
+	ulint			type_mode,/*!< in: lock mode and wait
+					flag, type is ignored and
+					replaced by LOCK_REC */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx)	/*!< in: transaction */
 {
-	page_t*	page;
-	lock_t*	lock;
-	ulint	page_no;
-	ulint	heap_no;
-	ulint	space;
-	ulint	n_bits;
-	ulint	n_bytes;
+	lock_t*		lock;
+	ulint		page_no;
+	ulint		space;
+	ulint		n_bits;
+	ulint		n_bytes;
+	const page_t*	page;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	page = buf_frame_align(rec);
-	space = buf_frame_get_space_id(page);
-	page_no	= buf_frame_get_page_no(page);
-	heap_no = rec_get_heap_no(rec, page_is_comp(page));
+	space = buf_block_get_space(block);
+	page_no	= buf_block_get_page_no(block);
+	page = block->frame;
 
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
 
@@ -1712,7 +1670,7 @@ lock_rec_create(
 	LOCK_REC_NOT_GAP bits, as all locks on the supremum are
 	automatically of the gap type */
 
-	if (rec == page_get_supremum_rec(page)) {
+	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
 
 		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
@@ -1745,7 +1703,7 @@ lock_rec_create(
 
 	HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
 		    lock_rec_fold(space, page_no), lock);
-	if (type_mode & LOCK_WAIT) {
+	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
@@ -1753,28 +1711,31 @@ lock_rec_create(
 	return(lock);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks. */
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
 static
 ulint
 lock_rec_enqueue_waiting(
 /*=====================*/
-				/* out: DB_LOCK_WAIT, DB_DEADLOCK, or
-				DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
-				DB_SUCCESS means that there was a deadlock,
-				but another transaction was chosen as a
-				victim, and we got the lock immediately:
-				no need to wait then */
-	ulint		type_mode,/* in: lock mode this transaction is
-				requesting: LOCK_S or LOCK_X, possibly ORed
-				with LOCK_GAP or LOCK_REC_NOT_GAP, ORed
-				with LOCK_INSERT_INTENTION if this waiting
-				lock request is set when performing an
-				insert of an index record */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index of record */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			type_mode,/*!< in: lock mode this
+					transaction is requesting:
+					LOCK_S or LOCK_X, possibly
+					ORed with LOCK_GAP or
+					LOCK_REC_NOT_GAP, ORed with
+					LOCK_INSERT_INTENTION if this
+					waiting lock request is set
+					when performing an insert of
+					an index record */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	lock_t*	lock;
 	trx_t*	trx;
@@ -1785,7 +1746,7 @@ lock_rec_enqueue_waiting(
 	we do not enqueue a lock request if the query thread should be
 	stopped anyway */
 
-	if (que_thr_stop(thr)) {
+	if (UNIV_UNLIKELY(que_thr_stop(thr))) {
 
 		ut_error;
 
@@ -1794,12 +1755,16 @@ lock_rec_enqueue_waiting(
 
 	trx = thr_get_trx(thr);
 
-	if (trx->dict_operation) {
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
 		ut_print_timestamp(stderr);
 		fputs("  InnoDB: Error: a record lock wait happens"
 		      " in a dictionary operation!\n"
-		      "InnoDB: Table name ", stderr);
-		ut_print_name(stderr, trx, TRUE, index->table_name);
+		      "InnoDB: ", stderr);
+		dict_index_name_print(stderr, trx, index);
 		fputs(".\n"
 		      "InnoDB: Submit a detailed bug report"
 		      " to http://bugs.mysql.com\n",
@@ -1807,16 +1772,16 @@ lock_rec_enqueue_waiting(
 	}
 
 	/* Enqueue the lock request that will wait to be granted */
-	lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
+	lock = lock_rec_create(type_mode | LOCK_WAIT,
+			       block, heap_no, index, trx);
 
 	/* Check if a deadlock occurs: if yes, remove the lock request and
 	return an error code */
 
-	if (lock_deadlock_occurs(lock, trx)) {
+	if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
 
 		lock_reset_lock_and_trx_wait(lock);
-		lock_rec_reset_nth_bit(lock, rec_get_heap_no(
-					       rec, page_rec_is_comp(rec)));
+		lock_rec_reset_nth_bit(lock, heap_no);
 
 		return(DB_DEADLOCK);
 	}
@@ -1846,47 +1811,58 @@ lock_rec_enqueue_waiting(
 	return(DB_LOCK_WAIT);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Adds a record lock request in the record queue. The request is normally
 added as the last in the queue, but if there are no waiting lock requests
 on the record, and the request to be added is not a waiting request, we
 can reuse a suitable record lock object already existing on the same page,
 just setting the appropriate bit in its bitmap. This is a low-level function
-which does NOT check for deadlocks or lock compatibility! */
+which does NOT check for deadlocks or lock compatibility!
+@return	lock where the bit was set */
 static
 lock_t*
 lock_rec_add_to_queue(
 /*==================*/
-				/* out: lock where the bit was set */
-	ulint		type_mode,/* in: lock mode, wait, gap etc. flags;
-				type is ignored and replaced by LOCK_REC */
-	rec_t*		rec,	/* in: record on page */
-	dict_index_t*	index,	/* in: index of record */
-	trx_t*		trx)	/* in: transaction */
+	ulint			type_mode,/*!< in: lock mode, wait, gap
+					etc. flags; type is ignored
+					and replaced by LOCK_REC */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx)	/*!< in: transaction */
 {
 	lock_t*	lock;
-	lock_t*	similar_lock	= NULL;
-	ulint	heap_no;
-	ibool	somebody_waits	= FALSE;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
-	      || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
-	      || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
-					      rec, trx));
-	ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
-	      || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
-	      || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
-					      rec, trx));
+#ifdef UNIV_DEBUG
+	switch (type_mode & LOCK_MODE_MASK) {
+	case LOCK_X:
+	case LOCK_S:
+		break;
+	default:
+		ut_error;
+	}
 
-	type_mode = type_mode | LOCK_REC;
+	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
+		enum lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+			? LOCK_X
+			: LOCK_S;
+		lock_t*		other_lock
+			= lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
+						      block, heap_no, trx);
+		ut_a(!other_lock);
+	}
+#endif /* UNIV_DEBUG */
+
+	type_mode |= LOCK_REC;
 
 	/* If rec is the supremum record, then we can reset the gap bit, as
 	all locks on the supremum are automatically of the gap type, and we
 	try to avoid unnecessary memory consumption of a new record lock
 	struct for a gap type lock */
 
-	if (page_rec_is_supremum(rec)) {
+	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
 
 		/* There should never be LOCK_REC_NOT_GAP on a supremum
@@ -1897,57 +1873,66 @@ lock_rec_add_to_queue(
 
 	/* Look for a waiting lock request on the same record or on a gap */
 
-	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-	lock = lock_rec_get_first_on_page(rec);
+	lock = lock_rec_get_first_on_page(block);
 
 	while (lock != NULL) {
 		if (lock_get_wait(lock)
 		    && (lock_rec_get_nth_bit(lock, heap_no))) {
 
-			somebody_waits = TRUE;
+			goto somebody_waits;
 		}
 
 		lock = lock_rec_get_next_on_page(lock);
 	}
 
-	/* Look for a similar record lock on the same page: if one is found
-	and there are no waiting lock requests, we can just set the bit */
+	if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
 
-	similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
+		/* Look for a similar record lock on the same page:
+		if one is found and there are no waiting lock requests,
+		we can just set the bit */
 
-	if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
+		lock = lock_rec_find_similar_on_page(
+			type_mode, heap_no,
+			lock_rec_get_first_on_page(block), trx);
 
-		lock_rec_set_nth_bit(similar_lock, heap_no);
+		if (lock) {
 
-		return(similar_lock);
+			lock_rec_set_nth_bit(lock, heap_no);
+
+			return(lock);
+		}
 	}
 
-	return(lock_rec_create(type_mode, rec, index, trx));
+somebody_waits:
+	return(lock_rec_create(type_mode, block, heap_no, index, trx));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This is a fast routine for locking a record in the most common cases:
 there are no explicit locks on the page, or there is just one lock, owned
 by this transaction, and of the right type_mode. This is a low-level function
 which does NOT look at implicit locks! Checks lock compatibility within
 explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock. */
+a page supremum record, a gap type lock.
+@return	TRUE if locking succeeded */
 UNIV_INLINE
 ibool
 lock_rec_lock_fast(
 /*===============*/
-				/* out: TRUE if locking succeeded */
-	ibool		impl,	/* in: if TRUE, no lock is set if no wait
-				is necessary: we assume that the caller will
-				set an implicit lock */
-	ulint		mode,	/* in: lock mode: LOCK_X or LOCK_S possibly
-				ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index of record */
-	que_thr_t*	thr)	/* in: query thread */
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	lock_t*	lock;
-	ulint	heap_no;
 	trx_t*	trx;
 
 	ut_ad(mutex_own(&kernel_mutex));
@@ -1961,15 +1946,13 @@ lock_rec_lock_fast(
 	      || mode - (LOCK_MODE_MASK & mode) == 0
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
 
-	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
-	lock = lock_rec_get_first_on_page(rec);
+	lock = lock_rec_get_first_on_page(block);
 
 	trx = thr_get_trx(thr);
 
 	if (lock == NULL) {
 		if (!impl) {
-			lock_rec_create(mode, rec, index, trx);
+			lock_rec_create(mode, block, heap_no, index, trx);
 		}
 
 		return(TRUE);
@@ -1999,25 +1982,28 @@ lock_rec_lock_fast(
 	return(TRUE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This is the general, and slower, routine for locking a record. This is a
 low-level function which does NOT look at implicit locks! Checks lock
 compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock. */
+lock, or in the case of a page supremum record, a gap type lock.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error code */
 static
 ulint
 lock_rec_lock_slow(
 /*===============*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				code */
-	ibool		impl,	/* in: if TRUE, no lock is set if no wait is
-				necessary: we assume that the caller will set
-				an implicit lock */
-	ulint		mode,	/* in: lock mode: LOCK_X or LOCK_S possibly
-				ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index of record */
-	que_thr_t*	thr)	/* in: query thread */
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 	ulint	err;
@@ -2035,24 +2021,25 @@ lock_rec_lock_slow(
 
 	trx = thr_get_trx(thr);
 
-	if (lock_rec_has_expl(mode, rec, trx)) {
+	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
 		/* The trx already has a strong enough lock on rec: do
 		nothing */
 
 		err = DB_SUCCESS;
-	} else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
+	} else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
 
 		/* If another transaction has a non-gap conflicting request in
 		the queue, as this transaction does not have a lock strong
 		enough already granted on the record, we have to wait. */
 
-		err = lock_rec_enqueue_waiting(mode, rec, index, thr);
+		err = lock_rec_enqueue_waiting(mode, block, heap_no,
+					       index, thr);
 	} else {
 		if (!impl) {
 			/* Set the requested lock on the record */
 
-			lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
-					      trx);
+			lock_rec_add_to_queue(LOCK_REC | mode, block,
+					      heap_no, index, trx);
 		}
 
 		err = DB_SUCCESS;
@@ -2061,26 +2048,29 @@ lock_rec_lock_slow(
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Tries to lock the specified record in the mode requested. If not immediately
 possible, enqueues a waiting lock request. This is a low-level function
 which does NOT look at implicit locks! Checks lock compatibility within
 explicit locks. This function sets a normal next-key lock, or in the case
-of a page supremum record, a gap type lock. */
+of a page supremum record, a gap type lock.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error code */
 static
 ulint
 lock_rec_lock(
 /*==========*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
-				code */
-	ibool		impl,	/* in: if TRUE, no lock is set if no wait is
-				necessary: we assume that the caller will set
-				an implicit lock */
-	ulint		mode,	/* in: lock mode: LOCK_X or LOCK_S possibly
-				ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index of record */
-	que_thr_t*	thr)	/* in: query thread */
+	ibool			impl,	/*!< in: if TRUE, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	ulint			mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
 
@@ -2095,27 +2085,28 @@ lock_rec_lock(
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0);
 
-	if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
+	if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
 
 		/* We try a simplified and faster subroutine for the most
 		common cases */
 
 		err = DB_SUCCESS;
 	} else {
-		err = lock_rec_lock_slow(impl, mode, rec, index, thr);
+		err = lock_rec_lock_slow(impl, mode, block,
+					 heap_no, index, thr);
 	}
 
 	return(err);
 }
 
-/*************************************************************************
-Checks if a waiting record lock request still has to wait in a queue. */
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return	TRUE if still has to wait */
 static
 ibool
 lock_rec_has_to_wait_in_queue(
 /*==========================*/
-				/* out: TRUE if still has to wait */
-	lock_t*	wait_lock)	/* in: waiting record lock */
+	lock_t*	wait_lock)	/*!< in: waiting record lock */
 {
 	lock_t*	lock;
 	ulint	space;
@@ -2124,7 +2115,7 @@ lock_rec_has_to_wait_in_queue(
 
 	ut_ad(mutex_own(&kernel_mutex));
 	ut_ad(lock_get_wait(wait_lock));
-	ut_ad(lock_get_type(wait_lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
 
 	space = wait_lock->un_member.rec_lock.space;
 	page_no = wait_lock->un_member.rec_lock.page_no;
@@ -2146,31 +2137,32 @@ lock_rec_has_to_wait_in_queue(
 	return(FALSE);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Grants a lock to a waiting lock request and releases the waiting
 transaction. */
 static
 void
 lock_grant(
 /*=======*/
-	lock_t*	lock)	/* in: waiting lock request */
+	lock_t*	lock)	/*!< in/out: waiting lock request */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
 	lock_reset_lock_and_trx_wait(lock);
 
 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+		trx_t*		trx = lock->trx;
+		dict_table_t*	table = lock->un_member.tab_lock.table;
 
-		if (lock->trx->auto_inc_lock != NULL) {
+		if (table->autoinc_trx == trx) {
 			fprintf(stderr,
 				"InnoDB: Error: trx already had"
 				" an AUTO-INC lock!\n");
+		} else {
+			table->autoinc_trx = trx;
+
+			ib_vector_push(trx->autoinc_locks, lock);
 		}
-
-		/* Store pointer to lock to trx so that we know to
-		release it at the end of the SQL statement */
-
-		lock->trx->auto_inc_lock = lock;
 	}
 
 #ifdef UNIV_DEBUG
@@ -2190,7 +2182,7 @@ lock_grant(
 	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 Cancels a waiting record lock request and releases the waiting transaction
 that requested it. NOTE: does NOT check if waiting lock requests behind this
 one can now be granted! */
@@ -2198,10 +2190,10 @@ static
 void
 lock_rec_cancel(
 /*============*/
-	lock_t*	lock)	/* in: waiting record lock request */
+	lock_t*	lock)	/*!< in: waiting record lock request */
 {
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2215,7 +2207,7 @@ lock_rec_cancel(
 	trx_end_lock_wait(lock->trx);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Removes a record lock request, waiting or granted, from the queue and
 grants locks to other transactions in the queue if they now are entitled
 to a lock. NOTE: all record locks contained in in_lock are removed. */
@@ -2223,7 +2215,7 @@ static
 void
 lock_rec_dequeue_from_page(
 /*=======================*/
-	lock_t*	in_lock)/* in: record lock object: all record locks which
+	lock_t*	in_lock)/*!< in: record lock object: all record locks which
 			are contained in this lock object are removed;
 			transactions waiting behind will get their lock
 			requests granted, if they are now qualified to it */
@@ -2234,7 +2226,7 @@ lock_rec_dequeue_from_page(
 	trx_t*	trx;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(in_lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
 
 	trx = in_lock->trx;
 
@@ -2263,13 +2255,13 @@ lock_rec_dequeue_from_page(
 	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 Removes a record lock request, waiting or granted, from the queue. */
 static
 void
 lock_rec_discard(
 /*=============*/
-	lock_t*	in_lock)/* in: record lock object: all record locks which
+	lock_t*	in_lock)/*!< in: record lock object: all record locks which
 			are contained in this lock object are removed */
 {
 	ulint	space;
@@ -2277,7 +2269,7 @@ lock_rec_discard(
 	trx_t*	trx;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(lock_get_type(in_lock) == LOCK_REC);
+	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
 
 	trx = in_lock->trx;
 
@@ -2290,7 +2282,7 @@ lock_rec_discard(
 	UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Removes record lock objects set on an index page which is discarded. This
 function does not move locks, or check for waiting locks, therefore the
 lock bitmaps must already be reset when this function is called. */
@@ -2298,7 +2290,7 @@ static
 void
 lock_rec_free_all_from_discard_page(
 /*================================*/
-	page_t*	page)	/* in: page to be discarded */
+	const buf_block_t*	block)	/*!< in: page to be discarded */
 {
 	ulint	space;
 	ulint	page_no;
@@ -2307,8 +2299,8 @@ lock_rec_free_all_from_discard_page(
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	space = buf_frame_get_space_id(page);
-	page_no = buf_frame_get_page_no(page);
+	space = buf_block_get_space(block);
+	page_no = buf_block_get_page_no(block);
 
 	lock = lock_rec_get_first_on_page_addr(space, page_no);
 
@@ -2326,23 +2318,22 @@ lock_rec_free_all_from_discard_page(
 
 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
 
-/*****************************************************************
+/*************************************************************//**
 Resets the lock bits for a single record. Releases transactions waiting for
 lock requests here. */
 static
 void
 lock_rec_reset_and_release_wait(
 /*============================*/
-	rec_t*	rec)	/* in: record whose locks bits should be reset */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no)/*!< in: heap number of record */
 {
 	lock_t*	lock;
-	ulint	heap_no;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock != NULL) {
 		if (lock_get_wait(lock)) {
@@ -2351,28 +2342,35 @@ lock_rec_reset_and_release_wait(
 			lock_rec_reset_nth_bit(lock, heap_no);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
 of another record as gap type locks, but does not reset the lock bits of
 the other record. Also waiting lock requests on rec are inherited as
 GRANTED gap locks. */
-
+static
 void
 lock_rec_inherit_to_gap(
 /*====================*/
-	rec_t*	heir,	/* in: record which inherits */
-	rec_t*	rec)	/* in: record from which inherited; does NOT reset
-			the locks on this record */
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no)	/*!< in: heap_no of the
+						donating record */
 {
 	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	/* If srv_locks_unsafe_for_binlog is TRUE or session is using
 	READ COMMITTED isolation level, we do not want locks set
@@ -2387,16 +2385,17 @@ lock_rec_inherit_to_gap(
 			  == TRX_ISO_READ_COMMITTED)
 			 && lock_get_mode(lock) == LOCK_X)) {
 
-			lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
-					      | LOCK_GAP,
-					      heir, lock->index, lock->trx);
+			lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+					      | lock_get_mode(lock),
+					      heir_block, heir_heap_no,
+					      lock->index, lock->trx);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
 of another record as gap type locks, but does not reset the lock bits of the
 other record. Also waiting lock requests are inherited as GRANTED gap locks. */
@@ -2404,99 +2403,103 @@ static
 void
 lock_rec_inherit_to_gap_if_gap_lock(
 /*================================*/
-	rec_t*	heir,	/* in: record which inherits */
-	rec_t*	rec)	/* in: record from which inherited; does NOT reset
-			the locks on this record */
+	const buf_block_t*	block,		/*!< in: buffer block */
+	ulint			heir_heap_no,	/*!< in: heap_no of
+						record which inherits */
+	ulint			heap_no)	/*!< in: heap_no of record
+						from which inherited;
+						does NOT reset the locks
+						on this record */
 {
 	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock != NULL) {
 		if (!lock_rec_get_insert_intention(lock)
-		    && (page_rec_is_supremum(rec)
+		    && (heap_no == PAGE_HEAP_NO_SUPREMUM
 			|| !lock_rec_get_rec_not_gap(lock))) {
 
-			lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
-					      | LOCK_GAP,
-					      heir, lock->index, lock->trx);
+			lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+					      | lock_get_mode(lock),
+					      block, heir_heap_no,
+					      lock->index, lock->trx);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 }
 
-/*****************************************************************
+/*************************************************************//**
 Moves the locks of a record to another record and resets the lock bits of
 the donating record. */
 static
 void
 lock_rec_move(
 /*==========*/
-	rec_t*	receiver,	/* in: record which gets locks; this record
-				must have no lock requests on it! */
-	rec_t*	donator,	/* in: record which gives locks */
-	ulint	comp)		/* in: nonzero=compact page format */
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator,	/*!< in: buffer block containing
+						the donating record */
+	ulint			receiver_heap_no,/*!< in: heap_no of the record
+						which gets the locks; there
+						must be no lock requests
+						on it! */
+	ulint			donator_heap_no)/*!< in: heap_no of the record
+						which gives the locks */
 {
 	lock_t*	lock;
-	ulint	heap_no;
-	ulint	type_mode;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	heap_no = rec_get_heap_no(donator, comp);
+	lock = lock_rec_get_first(donator, donator_heap_no);
 
-	lock = lock_rec_get_first(donator);
-
-	ut_ad(lock_rec_get_first(receiver) == NULL);
+	ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
 
 	while (lock != NULL) {
-		type_mode = lock->type_mode;
+		const ulint	type_mode = lock->type_mode;
 
-		lock_rec_reset_nth_bit(lock, heap_no);
+		lock_rec_reset_nth_bit(lock, donator_heap_no);
 
-		if (lock_get_wait(lock)) {
+		if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 			lock_reset_lock_and_trx_wait(lock);
 		}
 
 		/* Note that we FIRST reset the bit, and then set the lock:
 		the function works also if donator == receiver */
 
-		lock_rec_add_to_queue(type_mode, receiver, lock->index,
-				      lock->trx);
-		lock = lock_rec_get_next(donator, lock);
+		lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
+				      lock->index, lock->trx);
+		lock = lock_rec_get_next(donator_heap_no, lock);
 	}
 
-	ut_ad(lock_rec_get_first(donator) == NULL);
+	ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when we have reorganized a page. NOTE: we copy
 also the locks set on the infimum of the page; the infimum may carry
 locks if an update of a record is occurring on the page, and its locks
 were temporarily stored on the infimum. */
-
+UNIV_INTERN
 void
 lock_move_reorganize_page(
 /*======================*/
-	page_t*	page,		/* in: old index page, now reorganized */
-	page_t*	old_page)	/* in: copy of the old, not reorganized page */
+	const buf_block_t*	block,	/*!< in: old index page, now
+					reorganized */
+	const buf_block_t*	oblock)	/*!< in: copy of the old, not
+					reorganized page */
 {
 	lock_t*		lock;
-	lock_t*		old_lock;
-	page_cur_t	cur1;
-	page_cur_t	cur2;
-	ulint		old_heap_no;
 	UT_LIST_BASE_NODE_T(lock_t)	old_locks;
 	mem_heap_t*	heap		= NULL;
-	rec_t*		sup;
 	ulint		comp;
 
 	lock_mutex_enter_kernel();
 
-	lock = lock_rec_get_first_on_page(page);
+	lock = lock_rec_get_first_on_page(block);
 
 	if (lock == NULL) {
 		lock_mutex_exit_kernel();
@@ -2512,10 +2515,9 @@ lock_move_reorganize_page(
 
 	UT_LIST_INIT(old_locks);
 
-	while (lock != NULL) {
-
+	do {
 		/* Make a copy of the lock */
-		old_lock = lock_rec_copy(lock, heap);
+		lock_t*	old_lock = lock_rec_copy(lock, heap);
 
 		UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
 
@@ -2527,44 +2529,59 @@ lock_move_reorganize_page(
 		}
 
 		lock = lock_rec_get_next_on_page(lock);
-	}
+	} while (lock != NULL);
 
-	sup = page_get_supremum_rec(page);
+	comp = page_is_comp(block->frame);
+	ut_ad(comp == page_is_comp(oblock->frame));
 
-	lock = UT_LIST_GET_FIRST(old_locks);
-
-	comp = page_is_comp(page);
-	ut_ad(comp == page_is_comp(old_page));
-
-	while (lock) {
+	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
 		/* NOTE: we copy also the locks set on the infimum and
 		supremum of the page; the infimum may carry locks if an
 		update of a record is occurring on the page, and its locks
 		were temporarily stored on the infimum */
+		page_cur_t	cur1;
+		page_cur_t	cur2;
 
-		page_cur_set_before_first(page, &cur1);
-		page_cur_set_before_first(old_page, &cur2);
+		page_cur_set_before_first(block, &cur1);
+		page_cur_set_before_first(oblock, &cur2);
 
 		/* Set locks according to old locks */
 		for (;;) {
+			ulint	old_heap_no;
+			ulint	new_heap_no;
+
 			ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
 					      page_cur_get_rec(&cur2),
 					      rec_get_data_size_old(
 						      page_cur_get_rec(
 							      &cur2))));
-			old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2),
-						      comp);
+			if (UNIV_LIKELY(comp)) {
+				old_heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur2));
+				new_heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				old_heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur2));
+				new_heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+			}
 
 			if (lock_rec_get_nth_bit(lock, old_heap_no)) {
 
+				/* Clear the bit in old_lock. */
+				ut_d(lock_rec_reset_nth_bit(lock,
+							    old_heap_no));
+
 				/* NOTE that the old lock bitmap could be too
 				small for the new heap number! */
 
-				lock_rec_add_to_queue(lock->type_mode,
-						      page_cur_get_rec(&cur1),
+				lock_rec_add_to_queue(lock->type_mode, block,
+						      new_heap_no,
 						      lock->index, lock->trx);
 
-				/* if ((page_cur_get_rec(&cur1) == sup)
+				/* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
 				&& lock_get_wait(lock)) {
 				fprintf(stderr,
 				"---\n--\n!!!Lock reorg: supr type %lu\n",
@@ -2572,8 +2589,10 @@ lock_move_reorganize_page(
 				} */
 			}
 
-			if (page_cur_get_rec(&cur1) == sup) {
+			if (UNIV_UNLIKELY
+			    (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
 
+				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
 				break;
 			}
 
@@ -2581,41 +2600,46 @@ lock_move_reorganize_page(
 			page_cur_move_to_next(&cur2);
 		}
 
-		/* Remember that we chained old locks on the trx_locks field */
+#ifdef UNIV_DEBUG
+		{
+			ulint	i = lock_rec_find_set_bit(lock);
 
-		lock = UT_LIST_GET_NEXT(trx_locks, lock);
+			/* Check that all locks were moved. */
+			if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
+				fprintf(stderr,
+					"lock_move_reorganize_page():"
+					" %lu not moved in %p\n",
+					(ulong) i, (void*) lock);
+				ut_error;
+			}
+		}
+#endif /* UNIV_DEBUG */
 	}
 
 	lock_mutex_exit_kernel();
 
 	mem_heap_free(heap);
 
-#if 0
-	ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
-				     buf_frame_get_page_no(page)));
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+				     buf_block_get_page_no(block)));
 #endif
 }
 
-/*****************************************************************
+/*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list end is moved to another page. */
-
+UNIV_INTERN
 void
 lock_move_rec_list_end(
 /*===================*/
-	page_t*	new_page,	/* in: index page to move to */
-	page_t*	page,		/* in: index page */
-	rec_t*	rec)		/* in: record on page: this is the
-				first record moved */
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec)		/*!< in: record on page: this
+						is the first record moved */
 {
 	lock_t*		lock;
-	page_cur_t	cur1;
-	page_cur_t	cur2;
-	ulint		heap_no;
-	rec_t*		sup;
-	ulint		type_mode;
-	ulint		comp;
-	ut_ad(page == buf_frame_align(rec));
+	const ulint	comp	= page_rec_is_comp(rec);
 
 	lock_mutex_enter_kernel();
 
@@ -2625,127 +2649,148 @@ lock_move_rec_list_end(
 	table to the end of the hash chain, and lock_rec_add_to_queue
 	does not reuse locks if there are waiters in the queue. */
 
-	sup = page_get_supremum_rec(page);
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		page_cur_t	cur1;
+		page_cur_t	cur2;
+		const ulint	type_mode = lock->type_mode;
 
-	lock = lock_rec_get_first_on_page(page);
-
-	comp = page_is_comp(page);
-
-	while (lock != NULL) {
-
-		page_cur_position(rec, &cur1);
+		page_cur_position(rec, block, &cur1);
 
 		if (page_cur_is_before_first(&cur1)) {
 			page_cur_move_to_next(&cur1);
 		}
 
-		page_cur_set_before_first(new_page, &cur2);
+		page_cur_set_before_first(new_block, &cur2);
 		page_cur_move_to_next(&cur2);
 
 		/* Copy lock requests on user records to new page and
 		reset the lock bits on the old */
 
-		while (page_cur_get_rec(&cur1) != sup) {
-			ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
-					      page_cur_get_rec(&cur2),
-					      rec_get_data_size_old(
-						      page_cur_get_rec(
-							      &cur2))));
-			heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
-						  comp);
+		while (!page_cur_is_after_last(&cur1)) {
+			ulint	heap_no;
+
+			if (comp) {
+				heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+				ut_ad(!memcmp(page_cur_get_rec(&cur1),
+					 page_cur_get_rec(&cur2),
+					 rec_get_data_size_old(
+						 page_cur_get_rec(&cur2))));
+			}
 
 			if (lock_rec_get_nth_bit(lock, heap_no)) {
-				type_mode = lock->type_mode;
-
 				lock_rec_reset_nth_bit(lock, heap_no);
 
-				if (lock_get_wait(lock)) {
+				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
 
+				if (comp) {
+					heap_no = rec_get_heap_no_new(
+						page_cur_get_rec(&cur2));
+				} else {
+					heap_no = rec_get_heap_no_old(
+						page_cur_get_rec(&cur2));
+				}
+
 				lock_rec_add_to_queue(type_mode,
-						      page_cur_get_rec(&cur2),
+						      new_block, heap_no,
 						      lock->index, lock->trx);
 			}
 
 			page_cur_move_to_next(&cur1);
 			page_cur_move_to_next(&cur2);
 		}
-
-		lock = lock_rec_get_next_on_page(lock);
 	}
 
 	lock_mutex_exit_kernel();
 
-#if 0
-	ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
-				     buf_frame_get_page_no(page)));
-	ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
-				     buf_frame_get_page_no(new_page)));
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+				     buf_block_get_page_no(block)));
+	ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
+				     buf_block_get_page_no(new_block)));
 #endif
 }
 
-/*****************************************************************
+/*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list start is moved to another page. */
-
+UNIV_INTERN
 void
 lock_move_rec_list_start(
 /*=====================*/
-	page_t*	new_page,	/* in: index page to move to */
-	page_t*	page,		/* in: index page */
-	rec_t*	rec,		/* in: record on page: this is the
-				first record NOT copied */
-	rec_t*	old_end)	/* in: old previous-to-last record on
-				new_page before the records were copied */
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec,		/*!< in: record on page:
+						this is the first
+						record NOT copied */
+	const rec_t*		old_end)	/*!< in: old
+						previous-to-last
+						record on new_page
+						before the records
+						were copied */
 {
 	lock_t*		lock;
-	page_cur_t	cur1;
-	page_cur_t	cur2;
-	ulint		heap_no;
-	ulint		type_mode;
-	ulint		comp;
+	const ulint	comp	= page_rec_is_comp(rec);
 
-	ut_a(new_page);
+	ut_ad(block->frame == page_align(rec));
+	ut_ad(new_block->frame == page_align(old_end));
 
 	lock_mutex_enter_kernel();
 
-	lock = lock_rec_get_first_on_page(page);
-	comp = page_is_comp(page);
-	ut_ad(comp == page_is_comp(new_page));
-	ut_ad(page == buf_frame_align(rec));
+	for (lock = lock_rec_get_first_on_page(block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		page_cur_t	cur1;
+		page_cur_t	cur2;
+		const ulint	type_mode = lock->type_mode;
 
-	while (lock != NULL) {
-
-		page_cur_set_before_first(page, &cur1);
+		page_cur_set_before_first(block, &cur1);
 		page_cur_move_to_next(&cur1);
 
-		page_cur_position(old_end, &cur2);
+		page_cur_position(old_end, new_block, &cur2);
 		page_cur_move_to_next(&cur2);
 
 		/* Copy lock requests on user records to new page and
 		reset the lock bits on the old */
 
 		while (page_cur_get_rec(&cur1) != rec) {
-			ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
+			ulint	heap_no;
+
+			if (comp) {
+				heap_no = rec_get_heap_no_new(
+					page_cur_get_rec(&cur1));
+			} else {
+				heap_no = rec_get_heap_no_old(
+					page_cur_get_rec(&cur1));
+				ut_ad(!memcmp(page_cur_get_rec(&cur1),
 					      page_cur_get_rec(&cur2),
 					      rec_get_data_size_old(
 						      page_cur_get_rec(
 							      &cur2))));
-			heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
-						  comp);
+			}
 
 			if (lock_rec_get_nth_bit(lock, heap_no)) {
-				type_mode = lock->type_mode;
-
 				lock_rec_reset_nth_bit(lock, heap_no);
 
-				if (lock_get_wait(lock)) {
+				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
 
+				if (comp) {
+					heap_no = rec_get_heap_no_new(
+						page_cur_get_rec(&cur2));
+				} else {
+					heap_no = rec_get_heap_no_old(
+						page_cur_get_rec(&cur2));
+				}
+
 				lock_rec_add_to_queue(type_mode,
-						      page_cur_get_rec(&cur2),
+						      new_block, heap_no,
 						      lock->index, lock->trx);
 			}
 
@@ -2753,58 +2798,77 @@ lock_move_rec_list_start(
 			page_cur_move_to_next(&cur2);
 		}
 
-		lock = lock_rec_get_next_on_page(lock);
+#ifdef UNIV_DEBUG
+		if (page_rec_is_supremum(rec)) {
+			ulint	i;
+
+			for (i = PAGE_HEAP_NO_USER_LOW;
+			     i < lock_rec_get_n_bits(lock); i++) {
+				if (UNIV_UNLIKELY
+				    (lock_rec_get_nth_bit(lock, i))) {
+
+					fprintf(stderr,
+						"lock_move_rec_list_start():"
+						" %lu not moved in %p\n",
+						(ulong) i, (void*) lock);
+					ut_error;
+				}
+			}
+		}
+#endif /* UNIV_DEBUG */
 	}
 
 	lock_mutex_exit_kernel();
-#if 0
-	ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
-				     buf_frame_get_page_no(page)));
-	ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
-				     buf_frame_get_page_no(new_page)));
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+				     buf_block_get_page_no(block)));
 #endif
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is split to the right. */
-
+UNIV_INTERN
 void
 lock_update_split_right(
 /*====================*/
-	page_t*	right_page,	/* in: right page */
-	page_t*	left_page)	/* in: left page */
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
 {
-	ulint	comp;
+	ulint	heap_no = lock_get_min_heap_no(right_block);
+
 	lock_mutex_enter_kernel();
-	comp = page_is_comp(left_page);
-	ut_ad(comp == page_is_comp(right_page));
 
 	/* Move the locks on the supremum of the left page to the supremum
 	of the right page */
 
-	lock_rec_move(page_get_supremum_rec(right_page),
-		      page_get_supremum_rec(left_page), comp);
+	lock_rec_move(right_block, left_block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
 
 	/* Inherit the locks to the supremum of left page from the successor
 	of the infimum on right page */
 
-	lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
-				page_rec_get_next(
-					page_get_infimum_rec(right_page)));
+	lock_rec_inherit_to_gap(left_block, right_block,
+				PAGE_HEAP_NO_SUPREMUM, heap_no);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is merged to the right. */
-
+UNIV_INTERN
 void
 lock_update_merge_right(
 /*====================*/
-	rec_t*	orig_succ,	/* in: original successor of infimum
-				on the right page before merge */
-	page_t*	left_page)	/* in: merged index page which will be
-				discarded */
+	const buf_block_t*	right_block,	/*!< in: right page to
+						which merged */
+	const rec_t*		orig_succ,	/*!< in: original
+						successor of infimum
+						on the right page
+						before merge */
+	const buf_block_t*	left_block)	/*!< in: merged index
+						page which will be
+						discarded */
 {
 	lock_mutex_enter_kernel();
 
@@ -2812,170 +2876,187 @@ lock_update_merge_right(
 	original successor of infimum on the right page, to which the left
 	page was merged */
 
-	lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page));
+	lock_rec_inherit_to_gap(right_block, left_block,
+				page_rec_get_heap_no(orig_succ),
+				PAGE_HEAP_NO_SUPREMUM);
 
 	/* Reset the locks on the supremum of the left page, releasing
 	waiting transactions */
 
-	lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page));
+	lock_rec_reset_and_release_wait(left_block,
+					PAGE_HEAP_NO_SUPREMUM);
 
-	lock_rec_free_all_from_discard_page(left_page);
+	lock_rec_free_all_from_discard_page(left_block);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when the root page is copied to another in
 btr_root_raise_and_insert. Note that we leave lock structs on the
 root page, even though they do not make sense on other than leaf
 pages: the reason is that in a pessimistic update the infimum record
 of the root page will act as a dummy carrier of the locks of the record
 to be updated. */
-
+UNIV_INTERN
 void
 lock_update_root_raise(
 /*===================*/
-	page_t*	new_page,	/* in: index page to which copied */
-	page_t*	root)		/* in: root page */
+	const buf_block_t*	block,	/*!< in: index page to which copied */
+	const buf_block_t*	root)	/*!< in: root page */
 {
-	ulint	comp;
 	lock_mutex_enter_kernel();
-	comp = page_is_comp(root);
-	ut_ad(comp == page_is_comp(new_page));
 
 	/* Move the locks on the supremum of the root to the supremum
-	of new_page */
+	of block */
 
-	lock_rec_move(page_get_supremum_rec(new_page),
-		      page_get_supremum_rec(root), comp);
+	lock_rec_move(block, root,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is copied to another and the original page
 is removed from the chain of leaf pages, except if page is the root! */
-
+UNIV_INTERN
 void
 lock_update_copy_and_discard(
 /*=========================*/
-	page_t*	new_page,	/* in: index page to which copied */
-	page_t*	page)		/* in: index page; NOT the root! */
+	const buf_block_t*	new_block,	/*!< in: index page to
+						which copied */
+	const buf_block_t*	block)		/*!< in: index page;
+						NOT the root! */
 {
-	ulint	comp;
 	lock_mutex_enter_kernel();
-	comp = page_is_comp(page);
-	ut_ad(comp == page_is_comp(new_page));
 
 	/* Move the locks on the supremum of the old page to the supremum
 	of new_page */
 
-	lock_rec_move(page_get_supremum_rec(new_page),
-		      page_get_supremum_rec(page), comp);
-	lock_rec_free_all_from_discard_page(page);
+	lock_rec_move(new_block, block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+	lock_rec_free_all_from_discard_page(block);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is split to the left. */
-
+UNIV_INTERN
 void
 lock_update_split_left(
 /*===================*/
-	page_t*	right_page,	/* in: right page */
-	page_t*	left_page)	/* in: left page */
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
 {
+	ulint	heap_no = lock_get_min_heap_no(right_block);
+
 	lock_mutex_enter_kernel();
 
 	/* Inherit the locks to the supremum of the left page from the
 	successor of the infimum on the right page */
 
-	lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
-				page_rec_get_next(
-					page_get_infimum_rec(right_page)));
+	lock_rec_inherit_to_gap(left_block, right_block,
+				PAGE_HEAP_NO_SUPREMUM, heap_no);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is merged to the left. */
-
+UNIV_INTERN
 void
 lock_update_merge_left(
 /*===================*/
-	page_t*	left_page,	/* in: left page to which merged */
-	rec_t*	orig_pred,	/* in: original predecessor of supremum
-				on the left page before merge */
-	page_t*	right_page)	/* in: merged index page which will be
-				discarded */
+	const buf_block_t*	left_block,	/*!< in: left page to
+						which merged */
+	const rec_t*		orig_pred,	/*!< in: original predecessor
+						of supremum on the left page
+						before merge */
+	const buf_block_t*	right_block)	/*!< in: merged index page
+						which will be discarded */
 {
-	rec_t*	left_next_rec;
-	rec_t*	left_supremum;
-	ulint	comp;
+	const rec_t*	left_next_rec;
+
+	ut_ad(left_block->frame == page_align(orig_pred));
+
 	lock_mutex_enter_kernel();
-	comp = page_is_comp(left_page);
-	ut_ad(comp == page_is_comp(right_page));
-	ut_ad(left_page == buf_frame_align(orig_pred));
 
-	left_next_rec = page_rec_get_next(orig_pred);
-	left_supremum = page_get_supremum_rec(left_page);
+	left_next_rec = page_rec_get_next_const(orig_pred);
 
-	if (UNIV_LIKELY(left_next_rec != left_supremum)) {
+	if (!page_rec_is_supremum(left_next_rec)) {
 
 		/* Inherit the locks on the supremum of the left page to the
 		first record which was moved from the right page */
 
-		lock_rec_inherit_to_gap(left_next_rec, left_supremum);
+		lock_rec_inherit_to_gap(left_block, left_block,
+					page_rec_get_heap_no(left_next_rec),
+					PAGE_HEAP_NO_SUPREMUM);
 
 		/* Reset the locks on the supremum of the left page,
 		releasing waiting transactions */
 
-		lock_rec_reset_and_release_wait(left_supremum);
+		lock_rec_reset_and_release_wait(left_block,
+						PAGE_HEAP_NO_SUPREMUM);
 	}
 
 	/* Move the locks from the supremum of right page to the supremum
 	of the left page */
 
-	lock_rec_move(left_supremum, page_get_supremum_rec(right_page), comp);
+	lock_rec_move(left_block, right_block,
+		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
 
-	lock_rec_free_all_from_discard_page(right_page);
+	lock_rec_free_all_from_discard_page(right_block);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Resets the original locks on heir and replaces them with gap type locks
 inherited from rec. */
-
+UNIV_INTERN
 void
 lock_rec_reset_and_inherit_gap_locks(
 /*=================================*/
-	rec_t*	heir,	/* in: heir record */
-	rec_t*	rec)	/* in: record */
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no)	/*!< in: heap_no of the
+						donating record */
 {
 	mutex_enter(&kernel_mutex);
 
-	lock_rec_reset_and_release_wait(heir);
+	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
 
-	lock_rec_inherit_to_gap(heir, rec);
+	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
 
 	mutex_exit(&kernel_mutex);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a page is discarded. */
-
+UNIV_INTERN
 void
 lock_update_discard(
 /*================*/
-	rec_t*	heir,	/* in: record which will inherit the locks */
-	page_t*	page)	/* in: index page which will be discarded */
+	const buf_block_t*	heir_block,	/*!< in: index page
+						which will inherit the locks */
+	ulint			heir_heap_no,	/*!< in: heap_no of the record
+						which will inherit the locks */
+	const buf_block_t*	block)		/*!< in: index page
+						which will be discarded */
 {
-	rec_t*	rec;
+	const page_t*	page = block->frame;
+	const rec_t*	rec;
+	ulint		heap_no;
 
 	lock_mutex_enter_kernel();
 
-	if (NULL == lock_rec_get_first_on_page(page)) {
+	if (!lock_rec_get_first_on_page(block)) {
 		/* No locks exist on page, nothing to do */
 
 		lock_mutex_exit_kernel();
@@ -2986,128 +3067,179 @@ lock_update_discard(
 	/* Inherit all the locks on the page to the record and reset all
 	the locks on the page */
 
-	rec = page_get_infimum_rec(page);
+	if (page_is_comp(page)) {
+		rec = page + PAGE_NEW_INFIMUM;
 
-	for (;;) {
-		lock_rec_inherit_to_gap(heir, rec);
+		do {
+			heap_no = rec_get_heap_no_new(rec);
 
-		/* Reset the locks on rec, releasing waiting transactions */
+			lock_rec_inherit_to_gap(heir_block, block,
+						heir_heap_no, heap_no);
 
-		lock_rec_reset_and_release_wait(rec);
+			lock_rec_reset_and_release_wait(block, heap_no);
 
-		if (page_rec_is_supremum(rec)) {
+			rec = page + rec_get_next_offs(rec, TRUE);
+		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+	} else {
+		rec = page + PAGE_OLD_INFIMUM;
 
-			break;
-		}
+		do {
+			heap_no = rec_get_heap_no_old(rec);
 
-		rec = page_rec_get_next(rec);
+			lock_rec_inherit_to_gap(heir_block, block,
+						heir_heap_no, heap_no);
+
+			lock_rec_reset_and_release_wait(block, heap_no);
+
+			rec = page + rec_get_next_offs(rec, FALSE);
+		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
 	}
 
-	lock_rec_free_all_from_discard_page(page);
+	lock_rec_free_all_from_discard_page(block);
 
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a new user record is inserted. */
-
+UNIV_INTERN
 void
 lock_update_insert(
 /*===============*/
-	rec_t*	rec)	/* in: the inserted record */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the inserted record */
 {
-	lock_mutex_enter_kernel();
+	ulint	receiver_heap_no;
+	ulint	donator_heap_no;
+
+	ut_ad(block->frame == page_align(rec));
 
 	/* Inherit the gap-locking locks for rec, in gap mode, from the next
 	record */
 
-	lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
+	if (page_rec_is_comp(rec)) {
+		receiver_heap_no = rec_get_heap_no_new(rec);
+		donator_heap_no = rec_get_heap_no_new(
+			page_rec_get_next_low(rec, TRUE));
+	} else {
+		receiver_heap_no = rec_get_heap_no_old(rec);
+		donator_heap_no = rec_get_heap_no_old(
+			page_rec_get_next_low(rec, FALSE));
+	}
 
+	lock_mutex_enter_kernel();
+	lock_rec_inherit_to_gap_if_gap_lock(block,
+					    receiver_heap_no, donator_heap_no);
 	lock_mutex_exit_kernel();
 }
 
-/*****************************************************************
+/*************************************************************//**
 Updates the lock table when a record is removed. */
-
+UNIV_INTERN
 void
 lock_update_delete(
 /*===============*/
-	rec_t*	rec)	/* in: the record to be removed */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the record to be removed */
 {
+	const page_t*	page = block->frame;
+	ulint		heap_no;
+	ulint		next_heap_no;
+
+	ut_ad(page == page_align(rec));
+
+	if (page_is_comp(page)) {
+		heap_no = rec_get_heap_no_new(rec);
+		next_heap_no = rec_get_heap_no_new(page
+						   + rec_get_next_offs(rec,
+								       TRUE));
+	} else {
+		heap_no = rec_get_heap_no_old(rec);
+		next_heap_no = rec_get_heap_no_old(page
+						   + rec_get_next_offs(rec,
+								       FALSE));
+	}
+
 	lock_mutex_enter_kernel();
 
 	/* Let the next record inherit the locks from rec, in gap mode */
 
-	lock_rec_inherit_to_gap(page_rec_get_next(rec), rec);
+	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
 
 	/* Reset the lock bits on rec and release waiting transactions */
 
-	lock_rec_reset_and_release_wait(rec);
+	lock_rec_reset_and_release_wait(block, heap_no);
 
 	lock_mutex_exit_kernel();
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Stores on the page infimum record the explicit locks of another record.
 This function is used to store the lock state of a record when it is
 updated and the size of the record changes in the update. The record
 is moved in such an update, perhaps to another page. The infimum record
 acts as a dummy carrier record, taking care of lock releases while the
 actual record is being moved. */
-
+UNIV_INTERN
 void
 lock_rec_store_on_page_infimum(
 /*===========================*/
-	page_t*	page,	/* in: page containing the record */
-	rec_t*	rec)	/* in: record whose lock state is stored
-			on the infimum record of the same page; lock
-			bits are reset on the record */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: record whose lock state
+					is stored on the infimum
+					record of the same page; lock
+					bits are reset on the
+					record */
 {
-	ut_ad(page == buf_frame_align(rec));
+	ulint	heap_no = page_rec_get_heap_no(rec);
+
+	ut_ad(block->frame == page_align(rec));
 
 	lock_mutex_enter_kernel();
 
-	lock_rec_move(page_get_infimum_rec(page), rec, page_is_comp(page));
+	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
 
 	lock_mutex_exit_kernel();
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Restores the state of explicit lock requests on a single record, where the
 state was stored on the infimum of the page. */
-
+UNIV_INTERN
 void
 lock_rec_restore_from_page_infimum(
 /*===============================*/
-	rec_t*	rec,	/* in: record whose lock state is restored */
-	page_t*	page)	/* in: page (rec is not necessarily on this page)
-			whose infimum stored the lock state; lock bits are
-			reset on the infimum */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record whose lock state
+					is restored */
+	const buf_block_t*	donator)/*!< in: page (rec is not
+					necessarily on this page)
+					whose infimum stored the lock
+					state; lock bits are reset on
+					the infimum */
 {
-	ulint	comp;
-	lock_mutex_enter_kernel();
-	comp = page_is_comp(page);
-	ut_ad(!comp == !page_rec_is_comp(rec));
+	ulint	heap_no = page_rec_get_heap_no(rec);
 
-	lock_rec_move(rec, page_get_infimum_rec(page), comp);
+	lock_mutex_enter_kernel();
+
+	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
 
 	lock_mutex_exit_kernel();
 }
 
 /*=========== DEADLOCK CHECKING ======================================*/
 
-/************************************************************************
-Checks if a lock request results in a deadlock. */
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
 static
 ibool
 lock_deadlock_occurs(
 /*=================*/
-			/* out: TRUE if a deadlock was detected and we
-			chose trx as a victim; FALSE if no deadlock, or
-			there was a deadlock, but we chose other
-			transaction(s) as victim(s) */
-	lock_t*	lock,	/* in: lock the transaction is requesting */
-	trx_t*	trx)	/* in: transaction */
+	lock_t*	lock,	/*!< in: lock the transaction is requesting */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	dict_table_t*	table;
 	dict_index_t*	index;
@@ -3139,8 +3271,8 @@ retry:
 		goto retry;
 	}
 
-	if (ret == LOCK_VICTIM_IS_START) {
-		if (lock_get_type(lock) & LOCK_TABLE) {
+	if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
+		if (lock_get_type_low(lock) & LOCK_TABLE) {
 			table = lock->un_member.tab_lock.table;
 			index = NULL;
 		} else {
@@ -3159,27 +3291,24 @@ retry:
 	return(FALSE);
 }
 
-/************************************************************************
-Looks recursively for a deadlock. */
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
 static
 ulint
 lock_deadlock_recursive(
 /*====================*/
-				/* out: 0 if no deadlock found,
-				LOCK_VICTIM_IS_START if there was a deadlock
-				and we chose 'start' as the victim,
-				LOCK_VICTIM_IS_OTHER if a deadlock
-				was found and we chose some other trx as a
-				victim: we must do the search again in this
-				last case because there may be another
-				deadlock! */
-	trx_t*	start,		/* in: recursion starting point */
-	trx_t*	trx,		/* in: a transaction waiting for a lock */
-	lock_t*	wait_lock,	/* in: the lock trx is waiting to be granted */
-	ulint*	cost,		/* in/out: number of calculation steps thus
+	trx_t*	start,		/*!< in: recursion starting point */
+	trx_t*	trx,		/*!< in: a transaction waiting for a lock */
+	lock_t*	wait_lock,	/*!< in: the lock trx is waiting to be granted */
+	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
 				we return LOCK_VICTIM_IS_START */
-	ulint	depth)		/* in: recursion depth: if this exceeds
+	ulint	depth)		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
 				return LOCK_VICTIM_IS_START */
 {
@@ -3204,7 +3333,7 @@ lock_deadlock_recursive(
 
 	lock = wait_lock;
 
-	if (lock_get_type(wait_lock) == LOCK_REC) {
+	if (lock_get_type_low(wait_lock) == LOCK_REC) {
 
 		bit_no = lock_rec_find_set_bit(wait_lock);
 
@@ -3214,15 +3343,15 @@ lock_deadlock_recursive(
 	/* Look at the locks ahead of wait_lock in the lock queue */
 
 	for (;;) {
-		if (lock_get_type(lock) & LOCK_TABLE) {
+		if (lock_get_type_low(lock) & LOCK_TABLE) {
 
 			lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
 						lock);
 		} else {
-			ut_ad(lock_get_type(lock) == LOCK_REC);
+			ut_ad(lock_get_type_low(lock) == LOCK_REC);
 			ut_a(bit_no != ULINT_UNDEFINED);
 
-			lock = lock_rec_get_prev(lock, bit_no);
+			lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
 		}
 
 		if (lock == NULL) {
@@ -3258,7 +3387,7 @@ lock_deadlock_recursive(
 				fputs("*** (1) WAITING FOR THIS LOCK"
 				      " TO BE GRANTED:\n", ef);
 
-				if (lock_get_type(wait_lock) == LOCK_REC) {
+				if (lock_get_type_low(wait_lock) == LOCK_REC) {
 					lock_rec_print(ef, wait_lock);
 				} else {
 					lock_table_print(ef, wait_lock);
@@ -3270,7 +3399,7 @@ lock_deadlock_recursive(
 
 				fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
 
-				if (lock_get_type(lock) == LOCK_REC) {
+				if (lock_get_type_low(lock) == LOCK_REC) {
 					lock_rec_print(ef, lock);
 				} else {
 					lock_table_print(ef, lock);
@@ -3279,7 +3408,7 @@ lock_deadlock_recursive(
 				fputs("*** (2) WAITING FOR THIS LOCK"
 				      " TO BE GRANTED:\n", ef);
 
-				if (lock_get_type(start->wait_lock)
+				if (lock_get_type_low(start->wait_lock)
 				    == LOCK_REC) {
 					lock_rec_print(ef, start->wait_lock);
 				} else {
@@ -3355,18 +3484,18 @@ lock_deadlock_recursive(
 
 /*========================= TABLE LOCKS ==============================*/
 
-/*************************************************************************
+/*********************************************************************//**
 Creates a table lock object and adds it as the last in the lock queue
-of the table. Does NOT check for deadlocks or lock compatibility. */
+of the table. Does NOT check for deadlocks or lock compatibility.
+@return	own: new lock object */
 UNIV_INLINE
 lock_t*
 lock_table_create(
 /*==============*/
-				/* out, own: new lock object */
-	dict_table_t*	table,	/* in: database table in dictionary cache */
-	ulint		type_mode,/* in: lock mode possibly ORed with
+	dict_table_t*	table,	/*!< in: database table in dictionary cache */
+	ulint		type_mode,/*!< in: lock mode possibly ORed with
 				LOCK_WAIT */
-	trx_t*		trx)	/* in: trx */
+	trx_t*		trx)	/*!< in: trx */
 {
 	lock_t*	lock;
 
@@ -3377,15 +3506,16 @@ lock_table_create(
 		++table->n_waiting_or_granted_auto_inc_locks;
 	}
 
+	/* For AUTOINC locking we reuse the lock instance only if
+	there is no wait involved else we allocate the waiting lock
+	from the transaction lock heap. */
 	if (type_mode == LOCK_AUTO_INC) {
-		/* Only one trx can have the lock on the table
-		at a time: we may use the memory preallocated
-		to the table object */
 
-		lock = table->auto_inc_lock;
+		lock = table->autoinc_lock;
 
-		ut_a(trx->auto_inc_lock == NULL);
-		trx->auto_inc_lock = lock;
+		table->autoinc_trx = trx;
+
+		ib_vector_push(trx->autoinc_locks, lock);
 	} else {
 		lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
 	}
@@ -3399,7 +3529,7 @@ lock_table_create(
 
 	UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
 
-	if (type_mode & LOCK_WAIT) {
+	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
 
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
@@ -3407,7 +3537,7 @@ lock_table_create(
 	return(lock);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Removes a table lock request from the queue and the trx list of locks;
 this is a low-level function which does NOT check if waiting requests
 can now be granted. */
@@ -3415,18 +3545,41 @@ UNIV_INLINE
 void
 lock_table_remove_low(
 /*==================*/
-	lock_t*	lock)	/* in: table lock */
+	lock_t*	lock)	/*!< in: table lock */
 {
-	dict_table_t*	table;
 	trx_t*		trx;
+	dict_table_t*	table;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	table = lock->un_member.tab_lock.table;
 	trx = lock->trx;
+	table = lock->un_member.tab_lock.table;
 
-	if (lock == trx->auto_inc_lock) {
-		trx->auto_inc_lock = NULL;
+	/* Remove the table from the transaction's AUTOINC vector, if
+	the lock that is being release is an AUTOINC lock. */
+	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+
+		/* The table's AUTOINC lock can get transferred to
+		another transaction before we get here. */
+		if (table->autoinc_trx == trx) {
+			table->autoinc_trx = NULL;
+		}
+
+		/* The locks must be freed in the reverse order from
+		the one in which they were acquired. This is to avoid
+		traversing the AUTOINC lock vector unnecessarily. 
+
+		We only store locks that were granted in the
+		trx->autoinc_locks vector (see lock_table_create()
+		and lock_grant()). Therefore it can be empty and we
+		need to check for that. */
+
+		if (!ib_vector_is_empty(trx->autoinc_locks)) {
+			lock_t*	autoinc_lock;
+
+			autoinc_lock = ib_vector_pop(trx->autoinc_locks);
+			ut_a(autoinc_lock == lock);
+		}
 
 		ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
 		--table->n_waiting_or_granted_auto_inc_locks;
@@ -3436,23 +3589,21 @@ lock_table_remove_low(
 	UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Enqueues a waiting request for a table lock which cannot be granted
-immediately. Checks for deadlocks. */
+immediately. Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
 static
 ulint
 lock_table_enqueue_waiting(
 /*=======================*/
-				/* out: DB_LOCK_WAIT, DB_DEADLOCK, or
-				DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
-				DB_SUCCESS means that there was a deadlock,
-				but another transaction was chosen as a
-				victim, and we got the lock immediately:
-				no need to wait then */
-	ulint		mode,	/* in: lock mode this transaction is
+	ulint		mode,	/*!< in: lock mode this transaction is
 				requesting */
-	dict_table_t*	table,	/* in: table */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_table_t*	table,	/*!< in: table */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	lock_t*	lock;
 	trx_t*	trx;
@@ -3471,7 +3622,11 @@ lock_table_enqueue_waiting(
 
 	trx = thr_get_trx(thr);
 
-	if (trx->dict_operation) {
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
 		ut_print_timestamp(stderr);
 		fputs("  InnoDB: Error: a table lock wait happens"
 		      " in a dictionary operation!\n"
@@ -3514,19 +3669,19 @@ lock_table_enqueue_waiting(
 	return(DB_LOCK_WAIT);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if other transactions have an incompatible mode lock request in
 the lock queue. */
 UNIV_INLINE
 ibool
 lock_table_other_has_incompatible(
 /*==============================*/
-	trx_t*		trx,	/* in: transaction, or NULL if all
+	trx_t*		trx,	/*!< in: transaction, or NULL if all
 				transactions should be included */
-	ulint		wait,	/* in: LOCK_WAIT if also waiting locks are
+	ulint		wait,	/*!< in: LOCK_WAIT if also waiting locks are
 				taken into account, or 0 if not */
-	dict_table_t*	table,	/* in: table */
-	ulint		mode)	/* in: lock mode */
+	dict_table_t*	table,	/*!< in: table */
+	enum lock_mode	mode)	/*!< in: lock mode */
 {
 	lock_t*	lock;
 
@@ -3549,20 +3704,19 @@ lock_table_other_has_incompatible(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
+be granted immediately, the query thread is put to wait.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_table(
 /*=======*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
 				does nothing */
-	dict_table_t*	table,	/* in: database table in dictionary cache */
-	ulint		mode,	/* in: lock mode */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_table_t*	table,	/*!< in: database table in dictionary cache */
+	enum lock_mode	mode,	/*!< in: lock mode */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 	ulint	err;
@@ -3613,40 +3767,14 @@ lock_table(
 	return(DB_SUCCESS);
 }
 
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
-				/* out: TRUE if there are lock(s) */
-	dict_table_t*	table)	/* in: database table in dictionary cache */
-{
-	ibool	ret;
-
-	ut_ad(table);
-
-	lock_mutex_enter_kernel();
-
-	if (UT_LIST_GET_LAST(table->locks)) {
-		ret = TRUE;
-	} else {
-		ret = FALSE;
-	}
-
-	lock_mutex_exit_kernel();
-
-	return(ret);
-}
-
-/*************************************************************************
-Checks if a waiting table lock request still has to wait in a queue. */
+/*********************************************************************//**
+Checks if a waiting table lock request still has to wait in a queue.
+@return	TRUE if still has to wait */
 static
 ibool
 lock_table_has_to_wait_in_queue(
 /*============================*/
-				/* out: TRUE if still has to wait */
-	lock_t*	wait_lock)	/* in: waiting table lock */
+	lock_t*	wait_lock)	/*!< in: waiting table lock */
 {
 	dict_table_t*	table;
 	lock_t*		lock;
@@ -3671,7 +3799,7 @@ lock_table_has_to_wait_in_queue(
 	return(FALSE);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Removes a table lock request, waiting or granted, from the queue and grants
 locks to other transactions in the queue, if they now are entitled to a
 lock. */
@@ -3679,14 +3807,14 @@ static
 void
 lock_table_dequeue(
 /*===============*/
-	lock_t*	in_lock)/* in: table lock object; transactions waiting
+	lock_t*	in_lock)/*!< in: table lock object; transactions waiting
 			behind will get their lock requests granted, if
 			they are now qualified to it */
 {
 	lock_t*	lock;
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_a(lock_get_type(in_lock) == LOCK_TABLE);
+	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
 
 	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
 
@@ -3710,30 +3838,32 @@ lock_table_dequeue(
 
 /*=========================== LOCK RELEASE ==============================*/
 
-/*****************************************************************
+/*************************************************************//**
 Removes a granted record lock of a transaction from the queue and grants
 locks to other transactions waiting in the queue if they now are entitled
 to a lock. */
-
+UNIV_INTERN
 void
 lock_rec_unlock(
 /*============*/
-	trx_t*	trx,		/* in: transaction that has set a record
-				lock */
-	rec_t*	rec,		/* in: record */
-	ulint	lock_mode)	/* in: LOCK_S or LOCK_X */
+	trx_t*			trx,	/*!< in: transaction that has
+					set a record lock */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record */
+	enum lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
 {
 	lock_t*	lock;
 	lock_t*	release_lock	= NULL;
 	ulint	heap_no;
 
 	ut_ad(trx && rec);
+	ut_ad(block->frame == page_align(rec));
+
+	heap_no = page_rec_get_heap_no(rec);
 
 	mutex_enter(&kernel_mutex);
 
-	heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	/* Find the last lock with the same lock_mode and transaction
 	from the record. */
@@ -3744,7 +3874,7 @@ lock_rec_unlock(
 			ut_a(!lock_get_wait(lock));
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 
 	/* If a record lock is found, release the record lock */
@@ -3764,7 +3894,7 @@ lock_rec_unlock(
 
 	/* Check if we can now grant waiting lock requests */
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock != NULL) {
 		if (lock_get_wait(lock)
@@ -3774,54 +3904,20 @@ lock_rec_unlock(
 			lock_grant(lock);
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 
 	mutex_exit(&kernel_mutex);
 }
 
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
-	lock_t*	lock)	/* in: lock */
-{
-	mutex_enter(&kernel_mutex);
-
-	lock_table_dequeue(lock);
-
-	mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
-	trx_t*	trx)	/* in: transaction */
-{
-	if (trx->auto_inc_lock) {
-		mutex_enter(&kernel_mutex);
-
-		lock_table_dequeue(trx->auto_inc_lock);
-
-		mutex_exit(&kernel_mutex);
-	}
-}
-
-/*************************************************************************
+/*********************************************************************//**
 Releases transaction locks, and releases possible other transactions waiting
 because of these locks. */
-
+UNIV_INTERN
 void
 lock_release_off_kernel(
 /*====================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	dict_table_t*	table;
 	ulint		count;
@@ -3837,15 +3933,14 @@ lock_release_off_kernel(
 
 		count++;
 
-		if (lock_get_type(lock) == LOCK_REC) {
+		if (lock_get_type_low(lock) == LOCK_REC) {
 
 			lock_rec_dequeue_from_page(lock);
 		} else {
-			ut_ad(lock_get_type(lock) & LOCK_TABLE);
+			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
 
 			if (lock_get_mode(lock) != LOCK_IS
-			    && 0 != ut_dulint_cmp(trx->undo_no,
-						  ut_dulint_zero)) {
+			    && !ut_dulint_is_zero(trx->undo_no)) {
 
 				/* The trx may have modified the table. We
 				block the use of the MySQL query cache for
@@ -3874,27 +3969,32 @@ lock_release_off_kernel(
 		lock = UT_LIST_GET_LAST(trx->trx_locks);
 	}
 
-	mem_heap_empty(trx->lock_heap);
+	ut_a(ib_vector_size(trx->autoinc_locks) == 0);
 
-	ut_a(trx->auto_inc_lock == NULL);
+	mem_heap_empty(trx->lock_heap);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Cancels a waiting lock request and releases possible other transactions
 waiting behind it. */
-
+UNIV_INTERN
 void
 lock_cancel_waiting_and_release(
 /*============================*/
-	lock_t*	lock)	/* in: waiting lock request */
+	lock_t*	lock)	/*!< in: waiting lock request */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
-	if (lock_get_type(lock) == LOCK_REC) {
+	if (lock_get_type_low(lock) == LOCK_REC) {
 
 		lock_rec_dequeue_from_page(lock);
 	} else {
-		ut_ad(lock_get_type(lock) & LOCK_TABLE);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+		if (lock->trx->autoinc_locks != NULL) {
+			/* Release the transaction's AUTOINC locks/ */
+			lock_release_autoinc_locks(lock->trx);
+		}
 
 		lock_table_dequeue(lock);
 	}
@@ -3914,7 +4014,7 @@ lock_cancel_waiting_and_release(
 	 || lock_get_mode(lock) == LOCK_X)
 
 
-/*************************************************************************
+/*********************************************************************//**
 Removes locks of a transaction on a table to be dropped.
 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
 also removed in addition to other table-level and record-level locks.
@@ -3923,9 +4023,9 @@ static
 void
 lock_remove_all_on_table_for_trx(
 /*=============================*/
-	dict_table_t*	table,			/* in: table to be dropped */
-	trx_t*		trx,			/* in: a transaction */
-	ibool		remove_also_table_sx_locks)/* in: also removes
+	dict_table_t*	table,			/*!< in: table to be dropped */
+	trx_t*		trx,			/*!< in: a transaction */
+	ibool		remove_also_table_sx_locks)/*!< in: also removes
 						table S and X locks */
 {
 	lock_t*	lock;
@@ -3938,12 +4038,12 @@ lock_remove_all_on_table_for_trx(
 	while (lock != NULL) {
 		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
 
-		if (lock_get_type(lock) == LOCK_REC
+		if (lock_get_type_low(lock) == LOCK_REC
 		    && lock->index->table == table) {
 			ut_a(!lock_get_wait(lock));
 
 			lock_rec_discard(lock);
-		} else if (lock_get_type(lock) & LOCK_TABLE
+		} else if (lock_get_type_low(lock) & LOCK_TABLE
 			   && lock->un_member.tab_lock.table == table
 			   && (remove_also_table_sx_locks
 			       || !IS_LOCK_S_OR_X(lock))) {
@@ -3957,18 +4057,18 @@ lock_remove_all_on_table_for_trx(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Removes locks on a table to be dropped or truncated.
 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
 also removed in addition to other table-level and record-level locks.
 No lock, that is going to be removed, is allowed to be a wait lock. */
-
+UNIV_INTERN
 void
 lock_remove_all_on_table(
 /*=====================*/
-	dict_table_t*	table,			/* in: table to be dropped
+	dict_table_t*	table,			/*!< in: table to be dropped
 						or truncated */
-	ibool		remove_also_table_sx_locks)/* in: also removes
+	ibool		remove_also_table_sx_locks)/*!< in: also removes
 						table S and X locks */
 {
 	lock_t*	lock;
@@ -4024,23 +4124,23 @@ lock_remove_all_on_table(
 
 /*===================== VALIDATION AND DEBUGGING  ====================*/
 
-/*************************************************************************
+/*********************************************************************//**
 Prints info of a table lock. */
-
+UNIV_INTERN
 void
 lock_table_print(
 /*=============*/
-	FILE*	file,	/* in: file where to print */
-	lock_t*	lock)	/* in: table type lock */
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock)	/*!< in: table type lock */
 {
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_a(lock_get_type(lock) == LOCK_TABLE);
+	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
 
 	fputs("TABLE LOCK table ", file);
 	ut_print_name(file, lock->trx, TRUE,
 		      lock->un_member.tab_lock.table->name);
-	fprintf(file, " trx id %lu %lu",
-		(ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low);
+	fprintf(file, " trx id " TRX_ID_FMT,
+		TRX_ID_PREP_PRINTF(lock->trx->id));
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		fputs(" lock mode S", file);
@@ -4064,27 +4164,27 @@ lock_table_print(
 	putc('\n', file);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Prints info of a record lock. */
-
+UNIV_INTERN
 void
 lock_rec_print(
 /*===========*/
-	FILE*	file,	/* in: file where to print */
-	lock_t*	lock)	/* in: record type lock */
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock)	/*!< in: record type lock */
 {
-	page_t*		page;
-	ulint		space;
-	ulint		page_no;
-	ulint		i;
-	mtr_t		mtr;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	const buf_block_t*	block;
+	ulint			space;
+	ulint			page_no;
+	ulint			i;
+	mtr_t			mtr;
+	mem_heap_t*		heap		= NULL;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*			offsets		= offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_a(lock_get_type(lock) == LOCK_REC);
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
 
 	space = lock->un_member.rec_lock.space;
 	page_no = lock->un_member.rec_lock.page_no;
@@ -4093,9 +4193,8 @@ lock_rec_print(
 		(ulong) space, (ulong) page_no,
 		(ulong) lock_rec_get_n_bits(lock));
 	dict_index_name_print(file, lock->trx, lock->index);
-	fprintf(file, " trx id %lu %lu",
-		(ulong) (lock->trx)->id.high,
-		(ulong) (lock->trx)->id.low);
+	fprintf(file, " trx id " TRX_ID_FMT,
+		TRX_ID_PREP_PRINTF(lock->trx->id));
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		fputs(" lock mode S", file);
@@ -4125,48 +4224,29 @@ lock_rec_print(
 
 	putc('\n', file);
 
-	/* If the page is not in the buffer pool, we cannot load it
-	because we have the kernel mutex and ibuf operations would
-	break the latching order */
+	block = buf_page_try_get(space, page_no, &mtr);
 
-	page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
-				NULL, BUF_GET_IF_IN_POOL,
-				__FILE__, __LINE__, &mtr);
-	if (page) {
-		page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
+	if (block) {
+		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
 
-		if (!page) {
-			/* Let us try to get an X-latch. If the current thread
-			is holding an X-latch on the page, we cannot get an
-			S-latch. */
+			if (lock_rec_get_nth_bit(lock, i)) {
 
-			page = buf_page_get_nowait(space, page_no, RW_X_LATCH,
-						   &mtr);
-		}
-	}
-
-	if (page) {
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-	}
-
-	for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
-		if (lock_rec_get_nth_bit(lock, i)) {
-
-			fprintf(file, "Record lock, heap no %lu ", (ulong) i);
-
-			if (page) {
-				rec_t*	rec
-					= page_find_rec_with_heap_no(page, i);
+				const rec_t*	rec
+					= page_find_rec_with_heap_no(
+						buf_block_get_frame(block), i);
 				offsets = rec_get_offsets(
 					rec, lock->index, offsets,
 					ULINT_UNDEFINED, &heap);
-				rec_print_new(file, rec, offsets);
-			}
 
-			putc('\n', file);
+				fprintf(file, "Record lock, heap no %lu ",
+					(ulong) i);
+				rec_print_new(file, rec, offsets);
+				putc('\n', file);
+			}
+		}
+	} else {
+		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+			fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
 		}
 	}
 
@@ -4176,8 +4256,6 @@ lock_rec_print(
 	}
 }
 
-#ifndef UNIV_HOTBACKUP
-
 #ifdef UNIV_DEBUG
 /* Print the number of lock structs from lock_print_info_summary() only
 in non-production builds for performance reasons, see
@@ -4186,8 +4264,9 @@ http://bugs.mysql.com/36942 */
 #endif /* UNIV_DEBUG */
 
 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
-/*************************************************************************
-Calculates the number of record lock structs in the record lock hash table. */
+/*********************************************************************//**
+Calculates the number of record lock structs in the record lock hash table.
+@return	number of record locks */
 static
 ulint
 lock_get_n_rec_locks(void)
@@ -4214,13 +4293,13 @@ lock_get_n_rec_locks(void)
 }
 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
 
-/*************************************************************************
+/*********************************************************************//**
 Prints info of locks for all transactions. */
-
+UNIV_INTERN
 void
 lock_print_info_summary(
 /*====================*/
-	FILE*	file)	/* in: file where to print */
+	FILE*	file)	/*!< in: file where to print */
 {
 	/* We must protect the MySQL thd->query field with a MySQL mutex, and
 	because the MySQL mutex must be reserved before the kernel_mutex of
@@ -4241,16 +4320,14 @@ lock_print_info_summary(
 	      "TRANSACTIONS\n"
 	      "------------\n", file);
 
-	fprintf(file, "Trx id counter %lu %lu\n",
-		(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
-		(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
+		TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 
 	fprintf(file,
-		"Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
-		(ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
-		(ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
-		(ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
-		(ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
+		"Purge done for trx's n:o < " TRX_ID_FMT
+		" undo n:o < " TRX_ID_FMT "\n",
+		TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
+		TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
 
 	fprintf(file,
 		"History list length %lu\n",
@@ -4263,18 +4340,15 @@ lock_print_info_summary(
 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Prints info of locks for each transaction. */
-
+UNIV_INTERN
 void
 lock_print_info_all_transactions(
 /*=============================*/
-	FILE*	file)	/* in: file where to print */
+	FILE*	file)	/*!< in: file where to print */
 {
 	lock_t*	lock;
-	ulint	space;
-	ulint	page_no;
-	page_t*	page;
 	ibool	load_page_first = TRUE;
 	ulint	nth_trx		= 0;
 	ulint	nth_lock	= 0;
@@ -4328,14 +4402,11 @@ loop:
 		if (trx->read_view) {
 			fprintf(file,
 				"Trx read view will not see trx with"
-				" id >= %lu %lu, sees < %lu %lu\n",
-				(ulong) ut_dulint_get_high(
+				" id >= " TRX_ID_FMT
+				", sees < " TRX_ID_FMT "\n",
+				TRX_ID_PREP_PRINTF(
 					trx->read_view->low_limit_id),
-				(ulong) ut_dulint_get_low(
-					trx->read_view->low_limit_id),
-				(ulong) ut_dulint_get_high(
-					trx->read_view->up_limit_id),
-				(ulong) ut_dulint_get_low(
+				TRX_ID_PREP_PRINTF(
 					trx->read_view->up_limit_id));
 		}
 
@@ -4346,7 +4417,7 @@ loop:
 				(ulong) difftime(time(NULL),
 						 trx->wait_started));
 
-			if (lock_get_type(trx->wait_lock) == LOCK_REC) {
+			if (lock_get_type_low(trx->wait_lock) == LOCK_REC) {
 				lock_rec_print(file, trx->wait_lock);
 			} else {
 				lock_table_print(file, trx->wait_lock);
@@ -4380,18 +4451,33 @@ loop:
 		goto loop;
 	}
 
-	if (lock_get_type(lock) == LOCK_REC) {
-		space = lock->un_member.rec_lock.space;
-		page_no = lock->un_member.rec_lock.page_no;
-
+	if (lock_get_type_low(lock) == LOCK_REC) {
 		if (load_page_first) {
+			ulint	space	= lock->un_member.rec_lock.space;
+			ulint	zip_size= fil_space_get_zip_size(space);
+			ulint	page_no = lock->un_member.rec_lock.page_no;
+
+			if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+				/* It is a single table tablespace and
+				the .ibd file is missing (TRUNCATE
+				TABLE probably stole the locks): just
+				print the lock without attempting to
+				load the page in the buffer pool. */
+
+				fprintf(file, "RECORD LOCKS on"
+					" non-existing space %lu\n",
+					(ulong) space);
+				goto print_rec;
+			}
+
 			lock_mutex_exit_kernel();
 			innobase_mysql_end_print_arbitrary_thd();
 
 			mtr_start(&mtr);
 
-			page = buf_page_get_with_no_latch(
-				space, page_no, &mtr);
+			buf_page_get_with_no_latch(space, zip_size,
+						   page_no, &mtr);
 
 			mtr_commit(&mtr);
 
@@ -4403,9 +4489,10 @@ loop:
 			goto loop;
 		}
 
+print_rec:
 		lock_rec_print(file, lock);
 	} else {
-		ut_ad(lock_get_type(lock) & LOCK_TABLE);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
 
 		lock_table_print(file, lock);
 	}
@@ -4428,14 +4515,15 @@ loop:
 	goto loop;
 }
 
-/*************************************************************************
-Validates the lock queue on a table. */
-
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates the lock queue on a table.
+@return	TRUE if ok */
+static
 ibool
 lock_table_queue_validate(
 /*======================*/
-				/* out: TRUE if ok */
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in: table */
 {
 	lock_t*	lock;
 
@@ -4464,29 +4552,34 @@ lock_table_queue_validate(
 	return(TRUE);
 }
 
-/*************************************************************************
-Validates the lock queue on a single record. */
-
+/*********************************************************************//**
+Validates the lock queue on a single record.
+@return	TRUE if ok */
+static
 ibool
 lock_rec_queue_validate(
 /*====================*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: record to look at */
-	dict_index_t*	index,	/* in: index, or NULL if not known */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record to look at */
+	dict_index_t*		index,	/*!< in: index, or NULL if not known */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	trx_t*	impl_trx;
 	lock_t*	lock;
+	ulint	heap_no;
 
 	ut_a(rec);
+	ut_a(block->frame == page_align(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 
+	heap_no = page_rec_get_heap_no(rec);
+
 	lock_mutex_enter_kernel();
 
 	if (!page_rec_is_user_rec(rec)) {
 
-		lock = lock_rec_get_first(rec);
+		lock = lock_rec_get_first(block, heap_no);
 
 		while (lock) {
 			switch(lock->trx->conc_state) {
@@ -4508,7 +4601,7 @@ lock_rec_queue_validate(
 				ut_a(lock->index == index);
 			}
 
-			lock = lock_rec_get_next(rec, lock);
+			lock = lock_rec_get_next(heap_no, lock);
 		}
 
 		lock_mutex_exit_kernel();
@@ -4516,19 +4609,19 @@ lock_rec_queue_validate(
 		return(TRUE);
 	}
 
-	if (index && (index->type & DICT_CLUSTERED)) {
+	if (!index);
+	else if (dict_index_is_clust(index)) {
 
 		impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
 
-		if (impl_trx && lock_rec_other_has_expl_req(
-			    LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
+		if (impl_trx
+		    && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+						   block, heap_no, impl_trx)) {
 
 			ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
-					       rec, impl_trx));
+					       block, heap_no, impl_trx));
 		}
-	}
-
-	if (index && !(index->type & DICT_CLUSTERED)) {
+	} else {
 
 		/* The kernel mutex may get released temporarily in the
 		next function call: we have to release lock table mutex
@@ -4537,15 +4630,16 @@ lock_rec_queue_validate(
 		impl_trx = lock_sec_rec_some_has_impl_off_kernel(
 			rec, index, offsets);
 
-		if (impl_trx && lock_rec_other_has_expl_req(
-			    LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
+		if (impl_trx
+		    && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+						   block, heap_no, impl_trx)) {
 
 			ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
-					       rec, impl_trx));
+					       block, heap_no, impl_trx));
 		}
 	}
 
-	lock = lock_rec_get_first(rec);
+	lock = lock_rec_get_first(block, heap_no);
 
 	while (lock) {
 		ut_a(lock->trx->conc_state == TRX_ACTIVE
@@ -4559,7 +4653,7 @@ lock_rec_queue_validate(
 
 		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
 
-			ulint	mode;
+			enum lock_mode	mode;
 
 			if (lock_get_mode(lock) == LOCK_S) {
 				mode = LOCK_X;
@@ -4567,14 +4661,14 @@ lock_rec_queue_validate(
 				mode = LOCK_S;
 			}
 			ut_a(!lock_rec_other_has_expl_req(
-				     mode, 0, 0, rec, lock->trx));
+				     mode, 0, 0, block, heap_no, lock->trx));
 
 		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
 
 			ut_a(lock_rec_has_to_wait_in_queue(lock));
 		}
 
-		lock = lock_rec_get_next(rec, lock);
+		lock = lock_rec_get_next(heap_no, lock);
 	}
 
 	lock_mutex_exit_kernel();
@@ -4582,37 +4676,41 @@ lock_rec_queue_validate(
 	return(TRUE);
 }
 
-/*************************************************************************
-Validates the record lock queues on a page. */
-
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return	TRUE if ok */
+static
 ibool
 lock_rec_validate_page(
 /*===================*/
-			/* out: TRUE if ok */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
 {
 	dict_index_t*	index;
-	page_t*	page;
-	lock_t*	lock;
-	rec_t*	rec;
-	ulint	nth_lock		= 0;
-	ulint	nth_bit			= 0;
-	ulint	i;
-	mtr_t	mtr;
+	buf_block_t*	block;
+	const page_t*	page;
+	lock_t*		lock;
+	const rec_t*	rec;
+	ulint		nth_lock	= 0;
+	ulint		nth_bit		= 0;
+	ulint		i;
+	ulint		zip_size;
+	mtr_t		mtr;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(!mutex_own(&kernel_mutex));
 
 	mtr_start(&mtr);
 
-	page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+	zip_size = fil_space_get_zip_size(space);
+	ut_ad(zip_size != ULINT_UNDEFINED);
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+	page = block->frame;
 
 	lock_mutex_enter_kernel();
 loop:
@@ -4642,6 +4740,7 @@ loop:
 
 			index = lock->index;
 			rec = page_find_rec_with_heap_no(page, i);
+			ut_a(rec);
 			offsets = rec_get_offsets(rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
 
@@ -4651,7 +4750,7 @@ loop:
 
 			lock_mutex_exit_kernel();
 
-			lock_rec_queue_validate(rec, index, offsets);
+			lock_rec_queue_validate(block, rec, index, offsets);
 
 			lock_mutex_enter_kernel();
 
@@ -4677,13 +4776,13 @@ function_exit:
 	return(TRUE);
 }
 
-/*************************************************************************
-Validates the lock system. */
-
+/*********************************************************************//**
+Validates the lock system.
+@return	TRUE if ok */
+static
 ibool
 lock_validate(void)
 /*===============*/
-			/* out: TRUE if ok */
 {
 	lock_t*	lock;
 	trx_t*	trx;
@@ -4700,7 +4799,7 @@ lock_validate(void)
 		lock = UT_LIST_GET_FIRST(trx->trx_locks);
 
 		while (lock) {
-			if (lock_get_type(lock) & LOCK_TABLE) {
+			if (lock_get_type_low(lock) & LOCK_TABLE) {
 
 				lock_table_queue_validate(
 					lock->un_member.tab_lock.table);
@@ -4753,65 +4852,74 @@ lock_validate(void)
 
 	return(TRUE);
 }
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if locks of other transactions prevent an immediate insert of
 a record. If they do, first tests if the query thread should anyway
 be suspended for some reason; if not, then puts the transaction and
 the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
+for a gap x-lock to the lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_rec_insert_check_and_lock(
 /*===========================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record after which to insert */
-	dict_index_t*	index,	/* in: index */
-	que_thr_t*	thr,	/* in: query thread */
-	ibool*		inherit)/* out: set to TRUE if the new inserted
-				record maybe should inherit LOCK_GAP type
-				locks from the successor record */
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool*		inherit)/*!< out: set to TRUE if the new
+				inserted record maybe should inherit
+				LOCK_GAP type locks from the successor
+				record */
 {
-	rec_t*	next_rec;
-	trx_t*	trx;
-	lock_t*	lock;
-	ulint	err;
+	const rec_t*	next_rec;
+	trx_t*		trx;
+	lock_t*		lock;
+	ulint		err;
+	ulint		next_rec_heap_no;
+
+	ut_ad(block->frame == page_align(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
 
-	ut_ad(rec);
-
 	trx = thr_get_trx(thr);
-	next_rec = page_rec_get_next(rec);
-
-	*inherit = FALSE;
+	next_rec = page_rec_get_next((rec_t*) rec);
+	next_rec_heap_no = page_rec_get_heap_no(next_rec);
 
 	lock_mutex_enter_kernel();
 
-	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	/* When inserting a record into an index, the table must be at
+	least IX-locked or we must be building an index, in which case
+	the table must be at least S-locked. */
+	ut_ad(lock_table_has(trx, index->table, LOCK_IX)
+	      || (*index->name == TEMP_INDEX_PREFIX
+		  && lock_table_has(trx, index->table, LOCK_S)));
 
-	lock = lock_rec_get_first(next_rec);
+	lock = lock_rec_get_first(block, next_rec_heap_no);
 
-	if (lock == NULL) {
+	if (UNIV_LIKELY(lock == NULL)) {
 		/* We optimize CPU time usage in the simplest case */
 
 		lock_mutex_exit_kernel();
 
-		if (!(index->type & DICT_CLUSTERED)) {
-
+		if (!dict_index_is_clust(index)) {
 			/* Update the page max trx id field */
-			page_update_max_trx_id(buf_frame_align(rec),
-					       thr_get_trx(thr)->id);
+			page_update_max_trx_id(block,
+					       buf_block_get_page_zip(block),
+					       trx->id, mtr);
 		}
 
+		*inherit = FALSE;
+
 		return(DB_SUCCESS);
 	}
 
@@ -4828,24 +4936,25 @@ lock_rec_insert_check_and_lock(
 	on the successor, which produced an unnecessary deadlock. */
 
 	if (lock_rec_other_has_conflicting(
-		    LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, next_rec,
-		    trx)) {
+		    LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
+		    block, next_rec_heap_no, trx)) {
 
 		/* Note that we may get DB_SUCCESS also here! */
 		err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
 					       | LOCK_INSERT_INTENTION,
-					       next_rec, index, thr);
+					       block, next_rec_heap_no,
+					       index, thr);
 	} else {
 		err = DB_SUCCESS;
 	}
 
 	lock_mutex_exit_kernel();
 
-	if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) {
-
+	if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) {
 		/* Update the page max trx id field */
-		page_update_max_trx_id(buf_frame_align(rec),
-				       thr_get_trx(thr)->id);
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       trx->id, mtr);
 	}
 
 #ifdef UNIV_DEBUG
@@ -4853,11 +4962,12 @@ lock_rec_insert_check_and_lock(
 		mem_heap_t*	heap		= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		const ulint*	offsets;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		offsets = rec_get_offsets(next_rec, index, offsets_,
 					  ULINT_UNDEFINED, &heap);
-		ut_ad(lock_rec_queue_validate(next_rec, index, offsets));
+		ut_ad(lock_rec_queue_validate(block,
+					      next_rec, index, offsets));
 		if (UNIV_LIKELY_NULL(heap)) {
 			mem_heap_free(heap);
 		}
@@ -4867,7 +4977,7 @@ lock_rec_insert_check_and_lock(
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 If a transaction has an implicit x-lock on a record, but no explicit x-lock
 set on the record, sets one for it. NOTE that in the case of a secondary
 index, the kernel mutex may get temporarily released. */
@@ -4875,9 +4985,10 @@ static
 void
 lock_rec_convert_impl_to_expl(
 /*==========================*/
-	rec_t*		rec,	/* in: user record on page */
-	dict_index_t*	index,	/* in: index of record */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record on page */
+	dict_index_t*		index,	/*!< in: index of record */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	trx_t*	impl_trx;
 
@@ -4886,7 +4997,7 @@ lock_rec_convert_impl_to_expl(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
 	} else {
 		impl_trx = lock_sec_rec_some_has_impl_off_kernel(
@@ -4894,49 +5005,58 @@ lock_rec_convert_impl_to_expl(
 	}
 
 	if (impl_trx) {
+		ulint	heap_no = page_rec_get_heap_no(rec);
+
 		/* If the transaction has no explicit x-lock set on the
 		record, set one for it */
 
-		if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
-				       impl_trx)) {
+		if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
+				       heap_no, impl_trx)) {
 
 			lock_rec_add_to_queue(
 				LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
-				rec, index, impl_trx);
+				block, heap_no, index, impl_trx);
 		}
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (update,
 delete mark, or delete unmark) of a clustered index record. If they do,
 first tests if the query thread should anyway be suspended for some
 reason; if not, then puts the transaction and the query thread to the
 lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
+lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record which should be modified */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record which should be
+					modified */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
+	ulint	heap_no;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
 
+	heap_no = rec_offs_comp(offsets)
+		? rec_get_heap_no_new(rec)
+		: rec_get_heap_no_old(rec);
+
 	lock_mutex_enter_kernel();
 
 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
@@ -4944,43 +5064,50 @@ lock_clust_rec_modify_check_and_lock(
 	/* If a transaction has no explicit x-lock set on the record, set one
 	for it */
 
-	lock_rec_convert_impl_to_expl(rec, index, offsets);
+	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 
-	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
+	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
 
 	lock_mutex_exit_kernel();
 
-	ut_ad(lock_rec_queue_validate(rec, index, offsets));
+	ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
 
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (delete
-mark or delete unmark) of a secondary index record. */
-
+mark or delete unmark) of a secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: record which should be modified;
-				NOTE: as this is a secondary index, we
-				always have to modify the clustered index
-				record first: see the comment below */
-	dict_index_t*	index,	/* in: secondary index */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+				bit is set, does nothing */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	const rec_t*	rec,	/*!< in: record which should be
+				modified; NOTE: as this is a secondary
+				index, we always have to modify the
+				clustered index record first: see the
+				comment below */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ulint	err;
+	ulint	heap_no;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
 
-	ut_ad(!(index->type & DICT_CLUSTERED));
+	heap_no = page_rec_get_heap_no(rec);
 
 	/* Another transaction cannot have an implicit lock on the record,
 	because when we come here, we already have modified the clustered
@@ -4991,7 +5118,8 @@ lock_sec_rec_modify_check_and_lock(
 
 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
 
-	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
+	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
 
 	lock_mutex_exit_kernel();
 
@@ -5000,11 +5128,11 @@ lock_sec_rec_modify_check_and_lock(
 		mem_heap_t*	heap		= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		const ulint*	offsets;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		offsets = rec_get_offsets(rec, index, offsets_,
 					  ULINT_UNDEFINED, &heap);
-		ut_ad(lock_rec_queue_validate(rec, index, offsets));
+		ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
 		if (UNIV_LIKELY_NULL(heap)) {
 			mem_heap_free(heap);
 		}
@@ -5013,48 +5141,56 @@ lock_sec_rec_modify_check_and_lock(
 
 	if (err == DB_SUCCESS) {
 		/* Update the page max trx id field */
-
-		page_update_max_trx_id(buf_frame_align(rec),
-				       thr_get_trx(thr)->id);
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       thr_get_trx(thr)->id, mtr);
 	}
 
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
+secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: secondary index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: secondary index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
+	ulint	heap_no;
 
-	ut_ad(!(index->type & DICT_CLUSTERED));
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
 
+	heap_no = page_rec_get_heap_no(rec);
+
 	lock_mutex_enter_kernel();
 
 	ut_ad(mode != LOCK_X
@@ -5066,53 +5202,59 @@ lock_sec_rec_read_check_and_lock(
 	if the max trx id for the page >= min trx id for the trx list or a
 	database recovery is running. */
 
-	if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)),
+	if (((ut_dulint_cmp(page_get_max_trx_id(block->frame),
 			    trx_list_get_min_trx_id()) >= 0)
 	     || recv_recovery_is_on())
 	    && !page_rec_is_supremum(rec)) {
 
-		lock_rec_convert_impl_to_expl(rec, index, offsets);
+		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 	}
 
-	err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
 
 	lock_mutex_exit_kernel();
 
-	ut_ad(lock_rec_queue_validate(rec, index, offsets));
+	ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
 
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if locks of other transactions prevent an immediate read, or passing
 over by a read cursor, of a clustered index record. If they do, first tests
 if the query thread should anyway be suspended for some reason; if not, then
 puts the transaction and the query thread to the lock wait state and inserts a
 waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
+lock on the record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
+	ulint	heap_no;
 
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->frame == page_align(rec));
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
 	      || gap_mode == LOCK_REC_NOT_GAP);
@@ -5123,6 +5265,8 @@ lock_clust_rec_read_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	heap_no = page_rec_get_heap_no(rec);
+
 	lock_mutex_enter_kernel();
 
 	ut_ad(mode != LOCK_X
@@ -5130,20 +5274,21 @@ lock_clust_rec_read_check_and_lock(
 	ut_ad(mode != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 
-	if (!page_rec_is_supremum(rec)) {
+	if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
 
-		lock_rec_convert_impl_to_expl(rec, index, offsets);
+		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 	}
 
-	err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
 
 	lock_mutex_exit_kernel();
 
-	ut_ad(lock_rec_queue_validate(rec, index, offsets));
+	ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
 
 	return(err);
 }
-/*************************************************************************
+/*********************************************************************//**
 Checks if locks of other transactions prevent an immediate read, or passing
 over by a read cursor, of a clustered index record. If they do, first tests
 if the query thread should anyway be suspended for some reason; if not, then
@@ -5151,35 +5296,38 @@ puts the transaction and the query thread to the lock wait state and inserts a
 waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record. This is an alternative version of
 lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
+"offsets".
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
 ulint
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
-				does nothing */
-	rec_t*		rec,	/* in: user record or page supremum record
-				which should be read or passed over by a read
-				cursor */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		mode,	/* in: mode of the lock which the read cursor
-				should set on records: LOCK_S or LOCK_X; the
-				latter is possible in SELECT FOR UPDATE */
-	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	mem_heap_t*	tmp_heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	ulint		ret;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	offsets = rec_get_offsets(rec, index, offsets,
 				  ULINT_UNDEFINED, &tmp_heap);
-	ret = lock_clust_rec_read_check_and_lock(flags, rec, index,
+	ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
 						 offsets, mode, gap_mode, thr);
 	if (tmp_heap) {
 		mem_heap_free(tmp_heap);
@@ -5187,3 +5335,258 @@ lock_clust_rec_read_check_and_lock_alt(
 	return(ret);
 }
 
+/*******************************************************************//**
+Release the last lock from the transaction's autoinc locks. */
+UNIV_INLINE
+void
+lock_release_autoinc_last_lock(
+/*===========================*/
+	ib_vector_t*	autoinc_locks)	/*!< in/out: vector of AUTOINC locks */
+{
+	ulint		last;
+	lock_t*		lock;
+
+	ut_ad(mutex_own(&kernel_mutex));
+	ut_a(!ib_vector_is_empty(autoinc_locks));
+
+	/* The lock to be release must be the last lock acquired. */
+	last = ib_vector_size(autoinc_locks) - 1;
+	lock = ib_vector_get(autoinc_locks, last);
+
+	/* Should have only AUTOINC locks in the vector. */
+	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
+	ut_a(lock_get_type(lock) == LOCK_TABLE);
+
+	ut_a(lock->un_member.tab_lock.table != NULL);
+
+	/* This will remove the lock from the trx autoinc_locks too. */
+	lock_table_dequeue(lock);
+}
+
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+UNIV_INTERN
+void
+lock_release_autoinc_locks(
+/*=======================*/
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+
+	ut_a(trx->autoinc_locks != NULL);
+
+	/* We release the locks in the reverse order. This is to
+	avoid searching the vector for the element to delete at
+	the lower level. See (lock_table_remove_low()) for details. */
+	while (!ib_vector_is_empty(trx->autoinc_locks)) {
+
+		/* lock_table_remove_low() will also remove the lock from
+		the transaction's autoinc_locks vector. */
+		lock_release_autoinc_last_lock(trx->autoinc_locks);
+	}
+
+	/* Should release all locks. */
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock_get_type_low(lock));
+}
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return	transaction id */
+UNIV_INTERN
+ullint
+lock_get_trx_id(
+/*============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(trx_get_id(lock->trx));
+}
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ibool	is_gap_lock;
+
+	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
+		&& lock_rec_get_gap(lock);
+
+	switch (lock_get_mode(lock)) {
+	case LOCK_S:
+		if (is_gap_lock) {
+			return("S,GAP");
+		} else {
+			return("S");
+		}
+	case LOCK_X:
+		if (is_gap_lock) {
+			return("X,GAP");
+		} else {
+			return("X");
+		}
+	case LOCK_IS:
+		if (is_gap_lock) {
+			return("IS,GAP");
+		} else {
+			return("IS");
+		}
+	case LOCK_IX:
+		if (is_gap_lock) {
+			return("IX,GAP");
+		} else {
+			return("IX");
+		}
+	case LOCK_AUTO_INC:
+		return("AUTO_INC");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	switch (lock_get_type_low(lock)) {
+	case LOCK_REC:
+		return("RECORD");
+	case LOCK_TABLE:
+		return("TABLE");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/*******************************************************************//**
+Gets the table on which the lock is.
+@return	table */
+UNIV_INLINE
+dict_table_t*
+lock_get_table(
+/*===========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	switch (lock_get_type_low(lock)) {
+	case LOCK_REC:
+		return(lock->index->table);
+	case LOCK_TABLE:
+		return(lock->un_member.tab_lock.table);
+	default:
+		ut_error;
+		return(NULL);
+	}
+}
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return	id of the table */
+UNIV_INTERN
+ullint
+lock_get_table_id(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	dict_table_t*	table;
+
+	table = lock_get_table(lock);
+
+	return((ullint)ut_conv_dulint_to_longlong(table->id));
+}
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	dict_table_t*	table;
+
+	table = lock_get_table(lock);
+
+	return(table->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return	index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->index);
+}
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->index->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return	tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->un_member.rec_lock.space);
+}
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return	page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+	return(lock->un_member.rec_lock.page_no);
+}
diff --git a/storage/innobase/log/log0log.c b/storage/innodb_plugin/log/log0log.c
similarity index 77%
rename from storage/innobase/log/log0log.c
rename to storage/innodb_plugin/log/log0log.c
index 7eb5e3db422..24c828cdf5f 100644
--- a/storage/innobase/log/log0log.c
+++ b/storage/innodb_plugin/log/log0log.c
@@ -1,7 +1,48 @@
-/******************************************************
-Database log
+/*****************************************************************************
 
-(c) 1995-1997 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file log/log0log.c
+Database log
 
 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
@@ -12,6 +53,7 @@ Created 12/9/1995 Heikki Tuuri
 #include "log0log.ic"
 #endif
 
+#ifndef UNIV_HOTBACKUP
 #include "mem0mem.h"
 #include "buf0buf.h"
 #include "buf0flu.h"
@@ -52,26 +94,24 @@ reduce the size of the log.
 
 /* Current free limit of space 0; protected by the log sys mutex; 0 means
 uninitialized */
-ulint	log_fsp_current_free_limit		= 0;
+UNIV_INTERN ulint	log_fsp_current_free_limit		= 0;
 
 /* Global log system variable */
-log_t*	log_sys	= NULL;
+UNIV_INTERN log_t*	log_sys	= NULL;
 
 #ifdef UNIV_DEBUG
-ibool	log_do_write = TRUE;
-
-ibool	log_debug_writes = FALSE;
+UNIV_INTERN ibool	log_do_write = TRUE;
 #endif /* UNIV_DEBUG */
 
 /* These control how often we print warnings if the last checkpoint is too
 old */
-ibool	log_has_printed_chkp_warning = FALSE;
-time_t	log_last_warning_time;
+UNIV_INTERN ibool	log_has_printed_chkp_warning = FALSE;
+UNIV_INTERN time_t	log_last_warning_time;
 
 #ifdef UNIV_LOG_ARCHIVE
 /* Pointer to this variable is used as the i/o-message when we do i/o to an
 archive */
-byte	log_archive_io;
+UNIV_INTERN byte	log_archive_io;
 #endif /* UNIV_LOG_ARCHIVE */
 
 /* A margin for free space in the log buffer before a log entry is catenated */
@@ -113,14 +153,14 @@ the previous */
 #define	LOG_ARCHIVE_READ	1
 #define	LOG_ARCHIVE_WRITE	2
 
-/**********************************************************
+/******************************************************//**
 Completes a checkpoint write i/o to a log file. */
 static
 void
 log_io_complete_checkpoint(void);
 /*============================*/
 #ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
 Completes an archiving i/o. */
 static
 void
@@ -128,15 +168,15 @@ log_io_complete_archive(void);
 /*=========================*/
 #endif /* UNIV_LOG_ARCHIVE */
 
-/********************************************************************
+/****************************************************************//**
 Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
 so that we know that the limit has been written to a log checkpoint field
 on disk. */
-
+UNIV_INTERN
 void
 log_fsp_current_free_limit_set_and_checkpoint(
 /*==========================================*/
-	ulint	limit)	/* in: limit to set */
+	ulint	limit)	/*!< in: limit to set */
 {
 	ibool	success;
 
@@ -155,21 +195,22 @@ log_fsp_current_free_limit_set_and_checkpoint(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists. */
+exists.
+@return	LSN of oldest modification */
 static
-dulint
+ib_uint64_t
 log_buf_pool_get_oldest_modification(void)
 /*======================================*/
 {
-	dulint	lsn;
+	ib_uint64_t	lsn;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
 	lsn = buf_pool_get_oldest_modification();
 
-	if (ut_dulint_is_zero(lsn)) {
+	if (!lsn) {
 
 		lsn = log_sys->lsn;
 	}
@@ -177,15 +218,15 @@ log_buf_pool_get_oldest_modification(void)
 	return(lsn);
 }
 
-/****************************************************************
+/************************************************************//**
 Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
+released with log_release.
+@return	start lsn of the log record */
+UNIV_INTERN
+ib_uint64_t
 log_reserve_and_open(
 /*=================*/
-			/* out: start lsn of the log record */
-	ulint	len)	/* in: length of data to be catenated */
+	ulint	len)	/*!< in: length of data to be catenated */
 {
 	log_t*	log			= log_sys;
 	ulint	len_upper_limit;
@@ -225,8 +266,7 @@ loop:
 #ifdef UNIV_LOG_ARCHIVE
 	if (log->archiving_state != LOG_ARCH_OFF) {
 
-		archived_lsn_age = ut_dulint_minus(log->lsn,
-						   log->archived_lsn);
+		archived_lsn_age = log->lsn - log->archived_lsn;
 		if (archived_lsn_age + len_upper_limit
 		    > log->max_archived_lsn_age) {
 			/* Not enough free archived space in log groups: do a
@@ -252,15 +292,15 @@ loop:
 	return(log->lsn);
 }
 
-/****************************************************************
+/************************************************************//**
 Writes to the log the string given. It is assumed that the caller holds the
 log mutex. */
-
+UNIV_INTERN
 void
 log_write_low(
 /*==========*/
-	byte*	str,		/* in: string */
-	ulint	str_len)	/* in: string length */
+	byte*	str,		/*!< in: string */
+	ulint	str_len)	/*!< in: string length */
 {
 	log_t*	log	= log_sys;
 	ulint	len;
@@ -302,12 +342,12 @@ part_loop:
 					    log_sys->next_checkpoint_no);
 		len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
 
-		log->lsn = ut_dulint_add(log->lsn, len);
+		log->lsn += len;
 
 		/* Initialize the next block header */
 		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
 	} else {
-		log->lsn = ut_dulint_add(log->lsn, len);
+		log->lsn += len;
 	}
 
 	log->buf_free += len;
@@ -321,20 +361,20 @@ part_loop:
 	srv_log_write_requests++;
 }
 
-/****************************************************************
-Closes the log. */
-
-dulint
+/************************************************************//**
+Closes the log.
+@return	lsn */
+UNIV_INTERN
+ib_uint64_t
 log_close(void)
 /*===========*/
-			/* out: lsn */
 {
-	byte*	log_block;
-	ulint	first_rec_group;
-	dulint	oldest_lsn;
-	dulint	lsn;
-	log_t*	log	= log_sys;
-	ulint	checkpoint_age;
+	byte*		log_block;
+	ulint		first_rec_group;
+	ib_uint64_t	oldest_lsn;
+	ib_uint64_t	lsn;
+	log_t*		log	= log_sys;
+	ib_uint64_t	checkpoint_age;
 
 	ut_ad(mutex_own(&(log->mutex)));
 
@@ -358,7 +398,7 @@ log_close(void)
 		log->check_flush_or_checkpoint = TRUE;
 	}
 
-	checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
+	checkpoint_age = lsn - log->last_checkpoint_lsn;
 
 	if (checkpoint_age >= log->log_group_capacity) {
 		/* TODO: split btr_store_big_rec_extern_fields() into small
@@ -396,9 +436,8 @@ log_close(void)
 
 	oldest_lsn = buf_pool_get_oldest_modification();
 
-	if (ut_dulint_is_zero(oldest_lsn)
-	    || (ut_dulint_minus(lsn, oldest_lsn)
-		> log->max_modified_age_async)
+	if (!oldest_lsn
+	    || lsn - oldest_lsn > log->max_modified_age_async
 	    || checkpoint_age > log->max_checkpoint_age_async) {
 
 		log->check_flush_or_checkpoint = TRUE;
@@ -414,7 +453,7 @@ function_exit:
 }
 
 #ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
 Pads the current log block full with dummy log records. Used in producing
 consistent archived log files. */
 static
@@ -422,10 +461,10 @@ void
 log_pad_current_log_block(void)
 /*===========================*/
 {
-	byte	b		= MLOG_DUMMY_RECORD;
-	ulint	pad_length;
-	ulint	i;
-	dulint	lsn;
+	byte		b		= MLOG_DUMMY_RECORD;
+	ulint		pad_length;
+	ulint		i;
+	ib_uint64_t	lsn;
 
 	/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
 	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
@@ -443,52 +482,53 @@ log_pad_current_log_block(void)
 	log_close();
 	log_release();
 
-	ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
-	     == LOG_BLOCK_HDR_SIZE);
+	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
 }
 #endif /* UNIV_LOG_ARCHIVE */
 
-/**********************************************************
+/******************************************************//**
 Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
+included.
+@return	capacity in bytes */
+UNIV_INTERN
 ulint
 log_group_get_capacity(
 /*===================*/
-				/* out: capacity in bytes */
-	log_group_t*	group)	/* in: log group */
+	const log_group_t*	group)	/*!< in: log group */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
 	return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
 }
 
-/**********************************************************
+/******************************************************//**
 Calculates the offset within a log group, when the log file headers are not
-included. */
+included.
+@return	size offset (<= offset) */
 UNIV_INLINE
 ulint
 log_group_calc_size_offset(
 /*=======================*/
-				/* out: size offset (<= offset) */
-	ulint		offset,	/* in: real offset within the log group */
-	log_group_t*	group)	/* in: log group */
+	ulint			offset,	/*!< in: real offset within the
+					log group */
+	const log_group_t*	group)	/*!< in: log group */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
 	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
 }
 
-/**********************************************************
+/******************************************************//**
 Calculates the offset within a log group, when the log file headers are
-included. */
+included.
+@return	real offset (>= offset) */
 UNIV_INLINE
 ulint
 log_group_calc_real_offset(
 /*=======================*/
-				/* out: real offset (>= offset) */
-	ulint		offset,	/* in: size offset within the log group */
-	log_group_t*	group)	/* in: log group */
+	ulint			offset,	/*!< in: size offset within the
+					log group */
+	const log_group_t*	group)	/*!< in: log group */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
@@ -496,22 +536,22 @@ log_group_calc_real_offset(
 	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
 }
 
-/**********************************************************
-Calculates the offset of an lsn within a log group. */
+/******************************************************//**
+Calculates the offset of an lsn within a log group.
+@return	offset within the log group */
 static
 ulint
 log_group_calc_lsn_offset(
 /*======================*/
-				/* out: offset within the log group */
-	dulint		lsn,	/* in: lsn, must be within 4 GB of
-				group->lsn */
-	log_group_t*	group)	/* in: log group */
+	ib_uint64_t		lsn,	/*!< in: lsn, must be within 4 GB of
+					group->lsn */
+	const log_group_t*	group)	/*!< in: log group */
 {
-	dulint		gr_lsn;
-	ib_longlong	gr_lsn_size_offset;
-	ib_longlong	difference;
-	ib_longlong	group_size;
-	ib_longlong	offset;
+	ib_uint64_t	gr_lsn;
+	ib_int64_t	gr_lsn_size_offset;
+	ib_int64_t	difference;
+	ib_int64_t	group_size;
+	ib_int64_t	offset;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
@@ -520,16 +560,16 @@ log_group_calc_lsn_offset(
 
 	gr_lsn = group->lsn;
 
-	gr_lsn_size_offset = (ib_longlong)
+	gr_lsn_size_offset = (ib_int64_t)
 		log_group_calc_size_offset(group->lsn_offset, group);
 
-	group_size = (ib_longlong) log_group_get_capacity(group);
+	group_size = (ib_int64_t) log_group_get_capacity(group);
 
-	if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
+	if (lsn >= gr_lsn) {
 
-		difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
+		difference = (ib_int64_t) (lsn - gr_lsn);
 	} else {
-		difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
+		difference = (ib_int64_t) (gr_lsn - lsn);
 
 		difference = difference % group_size;
 
@@ -538,7 +578,7 @@ log_group_calc_lsn_offset(
 
 	offset = (gr_lsn_size_offset + difference) % group_size;
 
-	ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */
+	ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
 
 	/* fprintf(stderr,
 	"Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
@@ -547,78 +587,78 @@ log_group_calc_lsn_offset(
 
 	return(log_group_calc_real_offset((ulint)offset, group));
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool	log_debug_writes = FALSE;
+#endif /* UNIV_DEBUG */
 
+/*******************************************************************//**
+Calculates where in log files we find a specified lsn.
+@return	log file number */
+UNIV_INTERN
 ulint
 log_calc_where_lsn_is(
 /*==================*/
-						/* out: log file number */
-	ib_longlong*	log_file_offset,	/* out: offset in that file
+	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
 						(including the header) */
-	dulint		first_header_lsn,	/* in: first log file start
+	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 						lsn */
-	dulint		lsn,			/* in: lsn whose position to
+	ib_uint64_t	lsn,			/*!< in: lsn whose position to
 						determine */
-	ulint		n_log_files,		/* in: total number of log
+	ulint		n_log_files,		/*!< in: total number of log
 						files */
-	ib_longlong	log_file_size)		/* in: log file size
+	ib_int64_t	log_file_size)		/*!< in: log file size
 						(including the header) */
 {
-	ib_longlong	ib_lsn;
-	ib_longlong	ib_first_header_lsn;
-	ib_longlong	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
+	ib_int64_t	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
 	ulint		file_no;
-	ib_longlong	add_this_many;
+	ib_int64_t	add_this_many;
 
-	ib_lsn = ut_conv_dulint_to_longlong(lsn);
-	ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
-
-	if (ib_lsn < ib_first_header_lsn) {
-		add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
-			/ (capacity * (ib_longlong)n_log_files);
-		ib_lsn += add_this_many
-			* capacity * (ib_longlong)n_log_files;
+	if (lsn < first_header_lsn) {
+		add_this_many = 1 + (first_header_lsn - lsn)
+			/ (capacity * (ib_int64_t)n_log_files);
+		lsn += add_this_many
+			* capacity * (ib_int64_t)n_log_files;
 	}
 
-	ut_a(ib_lsn >= ib_first_header_lsn);
+	ut_a(lsn >= first_header_lsn);
 
-	file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
+	file_no = ((ulint)((lsn - first_header_lsn) / capacity))
 		% n_log_files;
-	*log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
+	*log_file_offset = (lsn - first_header_lsn) % capacity;
 
 	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
 
 	return(file_no);
 }
 
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
 some lsn, for instance, a checkpoint lsn. */
-
+UNIV_INTERN
 void
 log_group_set_fields(
 /*=================*/
-	log_group_t*	group,	/* in: group */
-	dulint		lsn)	/* in: lsn for which the values should be
+	log_group_t*	group,	/*!< in/out: group */
+	ib_uint64_t	lsn)	/*!< in: lsn for which the values should be
 				set */
 {
 	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
 	group->lsn = lsn;
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
+lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
+@return error value FALSE if the smallest log group is too small to
+accommodate the number of OS threads in the database server */
 static
 ibool
 log_calc_max_ages(void)
 /*===================*/
-			/* out: error value FALSE if the smallest log group is
-			too small to accommodate the number of OS threads in
-			the database server */
 {
 	log_group_t*	group;
 	ulint		margin;
@@ -712,8 +752,7 @@ failure:
 			" After an ERROR-FREE shutdown\n"
 			"InnoDB: of mysqld you can adjust the size of"
 			" ib_logfiles, as explained in\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"adding-and-removing.html\n"
+			"InnoDB: " REFMAN "adding-and-removing.html\n"
 			"InnoDB: Cannot continue operation."
 			" Calling exit(1).\n",
 			(ulong)srv_thread_concurrency);
@@ -724,9 +763,9 @@ failure:
 	return(success);
 }
 
-/**********************************************************
+/******************************************************//**
 Initializes the log. */
-
+UNIV_INTERN
 void
 log_init(void)
 /*==========*/
@@ -747,7 +786,7 @@ log_init(void)
 	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 
-	buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	buf = mem_alloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 	log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
 
 	log_sys->buf_size = LOG_BUFFER_SIZE;
@@ -767,9 +806,9 @@ log_init(void)
 
 	log_sys->buf_next_to_write = 0;
 
-	log_sys->write_lsn = ut_dulint_zero;
-	log_sys->current_flush_lsn = ut_dulint_zero;
-	log_sys->flushed_to_disk_lsn = ut_dulint_zero;
+	log_sys->write_lsn = 0;
+	log_sys->current_flush_lsn = 0;
+	log_sys->flushed_to_disk_lsn = 0;
 
 	log_sys->written_to_some_lsn = log_sys->lsn;
 	log_sys->written_to_all_lsn = log_sys->lsn;
@@ -787,7 +826,7 @@ log_init(void)
 	/*----------------------------*/
 	log_sys->adm_checkpoint_interval = ULINT_MAX;
 
-	log_sys->next_checkpoint_no = ut_dulint_zero;
+	log_sys->next_checkpoint_no = 0;
 	log_sys->last_checkpoint_lsn = log_sys->lsn;
 	log_sys->n_pending_checkpoint_writes = 0;
 
@@ -803,7 +842,7 @@ log_init(void)
 	/* Under MySQL, log archiving is always off */
 	log_sys->archiving_state = LOG_ARCH_OFF;
 	log_sys->archived_lsn = log_sys->lsn;
-	log_sys->next_archived_lsn = ut_dulint_zero;
+	log_sys->next_archived_lsn = 0;
 
 	log_sys->n_pending_archive_ios = 0;
 
@@ -828,36 +867,36 @@ log_init(void)
 	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
 	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
-	log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
+	log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
 
 	mutex_exit(&(log_sys->mutex));
 
 #ifdef UNIV_LOG_DEBUG
 	recv_sys_create();
-	recv_sys_init(FALSE, buf_pool_get_curr_size());
+	recv_sys_init(buf_pool_get_curr_size());
 
 	recv_sys->parse_start_lsn = log_sys->lsn;
 	recv_sys->scanned_lsn = log_sys->lsn;
 	recv_sys->scanned_checkpoint_no = 0;
 	recv_sys->recovered_lsn = log_sys->lsn;
-	recv_sys->limit_lsn = ut_dulint_max;
+	recv_sys->limit_lsn = IB_ULONGLONG_MAX;
 #endif
 }
 
-/**********************************************************************
+/******************************************************************//**
 Inits a log group to the log system. */
-
+UNIV_INTERN
 void
 log_group_init(
 /*===========*/
-	ulint	id,			/* in: group id */
-	ulint	n_files,		/* in: number of log files */
-	ulint	file_size,		/* in: log file size in bytes */
-	ulint	space_id,		/* in: space id of the file space
+	ulint	id,			/*!< in: group id */
+	ulint	n_files,		/*!< in: number of log files */
+	ulint	file_size,		/*!< in: log file size in bytes */
+	ulint	space_id,		/*!< in: space id of the file space
 					which contains the log files of this
 					group */
 	ulint	archive_space_id __attribute__((unused)))
-					/* in: space id of the file space
+					/*!< in: space id of the file space
 					which contains some archived log
 					files for this group; currently, only
 					for the first log group this is
@@ -917,13 +956,13 @@ log_group_init(
 	ut_a(log_calc_max_ages());
 }
 
-/**********************************************************************
+/******************************************************************//**
 Does the unlockings needed in flush i/o completion. */
 UNIV_INLINE
 void
 log_flush_do_unlocks(
 /*=================*/
-	ulint	code)	/* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
+	ulint	code)	/*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
 			and LOG_UNLOCK_NONE_FLUSHED_LOCK */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
@@ -948,15 +987,15 @@ log_flush_do_unlocks(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Checks if a flush is completed for a log group and does the completion
-routine if yes. */
+routine if yes.
+@return	LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
 UNIV_INLINE
 ulint
 log_group_check_flush_completion(
 /*=============================*/
-				/* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
-	log_group_t*	group)	/* in: log group */
+	log_group_t*	group)	/*!< in: log group */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
@@ -984,13 +1023,13 @@ log_group_check_flush_completion(
 	return(0);
 }
 
-/**********************************************************
-Checks if a flush is completed and does the completion routine if yes. */
+/******************************************************//**
+Checks if a flush is completed and does the completion routine if yes.
+@return	LOG_UNLOCK_FLUSH_LOCK or 0 */
 static
 ulint
 log_sys_check_flush_completion(void)
 /*================================*/
-			/* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
 {
 	ulint	move_start;
 	ulint	move_end;
@@ -1025,13 +1064,13 @@ log_sys_check_flush_completion(void)
 	return(0);
 }
 
-/**********************************************************
+/******************************************************//**
 Completes an i/o to a log file. */
-
+UNIV_INTERN
 void
 log_io_complete(
 /*============*/
-	log_group_t*	group)	/* in: log group or a dummy pointer */
+	log_group_t*	group)	/*!< in: log group or a dummy pointer */
 {
 	ulint	unlock;
 
@@ -1067,7 +1106,7 @@ log_io_complete(
 		return;
 	}
 
-	ut_error;	/* We currently use synchronous writing of the
+	ut_error;	/*!< We currently use synchronous writing of the
 			logs and cannot end up here! */
 
 	if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
@@ -1093,16 +1132,16 @@ log_io_complete(
 	mutex_exit(&(log_sys->mutex));
 }
 
-/**********************************************************
+/******************************************************//**
 Writes a log file header to a log file space. */
 static
 void
 log_group_file_header_flush(
 /*========================*/
-	log_group_t*	group,		/* in: log group */
-	ulint		nth_file,	/* in: header to the nth file in the
+	log_group_t*	group,		/*!< in: log group */
+	ulint		nth_file,	/*!< in: header to the nth file in the
 					log file space */
-	dulint		start_lsn)	/* in: log file data starts at this
+	ib_uint64_t	start_lsn)	/*!< in: log file data starts at this
 					lsn */
 {
 	byte*	buf;
@@ -1114,7 +1153,7 @@ log_group_file_header_flush(
 	buf = *(group->file_header_bufs + nth_file);
 
 	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
-	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+	mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
 
 	/* Wipe over possible label of ibbackup --restore */
 	memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
@@ -1133,7 +1172,7 @@ log_group_file_header_flush(
 
 		srv_os_log_pending_writes++;
 
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
+		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 		       dest_offset / UNIV_PAGE_SIZE,
 		       dest_offset % UNIV_PAGE_SIZE,
 		       OS_FILE_LOG_BLOCK_SIZE,
@@ -1143,7 +1182,7 @@ log_group_file_header_flush(
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 Stores a 4-byte checksum to the trailer checksum field of a log block
 before writing it to a log file. This checksum is used in recovery to
 check the consistency of a log block. */
@@ -1151,25 +1190,25 @@ static
 void
 log_block_store_checksum(
 /*=====================*/
-	byte*	block)	/* in/out: pointer to a log block */
+	byte*	block)	/*!< in/out: pointer to a log block */
 {
 	log_block_set_checksum(block, log_block_calc_checksum(block));
 }
 
-/**********************************************************
+/******************************************************//**
 Writes a buffer to a log file group. */
-
+UNIV_INTERN
 void
 log_group_write_buf(
 /*================*/
-	log_group_t*	group,		/* in: log group */
-	byte*		buf,		/* in: buffer */
-	ulint		len,		/* in: buffer len; must be divisible
+	log_group_t*	group,		/*!< in: log group */
+	byte*		buf,		/*!< in: buffer */
+	ulint		len,		/*!< in: buffer len; must be divisible
 					by OS_FILE_LOG_BLOCK_SIZE */
-	dulint		start_lsn,	/* in: start lsn of the buffer; must
+	ib_uint64_t	start_lsn,	/*!< in: start lsn of the buffer; must
 					be divisible by
 					OS_FILE_LOG_BLOCK_SIZE */
-	ulint		new_data_offset)/* in: start offset of new data in
+	ulint		new_data_offset)/*!< in: start offset of new data in
 					buf: this parameter is used to decide
 					if we have to write a new log file
 					header */
@@ -1181,7 +1220,7 @@ log_group_write_buf(
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
 
 	if (new_data_offset == 0) {
 		write_header = TRUE;
@@ -1221,12 +1260,11 @@ loop:
 		fprintf(stderr,
 			"Writing log file segment to group %lu"
 			" offset %lu len %lu\n"
-			"start lsn %lu %lu\n"
+			"start lsn %llu\n"
 			"First block n:o %lu last block n:o %lu\n",
 			(ulong) group->id, (ulong) next_offset,
 			(ulong) write_len,
-			(ulong) ut_dulint_get_high(start_lsn),
-			(ulong) ut_dulint_get_low(start_lsn),
+			start_lsn,
 			(ulong) log_block_get_hdr_no(buf),
 			(ulong) log_block_get_hdr_no(
 				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
@@ -1253,7 +1291,7 @@ loop:
 
 		srv_os_log_pending_writes++;
 
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
+		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 		       next_offset / UNIV_PAGE_SIZE,
 		       next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
 
@@ -1264,7 +1302,7 @@ loop:
 	}
 
 	if (write_len < len) {
-		start_lsn = ut_dulint_add(start_lsn, write_len);
+		start_lsn += write_len;
 		len -= write_len;
 		buf += write_len;
 
@@ -1274,22 +1312,23 @@ loop:
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
 flush flushed enough. If not, starts a new flush. */
-
+UNIV_INTERN
 void
 log_write_up_to(
 /*============*/
-	dulint	lsn,	/* in: log sequence number up to which the log should
-			be written, ut_dulint_max if not specified */
-	ulint	wait,	/* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
-			or LOG_WAIT_ALL_GROUPS */
-	ibool	flush_to_disk)
-			/* in: TRUE if we want the written log also to be
-			flushed to disk */
+	ib_uint64_t	lsn,	/*!< in: log sequence number up to which
+				the log should be written,
+				IB_ULONGLONG_MAX if not specified */
+	ulint		wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+				or LOG_WAIT_ALL_GROUPS */
+	ibool		flush_to_disk)
+				/*!< in: TRUE if we want the written log
+				also to be flushed to disk */
 {
 	log_group_t*	group;
 	ulint		start_offset;
@@ -1324,7 +1363,7 @@ loop:
 	mutex_enter(&(log_sys->mutex));
 
 	if (flush_to_disk
-	    && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
+	    && log_sys->flushed_to_disk_lsn >= lsn) {
 
 		mutex_exit(&(log_sys->mutex));
 
@@ -1332,9 +1371,8 @@ loop:
 	}
 
 	if (!flush_to_disk
-	    && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
-		|| (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn)
-		    >= 0
+	    && (log_sys->written_to_all_lsn >= lsn
+		|| (log_sys->written_to_some_lsn >= lsn
 		    && wait != LOG_WAIT_ALL_GROUPS))) {
 
 		mutex_exit(&(log_sys->mutex));
@@ -1346,8 +1384,7 @@ loop:
 		/* A write (+ possibly flush to disk) is running */
 
 		if (flush_to_disk
-		    && ut_dulint_cmp(log_sys->current_flush_lsn, lsn)
-		    >= 0) {
+		    && log_sys->current_flush_lsn >= lsn) {
 			/* The write + flush will write enough: wait for it to
 			complete  */
 
@@ -1355,7 +1392,7 @@ loop:
 		}
 
 		if (!flush_to_disk
-		    && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
+		    && log_sys->write_lsn >= lsn) {
 			/* The write will write enough: wait for it to
 			complete  */
 
@@ -1384,19 +1421,15 @@ loop:
 #ifdef UNIV_DEBUG
 	if (log_debug_writes) {
 		fprintf(stderr,
-			"Writing log from %lu %lu up to lsn %lu %lu\n",
-			(ulong) ut_dulint_get_high(
-				log_sys->written_to_all_lsn),
-			(ulong) ut_dulint_get_low(
-				log_sys->written_to_all_lsn),
-			(ulong) ut_dulint_get_high(log_sys->lsn),
-			(ulong)	ut_dulint_get_low(log_sys->lsn));
+			"Writing log from %llu up to lsn %llu\n",
+			log_sys->written_to_all_lsn,
+			log_sys->lsn);
 	}
 #endif /* UNIV_DEBUG */
 	log_sys->n_pending_writes++;
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-	group->n_pending_writes++;	/* We assume here that we have only
+	group->n_pending_writes++;	/*!< We assume here that we have only
 					one log group! */
 
 	os_event_reset(log_sys->no_flush_event);
@@ -1442,7 +1475,7 @@ loop:
 		log_group_write_buf(
 			group, log_sys->buf + area_start,
 			area_end - area_start,
-			ut_dulint_align_down(log_sys->written_to_all_lsn,
+			ut_uint64_align_down(log_sys->written_to_all_lsn,
 					     OS_FILE_LOG_BLOCK_SIZE),
 			start_offset - area_start);
 
@@ -1489,23 +1522,30 @@ loop:
 do_waits:
 	mutex_exit(&(log_sys->mutex));
 
-	if (wait == LOG_WAIT_ONE_GROUP) {
+	switch (wait) {
+	case LOG_WAIT_ONE_GROUP:
 		os_event_wait(log_sys->one_flushed_event);
-	} else if (wait == LOG_WAIT_ALL_GROUPS) {
+		break;
+	case LOG_WAIT_ALL_GROUPS:
 		os_event_wait(log_sys->no_flush_event);
-	} else {
-		ut_ad(wait == LOG_NO_WAIT);
+		break;
+#ifdef UNIV_DEBUG
+	case LOG_NO_WAIT:
+		break;
+	default:
+		ut_error;
+#endif /* UNIV_DEBUG */
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Does a syncronous flush of the log buffer to disk. */
-
+UNIV_INTERN
 void
 log_buffer_flush_to_disk(void)
 /*==========================*/
 {
-	dulint	lsn;
+	ib_uint64_t	lsn;
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -1516,15 +1556,18 @@ log_buffer_flush_to_disk(void)
 	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 }
 
-/********************************************************************
-Flush the log buffer. Force it to disk depending on the value of
-innodb_flush_log_at_trx_commit. */
-
+/****************************************************************//**
+This functions writes the log buffer to the log file and if 'flush'
+is set it forces a flush of the log file as well. This is meant to be
+called from background master thread only as it does not wait for
+the write (+ possible flush) to finish. */
+UNIV_INTERN
 void
-log_buffer_flush_maybe_sync(void)
+log_buffer_sync_in_background(
 /*==========================*/
+	ibool	flush)	/*!< in: flush the logs to disk */
 {
-	dulint	lsn;
+	ib_uint64_t	lsn;
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -1532,11 +1575,11 @@ log_buffer_flush_maybe_sync(void)
 
 	mutex_exit(&(log_sys->mutex));
 
-	/* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
-	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
-			srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
+	log_write_up_to(lsn, LOG_NO_WAIT, flush);
 }
+
 /********************************************************************
+
 Tries to establish a big enough margin of free space in the log buffer, such
 that a new log entry can be catenated without an immediate need for a flush. */
 static
@@ -1544,9 +1587,8 @@ void
 log_flush_margin(void)
 /*==================*/
 {
-	ibool	do_flush	= FALSE;
-	log_t*	log		= log_sys;
-	dulint	lsn;
+	log_t*		log	= log_sys;
+	ib_uint64_t	lsn	= 0;
 
 	mutex_enter(&(log->mutex));
 
@@ -1556,33 +1598,32 @@ log_flush_margin(void)
 			/* A flush is running: hope that it will provide enough
 			free space */
 		} else {
-			do_flush = TRUE;
 			lsn = log->lsn;
 		}
 	}
 
 	mutex_exit(&(log->mutex));
 
-	if (do_flush) {
+	if (lsn) {
 		log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Advances the smallest lsn for which there are unflushed dirty blocks in the
 buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects! */
-
+no synchronization objects!
+@return FALSE if there was a flush batch of the same type running,
+which means that we could not start this flush batch */
+UNIV_INTERN
 ibool
 log_preflush_pool_modified_pages(
 /*=============================*/
-				/* out: FALSE if there was a flush batch of
-				the same type running, which means that we
-				could not start this flush batch */
-	dulint	new_oldest,	/* in: try to advance oldest_modified_lsn
-				at least to this lsn */
-	ibool	sync)		/* in: TRUE if synchronous operation is
-				desired */
+	ib_uint64_t	new_oldest,	/*!< in: try to advance
+					oldest_modified_lsn at least
+					to this lsn */
+	ibool		sync)		/*!< in: TRUE if synchronous
+					operation is desired */
 {
 	ulint	n_pages;
 
@@ -1613,7 +1654,7 @@ log_preflush_pool_modified_pages(
 	return(TRUE);
 }
 
-/**********************************************************
+/******************************************************//**
 Completes a checkpoint. */
 static
 void
@@ -1623,15 +1664,14 @@ log_complete_checkpoint(void)
 	ut_ad(mutex_own(&(log_sys->mutex)));
 	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
 
-	log_sys->next_checkpoint_no
-		= ut_dulint_add(log_sys->next_checkpoint_no, 1);
+	log_sys->next_checkpoint_no++;
 
 	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
 
 	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 }
 
-/**********************************************************
+/******************************************************//**
 Completes an asynchronous checkpoint info write i/o to a log file. */
 static
 void
@@ -1651,16 +1691,16 @@ log_io_complete_checkpoint(void)
 	mutex_exit(&(log_sys->mutex));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Writes info to a checkpoint about a log group. */
 static
 void
 log_checkpoint_set_nth_group_info(
 /*==============================*/
-	byte*	buf,	/* in: buffer for checkpoint info */
-	ulint	n,	/* in: nth slot */
-	ulint	file_no,/* in: archived file number */
-	ulint	offset)	/* in: archived file offset */
+	byte*	buf,	/*!< in: buffer for checkpoint info */
+	ulint	n,	/*!< in: nth slot */
+	ulint	file_no,/*!< in: archived file number */
+	ulint	offset)	/*!< in: archived file offset */
 {
 	ut_ad(n < LOG_MAX_N_GROUPS);
 
@@ -1670,16 +1710,16 @@ log_checkpoint_set_nth_group_info(
 			+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Gets info from a checkpoint about a log group. */
-
+UNIV_INTERN
 void
 log_checkpoint_get_nth_group_info(
 /*==============================*/
-	byte*	buf,	/* in: buffer containing checkpoint info */
-	ulint	n,	/* in: nth slot */
-	ulint*	file_no,/* out: archived file number */
-	ulint*	offset)	/* out: archived file offset */
+	const byte*	buf,	/*!< in: buffer containing checkpoint info */
+	ulint		n,	/*!< in: nth slot */
+	ulint*		file_no,/*!< out: archived file number */
+	ulint*		offset)	/*!< out: archived file offset */
 {
 	ut_ad(n < LOG_MAX_N_GROUPS);
 
@@ -1689,23 +1729,23 @@ log_checkpoint_get_nth_group_info(
 				   + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
 }
 
-/**********************************************************
+/******************************************************//**
 Writes the checkpoint info to a log group header. */
 static
 void
 log_group_checkpoint(
 /*=================*/
-	log_group_t*	group)	/* in: log group */
+	log_group_t*	group)	/*!< in: log group */
 {
 	log_group_t*	group2;
 #ifdef UNIV_LOG_ARCHIVE
-	dulint	archived_lsn;
-	dulint	next_archived_lsn;
+	ib_uint64_t	archived_lsn;
+	ib_uint64_t	next_archived_lsn;
 #endif /* UNIV_LOG_ARCHIVE */
-	ulint	write_offset;
-	ulint	fold;
-	byte*	buf;
-	ulint	i;
+	ulint		write_offset;
+	ulint		fold;
+	byte*		buf;
+	ulint		i;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
@@ -1714,9 +1754,8 @@ log_group_checkpoint(
 
 	buf = group->checkpoint_buf;
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
-	mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
-			log_sys->next_checkpoint_lsn);
+	mach_write_ull(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
+	mach_write_ull(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
 
 	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
 			log_group_calc_lsn_offset(
@@ -1726,20 +1765,19 @@ log_group_checkpoint(
 
 #ifdef UNIV_LOG_ARCHIVE
 	if (log_sys->archiving_state == LOG_ARCH_OFF) {
-		archived_lsn = ut_dulint_max;
+		archived_lsn = IB_ULONGLONG_MAX;
 	} else {
 		archived_lsn = log_sys->archived_lsn;
 
-		if (0 != ut_dulint_cmp(archived_lsn,
-				       log_sys->next_archived_lsn)) {
+		if (archived_lsn != log_sys->next_archived_lsn) {
 			next_archived_lsn = log_sys->next_archived_lsn;
 			/* For debugging only */
 		}
 	}
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+	mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
 #else /* UNIV_LOG_ARCHIVE */
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+	mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
 #endif /* UNIV_LOG_ARCHIVE */
 
 	for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
@@ -1780,7 +1818,7 @@ log_group_checkpoint(
 	/* We alternate the physical place of the checkpoint info in the first
 	log file */
 
-	if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
+	if ((log_sys->next_checkpoint_no & 1) == 0) {
 		write_offset = LOG_CHECKPOINT_1;
 	} else {
 		write_offset = LOG_CHECKPOINT_2;
@@ -1801,7 +1839,7 @@ log_group_checkpoint(
 		added with 1, as we want to distinguish between a normal log
 		file write and a checkpoint field write */
 
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
+		fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
 		       write_offset / UNIV_PAGE_SIZE,
 		       write_offset % UNIV_PAGE_SIZE,
 		       OS_FILE_LOG_BLOCK_SIZE,
@@ -1810,28 +1848,30 @@ log_group_checkpoint(
 		ut_ad(((ulint)group & 0x1UL) == 0);
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**********************************************************
+#ifdef UNIV_HOTBACKUP
+/******************************************************//**
 Writes info to a buffer of a log group when log files are created in
 backup restoration. */
-
+UNIV_INTERN
 void
 log_reset_first_header_and_checkpoint(
 /*==================================*/
-	byte*	hdr_buf,/* in: buffer which will be written to the start
-			of the first log file */
-	dulint	start)	/* in: lsn of the start of the first log file;
-			we pretend that there is a checkpoint at
-			start + LOG_BLOCK_HDR_SIZE */
+	byte*		hdr_buf,/*!< in: buffer which will be written to the
+				start of the first log file */
+	ib_uint64_t	start)	/*!< in: lsn of the start of the first log file;
+				we pretend that there is a checkpoint at
+				start + LOG_BLOCK_HDR_SIZE */
 {
-	ulint	fold;
-	byte*	buf;
-	dulint	lsn;
+	ulint		fold;
+	byte*		buf;
+	ib_uint64_t	lsn;
 
 	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
-	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
+	mach_write_ull(hdr_buf + LOG_FILE_START_LSN, start);
 
-	lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
+	lsn = start + LOG_BLOCK_HDR_SIZE;
 
 	/* Write the label of ibbackup --restore */
 	strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
@@ -1841,15 +1881,15 @@ log_reset_first_header_and_checkpoint(
 				+ (sizeof "ibbackup ") - 1));
 	buf = hdr_buf + LOG_CHECKPOINT_1;
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
-	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
+	mach_write_ull(buf + LOG_CHECKPOINT_NO, 0);
+	mach_write_ull(buf + LOG_CHECKPOINT_LSN, lsn);
 
 	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
 			LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
 
 	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+	mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
 
 	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
@@ -1862,28 +1902,30 @@ log_reset_first_header_and_checkpoint(
 	allocated size in the tablespace, but unfortunately we do not
 	know it here */
 }
+#endif /* UNIV_HOTBACKUP */
 
-/**********************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************//**
 Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
+UNIV_INTERN
 void
 log_group_read_checkpoint_info(
 /*===========================*/
-	log_group_t*	group,	/* in: log group */
-	ulint		field)	/* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+	log_group_t*	group,	/*!< in: log group */
+	ulint		field)	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
 {
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
 	log_sys->n_log_ios++;
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
+	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
 	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
 	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
 }
 
-/**********************************************************
+/******************************************************//**
 Writes checkpoint info to groups. */
-
+UNIV_INTERN
 void
 log_groups_write_checkpoint_info(void)
 /*==================================*/
@@ -1901,27 +1943,26 @@ log_groups_write_checkpoint_info(void)
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 Makes a checkpoint. Note that this function does not flush dirty
 blocks from the buffer pool: it only checks what is lsn of the oldest
 modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
+log files. Use log_make_checkpoint_at to flush also the pool.
+@return	TRUE if success, FALSE if a checkpoint write was already running */
+UNIV_INTERN
 ibool
 log_checkpoint(
 /*===========*/
-				/* out: TRUE if success, FALSE if a checkpoint
-				write was already running */
-	ibool	sync,		/* in: TRUE if synchronous operation is
+	ibool	sync,		/*!< in: TRUE if synchronous operation is
 				desired */
-	ibool	write_always)	/* in: the function normally checks if the
+	ibool	write_always)	/*!< in: the function normally checks if the
 				the new checkpoint would have a greater
 				lsn than the previous one: if not, then no
 				physical write is done; by setting this
 				parameter TRUE, a physical write will always be
 				made to log files */
 {
-	dulint	oldest_lsn;
+	ib_uint64_t	oldest_lsn;
 
 	if (recv_recovery_is_on()) {
 		recv_apply_hashed_log_recs(TRUE);
@@ -1950,14 +1991,14 @@ log_checkpoint(
 	mutex_enter(&(log_sys->mutex));
 
 	if (!write_always
-	    && ut_dulint_cmp(log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
+	    && log_sys->last_checkpoint_lsn >= oldest_lsn) {
 
 		mutex_exit(&(log_sys->mutex));
 
 		return(TRUE);
 	}
 
-	ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
+	ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
 
 	if (log_sys->n_pending_checkpoint_writes > 0) {
 		/* A checkpoint write is running */
@@ -1977,10 +2018,9 @@ log_checkpoint(
 
 #ifdef UNIV_DEBUG
 	if (log_debug_writes) {
-		fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n",
-			(ulong) ut_dulint_get_low(log_sys->next_checkpoint_no),
-			(ulong) ut_dulint_get_high(oldest_lsn),
-			(ulong) ut_dulint_get_low(oldest_lsn));
+		fprintf(stderr, "Making checkpoint no %lu at lsn %llu\n",
+			(ulong) log_sys->next_checkpoint_no,
+			oldest_lsn);
 	}
 #endif /* UNIV_DEBUG */
 
@@ -1997,40 +2037,31 @@ log_checkpoint(
 	return(TRUE);
 }
 
-/********************************************************************
+/****************************************************************//**
 Makes a checkpoint at a given lsn or later. */
-
+UNIV_INTERN
 void
 log_make_checkpoint_at(
 /*===================*/
-	dulint	lsn,		/* in: make a checkpoint at this or a later
-				lsn, if ut_dulint_max, makes a checkpoint at
-				the latest lsn */
-	ibool	write_always)	/* in: the function normally checks if the
-				the new checkpoint would have a greater
-				lsn than the previous one: if not, then no
-				physical write is done; by setting this
-				parameter TRUE, a physical write will always be
-				made to log files */
+	ib_uint64_t	lsn,		/*!< in: make a checkpoint at this or a
+					later lsn, if IB_ULONGLONG_MAX, makes
+					a checkpoint at the latest lsn */
+	ibool		write_always)	/*!< in: the function normally checks if
+					the the new checkpoint would have a
+					greater lsn than the previous one: if
+					not, then no physical write is done;
+					by setting this parameter TRUE, a
+					physical write will always be made to
+					log files */
 {
-	ibool	success;
-
 	/* Preflush pages synchronously */
 
-	success = FALSE;
+	while (!log_preflush_pool_modified_pages(lsn, TRUE));
 
-	while (!success) {
-		success = log_preflush_pool_modified_pages(lsn, TRUE);
-	}
-
-	success = FALSE;
-
-	while (!success) {
-		success = log_checkpoint(TRUE, write_always);
-	}
+	while (!log_checkpoint(TRUE, write_always));
 }
 
-/********************************************************************
+/****************************************************************//**
 Tries to establish a big enough margin of free space in the log groups, such
 that a new log entry can be catenated without an immediate need for a
 checkpoint. NOTE: this function may only be called if the calling thread
@@ -2040,15 +2071,15 @@ void
 log_checkpoint_margin(void)
 /*=======================*/
 {
-	log_t*	log		= log_sys;
-	ulint	age;
-	ulint	checkpoint_age;
-	ulint	advance;
-	dulint	oldest_lsn;
-	ibool	sync;
-	ibool	checkpoint_sync;
-	ibool	do_checkpoint;
-	ibool	success;
+	log_t*		log		= log_sys;
+	ib_uint64_t	age;
+	ib_uint64_t	checkpoint_age;
+	ib_uint64_t	advance;
+	ib_uint64_t	oldest_lsn;
+	ibool		sync;
+	ibool		checkpoint_sync;
+	ibool		do_checkpoint;
+	ibool		success;
 loop:
 	sync = FALSE;
 	checkpoint_sync = FALSE;
@@ -2064,7 +2095,7 @@ loop:
 
 	oldest_lsn = log_buf_pool_get_oldest_modification();
 
-	age = ut_dulint_minus(log->lsn, oldest_lsn);
+	age = log->lsn - oldest_lsn;
 
 	if (age > log->max_modified_age_sync) {
 
@@ -2080,7 +2111,7 @@ loop:
 		advance = 0;
 	}
 
-	checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
+	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
 
 	if (checkpoint_age > log->max_checkpoint_age) {
 		/* A checkpoint is urgent: we do it synchronously */
@@ -2102,7 +2133,7 @@ loop:
 	mutex_exit(&(log->mutex));
 
 	if (advance) {
-		dulint	new_oldest = ut_dulint_add(oldest_lsn, advance);
+		ib_uint64_t	new_oldest = oldest_lsn + advance;
 
 		success = log_preflush_pool_modified_pages(new_oldest, sync);
 
@@ -2132,17 +2163,17 @@ loop:
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 Reads a specified log segment to a buffer. */
-
+UNIV_INTERN
 void
 log_group_read_log_seg(
 /*===================*/
-	ulint		type,		/* in: LOG_ARCHIVE or LOG_RECOVER */
-	byte*		buf,		/* in: buffer where to read */
-	log_group_t*	group,		/* in: log group */
-	dulint		start_lsn,	/* in: read area start */
-	dulint		end_lsn)	/* in: read area end */
+	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
+	byte*		buf,		/*!< in: buffer where to read */
+	log_group_t*	group,		/*!< in: log group */
+	ib_uint64_t	start_lsn,	/*!< in: read area start */
+	ib_uint64_t	end_lsn)	/*!< in: read area end */
 {
 	ulint	len;
 	ulint	source_offset;
@@ -2150,15 +2181,11 @@ log_group_read_log_seg(
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
-	sync = FALSE;
-
-	if (type == LOG_RECOVER) {
-		sync = TRUE;
-	}
+	sync = (type == LOG_RECOVER);
 loop:
 	source_offset = log_group_calc_lsn_offset(start_lsn, group);
 
-	len = ut_dulint_minus(end_lsn, start_lsn);
+	len = (ulint) (end_lsn - start_lsn);
 
 	ut_ad(len != 0);
 
@@ -2176,46 +2203,46 @@ loop:
 
 	log_sys->n_log_ios++;
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
+	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 	       source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
 	       len, buf, NULL);
 
-	start_lsn = ut_dulint_add(start_lsn, len);
+	start_lsn += len;
 	buf += len;
 
-	if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
+	if (start_lsn != end_lsn) {
 
 		goto loop;
 	}
 }
 
 #ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
 Generates an archived log file name. */
-
+UNIV_INTERN
 void
 log_archived_file_name_gen(
 /*=======================*/
-	char*	buf,	/* in: buffer where to write */
+	char*	buf,	/*!< in: buffer where to write */
 	ulint	id __attribute__((unused)),
-			/* in: group id;
+			/*!< in: group id;
 			currently we only archive the first group */
-	ulint	file_no)/* in: file number */
+	ulint	file_no)/*!< in: file number */
 {
 	sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
 }
 
-/**********************************************************
+/******************************************************//**
 Writes a log file header to a log file space. */
 static
 void
 log_group_archive_file_header_write(
 /*================================*/
-	log_group_t*	group,		/* in: log group */
-	ulint		nth_file,	/* in: header to the nth file in the
+	log_group_t*	group,		/*!< in: log group */
+	ulint		nth_file,	/*!< in: header to the nth file in the
 					archive log file space */
-	ulint		file_no,	/* in: archived file number */
-	dulint		start_lsn)	/* in: log file data starts at this
+	ulint		file_no,	/*!< in: archived file number */
+	ib_uint64_t	start_lsn)	/*!< in: log file data starts at this
 					lsn */
 {
 	byte*	buf;
@@ -2228,7 +2255,7 @@ log_group_archive_file_header_write(
 	buf = *(group->archive_file_header_bufs + nth_file);
 
 	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
-	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+	mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
 	mach_write_to_4(buf + LOG_FILE_NO, file_no);
 
 	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
@@ -2244,16 +2271,16 @@ log_group_archive_file_header_write(
 	       buf, &log_archive_io);
 }
 
-/**********************************************************
+/******************************************************//**
 Writes a log file header to a completed archived log file. */
 static
 void
 log_group_archive_completed_header_write(
 /*=====================================*/
-	log_group_t*	group,		/* in: log group */
-	ulint		nth_file,	/* in: header to the nth file in the
+	log_group_t*	group,		/*!< in: log group */
+	ulint		nth_file,	/*!< in: header to the nth file in the
 					archive log file space */
-	dulint		end_lsn)	/* in: end lsn of the file */
+	ib_uint64_t	end_lsn)	/*!< in: end lsn of the file */
 {
 	byte*	buf;
 	ulint	dest_offset;
@@ -2264,7 +2291,7 @@ log_group_archive_completed_header_write(
 	buf = *(group->archive_file_header_bufs + nth_file);
 
 	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
-	mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
+	mach_write_ull(buf + LOG_FILE_END_LSN, end_lsn);
 
 	dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
 
@@ -2278,34 +2305,34 @@ log_group_archive_completed_header_write(
 	       &log_archive_io);
 }
 
-/**********************************************************
+/******************************************************//**
 Does the archive writes for a single log group. */
 static
 void
 log_group_archive(
 /*==============*/
-	log_group_t*	group)	/* in: log group */
+	log_group_t*	group)	/*!< in: log group */
 {
-	os_file_t file_handle;
-	dulint	start_lsn;
-	dulint	end_lsn;
-	char	name[1024];
-	byte*	buf;
-	ulint	len;
-	ibool	ret;
-	ulint	next_offset;
-	ulint	n_files;
-	ulint	open_mode;
+	os_file_t	 file_handle;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
+	char		name[1024];
+	byte*		buf;
+	ulint		len;
+	ibool		ret;
+	ulint		next_offset;
+	ulint		n_files;
+	ulint		open_mode;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
 	start_lsn = log_sys->archived_lsn;
 
-	ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
 	end_lsn = log_sys->next_archived_lsn;
 
-	ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
 	buf = log_sys->archive_buf;
 
@@ -2374,7 +2401,7 @@ loop:
 		}
 	}
 
-	len = ut_dulint_minus(end_lsn, start_lsn);
+	len = end_lsn - start_lsn;
 
 	if (group->file_size < (next_offset % group->file_size) + len) {
 
@@ -2384,10 +2411,9 @@ loop:
 #ifdef UNIV_DEBUG
 	if (log_debug_writes) {
 		fprintf(stderr,
-			"Archiving starting at lsn %lu %lu, len %lu"
+			"Archiving starting at lsn %llu, len %lu"
 			" to group %lu\n",
-			(ulong) ut_dulint_get_high(start_lsn),
-			(ulong) ut_dulint_get_low(start_lsn),
+			start_lsn,
 			(ulong) len, (ulong) group->id);
 	}
 #endif /* UNIV_DEBUG */
@@ -2401,7 +2427,7 @@ loop:
 	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
 	       &log_archive_io);
 
-	start_lsn = ut_dulint_add(start_lsn, len);
+	start_lsn += len;
 	next_offset += len;
 	buf += len;
 
@@ -2409,7 +2435,7 @@ loop:
 		n_files++;
 	}
 
-	if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
+	if (end_lsn != start_lsn) {
 
 		goto loop;
 	}
@@ -2420,7 +2446,7 @@ loop:
 	ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 }
 
-/*********************************************************
+/*****************************************************//**
 (Writes to the archive of each log group.) Currently, only the first
 group is archived. */
 static
@@ -2437,7 +2463,7 @@ log_archive_groups(void)
 	log_group_archive(group);
 }
 
-/*********************************************************
+/*****************************************************//**
 Completes the archiving write phase for (each log group), currently,
 the first log group. */
 static
@@ -2449,8 +2475,8 @@ log_archive_write_complete_groups(void)
 	ulint		end_offset;
 	ulint		trunc_files;
 	ulint		n_files;
-	dulint		start_lsn;
-	dulint		end_lsn;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
 	ulint		i;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
@@ -2486,16 +2512,14 @@ log_archive_write_complete_groups(void)
 #endif /* UNIV_DEBUG */
 
 	/* Calculate the archive file space start lsn */
-	start_lsn = ut_dulint_subtract(
-		log_sys->next_archived_lsn,
-		end_offset - LOG_FILE_HDR_SIZE + trunc_files
-		* (group->file_size - LOG_FILE_HDR_SIZE));
+	start_lsn = log_sys->next_archived_lsn
+		- (end_offset - LOG_FILE_HDR_SIZE + trunc_files
+		   * (group->file_size - LOG_FILE_HDR_SIZE));
 	end_lsn = start_lsn;
 
 	for (i = 0; i < trunc_files; i++) {
 
-		end_lsn = ut_dulint_add(end_lsn,
-					group->file_size - LOG_FILE_HDR_SIZE);
+		end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
 
 		/* Write a notice to the headers of archived log
 		files that the file write has been completed */
@@ -2513,7 +2537,7 @@ log_archive_write_complete_groups(void)
 #endif /* UNIV_DEBUG */
 }
 
-/**********************************************************
+/******************************************************//**
 Completes an archiving i/o. */
 static
 void
@@ -2549,7 +2573,7 @@ log_archive_check_completion_low(void)
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 Completes an archiving i/o. */
 static
 void
@@ -2577,42 +2601,39 @@ log_io_complete_archive(void)
 	mutex_exit(&(log_sys->mutex));
 }
 
-/************************************************************************
-Starts an archiving operation. */
-
+/********************************************************************//**
+Starts an archiving operation.
+@return	TRUE if succeed, FALSE if an archiving operation was already running */
+UNIV_INTERN
 ibool
 log_archive_do(
 /*===========*/
-			/* out: TRUE if succeed, FALSE if an archiving
-			operation was already running */
-	ibool	sync,	/* in: TRUE if synchronous operation is desired */
-	ulint*	n_bytes)/* out: archive log buffer size, 0 if nothing to
+	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
+	ulint*	n_bytes)/*!< out: archive log buffer size, 0 if nothing to
 			archive */
 {
-	ibool	calc_new_limit;
-	dulint	start_lsn;
-	dulint	limit_lsn;
+	ibool		calc_new_limit;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	limit_lsn;
 
 	calc_new_limit = TRUE;
 loop:
 	mutex_enter(&(log_sys->mutex));
 
-	if (log_sys->archiving_state == LOG_ARCH_OFF) {
+	switch (log_sys->archiving_state) {
+	case LOG_ARCH_OFF:
+arch_none:
 		mutex_exit(&(log_sys->mutex));
 
 		*n_bytes = 0;
 
 		return(TRUE);
-
-	} else if (log_sys->archiving_state == LOG_ARCH_STOPPED
-		   || log_sys->archiving_state == LOG_ARCH_STOPPING2) {
-
+	case LOG_ARCH_STOPPED:
+	case LOG_ARCH_STOPPING2:
 		mutex_exit(&(log_sys->mutex));
 
 		os_event_wait(log_sys->archiving_on);
 
-		mutex_enter(&(log_sys->mutex));
-
 		goto loop;
 	}
 
@@ -2620,28 +2641,23 @@ loop:
 
 	if (calc_new_limit) {
 		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
-		limit_lsn = ut_dulint_add(start_lsn,
-					  log_sys->archive_buf_size);
+		limit_lsn = start_lsn + log_sys->archive_buf_size;
 
 		*n_bytes = log_sys->archive_buf_size;
 
-		if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
+		if (limit_lsn >= log_sys->lsn) {
 
-			limit_lsn = ut_dulint_align_down(
+			limit_lsn = ut_uint64_align_down(
 				log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
 		}
 	}
 
-	if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
+	if (log_sys->archived_lsn >= limit_lsn) {
 
-		mutex_exit(&(log_sys->mutex));
-
-		*n_bytes = 0;
-
-		return(TRUE);
+		goto arch_none;
 	}
 
-	if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
+	if (log_sys->written_to_all_lsn < limit_lsn) {
 
 		mutex_exit(&(log_sys->mutex));
 
@@ -2676,11 +2692,8 @@ loop:
 #ifdef UNIV_DEBUG
 	if (log_debug_writes) {
 		fprintf(stderr,
-			"Archiving from lsn %lu %lu to lsn %lu %lu\n",
-			(ulong) ut_dulint_get_high(log_sys->archived_lsn),
-			(ulong) ut_dulint_get_low(log_sys->archived_lsn),
-			(ulong) ut_dulint_get_high(limit_lsn),
-			(ulong) ut_dulint_get_low(limit_lsn));
+			"Archiving from lsn %llu to lsn %llu\n",
+			log_sys->archived_lsn, limit_lsn);
 	}
 #endif /* UNIV_DEBUG */
 
@@ -2702,7 +2715,7 @@ loop:
 	return(TRUE);
 }
 
-/********************************************************************
+/****************************************************************//**
 Writes the log contents to the archive at least up to the lsn when this
 function was called. */
 static
@@ -2710,8 +2723,8 @@ void
 log_archive_all(void)
 /*=================*/
 {
-	dulint	present_lsn;
-	ulint	dummy;
+	ib_uint64_t	present_lsn;
+	ulint		dummy;
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -2730,7 +2743,7 @@ log_archive_all(void)
 	for (;;) {
 		mutex_enter(&(log_sys->mutex));
 
-		if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
+		if (present_lsn <= log_sys->archived_lsn) {
 
 			mutex_exit(&(log_sys->mutex));
 
@@ -2743,14 +2756,14 @@ log_archive_all(void)
 	}
 }
 
-/*********************************************************
+/*****************************************************//**
 Closes the possible open archive log file (for each group) the first group,
 and if it was open, increments the group file count by 2, if desired. */
 static
 void
 log_archive_close_groups(
 /*=====================*/
-	ibool	increment_file_count)	/* in: TRUE if we want to increment
+	ibool	increment_file_count)	/*!< in: TRUE if we want to increment
 					the file count */
 {
 	log_group_t*	group;
@@ -2795,16 +2808,16 @@ log_archive_close_groups(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Writes the log contents to the archive up to the lsn when this function was
 called, and stops the archiving. When archiving is started again, the archived
 log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns. */
-
+again to the archived log files which exist when this function returns.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_stop(void)
 /*==================*/
-			/* out: DB_SUCCESS or DB_ERROR */
 {
 	ibool	success;
 
@@ -2862,13 +2875,13 @@ log_archive_stop(void)
 	return(DB_SUCCESS);
 }
 
-/********************************************************************
-Starts again archiving which has been stopped. */
-
+/****************************************************************//**
+Starts again archiving which has been stopped.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_start(void)
 /*===================*/
-			/* out: DB_SUCCESS or DB_ERROR */
 {
 	mutex_enter(&(log_sys->mutex));
 
@@ -2888,13 +2901,13 @@ log_archive_start(void)
 	return(DB_SUCCESS);
 }
 
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Stop archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_noarchivelog(void)
 /*==========================*/
-			/* out: DB_SUCCESS or DB_ERROR */
 {
 loop:
 	mutex_enter(&(log_sys->mutex));
@@ -2920,13 +2933,13 @@ loop:
 	goto loop;
 }
 
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Start archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
 ulint
 log_archive_archivelog(void)
 /*========================*/
-			/* out: DB_SUCCESS or DB_ERROR */
 {
 	mutex_enter(&(log_sys->mutex));
 
@@ -2935,7 +2948,7 @@ log_archive_archivelog(void)
 		log_sys->archiving_state = LOG_ARCH_ON;
 
 		log_sys->archived_lsn
-			= ut_dulint_align_down(log_sys->lsn,
+			= ut_uint64_align_down(log_sys->lsn,
 					       OS_FILE_LOG_BLOCK_SIZE);
 		mutex_exit(&(log_sys->mutex));
 
@@ -2947,7 +2960,7 @@ log_archive_archivelog(void)
 	return(DB_ERROR);
 }
 
-/********************************************************************
+/****************************************************************//**
 Tries to establish a big enough margin of free space in the log groups, such
 that a new log entry can be catenated without an immediate need for
 archiving. */
@@ -2969,7 +2982,7 @@ loop:
 		return;
 	}
 
-	age = ut_dulint_minus(log->lsn, log->archived_lsn);
+	age = log->lsn - log->archived_lsn;
 
 	if (age > log->max_archived_lsn_age) {
 
@@ -3002,12 +3015,12 @@ loop:
 }
 #endif /* UNIV_LOG_ARCHIVE */
 
-/************************************************************************
+/********************************************************************//**
 Checks that there is enough free space in the log to start a new query step.
 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 function may only be called if the calling thread owns no synchronization
 objects! */
-
+UNIV_INTERN
 void
 log_check_margins(void)
 /*===================*/
@@ -3033,18 +3046,18 @@ loop:
 	mutex_exit(&(log_sys->mutex));
 }
 
-/********************************************************************
+/****************************************************************//**
 Makes a checkpoint at the latest lsn and writes it to first page of each
 data file in the database, so that we know that the file spaces contain
 all modifications up to that lsn. This can only be called at database
 shutdown. This function also writes all log in log files to the log archive. */
-
+UNIV_INTERN
 void
 logs_empty_and_mark_files_at_shutdown(void)
 /*=======================================*/
 {
-	dulint	lsn;
-	ulint	arch_log_no;
+	ib_uint64_t	lsn;
+	ulint		arch_log_no;
 
 	if (srv_print_verbose_log) {
 		ut_print_timestamp(stderr);
@@ -3133,19 +3146,16 @@ loop:
 	log_archive_all();
 #endif /* UNIV_LOG_ARCHIVE */
 
-	log_make_checkpoint_at(ut_dulint_max, TRUE);
+	log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
 	mutex_enter(&(log_sys->mutex));
 
 	lsn = log_sys->lsn;
 
-	if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0)
+	if (lsn != log_sys->last_checkpoint_lsn
 #ifdef UNIV_LOG_ARCHIVE
 	    || (srv_log_archive_on
-		&& ut_dulint_cmp(lsn,
-				 ut_dulint_add(log_sys->archived_lsn,
-					       LOG_BLOCK_HDR_SIZE))
-		!= 0)
+		&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
 #endif /* UNIV_LOG_ARCHIVE */
 	    ) {
 
@@ -3200,17 +3210,14 @@ loop:
 	/* Make some checks that the server really is quiet */
 	ut_a(srv_n_threads_active[SRV_MASTER] == 0);
 	ut_a(buf_all_freed());
-	ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
+	ut_a(lsn == log_sys->lsn);
 
-	if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) {
+	if (lsn < srv_start_lsn) {
 		fprintf(stderr,
 			"InnoDB: Error: log sequence number"
-			" at shutdown %lu %lu\n"
-			"InnoDB: is lower than at startup %lu %lu!\n",
-			(ulong) ut_dulint_get_high(lsn),
-			(ulong) ut_dulint_get_low(lsn),
-			(ulong) ut_dulint_get_high(srv_start_lsn),
-			(ulong) ut_dulint_get_low(srv_start_lsn));
+			" at shutdown %llu\n"
+			"InnoDB: is lower than at startup %llu!\n",
+			lsn, srv_start_lsn);
 	}
 
 	srv_shutdown_lsn = lsn;
@@ -3224,27 +3231,28 @@ loop:
 	/* Make some checks that the server really is quiet */
 	ut_a(srv_n_threads_active[SRV_MASTER] == 0);
 	ut_a(buf_all_freed());
-	ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
+	ut_a(lsn == log_sys->lsn);
 }
 
-/**********************************************************
+/******************************************************//**
 Checks by parsing that the catenated log segment for a single mtr is
 consistent. */
-
+UNIV_INTERN
 ibool
 log_check_log_recs(
 /*===============*/
-	byte*	buf,		/* in: pointer to the start of the log segment
-				in the log_sys->buf log buffer */
-	ulint	len,		/* in: segment length in bytes */
-	dulint	buf_start_lsn)	/* in: buffer start lsn */
+	byte*		buf,		/*!< in: pointer to the start of
+					the log segment in the
+					log_sys->buf log buffer */
+	ulint		len,		/*!< in: segment length in bytes */
+	ib_uint64_t	buf_start_lsn)	/*!< in: buffer start lsn */
 {
-	dulint	contiguous_lsn;
-	dulint	scanned_lsn;
-	byte*	start;
-	byte*	end;
-	byte*	buf1;
-	byte*	scan_buf;
+	ib_uint64_t	contiguous_lsn;
+	ib_uint64_t	scanned_lsn;
+	byte*		start;
+	byte*		end;
+	byte*		buf1;
+	byte*		scan_buf;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
@@ -3261,32 +3269,29 @@ log_check_log_recs(
 
 	ut_memcpy(scan_buf, start, end - start);
 
-	recv_scan_log_recs(TRUE,
-			   (buf_pool->n_frames
+	recv_scan_log_recs((buf_pool->curr_size
 			    - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
 			   FALSE, scan_buf, end - start,
-			   ut_dulint_align_down(buf_start_lsn,
+			   ut_uint64_align_down(buf_start_lsn,
 						OS_FILE_LOG_BLOCK_SIZE),
 			   &contiguous_lsn, &scanned_lsn);
 
-	ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
-	     == 0);
-	ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
+	ut_a(scanned_lsn == buf_start_lsn + len);
+	ut_a(recv_sys->recovered_lsn == scanned_lsn);
 
 	mem_free(buf1);
 
 	return(TRUE);
 }
 
-/**********************************************************
-Peeks the current lsn. */
-
+/******************************************************//**
+Peeks the current lsn.
+@return	TRUE if success, FALSE if could not get the log system mutex */
+UNIV_INTERN
 ibool
 log_peek_lsn(
 /*=========*/
-			/* out: TRUE if success, FALSE if could not get the
-			log system mutex */
-	dulint*	lsn)	/* out: if returns TRUE, current lsn is here */
+	ib_uint64_t*	lsn)	/*!< out: if returns TRUE, current lsn is here */
 {
 	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
 		*lsn = log_sys->lsn;
@@ -3299,13 +3304,13 @@ log_peek_lsn(
 	return(FALSE);
 }
 
-/**********************************************************
+/******************************************************//**
 Prints info of the log. */
-
+UNIV_INTERN
 void
 log_print(
 /*======*/
-	FILE*	file)	/* in: file where to print */
+	FILE*	file)	/*!< in: file where to print */
 {
 	double	time_elapsed;
 	time_t	current_time;
@@ -3313,15 +3318,12 @@ log_print(
 	mutex_enter(&(log_sys->mutex));
 
 	fprintf(file,
-		"Log sequence number %lu %lu\n"
-		"Log flushed up to   %lu %lu\n"
-		"Last checkpoint at  %lu %lu\n",
-		(ulong) ut_dulint_get_high(log_sys->lsn),
-		(ulong) ut_dulint_get_low(log_sys->lsn),
-		(ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
-		(ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
-		(ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
-		(ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+		"Log sequence number %llu\n"
+		"Log flushed up to   %llu\n"
+		"Last checkpoint at  %llu\n",
+		log_sys->lsn,
+		log_sys->flushed_to_disk_lsn,
+		log_sys->last_checkpoint_lsn);
 
 	current_time = time(NULL);
 
@@ -3342,9 +3344,9 @@ log_print(
 	mutex_exit(&(log_sys->mutex));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
 void
 log_refresh_stats(void)
 /*===================*/
@@ -3352,3 +3354,4 @@ log_refresh_stats(void)
 	log_sys->n_log_ios_old = log_sys->n_log_ios;
 	log_sys->last_printout_time = time(NULL);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c
similarity index 66%
rename from storage/innobase/log/log0recv.c
rename to storage/innodb_plugin/log/log0recv.c
index aef58b7b576..aea29c78517 100644
--- a/storage/innobase/log/log0recv.c
+++ b/storage/innodb_plugin/log/log0recv.c
@@ -1,7 +1,24 @@
-/******************************************************
-Recovery
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file log/log0recv.c
+Recovery
 
 Created 9/20/1997 Heikki Tuuri
 *******************************************************/
@@ -15,49 +32,59 @@ Created 9/20/1997 Heikki Tuuri
 #include "mem0mem.h"
 #include "buf0buf.h"
 #include "buf0flu.h"
-#include "buf0rea.h"
-#include "srv0srv.h"
-#include "srv0start.h"
 #include "mtr0mtr.h"
 #include "mtr0log.h"
-#include "page0page.h"
 #include "page0cur.h"
+#include "page0zip.h"
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "ibuf0ibuf.h"
 #include "trx0undo.h"
 #include "trx0rec.h"
-#include "trx0roll.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "dict0boot.h"
 #include "fil0fil.h"
-#include "sync0sync.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0rea.h"
+# include "srv0srv.h"
+# include "srv0start.h"
+# include "trx0roll.h"
+# include "row0merge.h"
+# include "sync0sync.h"
+#else /* !UNIV_HOTBACKUP */
 
-#ifdef UNIV_HOTBACKUP
-/* This is set to FALSE if the backup was originally taken with the
+/** This is set to FALSE if the backup was originally taken with the
 ibbackup --include regexp option: then we do not want to create tables in
 directories which were not included */
-ibool	recv_replay_file_ops	= TRUE;
-#endif /* UNIV_HOTBACKUP */
+UNIV_INTERN ibool	recv_replay_file_ops	= TRUE;
+#endif /* !UNIV_HOTBACKUP */
 
-/* Log records are stored in the hash table in chunks at most of this size;
+/** Log records are stored in the hash table in chunks at most of this size;
 this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 #define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 
-/* Read-ahead area in applying log records to file pages */
+/** Read-ahead area in applying log records to file pages */
 #define RECV_READ_AHEAD_AREA	32
 
-recv_sys_t*	recv_sys = NULL;
-ibool		recv_recovery_on = FALSE;
-ibool		recv_recovery_from_backup_on = FALSE;
+/** The recovery system */
+UNIV_INTERN recv_sys_t*	recv_sys = NULL;
+/** TRUE when applying redo log records during crash recovery; FALSE
+otherwise.  Note that this is FALSE while a background thread is
+rolling back incomplete transactions. */
+UNIV_INTERN ibool	recv_recovery_on = FALSE;
+#ifdef UNIV_LOG_ARCHIVE
+/** TRUE when applying redo log records from an archived log file */
+UNIV_INTERN ibool	recv_recovery_from_backup_on = FALSE;
+#endif /* UNIV_LOG_ARCHIVE */
 
-ibool		recv_needed_recovery = FALSE;
+#ifndef UNIV_HOTBACKUP
+/** TRUE when recv_init_crash_recovery() has been called. */
+UNIV_INTERN ibool	recv_needed_recovery = FALSE;
 
-ibool		recv_lsn_checks_on = FALSE;
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
+recv_recovery_from_checkpoint_start_func(). */
+UNIV_INTERN ibool	recv_lsn_checks_on = FALSE;
 
-/* There are two conditions under which we scan the logs, the first
+/** There are two conditions under which we scan the logs, the first
 is normal startup and the second is when we do a recovery from an
 archive.
 This flag is set if we are doing a scan from the last checkpoint during
@@ -65,63 +92,70 @@ startup. If we find log entries that were written after the last checkpoint
 we know that the server was not cleanly shutdown. We must then initialize
 the crash recovery environment before attempting to store these entries in
 the log hash table. */
-ibool	recv_log_scan_is_startup_type = FALSE;
+static ibool		recv_log_scan_is_startup_type = FALSE;
 
-/* If the following is TRUE, the buffer pool file pages must be invalidated
+/** If the following is TRUE, the buffer pool file pages must be invalidated
 after recovery and no ibuf operations are allowed; this becomes TRUE if
 the log record hash table becomes too full, and log records must be merged
 to file pages already before the recovery is finished: in this case no
 ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state */
+buffer pool before the pages have been recovered to the up-to-date state.
 
-/* Recovery is running and no operations on the log files are allowed
-yet: the variable name is misleading */
-
-ibool	recv_no_ibuf_operations = FALSE;
-
-/* The following counter is used to decide when to print info on
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
+UNIV_INTERN ibool	recv_no_ibuf_operations = FALSE;
+/** TRUE when the redo log is being backed up */
+# define recv_is_making_a_backup		FALSE
+/** TRUE when recovering from a backed up redo log file */
+# define recv_is_from_backup			FALSE
+#else /* !UNIV_HOTBACKUP */
+# define recv_needed_recovery			FALSE
+/** TRUE when the redo log is being backed up */
+UNIV_INTERN ibool	recv_is_making_a_backup = FALSE;
+/** TRUE when recovering from a backed up redo log file */
+UNIV_INTERN ibool	recv_is_from_backup	= FALSE;
+# define buf_pool_get_curr_size() (5 * 1024 * 1024)
+#endif /* !UNIV_HOTBACKUP */
+/** The following counter is used to decide when to print info on
 log scan */
-ulint	recv_scan_print_counter	= 0;
+static ulint	recv_scan_print_counter	= 0;
 
-ibool	recv_is_from_backup	= FALSE;
-#ifdef UNIV_HOTBACKUP
-ibool	recv_is_making_a_backup = FALSE;
-#else
-# define recv_is_making_a_backup FALSE
-#endif /* UNIV_HOTBACKUP */
+/** The type of the previous parsed redo log record */
+static ulint	recv_previous_parsed_rec_type	= 999999;
+/** The offset of the previous parsed redo log record */
+static ulint	recv_previous_parsed_rec_offset	= 0;
+/** The 'multi' flag of the previous parsed redo log record */
+static ulint	recv_previous_parsed_rec_is_multi = 0;
 
-ulint	recv_previous_parsed_rec_type	= 999999;
-ulint	recv_previous_parsed_rec_offset	= 0;
-ulint	recv_previous_parsed_rec_is_multi = 0;
+/** Maximum page number encountered in the redo log */
+UNIV_INTERN ulint	recv_max_parsed_page_no		= 0;
 
-ulint	recv_max_parsed_page_no		= 0;
-
-/* This many frames must be left free in the buffer pool when we scan
+/** This many frames must be left free in the buffer pool when we scan
 the log and store the scanned log records in the buffer pool: we will
 use these free frames to read in pages when we start applying the
 log records to the database. */
+UNIV_INTERN ulint	recv_n_pool_free_frames		= 256;
 
-ulint	recv_n_pool_free_frames		= 256;
-
-/* The maximum lsn we see for a page during the recovery process. If this
+/** The maximum lsn we see for a page during the recovery process. If this
 is bigger than the lsn we are able to scan up to, that is an indication that
 the recovery failed and the database may be corrupt. */
-
-dulint	recv_max_page_lsn;
+UNIV_INTERN ib_uint64_t	recv_max_page_lsn;
 
 /* prototypes */
 
-/***********************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************//**
 Initialize crash recovery environment. Can be called iff
 recv_needed_recovery == FALSE. */
 static
 void
 recv_init_crash_recovery(void);
 /*===========================*/
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
+/********************************************************//**
 Creates the recovery system. */
-
+UNIV_INTERN
 void
 recv_sys_create(void)
 /*=================*/
@@ -139,15 +173,13 @@ recv_sys_create(void)
 	recv_sys->addr_hash = NULL;
 }
 
-/************************************************************
+/********************************************************//**
 Inits the recovery system for a recovery operation. */
-
+UNIV_INTERN
 void
 recv_sys_init(
 /*==========*/
-	ibool	recover_from_backup,	/* in: TRUE if this is called
-					to recover from a hot backup */
-	ulint	available_memory)	/* in: available memory in bytes */
+	ulint	available_memory)	/*!< in: available memory in bytes */
 {
 	if (recv_sys->heap != NULL) {
 
@@ -156,12 +188,12 @@ recv_sys_init(
 
 	mutex_enter(&(recv_sys->mutex));
 
-	if (!recover_from_backup) {
-		recv_sys->heap = mem_heap_create_in_buffer(256);
-	} else {
-		recv_sys->heap = mem_heap_create(256);
-		recv_is_from_backup = TRUE;
-	}
+#ifndef UNIV_HOTBACKUP
+	recv_sys->heap = mem_heap_create_in_buffer(256);
+#else /* !UNIV_HOTBACKUP */
+	recv_sys->heap = mem_heap_create(256);
+	recv_is_from_backup = TRUE;
+#endif /* !UNIV_HOTBACKUP */
 
 	recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 	recv_sys->len = 0;
@@ -179,12 +211,12 @@ recv_sys_init(
 					OS_FILE_LOG_BLOCK_SIZE);
 	recv_sys->found_corrupt_log = FALSE;
 
-	recv_max_page_lsn = ut_dulint_zero;
+	recv_max_page_lsn = 0;
 
 	mutex_exit(&(recv_sys->mutex));
 }
 
-/************************************************************
+/********************************************************//**
 Empties the hash table when it has been fully processed. */
 static
 void
@@ -210,8 +242,9 @@ recv_sys_empty_hash(void)
 	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
 }
 
-#ifndef UNIV_LOG_DEBUG
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+# ifndef UNIV_LOG_DEBUG
+/********************************************************//**
 Frees the recovery system. */
 static
 void
@@ -230,55 +263,54 @@ recv_sys_free(void)
 
 	mutex_exit(&(recv_sys->mutex));
 }
-#endif /* UNIV_LOG_DEBUG */
+# endif /* UNIV_LOG_DEBUG */
 
-/************************************************************
+/********************************************************//**
 Truncates possible corrupted or extra records from a log group. */
 static
 void
 recv_truncate_group(
 /*================*/
-	log_group_t*	group,		/* in: log group */
-	dulint		recovered_lsn,	/* in: recovery succeeded up to this
+	log_group_t*	group,		/*!< in: log group */
+	ib_uint64_t	recovered_lsn,	/*!< in: recovery succeeded up to this
 					lsn */
-	dulint		limit_lsn,	/* in: this was the limit for
+	ib_uint64_t	limit_lsn,	/*!< in: this was the limit for
 					recovery */
-	dulint		checkpoint_lsn,	/* in: recovery was started from this
+	ib_uint64_t	checkpoint_lsn,	/*!< in: recovery was started from this
 					checkpoint */
-	dulint		archived_lsn)	/* in: the log has been archived up to
+	ib_uint64_t	archived_lsn)	/*!< in: the log has been archived up to
 					this lsn */
 {
-	dulint	start_lsn;
-	dulint	end_lsn;
-	dulint	finish_lsn1;
-	dulint	finish_lsn2;
-	dulint	finish_lsn;
-	ulint	len;
-	ulint	i;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
+	ib_uint64_t	finish_lsn1;
+	ib_uint64_t	finish_lsn2;
+	ib_uint64_t	finish_lsn;
+	ulint		len;
+	ulint		i;
 
-	if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+	if (archived_lsn == IB_ULONGLONG_MAX) {
 		/* Checkpoint was taken in the NOARCHIVELOG mode */
 		archived_lsn = checkpoint_lsn;
 	}
 
-	finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
-					    archived_lsn,
-					    OS_FILE_LOG_BLOCK_SIZE),
-				    log_group_get_capacity(group));
+	finish_lsn1 = ut_uint64_align_down(archived_lsn,
+					   OS_FILE_LOG_BLOCK_SIZE)
+		+ log_group_get_capacity(group);
 
-	finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
-					    recovered_lsn,
-					    OS_FILE_LOG_BLOCK_SIZE),
-				    recv_sys->last_log_buf_size);
+	finish_lsn2 = ut_uint64_align_up(recovered_lsn,
+					 OS_FILE_LOG_BLOCK_SIZE)
+		+ recv_sys->last_log_buf_size;
 
-	if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+	if (limit_lsn != IB_ULONGLONG_MAX) {
 		/* We do not know how far we should erase log records: erase
 		as much as possible */
 
 		finish_lsn = finish_lsn1;
 	} else {
 		/* It is enough to erase the length of the log buffer */
-		finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
+		finish_lsn = finish_lsn1 < finish_lsn2
+			? finish_lsn1 : finish_lsn2;
 	}
 
 	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
@@ -289,36 +321,36 @@ recv_truncate_group(
 		*(log_sys->buf + i) = '\0';
 	}
 
-	start_lsn = ut_dulint_align_down(recovered_lsn,
+	start_lsn = ut_uint64_align_down(recovered_lsn,
 					 OS_FILE_LOG_BLOCK_SIZE);
 
-	if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
+	if (start_lsn != recovered_lsn) {
 		/* Copy the last incomplete log block to the log buffer and
 		edit its data length: */
 
 		ut_memcpy(log_sys->buf, recv_sys->last_block,
 			  OS_FILE_LOG_BLOCK_SIZE);
-		log_block_set_data_len(log_sys->buf, ut_dulint_minus(
-					       recovered_lsn, start_lsn));
+		log_block_set_data_len(log_sys->buf,
+				       (ulint) (recovered_lsn - start_lsn));
 	}
 
-	if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
+	if (start_lsn >= finish_lsn) {
 
 		return;
 	}
 
 	for (;;) {
-		end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+		end_lsn = start_lsn + RECV_SCAN_SIZE;
 
-		if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
+		if (end_lsn > finish_lsn) {
 
 			end_lsn = finish_lsn;
 		}
 
-		len = ut_dulint_minus(end_lsn, start_lsn);
+		len = (ulint) (end_lsn - start_lsn);
 
 		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-		if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
+		if (end_lsn >= finish_lsn) {
 
 			return;
 		}
@@ -333,49 +365,49 @@ recv_truncate_group(
 	}
 }
 
-/************************************************************
+/********************************************************//**
 Copies the log segment between group->recovered_lsn and recovered_lsn from the
 most up-to-date log group to group, so that it contains the latest log data. */
 static
 void
 recv_copy_group(
 /*============*/
-	log_group_t*	up_to_date_group,	/* in: the most up-to-date log
+	log_group_t*	up_to_date_group,	/*!< in: the most up-to-date log
 						group */
-	log_group_t*	group,			/* in: copy to this log
+	log_group_t*	group,			/*!< in: copy to this log
 						group */
-	dulint		recovered_lsn)		/* in: recovery succeeded up
+	ib_uint64_t	recovered_lsn)		/*!< in: recovery succeeded up
 						to this lsn */
 {
-	dulint	start_lsn;
-	dulint	end_lsn;
-	ulint	len;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
+	ulint		len;
 
-	if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
+	if (group->scanned_lsn >= recovered_lsn) {
 
 		return;
 	}
 
 	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
-	start_lsn = ut_dulint_align_down(group->scanned_lsn,
+	start_lsn = ut_uint64_align_down(group->scanned_lsn,
 					 OS_FILE_LOG_BLOCK_SIZE);
 	for (;;) {
-		end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+		end_lsn = start_lsn + RECV_SCAN_SIZE;
 
-		if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
-			end_lsn = ut_dulint_align_up(recovered_lsn,
+		if (end_lsn > recovered_lsn) {
+			end_lsn = ut_uint64_align_up(recovered_lsn,
 						     OS_FILE_LOG_BLOCK_SIZE);
 		}
 
 		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 				       up_to_date_group, start_lsn, end_lsn);
 
-		len = ut_dulint_minus(end_lsn, start_lsn);
+		len = (ulint) (end_lsn - start_lsn);
 
 		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
-		if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
+		if (end_lsn >= recovered_lsn) {
 
 			return;
 		}
@@ -384,7 +416,7 @@ recv_copy_group(
 	}
 }
 
-/************************************************************
+/********************************************************//**
 Copies a log segment from the most up-to-date log group to the other log
 groups, so that they all contain the latest log data. Also writes the info
 about the latest checkpoint to the groups, and inits the fields in the group
@@ -393,14 +425,14 @@ static
 void
 recv_synchronize_groups(
 /*====================*/
-	log_group_t*	up_to_date_group)	/* in: the most up-to-date
+	log_group_t*	up_to_date_group)	/*!< in: the most up-to-date
 						log group */
 {
 	log_group_t*	group;
-	dulint		start_lsn;
-	dulint		end_lsn;
-	dulint		recovered_lsn;
-	dulint		limit_lsn;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
+	ib_uint64_t	recovered_lsn;
+	ib_uint64_t	limit_lsn;
 
 	recovered_lsn = recv_sys->recovered_lsn;
 	limit_lsn = recv_sys->limit_lsn;
@@ -408,11 +440,11 @@ recv_synchronize_groups(
 	/* Read the last recovered log block to the recovery system buffer:
 	the block is always incomplete */
 
-	start_lsn = ut_dulint_align_down(recovered_lsn,
+	start_lsn = ut_uint64_align_down(recovered_lsn,
 					 OS_FILE_LOG_BLOCK_SIZE);
-	end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
+	end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
-	ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
+	ut_a(start_lsn != end_lsn);
 
 	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 			       up_to_date_group, start_lsn, end_lsn);
@@ -451,15 +483,16 @@ recv_synchronize_groups(
 
 	mutex_enter(&(log_sys->mutex));
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************************
-Checks the consistency of the checkpoint info */
+/***********************************************************************//**
+Checks the consistency of the checkpoint info
+@return	TRUE if ok */
 static
 ibool
 recv_check_cp_is_consistent(
 /*========================*/
-			/* out: TRUE if ok */
-	byte*	buf)	/* in: buffer containing checkpoint info */
+	const byte*	buf)	/*!< in: buffer containing checkpoint info */
 {
 	ulint	fold;
 
@@ -481,26 +514,27 @@ recv_check_cp_is_consistent(
 	return(TRUE);
 }
 
-/************************************************************
-Looks for the maximum consistent checkpoint from the log groups. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Looks for the maximum consistent checkpoint from the log groups.
+@return	error code or DB_SUCCESS */
 static
 ulint
 recv_find_max_checkpoint(
 /*=====================*/
-					/* out: error code or DB_SUCCESS */
-	log_group_t**	max_group,	/* out: max group */
-	ulint*		max_field)	/* out: LOG_CHECKPOINT_1 or
+	log_group_t**	max_group,	/*!< out: max group */
+	ulint*		max_field)	/*!< out: LOG_CHECKPOINT_1 or
 					LOG_CHECKPOINT_2 */
 {
 	log_group_t*	group;
-	dulint		max_no;
-	dulint		checkpoint_no;
+	ib_uint64_t	max_no;
+	ib_uint64_t	checkpoint_no;
 	ulint		field;
 	byte*		buf;
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
-	max_no = ut_dulint_zero;
+	max_no = 0;
 	*max_group = NULL;
 	*max_field = 0;
 
@@ -533,11 +567,11 @@ recv_find_max_checkpoint(
 
 			group->state = LOG_GROUP_OK;
 
-			group->lsn = mach_read_from_8(
+			group->lsn = mach_read_ull(
 				buf + LOG_CHECKPOINT_LSN);
 			group->lsn_offset = mach_read_from_4(
 				buf + LOG_CHECKPOINT_OFFSET);
-			checkpoint_no = mach_read_from_8(
+			checkpoint_no = mach_read_ull(
 				buf + LOG_CHECKPOINT_NO);
 
 #ifdef UNIV_DEBUG
@@ -545,13 +579,12 @@ recv_find_max_checkpoint(
 				fprintf(stderr,
 					"InnoDB: Checkpoint number %lu"
 					" found in group %lu\n",
-					(ulong) ut_dulint_get_low(
-						checkpoint_no),
+					(ulong) checkpoint_no,
 					(ulong) group->id);
 			}
 #endif /* UNIV_DEBUG */
 
-			if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
+			if (checkpoint_no >= max_no) {
 				*max_group = group;
 				*max_field = field;
 				max_no = checkpoint_no;
@@ -575,46 +608,47 @@ not_consistent:
 			"InnoDB: to create the InnoDB data files,"
 			" but log file creation failed.\n"
 			"InnoDB: If that is the case, please refer to\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"error-creating-innodb.html\n");
+			"InnoDB: " REFMAN "error-creating-innodb.html\n");
 		return(DB_ERROR);
 	}
 
 	return(DB_SUCCESS);
 }
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
-
+#else /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Reads the checkpoint info needed in hot backup.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 recv_read_cp_info_for_backup(
 /*=========================*/
-			/* out: TRUE if success */
-	byte*	hdr,	/* in: buffer containing the log group header */
-	dulint*	lsn,	/* out: checkpoint lsn */
-	ulint*	offset,	/* out: checkpoint offset in the log group */
-	ulint*	fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
-			database is running with < version 3.23.50 of InnoDB */
-	dulint*	cp_no,	/* out: checkpoint number */
-	dulint*	first_header_lsn)
-			/* out: lsn of of the start of the first log file */
+	const byte*	hdr,	/*!< in: buffer containing the log group
+				header */
+	ib_uint64_t*	lsn,	/*!< out: checkpoint lsn */
+	ulint*		offset,	/*!< out: checkpoint offset in the log group */
+	ulint*		fsp_limit,/*!< out: fsp limit of space 0,
+				1000000000 if the database is running
+				with < version 3.23.50 of InnoDB */
+	ib_uint64_t*	cp_no,	/*!< out: checkpoint number */
+	ib_uint64_t*	first_header_lsn)
+				/*!< out: lsn of of the start of the
+				first log file */
 {
-	ulint	max_cp		= 0;
-	dulint	max_cp_no	= ut_dulint_zero;
-	byte*	cp_buf;
+	ulint		max_cp		= 0;
+	ib_uint64_t	max_cp_no	= 0;
+	const byte*	cp_buf;
 
 	cp_buf = hdr + LOG_CHECKPOINT_1;
 
 	if (recv_check_cp_is_consistent(cp_buf)) {
-		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+		max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 		max_cp = LOG_CHECKPOINT_1;
 	}
 
 	cp_buf = hdr + LOG_CHECKPOINT_2;
 
 	if (recv_check_cp_is_consistent(cp_buf)) {
-		if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
-				  max_cp_no) > 0) {
+		if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 			max_cp = LOG_CHECKPOINT_2;
 		}
 	}
@@ -625,7 +659,7 @@ recv_read_cp_info_for_backup(
 
 	cp_buf = hdr + max_cp;
 
-	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
+	*lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
 	*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
 
 	/* If the user is running a pre-3.23.50 version of InnoDB, its
@@ -645,24 +679,25 @@ recv_read_cp_info_for_backup(
 
 	/*	fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
 
-	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+	*cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 
-	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+	*first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
 
 	return(TRUE);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**********************************************************
-Checks the 4-byte checksum to the trailer checksum field of a log block.
-We also accept a log block in the old format < InnoDB-3.23.52 where the
-checksum field contains the log block number. */
+/******************************************************//**
+Checks the 4-byte checksum to the trailer checksum field of a log
+block.  We also accept a log block in the old format before
+InnoDB-3.23.52 where the checksum field contains the log block number.
+@return TRUE if ok, or if the log block may be in the format of InnoDB
+version predating 3.23.52 */
 static
 ibool
 log_block_checksum_is_ok_or_old_format(
 /*===================================*/
-			/* out: TRUE if ok, or if the log block may be in the
-			format of InnoDB version < 3.23.52 */
-	byte*	block)	/* in: pointer to a log block */
+	const byte*	block)	/*!< in: pointer to a log block */
 {
 #ifdef UNIV_LOG_DEBUG
 	return(TRUE);
@@ -688,22 +723,23 @@ log_block_checksum_is_ok_or_old_format(
 	return(FALSE);
 }
 
-/***********************************************************************
+#ifdef UNIV_HOTBACKUP
+/*******************************************************************//**
 Scans the log segment and n_bytes_scanned is set to the length of valid
 log scanned. */
-
+UNIV_INTERN
 void
 recv_scan_log_seg_for_backup(
 /*=========================*/
-	byte*		buf,		/* in: buffer containing log data */
-	ulint		buf_len,	/* in: data length in that buffer */
-	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
+	byte*		buf,		/*!< in: buffer containing log data */
+	ulint		buf_len,	/*!< in: data length in that buffer */
+	ib_uint64_t*	scanned_lsn,	/*!< in/out: lsn of buffer start,
 					we return scanned lsn */
 	ulint*		scanned_checkpoint_no,
-					/* in/out: 4 lowest bytes of the
+					/*!< in/out: 4 lowest bytes of the
 					highest scanned checkpoint number so
 					far */
-	ulint*		n_bytes_scanned)/* out: how much we were able to
+	ulint*		n_bytes_scanned)/*!< out: how much we were able to
 					scan, smaller than buf_len if log
 					data ended here */
 {
@@ -762,7 +798,7 @@ recv_scan_log_seg_for_backup(
 
 		*scanned_checkpoint_no
 			= log_block_get_checkpoint_no(log_block);
-		*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
+		*scanned_lsn += data_len;
 
 		*n_bytes_scanned += data_len;
 
@@ -777,32 +813,126 @@ recv_scan_log_seg_for_backup(
 		}
 	}
 }
+#endif /* UNIV_HOTBACKUP */
 
-/***********************************************************************
+/*******************************************************************//**
 Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function. */
+specified. File ops are parsed, but not applied in this function.
+@return	log record end, NULL if not a complete record */
 static
 byte*
 recv_parse_or_apply_log_rec_body(
 /*=============================*/
-			/* out: log record end, NULL if not a complete
-			record */
-	byte	type,	/* in: type */
-	byte*	ptr,	/* in: pointer to a buffer */
-	byte*	end_ptr,/* in: pointer to the buffer end */
-	page_t*	page,	/* in: buffer page or NULL; if not NULL, then the log
-			record is applied to the page, and the log record
-			should be complete then */
-	mtr_t*	mtr)	/* in: mtr or NULL; should be non-NULL if and only if
-			page is non-NULL */
+	byte		type,	/*!< in: type */
+	byte*		ptr,	/*!< in: pointer to a buffer */
+	byte*		end_ptr,/*!< in: pointer to the buffer end */
+	buf_block_t*	block,	/*!< in/out: buffer block or NULL; if
+				not NULL, then the log record is
+				applied to the page, and the log
+				record should be complete then */
+	mtr_t*		mtr)	/*!< in: mtr or NULL; should be non-NULL
+				if and only if block is non-NULL */
 {
-	dict_index_t*	index = NULL;
+	dict_index_t*	index	= NULL;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+#ifdef UNIV_DEBUG
+	ulint		page_type;
+#endif /* UNIV_DEBUG */
+
+	ut_ad(!block == !mtr);
+
+	if (block) {
+		page = block->frame;
+		page_zip = buf_block_get_page_zip(block);
+		ut_d(page_type = fil_page_get_type(page));
+	} else {
+		page = NULL;
+		page_zip = NULL;
+		ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
+	}
 
 	switch (type) {
 	case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
-		ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
+#ifdef UNIV_DEBUG
+		if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
+		    && end_ptr >= ptr + 2) {
+			/* It is OK to set FIL_PAGE_TYPE and certain
+			list node fields on an empty page.  Any other
+			write is not OK. */
+
+			/* NOTE: There may be bogus assertion failures for
+			dict_hdr_create(), trx_rseg_header_create(),
+			trx_sys_create_doublewrite_buf(), and
+			trx_sysf_create().
+			These are only called during database creation. */
+			ulint	offs = mach_read_from_2(ptr);
+
+			switch (type) {
+			default:
+				ut_error;
+			case MLOG_2BYTES:
+				/* Note that this can fail when the
+				redo log been written with something
+				older than InnoDB Plugin 1.0.4. */
+				ut_ad(offs == FIL_PAGE_TYPE
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + FIL_ADDR_SIZE
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + 0 /*FLST_PREV*/
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
+				break;
+			case MLOG_4BYTES:
+				/* Note that this can fail when the
+				redo log been written with something
+				older than InnoDB Plugin 1.0.4. */
+				ut_ad(0
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_SPACE
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER/* flst_init */
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + FIL_ADDR_SIZE
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_SPACE
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_SPACE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + 0 /*FLST_PREV*/
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
+				break;
+			}
+		}
+#endif /* UNIV_DEBUG */
+		ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
 		break;
 	case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_REC_INSERT,
@@ -811,10 +941,12 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
-							index, page, mtr);
+							block, index, mtr);
 		}
 		break;
 	case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
@@ -823,22 +955,28 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = btr_cur_parse_del_mark_set_clust_rec(
-				ptr, end_ptr, index, page);
+				ptr, end_ptr, page, page_zip, index);
 		}
 		break;
 	case MLOG_COMP_REC_SEC_DELETE_MARK:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 		/* This log record type is obsolete, but we process it for
 		backward compatibility with MySQL 5.0.3 and 5.0.4. */
 		ut_a(!page || page_is_comp(page));
+		ut_a(!page_zip);
 		ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 		if (!ptr) {
 			break;
 		}
 		/* Fall through */
 	case MLOG_REC_SEC_DELETE_MARK:
-		ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
+							 page, page_zip);
 		break;
 	case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
@@ -846,12 +984,14 @@ recv_parse_or_apply_log_rec_body(
 			ut_a(!page
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
-			ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
-							    page, index);
+			ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
+							    page_zip, index);
 		}
 		break;
 	case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 	case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_LIST_END_DELETE
@@ -861,10 +1001,12 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
-							 index, page, mtr);
+							 block, index, mtr);
 		}
 		break;
 	case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_LIST_END_COPY_CREATED,
@@ -873,10 +1015,12 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = page_parse_copy_rec_list_to_created_page(
-				ptr, end_ptr, index, page, mtr);
+				ptr, end_ptr, block, index, mtr);
 		}
 		break;
 	case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_PAGE_REORGANIZE,
@@ -885,37 +1029,52 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
-							page, mtr);
+							block, mtr);
 		}
 		break;
 	case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
+		/* Allow anything in page_type when creating a page. */
+		ut_a(!page_zip);
 		ptr = page_parse_create(ptr, end_ptr,
 					type == MLOG_COMP_PAGE_CREATE,
-					page, mtr);
+					block, mtr);
 		break;
 	case MLOG_UNDO_INSERT:
+		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 		ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 		break;
 	case MLOG_UNDO_ERASE_END:
+		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 		ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 		break;
 	case MLOG_UNDO_INIT:
+		/* Allow anything in page_type when creating a page. */
 		ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 		break;
 	case MLOG_UNDO_HDR_DISCARD:
+		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 		ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 		break;
 	case MLOG_UNDO_HDR_CREATE:
 	case MLOG_UNDO_HDR_REUSE:
+		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 		ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 						 page, mtr);
 		break;
 	case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		/* On a compressed page, MLOG_COMP_REC_MIN_MARK
+		will be followed by MLOG_COMP_REC_DELETE
+		or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
+		in the same mini-transaction. */
+		ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
 		ptr = btr_parse_set_min_rec_mark(
 			ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 			page, mtr);
 		break;
 	case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
 				     type == MLOG_COMP_REC_DELETE,
@@ -924,23 +1083,46 @@ recv_parse_or_apply_log_rec_body(
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
 			ptr = page_cur_parse_delete_rec(ptr, end_ptr,
-							index, page, mtr);
+							block, index, mtr);
 		}
 		break;
 	case MLOG_IBUF_BITMAP_INIT:
-		ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
+		/* Allow anything in page_type when creating a page. */
+		ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 		break;
 	case MLOG_INIT_FILE_PAGE:
-		ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
+		/* Allow anything in page_type when creating a page. */
+		ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 		break;
 	case MLOG_WRITE_STRING:
-		ptr = mlog_parse_string(ptr, end_ptr, page);
+		ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
+		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 		break;
 	case MLOG_FILE_CREATE:
 	case MLOG_FILE_RENAME:
 	case MLOG_FILE_DELETE:
-		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
-						 ULINT_UNDEFINED);
+	case MLOG_FILE_CREATE2:
+		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
+		break;
+	case MLOG_ZIP_WRITE_NODE_PTR:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
+						    page, page_zip);
+		break;
+	case MLOG_ZIP_WRITE_BLOB_PTR:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
+						    page, page_zip);
+		break;
+	case MLOG_ZIP_WRITE_HEADER:
+		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ptr = page_zip_parse_write_header(ptr, end_ptr,
+						  page, page_zip);
+		break;
+	case MLOG_ZIP_PAGE_COMPRESS:
+		/* Allow anything in page_type when creating a page. */
+		ptr = page_zip_parse_compress(ptr, end_ptr,
+					      page, page_zip);
 		break;
 	default:
 		ptr = NULL;
@@ -957,44 +1139,43 @@ recv_parse_or_apply_log_rec_body(
 	return(ptr);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
-searching for a log record in the hash table. */
+searching for a log record in the hash table.
+@return	folded value */
 UNIV_INLINE
 ulint
 recv_fold(
 /*======*/
-			/* out: folded value */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
 {
 	return(ut_fold_ulint_pair(space, page_no));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
-searching for a log record in the hash table. */
+searching for a log record in the hash table.
+@return	folded value */
 UNIV_INLINE
 ulint
 recv_hash(
 /*======*/
-			/* out: folded value */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
 {
 	return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 }
 
-/*************************************************************************
-Gets the hashed file address struct for a page. */
+/*********************************************************************//**
+Gets the hashed file address struct for a page.
+@return	file address struct, NULL if not found from the hash table */
 static
 recv_addr_t*
 recv_get_fil_addr_struct(
 /*=====================*/
-			/* out: file address struct, NULL if not found from
-			the hash table */
-	ulint	space,	/* in: space id */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
 {
 	recv_addr_t*	recv_addr;
 
@@ -1013,19 +1194,19 @@ recv_get_fil_addr_struct(
 	return(recv_addr);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Adds a new log record to the hash table of log records. */
 static
 void
 recv_add_to_hash_table(
 /*===================*/
-	byte	type,		/* in: log record type */
-	ulint	space,		/* in: space id */
-	ulint	page_no,	/* in: page number */
-	byte*	body,		/* in: log record body */
-	byte*	rec_end,	/* in: log record end */
-	dulint	start_lsn,	/* in: start lsn of the mtr */
-	dulint	end_lsn)	/* in: end lsn of the mtr */
+	byte		type,		/*!< in: log record type */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no,	/*!< in: page number */
+	byte*		body,		/*!< in: log record body */
+	byte*		rec_end,	/*!< in: log record end */
+	ib_uint64_t	start_lsn,	/*!< in: start lsn of the mtr */
+	ib_uint64_t	end_lsn)	/*!< in: end lsn of the mtr */
 {
 	recv_t*		recv;
 	ulint		len;
@@ -1098,14 +1279,14 @@ recv_add_to_hash_table(
 	*prev_field = NULL;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Copies the log record body from recv to buf. */
 static
 void
 recv_data_copy_to_buf(
 /*==================*/
-	byte*	buf,	/* in: buffer of length at least recv->len */
-	recv_t*	recv)	/* in: log record */
+	byte*	buf,	/*!< in: buffer of length at least recv->len */
+	recv_t*	recv)	/*!< in: log record */
 {
 	recv_data_t*	recv_data;
 	ulint		part_len;
@@ -1130,34 +1311,33 @@ recv_data_copy_to_buf(
 	}
 }
 
-/****************************************************************************
+/************************************************************************//**
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool. */
-
+UNIV_INTERN
 void
-recv_recover_page(
-/*==============*/
-	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
-				page: then we do not acquire any latches
-				since the page was read in outside the
-				buffer pool */
-	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
-				a freshly read page */
-	page_t*	page,		/* in: buffer page */
-	ulint	space,		/* in: space id */
-	ulint	page_no)	/* in: page number */
+recv_recover_page_func(
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+	ibool		just_read_in,
+				/*!< in: TRUE if the i/o handler calls
+				this for a freshly read page */
+#endif /* !UNIV_HOTBACKUP */
+	buf_block_t*	block)	/*!< in/out: buffer block */
 {
-	buf_block_t*	block		= NULL;
+	page_t*		page;
 	recv_addr_t*	recv_addr;
 	recv_t*		recv;
 	byte*		buf;
-	dulint		start_lsn;
-	dulint		end_lsn;
-	dulint		page_lsn;
-	dulint		page_newest_lsn;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
+	ib_uint64_t	page_lsn;
+	ib_uint64_t	page_newest_lsn;
 	ibool		modification_to_page;
+#ifndef UNIV_HOTBACKUP
 	ibool		success;
+#endif /* !UNIV_HOTBACKUP */
 	mtr_t		mtr;
 
 	mutex_enter(&(recv_sys->mutex));
@@ -1171,7 +1351,8 @@ recv_recover_page(
 		return;
 	}
 
-	recv_addr = recv_get_fil_addr_struct(space, page_no);
+	recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
+					     buf_block_get_page_no(block));
 
 	if ((recv_addr == NULL)
 	    || (recv_addr->state == RECV_BEING_PROCESSED)
@@ -1183,7 +1364,8 @@ recv_recover_page(
 	}
 
 #if 0
-	fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
+	fprintf(stderr, "Recovering space %lu, page %lu\n",
+		buf_block_get_space(block), buf_block_get_page_no(block));
 #endif
 
 	recv_addr->state = RECV_BEING_PROCESSED;
@@ -1193,52 +1375,47 @@ recv_recover_page(
 	mtr_start(&mtr);
 	mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 
-	if (!recover_backup) {
-		block = buf_block_align(page);
+	page = block->frame;
 
-		if (just_read_in) {
-			/* Move the ownership of the x-latch on the
-			page to this OS thread, so that we can acquire
-			a second x-latch on it. This is needed for the
-			operations to the page to pass the debug
-			checks. */
+#ifndef UNIV_HOTBACKUP
+	if (just_read_in) {
+		/* Move the ownership of the x-latch on the page to
+		this OS thread, so that we can acquire a second
+		x-latch on it.  This is needed for the operations to
+		the page to pass the debug checks. */
 
-			rw_lock_x_lock_move_ownership(&(block->lock));
-		}
-
-		success = buf_page_get_known_nowait(RW_X_LATCH, page,
-						    BUF_KEEP_OLD,
-						    __FILE__, __LINE__,
-						    &mtr);
-		ut_a(success);
-
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+		rw_lock_x_lock_move_ownership(&block->lock);
 	}
 
+	success = buf_page_get_known_nowait(RW_X_LATCH, block,
+					    BUF_KEEP_OLD,
+					    __FILE__, __LINE__,
+					    &mtr);
+	ut_a(success);
+
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+#endif /* !UNIV_HOTBACKUP */
+
 	/* Read the newest modification lsn from the page */
-	page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+	page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
 
-	if (!recover_backup) {
-		/* It may be that the page has been modified in the buffer
-		pool: read the newest modification lsn there */
+#ifndef UNIV_HOTBACKUP
+	/* It may be that the page has been modified in the buffer
+	pool: read the newest modification lsn there */
 
-		page_newest_lsn = buf_frame_get_newest_modification(page);
+	page_newest_lsn = buf_page_get_newest_modification(&block->page);
 
-		if (!ut_dulint_is_zero(page_newest_lsn)) {
+	if (page_newest_lsn) {
 
-			page_lsn = page_newest_lsn;
-		}
-	} else {
-		/* In recovery from a backup we do not really use the buffer
-		pool */
-
-		page_newest_lsn = ut_dulint_zero;
+		page_lsn = page_newest_lsn;
 	}
+#else /* !UNIV_HOTBACKUP */
+	/* In recovery from a backup we do not really use the buffer pool */
+	page_newest_lsn = 0;
+#endif /* !UNIV_HOTBACKUP */
 
 	modification_to_page = FALSE;
-	start_lsn = end_lsn = ut_dulint_zero;
+	start_lsn = end_lsn = 0;
 
 	recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 
@@ -1259,13 +1436,12 @@ recv_recover_page(
 		if (recv->type == MLOG_INIT_FILE_PAGE) {
 			page_lsn = page_newest_lsn;
 
-			mach_write_to_8(page + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM,
-					ut_dulint_zero);
-			mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+			mach_write_ull(page + UNIV_PAGE_SIZE
+				       - FIL_PAGE_END_LSN_OLD_CHKSUM, 0);
+			mach_write_ull(page + FIL_PAGE_LSN, 0);
 		}
 
-		if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
+		if (recv->start_lsn >= page_lsn) {
 
 			if (!modification_to_page) {
 
@@ -1287,14 +1463,12 @@ recv_recover_page(
 
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
 							 buf + recv->len,
-							 page, &mtr);
-			mach_write_to_8(page + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM,
-					ut_dulint_add(recv->start_lsn,
-						      recv->len));
-			mach_write_to_8(page + FIL_PAGE_LSN,
-					ut_dulint_add(recv->start_lsn,
-						      recv->len));
+							 block, &mtr);
+			mach_write_ull(page + UNIV_PAGE_SIZE
+				       - FIL_PAGE_END_LSN_OLD_CHKSUM,
+				       recv->start_lsn + recv->len);
+			mach_write_ull(page + FIL_PAGE_LSN,
+				       recv->start_lsn + recv->len);
 		}
 
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
@@ -1304,9 +1478,19 @@ recv_recover_page(
 		recv = UT_LIST_GET_NEXT(rec_list, recv);
 	}
 
+#ifdef UNIV_ZIP_DEBUG
+	if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
+		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
+
+		if (page_zip) {
+			ut_a(page_zip_validate_low(page_zip, page, FALSE));
+		}
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
 	mutex_enter(&(recv_sys->mutex));
 
-	if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
+	if (recv_max_page_lsn < page_lsn) {
 		recv_max_page_lsn = page_lsn;
 	}
 
@@ -1317,11 +1501,13 @@ recv_recover_page(
 
 	mutex_exit(&(recv_sys->mutex));
 
-	if (!recover_backup && modification_to_page) {
+#ifndef UNIV_HOTBACKUP
+	if (modification_to_page) {
 		ut_a(block);
 
 		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 	}
+#endif /* !UNIV_HOTBACKUP */
 
 	/* Make sure that committing mtr does not change the modification
 	lsn values of page */
@@ -1331,16 +1517,18 @@ recv_recover_page(
 	mtr_commit(&mtr);
 }
 
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
 Reads in pages which have hashed log records, from an area around a given
-page number. */
+page number.
+@return	number of pages found */
 static
 ulint
 recv_read_in_area(
 /*==============*/
-			/* out: number of pages found */
-	ulint	space,	/* in: space */
-	ulint	page_no)/* in: page number */
+	ulint	space,	/*!< in: space */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	page_no)/*!< in: page number */
 {
 	recv_addr_t* recv_addr;
 	ulint	page_nos[RECV_READ_AHEAD_AREA];
@@ -1371,21 +1559,21 @@ recv_read_in_area(
 		}
 	}
 
-	buf_read_recv_pages(FALSE, space, page_nos, n);
+	buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
 	/*
 	fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
 	*/
 	return(n);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Empties the hash table of stored log records, applying them to appropriate
 pages. */
-
+UNIV_INTERN
 void
 recv_apply_hashed_log_recs(
 /*=======================*/
-	ibool	allow_ibuf)	/* in: if TRUE, also ibuf operations are
+	ibool	allow_ibuf)	/*!< in: if TRUE, also ibuf operations are
 				allowed during the application; if FALSE,
 				no ibuf operations are allowed, and after
 				the application all file pages are flushed to
@@ -1396,10 +1584,7 @@ recv_apply_hashed_log_recs(
 				mutex */
 {
 	recv_addr_t* recv_addr;
-	page_t*	page;
 	ulint	i;
-	ulint	space;
-	ulint	page_no;
 	ulint	n_pages;
 	ibool	has_printed	= FALSE;
 	mtr_t	mtr;
@@ -1429,8 +1614,9 @@ loop:
 		recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
 
 		while (recv_addr) {
-			space = recv_addr->space;
-			page_no = recv_addr->page_no;
+			ulint	space = recv_addr->space;
+			ulint	zip_size = fil_space_get_zip_size(space);
+			ulint	page_no = recv_addr->page_no;
 
 			if (recv_addr->state == RECV_NOT_PROCESSED) {
 				if (!has_printed) {
@@ -1446,21 +1632,21 @@ loop:
 				mutex_exit(&(recv_sys->mutex));
 
 				if (buf_page_peek(space, page_no)) {
+					buf_block_t*	block;
 
 					mtr_start(&mtr);
 
-					page = buf_page_get(space, page_no,
-							    RW_X_LATCH, &mtr);
+					block = buf_page_get(
+						space, zip_size, page_no,
+						RW_X_LATCH, &mtr);
+					buf_block_dbg_add_level(
+						block, SYNC_NO_ORDER_CHECK);
 
-#ifdef UNIV_SYNC_DEBUG
-					buf_page_dbg_add_level(
-						page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-					recv_recover_page(FALSE, FALSE, page,
-							  space, page_no);
+					recv_recover_page(FALSE, block);
 					mtr_commit(&mtr);
 				} else {
-					recv_read_in_area(space, page_no);
+					recv_read_in_area(space, zip_size,
+							  page_no);
 				}
 
 				mutex_enter(&(recv_sys->mutex));
@@ -1504,7 +1690,7 @@ loop:
 		mutex_exit(&(log_sys->mutex));
 
 		n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
-					  ut_dulint_max);
+					  IB_ULONGLONG_MAX);
 		ut_a(n_pages != ULINT_UNDEFINED);
 
 		buf_flush_wait_batch_end(BUF_FLUSH_LIST);
@@ -1528,21 +1714,17 @@ loop:
 
 	mutex_exit(&(recv_sys->mutex));
 }
-
-/* This page is allocated from the buffer pool and used in the function
-below */
-static page_t* recv_backup_application_page	= NULL;
-
-/***********************************************************************
+#else /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
 Applies log records in the hash table to a backup. */
-
+UNIV_INTERN
 void
 recv_apply_log_recs_for_backup(void)
 /*================================*/
 {
 	recv_addr_t*	recv_addr;
 	ulint		n_hash_cells;
-	byte*		page;
+	buf_block_t*	block;
 	ulint		actual_size;
 	ibool		success;
 	ulint		error;
@@ -1551,11 +1733,7 @@ recv_apply_log_recs_for_backup(void)
 	recv_sys->apply_log_recs = TRUE;
 	recv_sys->apply_batch_on = TRUE;
 
-	if (recv_backup_application_page == NULL) {
-		recv_backup_application_page = buf_frame_alloc();
-	}
-
-	page = recv_backup_application_page;
+	block = back_block1;
 
 	fputs("InnoDB: Starting an apply batch of log records"
 	      " to the database...\n"
@@ -1569,7 +1747,10 @@ recv_apply_log_recs_for_backup(void)
 
 		while (recv_addr != NULL) {
 
-			if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
+			ulint	zip_size
+				= fil_space_get_zip_size(recv_addr->space);
+
+			if (zip_size == ULINT_UNDEFINED) {
 #if 0
 				fprintf(stderr,
 					"InnoDB: Warning: cannot apply"
@@ -1588,14 +1769,12 @@ recv_apply_log_recs_for_backup(void)
 			}
 
 			/* We simulate a page read made by the buffer pool, to
-			make sure the recovery apparatus works ok, for
-			example, the buf_frame_align() function. We must init
-			the block corresponding to buf_pool->frame_zero
-			(== page). */
+			make sure the recovery apparatus works ok. We must init
+			the block. */
 
 			buf_page_init_for_backup_restore(
 				recv_addr->space, recv_addr->page_no,
-				buf_block_align(page));
+				zip_size, block);
 
 			/* Extend the tablespace's last file if the page_no
 			does not fall inside its bounds; we assume the last
@@ -1617,9 +1796,23 @@ recv_apply_log_recs_for_backup(void)
 			/* Read the page from the tablespace file using the
 			fil0fil.c routines */
 
-			error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
-				       recv_addr->page_no, 0, UNIV_PAGE_SIZE,
-				       page, NULL);
+			if (zip_size) {
+				error = fil_io(OS_FILE_READ, TRUE,
+					       recv_addr->space, zip_size,
+					       recv_addr->page_no, 0, zip_size,
+					       block->page.zip.data, NULL);
+				if (error == DB_SUCCESS
+				    && !buf_zip_decompress(block, TRUE)) {
+					exit(1);
+				}
+			} else {
+				error = fil_io(OS_FILE_READ, TRUE,
+					       recv_addr->space, 0,
+					       recv_addr->page_no, 0,
+					       UNIV_PAGE_SIZE,
+					       block->frame, NULL);
+			}
+
 			if (error != DB_SUCCESS) {
 				fprintf(stderr,
 					"InnoDB: Fatal error: cannot read"
@@ -1632,19 +1825,28 @@ recv_apply_log_recs_for_backup(void)
 			}
 
 			/* Apply the log records to this page */
-			recv_recover_page(TRUE, FALSE, page, recv_addr->space,
-					  recv_addr->page_no);
+			recv_recover_page(FALSE, block);
 
 			/* Write the page back to the tablespace file using the
 			fil0fil.c routines */
 
 			buf_flush_init_for_writing(
-				page, mach_read_from_8(page + FIL_PAGE_LSN),
-				recv_addr->space, recv_addr->page_no);
+				block->frame, buf_block_get_page_zip(block),
+				mach_read_ull(block->frame + FIL_PAGE_LSN));
 
-			error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
-				       recv_addr->page_no, 0, UNIV_PAGE_SIZE,
-				       page, NULL);
+			if (zip_size) {
+				error = fil_io(OS_FILE_WRITE, TRUE,
+					       recv_addr->space, zip_size,
+					       recv_addr->page_no, 0,
+					       zip_size,
+					       block->page.zip.data, NULL);
+			} else {
+				error = fil_io(OS_FILE_WRITE, TRUE,
+					       recv_addr->space, 0,
+					       recv_addr->page_no, 0,
+					       UNIV_PAGE_SIZE,
+					       block->frame, NULL);
+			}
 skip_this_recv_addr:
 			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 		}
@@ -1659,21 +1861,21 @@ skip_this_recv_addr:
 
 	recv_sys_empty_hash();
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************
-Tries to parse a single log record and returns its length. */
+/*******************************************************************//**
+Tries to parse a single log record and returns its length.
+@return	length of the record, or 0 if the record was not complete */
 static
 ulint
 recv_parse_log_rec(
 /*===============*/
-			/* out: length of the record, or 0 if the record was
-			not complete */
-	byte*	ptr,	/* in: pointer to a buffer */
-	byte*	end_ptr,/* in: pointer to the buffer end */
-	byte*	type,	/* out: type */
-	ulint*	space,	/* out: space id */
-	ulint*	page_no,/* out: page number */
-	byte**	body)	/* out: log record body start */
+	byte*	ptr,	/*!< in: pointer to a buffer */
+	byte*	end_ptr,/*!< in: pointer to the buffer end */
+	byte*	type,	/*!< out: type */
+	ulint*	space,	/*!< out: space id */
+	ulint*	page_no,/*!< out: page number */
+	byte**	body)	/*!< out: log record body start */
 {
 	byte*	new_ptr;
 
@@ -1731,40 +1933,42 @@ recv_parse_log_rec(
 	return(new_ptr - ptr);
 }
 
-/***********************************************************
+/*******************************************************//**
 Calculates the new value for lsn when more data is added to the log. */
 static
-dulint
+ib_uint64_t
 recv_calc_lsn_on_data_add(
 /*======================*/
-	dulint	lsn,	/* in: old lsn */
-	ulint	len)	/* in: this many bytes of data is added, log block
-			headers not included */
+	ib_uint64_t	lsn,	/*!< in: old lsn */
+	ib_uint64_t	len)	/*!< in: this many bytes of data is
+				added, log block headers not included */
 {
 	ulint	frag_len;
 	ulint	lsn_len;
 
-	frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+	frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
 		- LOG_BLOCK_HDR_SIZE;
 	ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 	      - LOG_BLOCK_TRL_SIZE);
-	lsn_len = len + ((len + frag_len)
-			 / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
-			    - LOG_BLOCK_TRL_SIZE))
+	lsn_len = (ulint) len;
+	lsn_len += (lsn_len + frag_len)
+		/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+		   - LOG_BLOCK_TRL_SIZE)
 		* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 
-	return(ut_dulint_add(lsn, lsn_len));
+	return(lsn + lsn_len);
 }
 
-/***********************************************************
+#ifdef UNIV_LOG_DEBUG
+/*******************************************************//**
 Checks that the parser recognizes incomplete initial segments of a log
 record as incomplete. */
-
+static
 void
 recv_check_incomplete_log_recs(
 /*===========================*/
-	byte*	ptr,	/* in: pointer to a complete log record */
-	ulint	len)	/* in: length of the log record */
+	byte*	ptr,	/*!< in: pointer to a complete log record */
+	ulint	len)	/*!< in: length of the log record */
 {
 	ulint	i;
 	byte	type;
@@ -1777,27 +1981,27 @@ recv_check_incomplete_log_recs(
 					     &page_no, &body));
 	}
 }
+#endif /* UNIV_LOG_DEBUG */
 
-/***********************************************************
+/*******************************************************//**
 Prints diagnostic info of corrupt log. */
 static
 void
 recv_report_corrupt_log(
 /*====================*/
-	byte*	ptr,	/* in: pointer to corrupt log record */
-	byte	type,	/* in: type of the record */
-	ulint	space,	/* in: space id, this may also be garbage */
-	ulint	page_no)/* in: page number, this may also be garbage */
+	byte*	ptr,	/*!< in: pointer to corrupt log record */
+	byte	type,	/*!< in: type of the record */
+	ulint	space,	/*!< in: space id, this may also be garbage */
+	ulint	page_no)/*!< in: page number, this may also be garbage */
 {
 	fprintf(stderr,
 		"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 		"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
-		"InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
+		"InnoDB: Log parsing proceeded successfully up to %llu\n"
 		"InnoDB: Previous log record type %lu, is multi %lu\n"
 		"InnoDB: Recv offset %lu, prev %lu\n",
 		(ulong) type, (ulong) space, (ulong) page_no,
-		(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
-		(ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
+		recv_sys->recovered_lsn,
 		(ulong) recv_previous_parsed_rec_type,
 		(ulong) recv_previous_parsed_rec_is_multi,
 		(ulong) (ptr - recv_sys->buf),
@@ -1828,40 +2032,39 @@ recv_report_corrupt_log(
 	      "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 	      "InnoDB: on your InnoDB tables to check that they are ok!\n"
 	      "InnoDB: If mysqld crashes after this recovery, look at\n"
-	      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-	      "forcing-recovery.html\n"
+	      "InnoDB: " REFMAN "forcing-recovery.html\n"
 	      "InnoDB: about forcing recovery.\n", stderr);
 
 	fflush(stderr);
 }
 
-/***********************************************************
+/*******************************************************//**
 Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages. */
+merging to file pages.
+@return	currently always returns FALSE */
 static
 ibool
 recv_parse_log_recs(
 /*================*/
-				/* out: currently always returns FALSE */
-	ibool	store_to_hash)	/* in: TRUE if the records should be stored
+	ibool	store_to_hash)	/*!< in: TRUE if the records should be stored
 				to the hash table; this is set to FALSE if just
 				debug checking is needed */
 {
-	byte*	ptr;
-	byte*	end_ptr;
-	ulint	single_rec;
-	ulint	len;
-	ulint	total_len;
-	dulint	new_recovered_lsn;
-	dulint	old_lsn;
-	byte	type;
-	ulint	space;
-	ulint	page_no;
-	byte*	body;
-	ulint	n_recs;
+	byte*		ptr;
+	byte*		end_ptr;
+	ulint		single_rec;
+	ulint		len;
+	ulint		total_len;
+	ib_uint64_t	new_recovered_lsn;
+	ib_uint64_t	old_lsn;
+	byte		type;
+	ulint		space;
+	ulint		page_no;
+	byte*		body;
+	ulint		n_recs;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
-	ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
+	ut_ad(recv_sys->parse_start_lsn != 0);
 loop:
 	ptr = recv_sys->buf + recv_sys->recovered_offset;
 
@@ -1897,8 +2100,7 @@ loop:
 
 		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 
-		if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
-		    > 0) {
+		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 			/* The log record filled a log block, and we require
 			that also the next log block should have been scanned
 			in */
@@ -1926,9 +2128,19 @@ loop:
 		if (type == MLOG_DUMMY_RECORD) {
 			/* Do nothing */
 
-		} else if (store_to_hash && (type == MLOG_FILE_CREATE
-					     || type == MLOG_FILE_RENAME
-					     || type == MLOG_FILE_DELETE)) {
+		} else if (!store_to_hash) {
+			/* In debug checking, update a replicate page
+			according to the log record, and check that it
+			becomes identical with the original page */
+#ifdef UNIV_LOG_DEBUG
+			recv_check_incomplete_log_recs(ptr, len);
+#endif/* UNIV_LOG_DEBUG */
+
+		} else if (type == MLOG_FILE_CREATE
+			   || type == MLOG_FILE_CREATE2
+			   || type == MLOG_FILE_RENAME
+			   || type == MLOG_FILE_DELETE) {
+			ut_a(space);
 #ifdef UNIV_HOTBACKUP
 			if (recv_replay_file_ops) {
 
@@ -1938,8 +2150,8 @@ loop:
 				point to the datadir we should use there */
 
 				if (NULL == fil_op_log_parse_or_replay(
-					    body, end_ptr, type, TRUE,
-					    space)) {
+					    body, end_ptr, type,
+					    space, page_no)) {
 					fprintf(stderr,
 						"InnoDB: Error: file op"
 						" log record of type %lu"
@@ -1949,20 +2161,16 @@ loop:
 						(ulint)type, space,
 						(char*)(body + 2));
 
-					ut_a(0);
+					ut_error;
 				}
 			}
 #endif
 			/* In normal mysqld crash recovery we do not try to
 			replay file operations */
-		} else if (store_to_hash) {
+		} else {
 			recv_add_to_hash_table(type, space, page_no, body,
 					       ptr + len, old_lsn,
 					       recv_sys->recovered_lsn);
-		} else {
-#ifdef UNIV_LOG_DEBUG
-			recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
 		}
 	} else {
 		/* Check that all the records associated with the single mtr
@@ -2023,8 +2231,7 @@ loop:
 		new_recovered_lsn = recv_calc_lsn_on_data_add(
 			recv_sys->recovered_lsn, total_len);
 
-		if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
-		    > 0) {
+		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 			/* The log record filled a log block, and we require
 			that also the next log block should have been scanned
 			in */
@@ -2073,26 +2280,26 @@ loop:
 	goto loop;
 }
 
-/***********************************************************
+/*******************************************************//**
 Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys->parse_start_lsn is non-zero. */
+recv_sys->parse_start_lsn is non-zero.
+@return	TRUE if more data added */
 static
 ibool
 recv_sys_add_to_parsing_buf(
 /*========================*/
-				/* out: TRUE if more data added */
-	byte*	log_block,	/* in: log block */
-	dulint	scanned_lsn)	/* in: lsn of how far we were able to find
-				data in this log block */
+	const byte*	log_block,	/*!< in: log block */
+	ib_uint64_t	scanned_lsn)	/*!< in: lsn of how far we were able
+					to find data in this log block */
 {
 	ulint	more_len;
 	ulint	data_len;
 	ulint	start_offset;
 	ulint	end_offset;
 
-	ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
+	ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
 
-	if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
+	if (!recv_sys->parse_start_lsn) {
 		/* Cannot start parsing yet because no start point for
 		it found */
 
@@ -2101,20 +2308,18 @@ recv_sys_add_to_parsing_buf(
 
 	data_len = log_block_get_data_len(log_block);
 
-	if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
+	if (recv_sys->parse_start_lsn >= scanned_lsn) {
 
 		return(FALSE);
 
-	} else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
+	} else if (recv_sys->scanned_lsn >= scanned_lsn) {
 
 		return(FALSE);
 
-	} else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
-				 recv_sys->scanned_lsn) > 0) {
-		more_len = ut_dulint_minus(scanned_lsn,
-					   recv_sys->parse_start_lsn);
+	} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
+		more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
 	} else {
-		more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
+		more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
 	}
 
 	if (more_len == 0) {
@@ -2150,7 +2355,7 @@ recv_sys_add_to_parsing_buf(
 	return(TRUE);
 }
 
-/***********************************************************
+/*******************************************************//**
 Moves the parsing buffer data left to the buffer start. */
 static
 void
@@ -2165,44 +2370,43 @@ recv_sys_justify_left_parsing_buf(void)
 	recv_sys->recovered_offset = 0;
 }
 
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
+/*******************************************************//**
+Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found.  Unless
+UNIV_HOTBACKUP is defined, this function will apply log records
+automatically when the hash table becomes full.
+@return TRUE if limit_lsn has been reached, or not able to scan any
+more in this log group */
+UNIV_INTERN
 ibool
 recv_scan_log_recs(
 /*===============*/
-				/* out: TRUE if limit_lsn has been reached, or
-				not able to scan any more in this log group */
-	ibool	apply_automatically,/* in: TRUE if we want this function to
-				apply log records automatically when the
-				hash table becomes full; in the hot backup tool
-				the tool does the applying, not this
-				function */
-	ulint	available_memory,/* in: we let the hash table of recs to grow
-				to this size, at the maximum */
-	ibool	store_to_hash,	/* in: TRUE if the records should be stored
-				to the hash table; this is set to FALSE if just
-				debug checking is needed */
-	byte*	buf,		/* in: buffer containing a log segment or
-				garbage */
-	ulint	len,		/* in: buffer length */
-	dulint	start_lsn,	/* in: buffer start lsn */
-	dulint*	contiguous_lsn,	/* in/out: it is known that all log groups
-				contain contiguous log data up to this lsn */
-	dulint*	group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+	ulint		available_memory,/*!< in: we let the hash table of recs
+					to grow to this size, at the maximum */
+	ibool		store_to_hash,	/*!< in: TRUE if the records should be
+					stored to the hash table; this is set
+					to FALSE if just debug checking is
+					needed */
+	const byte*	buf,		/*!< in: buffer containing a log
+					segment or garbage */
+	ulint		len,		/*!< in: buffer length */
+	ib_uint64_t	start_lsn,	/*!< in: buffer start lsn */
+	ib_uint64_t*	contiguous_lsn,	/*!< in/out: it is known that all log
+					groups contain contiguous log data up
+					to this lsn */
+	ib_uint64_t*	group_scanned_lsn)/*!< out: scanning succeeded up to
+					this lsn */
 {
-	byte*	log_block;
-	ulint	no;
-	dulint	scanned_lsn;
-	ibool	finished;
-	ulint	data_len;
-	ibool	more_data;
+	const byte*	log_block;
+	ulint		no;
+	ib_uint64_t	scanned_lsn;
+	ibool		finished;
+	ulint		data_len;
+	ibool		more_data;
 
-	ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len > 0);
-	ut_a(apply_automatically <= TRUE);
 	ut_a(store_to_hash <= TRUE);
 
 	finished = FALSE;
@@ -2228,13 +2432,11 @@ recv_scan_log_recs(
 				    log_block)) {
 				fprintf(stderr,
 					"InnoDB: Log block no %lu at"
-					" lsn %lu %lu has\n"
+					" lsn %llu has\n"
 					"InnoDB: ok header, but checksum field"
 					" contains %lu, should be %lu\n",
 					(ulong) no,
-					(ulong) ut_dulint_get_high(
-						scanned_lsn),
-					(ulong) ut_dulint_get_low(scanned_lsn),
+					scanned_lsn,
 					(ulong) log_block_get_checksum(
 						log_block),
 					(ulong) log_block_calc_checksum(
@@ -2256,7 +2458,7 @@ recv_scan_log_recs(
 			we know that log data is contiguous up to scanned_lsn
 			in all non-corrupt log groups. */
 
-			if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
+			if (scanned_lsn > *contiguous_lsn) {
 				*contiguous_lsn = scanned_lsn;
 			}
 		}
@@ -2264,8 +2466,7 @@ recv_scan_log_recs(
 		data_len = log_block_get_data_len(log_block);
 
 		if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
-		    && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
-				      recv_sys->scanned_lsn) > 0)
+		    && scanned_lsn + data_len > recv_sys->scanned_lsn
 		    && (recv_sys->scanned_checkpoint_no > 0)
 		    && (log_block_get_checkpoint_no(log_block)
 			< recv_sys->scanned_checkpoint_no)
@@ -2286,40 +2487,37 @@ recv_scan_log_recs(
 			break;
 		}
 
-		if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
+		if (!recv_sys->parse_start_lsn
 		    && (log_block_get_first_rec_group(log_block) > 0)) {
 
 			/* We found a point from which to start the parsing
 			of log records */
 
-			recv_sys->parse_start_lsn
-				= ut_dulint_add(scanned_lsn,
-						log_block_get_first_rec_group(
-							log_block));
+			recv_sys->parse_start_lsn = scanned_lsn
+				+ log_block_get_first_rec_group(log_block);
 			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 			recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 		}
 
-		scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
+		scanned_lsn += data_len;
 
-		if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
+		if (scanned_lsn > recv_sys->scanned_lsn) {
 
 			/* We have found more entries. If this scan is
  			of startup type, we must initiate crash recovery
 			environment before parsing these log records. */
 
+#ifndef UNIV_HOTBACKUP
 			if (recv_log_scan_is_startup_type
 			    && !recv_needed_recovery) {
 
 				fprintf(stderr,
 					"InnoDB: Log scan progressed"
-					" past the checkpoint lsn %lu %lu\n",
-					(ulong) ut_dulint_get_high(
-						recv_sys->scanned_lsn),
-					(ulong) ut_dulint_get_low(
-						recv_sys->scanned_lsn));
+					" past the checkpoint lsn %llu\n",
+					recv_sys->scanned_lsn);
 				recv_init_crash_recovery();
 			}
+#endif /* !UNIV_HOTBACKUP */
 
 			/* We were able to find more log data: add it to the
 			parsing buffer if parse_start_lsn is already
@@ -2363,9 +2561,8 @@ recv_scan_log_recs(
 
 			fprintf(stderr,
 				"InnoDB: Doing recovery: scanned up to"
-				" log sequence number %lu %lu\n",
-				(ulong) ut_dulint_get_high(*group_scanned_lsn),
-				(ulong) ut_dulint_get_low(*group_scanned_lsn));
+				" log sequence number %llu\n",
+				*group_scanned_lsn);
 		}
 	}
 
@@ -2374,9 +2571,9 @@ recv_scan_log_recs(
 
 		recv_parse_log_recs(store_to_hash);
 
+#ifndef UNIV_HOTBACKUP
 		if (store_to_hash && mem_heap_get_size(recv_sys->heap)
-		    > available_memory
-		    && apply_automatically) {
+		    > available_memory) {
 
 			/* Hash table of log records has grown too big:
 			empty it; FALSE means no ibuf operations
@@ -2386,6 +2583,7 @@ recv_scan_log_recs(
 
 			recv_apply_hashed_log_recs(FALSE);
 		}
+#endif /* !UNIV_HOTBACKUP */
 
 		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 			/* Move parsing buffer data to the buffer start */
@@ -2397,34 +2595,37 @@ recv_scan_log_recs(
 	return(finished);
 }
 
-/***********************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************//**
 Scans log from a buffer and stores new log data to the parsing buffer. Parses
 and hashes the log records if new data found. */
 static
 void
 recv_group_scan_log_recs(
 /*=====================*/
-	log_group_t* group,	/* in: log group */
-	dulint*	contiguous_lsn,	/* in/out: it is known that all log groups
-				contain contiguous log data up to this lsn */
-	dulint*	group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+	log_group_t*	group,		/*!< in: log group */
+	ib_uint64_t*	contiguous_lsn,	/*!< in/out: it is known that all log
+					groups contain contiguous log data up
+					to this lsn */
+	ib_uint64_t*	group_scanned_lsn)/*!< out: scanning succeeded up to
+					this lsn */
 {
-	ibool	finished;
-	dulint	start_lsn;
-	dulint	end_lsn;
+	ibool		finished;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	end_lsn;
 
 	finished = FALSE;
 
 	start_lsn = *contiguous_lsn;
 
 	while (!finished) {
-		end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+		end_lsn = start_lsn + RECV_SCAN_SIZE;
 
 		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 				       group, start_lsn, end_lsn);
 
 		finished = recv_scan_log_recs(
-			TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
+			(buf_pool->curr_size - recv_n_pool_free_frames)
 			* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
 			start_lsn, contiguous_lsn, group_scanned_lsn);
 		start_lsn = end_lsn;
@@ -2434,15 +2635,14 @@ recv_group_scan_log_recs(
 	if (log_debug_writes) {
 		fprintf(stderr,
 			"InnoDB: Scanned group %lu up to"
-			" log sequence number %lu %lu\n",
+			" log sequence number %llu\n",
 			(ulong) group->id,
-			(ulong) ut_dulint_get_high(*group_scanned_lsn),
-			(ulong) ut_dulint_get_low(*group_scanned_lsn));
+			*group_scanned_lsn);
 	}
 #endif /* UNIV_DEBUG */
 }
 
-/***********************************************************
+/*******************************************************//**
 Initialize crash recovery environment. Can be called iff
 recv_needed_recovery == FALSE. */
 static
@@ -2483,42 +2683,57 @@ recv_init_crash_recovery(void)
 	}
 }
 
-/************************************************************
+/********************************************************//**
 Recovers from a checkpoint. When this function returns, the database is able
 to start processing of new user transactions, but the function
 recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
+the recovery and free the resources used in it.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
-				/* out: error code or DB_SUCCESS */
-	ulint	type,		/* in: LOG_CHECKPOINT or LOG_ARCHIVE */
-	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
-	dulint	min_flushed_lsn,/* in: min flushed lsn from data files */
-	dulint	max_flushed_lsn)/* in: max flushed lsn from data files */
+recv_recovery_from_checkpoint_start_func(
+/*=====================================*/
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		type,		/*!< in: LOG_CHECKPOINT or
+					LOG_ARCHIVE */
+	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn
+					if possible */
+#endif /* UNIV_LOG_ARCHIVE */
+	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn from
+					data files */
+	ib_uint64_t	max_flushed_lsn)/*!< in: max flushed lsn from
+					data files */
 {
 	log_group_t*	group;
 	log_group_t*	max_cp_group;
 	log_group_t*	up_to_date_group;
 	ulint		max_cp_field;
-	dulint		checkpoint_lsn;
-	dulint		checkpoint_no;
-	dulint		old_scanned_lsn;
-	dulint		group_scanned_lsn;
-	dulint		contiguous_lsn;
-	dulint		archived_lsn;
-	ulint		capacity;
+	ib_uint64_t	checkpoint_lsn;
+	ib_uint64_t	checkpoint_no;
+	ib_uint64_t	old_scanned_lsn;
+	ib_uint64_t	group_scanned_lsn;
+	ib_uint64_t	contiguous_lsn;
+	ib_uint64_t	archived_lsn;
 	byte*		buf;
 	byte		log_hdr_buf[LOG_FILE_HDR_SIZE];
 	ulint		err;
 
-	ut_ad((type != LOG_CHECKPOINT)
-	      || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
+#ifdef UNIV_LOG_ARCHIVE
+	ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
+/** TRUE when recovering from a checkpoint */
+# define TYPE_CHECKPOINT	(type == LOG_CHECKPOINT)
+/** Recover up to this log sequence number */
+# define LIMIT_LSN		limit_lsn
+#else /* UNIV_LOG_ARCHIVE */
+/** TRUE when recovering from a checkpoint */
+# define TYPE_CHECKPOINT	1
+/** Recover up to this log sequence number */
+# define LIMIT_LSN		IB_ULONGLONG_MAX
+#endif /* UNIV_LOG_ARCHIVE */
 
-	if (type == LOG_CHECKPOINT) {
+	if (TYPE_CHECKPOINT) {
 		recv_sys_create();
-		recv_sys_init(FALSE, buf_pool_get_curr_size());
+		recv_sys_init(buf_pool_get_curr_size());
 	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
@@ -2532,7 +2747,7 @@ recv_recovery_from_checkpoint_start(
 
 	recv_recovery_on = TRUE;
 
-	recv_sys->limit_lsn = limit_lsn;
+	recv_sys->limit_lsn = LIMIT_LSN;
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -2551,14 +2766,14 @@ recv_recovery_from_checkpoint_start(
 
 	buf = log_sys->checkpoint_buf;
 
-	checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
-	checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
-	archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
+	checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
+	checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
+	archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 
 	/* Read the first log file header to print a note if this is
 	a recovery from a restored InnoDB Hot Backup */
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
+	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
 	       0, 0, LOG_FILE_HDR_SIZE,
 	       log_hdr_buf, max_cp_group);
 
@@ -2582,7 +2797,7 @@ recv_recovery_from_checkpoint_start(
 		       ' ', 4);
 		/* Write to the log file to wipe over the label */
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
-		       max_cp_group->space_id,
+		       max_cp_group->space_id, 0,
 		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 		       log_hdr_buf, max_cp_group);
 	}
@@ -2599,7 +2814,7 @@ recv_recovery_from_checkpoint_start(
 	}
 #endif /* UNIV_LOG_ARCHIVE */
 
-	if (type == LOG_CHECKPOINT) {
+	if (TYPE_CHECKPOINT) {
 		/* Start reading the log groups from the checkpoint lsn up. The
 		variable contiguous_lsn contains an lsn up to which the log is
 		known to be contiguously written to all log groups. */
@@ -2612,20 +2827,22 @@ recv_recovery_from_checkpoint_start(
 		srv_start_lsn = checkpoint_lsn;
 	}
 
-	contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
+	contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 					      OS_FILE_LOG_BLOCK_SIZE);
-	if (type == LOG_ARCHIVE) {
+	if (TYPE_CHECKPOINT) {
+		up_to_date_group = max_cp_group;
+#ifdef UNIV_LOG_ARCHIVE
+	} else {
+		ulint	capacity;
+
 		/* Try to recover the remaining part from logs: first from
 		the logs of the archived group */
 
 		group = recv_sys->archive_group;
 		capacity = log_group_get_capacity(group);
 
-		if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
-					   checkpoint_lsn, capacity)) > 0)
-		    || (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
-					      recv_sys->scanned_lsn, capacity))
-			> 0)) {
+		if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
+		    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
 
 			mutex_exit(&(log_sys->mutex));
 
@@ -2637,7 +2854,7 @@ recv_recovery_from_checkpoint_start(
 
 		recv_group_scan_log_recs(group, &contiguous_lsn,
 					 &group_scanned_lsn);
-		if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
+		if (recv_sys->scanned_lsn < checkpoint_lsn) {
 
 			mutex_exit(&(log_sys->mutex));
 
@@ -2650,20 +2867,21 @@ recv_recovery_from_checkpoint_start(
 
 		group->scanned_lsn = group_scanned_lsn;
 		up_to_date_group = group;
-	} else {
-		up_to_date_group = max_cp_group;
+#endif /* UNIV_LOG_ARCHIVE */
 	}
 
 	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
+#ifdef UNIV_LOG_ARCHIVE
 	if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
+#endif /* UNIV_LOG_ARCHIVE */
 
 	/* Set the flag to publish that we are doing startup scan. */
-	recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
+	recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 	while (group) {
 		old_scanned_lsn = recv_sys->scanned_lsn;
 
@@ -2671,32 +2889,33 @@ recv_recovery_from_checkpoint_start(
 					 &group_scanned_lsn);
 		group->scanned_lsn = group_scanned_lsn;
 
-		if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
+		if (old_scanned_lsn < group_scanned_lsn) {
 			/* We found a more up-to-date group */
 
 			up_to_date_group = group;
 		}
 
+#ifdef UNIV_LOG_ARCHIVE
 		if ((type == LOG_ARCHIVE)
 		    && (group == recv_sys->archive_group)) {
 			group = UT_LIST_GET_NEXT(log_groups, group);
 		}
+#endif /* UNIV_LOG_ARCHIVE */
 
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
 	/* Done with startup scan. Clear the flag. */
 	recv_log_scan_is_startup_type = FALSE;
-	if (type == LOG_CHECKPOINT) {
+	if (TYPE_CHECKPOINT) {
 		/* NOTE: we always do a 'recovery' at startup, but only if
 		there is something wrong we will print a message to the
 		user about recovery: */
 
-		if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
-		    || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
+		if (checkpoint_lsn != max_flushed_lsn
+		    || checkpoint_lsn != min_flushed_lsn) {
 
-			if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
-			    < 0) {
+			if (checkpoint_lsn < max_flushed_lsn) {
 				fprintf(stderr,
 					"InnoDB: #########################"
 					"#################################\n"
@@ -2710,26 +2929,15 @@ recv_recovery_from_checkpoint_start(
 					" ib_logfiles to start up"
 					" the database?\n"
 					"InnoDB: Log sequence number in"
-					" ib_logfiles is %lu %lu, log\n"
+					" ib_logfiles is %llu, log\n"
 					"InnoDB: sequence numbers stamped"
 					" to ibdata file headers are between\n"
-					"InnoDB: %lu %lu and %lu %lu.\n"
+					"InnoDB: %llu and %llu.\n"
 					"InnoDB: #########################"
 					"#################################\n",
-					(ulong) ut_dulint_get_high(
-						checkpoint_lsn),
-					(ulong) ut_dulint_get_low(
-						checkpoint_lsn),
-					(ulong) ut_dulint_get_high(
-						min_flushed_lsn),
-					(ulong) ut_dulint_get_low(
-						min_flushed_lsn),
-					(ulong) ut_dulint_get_high(
-						max_flushed_lsn),
-					(ulong) ut_dulint_get_low(
-						max_flushed_lsn));
-
-
+					checkpoint_lsn,
+					min_flushed_lsn,
+					max_flushed_lsn);
 			}
 
 			if (!recv_needed_recovery) {
@@ -2740,8 +2948,8 @@ recv_recovery_from_checkpoint_start(
 					" in the ib_logfiles!\n");
 				recv_init_crash_recovery();
 			}
-
 		}
+
 		if (!recv_needed_recovery) {
 			/* Init the doublewrite buffer memory structure */
 			trx_sys_doublewrite_init_or_restore_pages(FALSE);
@@ -2749,39 +2957,35 @@ recv_recovery_from_checkpoint_start(
 	}
 
 	/* We currently have only one log group */
-	if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
+	if (group_scanned_lsn < checkpoint_lsn) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: ERROR: We were only able to scan the log"
 			" up to\n"
-			"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
+			"InnoDB: %llu, but a checkpoint was at %llu.\n"
 			"InnoDB: It is possible that"
 			" the database is now corrupt!\n",
-			(ulong) ut_dulint_get_high(group_scanned_lsn),
-			(ulong) ut_dulint_get_low(group_scanned_lsn),
-			(ulong) ut_dulint_get_high(checkpoint_lsn),
-			(ulong) ut_dulint_get_low(checkpoint_lsn));
+			group_scanned_lsn,
+			checkpoint_lsn);
 	}
 
-	if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
+	if (group_scanned_lsn < recv_max_page_lsn) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: ERROR: We were only able to scan the log"
-			" up to %lu %lu\n"
-			"InnoDB: but a database page a had an lsn %lu %lu."
+			" up to %llu\n"
+			"InnoDB: but a database page a had an lsn %llu."
 			" It is possible that the\n"
 			"InnoDB: database is now corrupt!\n",
-			(ulong) ut_dulint_get_high(group_scanned_lsn),
-			(ulong) ut_dulint_get_low(group_scanned_lsn),
-			(ulong) ut_dulint_get_high(recv_max_page_lsn),
-			(ulong) ut_dulint_get_low(recv_max_page_lsn));
+			group_scanned_lsn,
+			recv_max_page_lsn);
 	}
 
-	if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
+	if (recv_sys->recovered_lsn < checkpoint_lsn) {
 
 		mutex_exit(&(log_sys->mutex));
 
-		if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
+		if (recv_sys->recovered_lsn >= LIMIT_LSN) {
 
 			return(DB_SUCCESS);
 		}
@@ -2795,7 +2999,7 @@ recv_recovery_from_checkpoint_start(
 	group; we also copy checkpoint info to groups */
 
 	log_sys->next_checkpoint_lsn = checkpoint_lsn;
-	log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+	log_sys->next_checkpoint_no = checkpoint_no + 1;
 
 #ifdef UNIV_LOG_ARCHIVE
 	log_sys->archived_lsn = archived_lsn;
@@ -2804,9 +3008,7 @@ recv_recovery_from_checkpoint_start(
 	recv_synchronize_groups(up_to_date_group);
 
 	if (!recv_needed_recovery) {
-		ut_a(ut_dulint_cmp(checkpoint_lsn,
-				   recv_sys->recovered_lsn) == 0);
-
+		ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
 	} else {
 		srv_start_lsn = recv_sys->recovered_lsn;
 	}
@@ -2815,18 +3017,17 @@ recv_recovery_from_checkpoint_start(
 
 	ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 
-	log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
-		% OS_FILE_LOG_BLOCK_SIZE;
+	log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 	log_sys->buf_next_to_write = log_sys->buf_free;
 	log_sys->written_to_some_lsn = log_sys->lsn;
 	log_sys->written_to_all_lsn = log_sys->lsn;
 
 	log_sys->last_checkpoint_lsn = checkpoint_lsn;
 
-	log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+	log_sys->next_checkpoint_no = checkpoint_no + 1;
 
 #ifdef UNIV_LOG_ARCHIVE
-	if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+	if (archived_lsn == IB_ULONGLONG_MAX) {
 
 		log_sys->archiving_state = LOG_ARCH_OFF;
 	}
@@ -2847,11 +3048,14 @@ recv_recovery_from_checkpoint_start(
 	records in the hash table can be run in background. */
 
 	return(DB_SUCCESS);
+
+#undef TYPE_CHECKPOINT
+#undef LIMIT_LSN
 }
 
-/************************************************************
+/********************************************************//**
 Completes recovery from a checkpoint. */
-
+UNIV_INTERN
 void
 recv_recovery_from_checkpoint_finish(void)
 /*======================================*/
@@ -2901,6 +3105,9 @@ recv_recovery_from_checkpoint_finish(void)
 	recv_sys_free();
 #endif
 
+	/* Drop partially created indexes. */
+	row_merge_drop_temp_indexes();
+
 #ifdef UNIV_SYNC_DEBUG
 	/* Wait for a while so that created threads have time to suspend
 	themselves before we switch the latching order checks on */
@@ -2913,32 +3120,35 @@ recv_recovery_from_checkpoint_finish(void)
 		/* Rollback the uncommitted transactions which have no user
 		session */
 
-		os_thread_create(trx_rollback_or_clean_all_without_sess,
+		os_thread_create(trx_rollback_or_clean_all_recovered,
 				 (void *)&i, NULL);
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 Resets the logs. The contents of log files will be lost! */
-
+UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-	dulint	lsn,		/* in: reset to this lsn rounded up to
-				be divisible by OS_FILE_LOG_BLOCK_SIZE,
-				after which we add LOG_BLOCK_HDR_SIZE */
+	ib_uint64_t	lsn,		/*!< in: reset to this lsn
+					rounded up to be divisible by
+					OS_FILE_LOG_BLOCK_SIZE, after
+					which we add
+					LOG_BLOCK_HDR_SIZE */
 #ifdef UNIV_LOG_ARCHIVE
-	ulint	arch_log_no,	/* in: next archived log file number */
+	ulint		arch_log_no,	/*!< in: next archived log file number */
 #endif /* UNIV_LOG_ARCHIVE */
-	ibool	new_logs_created)/* in: TRUE if resetting logs is done
-				at the log creation; FALSE if it is done
-				after archive recovery */
+	ibool		new_logs_created)/*!< in: TRUE if resetting logs
+					is done at the log creation;
+					FALSE if it is done after
+					archive recovery */
 {
 	log_group_t*	group;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
-	log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+	log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
@@ -2962,8 +3172,8 @@ recv_reset_logs(
 	log_sys->written_to_some_lsn = log_sys->lsn;
 	log_sys->written_to_all_lsn = log_sys->lsn;
 
-	log_sys->next_checkpoint_no = ut_dulint_zero;
-	log_sys->last_checkpoint_lsn = ut_dulint_zero;
+	log_sys->next_checkpoint_no = 0;
+	log_sys->last_checkpoint_lsn = 0;
 
 #ifdef UNIV_LOG_ARCHIVE
 	log_sys->archived_lsn = log_sys->lsn;
@@ -2973,29 +3183,30 @@ recv_reset_logs(
 	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
 	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
-	log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
+	log_sys->lsn += LOG_BLOCK_HDR_SIZE;
 
 	mutex_exit(&(log_sys->mutex));
 
 	/* Reset the checkpoint fields in logs */
 
-	log_make_checkpoint_at(ut_dulint_max, TRUE);
-	log_make_checkpoint_at(ut_dulint_max, TRUE);
+	log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+	log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
 	mutex_enter(&(log_sys->mutex));
 }
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_HOTBACKUP
-/**********************************************************
+/******************************************************//**
 Creates new log files after a backup has been restored. */
-
+UNIV_INTERN
 void
 recv_reset_log_files_for_backup(
 /*============================*/
-	const char*	log_dir,	/* in: log file directory path */
-	ulint		n_log_files,	/* in: number of log files */
-	ulint		log_file_size,	/* in: log file size */
-	dulint		lsn)		/* in: new start lsn, must be
+	const char*	log_dir,	/*!< in: log file directory path */
+	ulint		n_log_files,	/*!< in: number of log files */
+	ulint		log_file_size,	/*!< in: log file size */
+	ib_uint64_t	lsn)		/*!< in: new start lsn, must be
 					divisible by OS_FILE_LOG_BLOCK_SIZE */
 {
 	os_file_t	log_file;
@@ -3078,29 +3289,28 @@ recv_reset_log_files_for_backup(
 #endif /* UNIV_HOTBACKUP */
 
 #ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Reads from the archive of a log group and performs recovery. */
+/******************************************************//**
+Reads from the archive of a log group and performs recovery.
+@return	TRUE if no more complete consistent archive files */
 static
 ibool
 log_group_recover_from_archive_file(
 /*================================*/
-					/* out: TRUE if no more complete
-					consistent archive files */
-	log_group_t*	group)		/* in: log group */
+	log_group_t*	group)		/*!< in: log group */
 {
-	os_file_t file_handle;
-	dulint	start_lsn;
-	dulint	file_end_lsn;
-	dulint	dummy_lsn;
-	dulint	scanned_lsn;
-	ulint	len;
-	ibool	ret;
-	byte*	buf;
-	ulint	read_offset;
-	ulint	file_size;
-	ulint	file_size_high;
-	int	input_char;
-	char	name[10000];
+	os_file_t	file_handle;
+	ib_uint64_t	start_lsn;
+	ib_uint64_t	file_end_lsn;
+	ib_uint64_t	dummy_lsn;
+	ib_uint64_t	scanned_lsn;
+	ulint		len;
+	ibool		ret;
+	byte*		buf;
+	ulint		read_offset;
+	ulint		file_size;
+	ulint		file_size_high;
+	int		input_char;
+	char		name[10000];
 
 	ut_a(0);
 
@@ -3188,12 +3398,12 @@ ask_again:
 		return(TRUE);
 	}
 
-	start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
-	file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
+	start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
+	file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
 
-	if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+	if (!recv_sys->scanned_lsn) {
 
-		if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
+		if (recv_sys->parse_start_lsn < start_lsn) {
 			fprintf(stderr,
 				"InnoDB: Archive log file %s"
 				" starts from too big a lsn\n",
@@ -3204,7 +3414,7 @@ ask_again:
 		recv_sys->scanned_lsn = start_lsn;
 	}
 
-	if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
+	if (recv_sys->scanned_lsn != start_lsn) {
 
 		fprintf(stderr,
 			"InnoDB: Archive log file %s starts from"
@@ -3232,9 +3442,8 @@ ask_again:
 		if (log_debug_writes) {
 			fprintf(stderr,
 				"InnoDB: Archive read starting at"
-				" lsn %lu %lu, len %lu from file %s\n",
-				(ulong) ut_dulint_get_high(start_lsn),
-				(ulong) ut_dulint_get_low(start_lsn),
+				" lsn %llu, len %lu from file %s\n",
+				start_lsn,
 				(ulong) len, name);
 		}
 #endif /* UNIV_DEBUG */
@@ -3244,11 +3453,11 @@ ask_again:
 		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
 		ret = recv_scan_log_recs(
-			TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
+			(buf_pool->n_frames - recv_n_pool_free_frames)
 			* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
 			&dummy_lsn, &scanned_lsn);
 
-		if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
+		if (scanned_lsn == file_end_lsn) {
 
 			return(FALSE);
 		}
@@ -3262,28 +3471,30 @@ ask_again:
 		}
 
 		read_offset += len;
-		start_lsn = ut_dulint_add(start_lsn, len);
+		start_lsn += len;
 
-		ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
+		ut_ad(start_lsn == scanned_lsn);
 	}
 
 	return(FALSE);
 }
 
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
+/********************************************************//**
+Recovers from archived log files, and also from log files, if they exist.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 recv_recovery_from_archive_start(
 /*=============================*/
-				/* out: error code or DB_SUCCESS */
-	dulint	min_flushed_lsn,/* in: min flushed lsn field from the
-				data files */
-	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
-	ulint	first_log_no)	/* in: number of the first archived log file
-				to use in the recovery; the file will be
-				searched from INNOBASE_LOG_ARCH_DIR specified
-				in server config file */
+	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn field from the
+					data files */
+	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn if
+					possible */
+	ulint		first_log_no)	/*!< in: number of the first archived
+					log file to use in the recovery; the
+					file will be searched from
+					INNOBASE_LOG_ARCH_DIR specified in
+					server config file */
 {
 	log_group_t*	group;
 	ulint		group_id;
@@ -3294,7 +3505,7 @@ recv_recovery_from_archive_start(
 	ut_a(0);
 
 	recv_sys_create();
-	recv_sys_init(FALSE, buf_pool_get_curr_size());
+	recv_sys_init(buf_pool_get_curr_size());
 
 	recv_recovery_on = TRUE;
 	recv_recovery_from_backup_on = TRUE;
@@ -3325,7 +3536,7 @@ recv_recovery_from_archive_start(
 
 	recv_sys->parse_start_lsn = min_flushed_lsn;
 
-	recv_sys->scanned_lsn = ut_dulint_zero;
+	recv_sys->scanned_lsn = 0;
 	recv_sys->scanned_checkpoint_no = 0;
 	recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
@@ -3351,9 +3562,9 @@ recv_recovery_from_archive_start(
 		group->archived_file_no++;
 	}
 
-	if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
+	if (recv_sys->recovered_lsn < limit_lsn) {
 
-		if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+		if (!recv_sys->scanned_lsn) {
 
 			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 		}
@@ -3362,8 +3573,8 @@ recv_recovery_from_archive_start(
 
 		err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
 							  limit_lsn,
-							  ut_dulint_max,
-							  ut_dulint_max);
+							  IB_ULONGLONG_MAX,
+							  IB_ULONGLONG_MAX);
 		if (err != DB_SUCCESS) {
 
 			return(err);
@@ -3372,7 +3583,7 @@ recv_recovery_from_archive_start(
 		mutex_enter(&(log_sys->mutex));
 	}
 
-	if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+	if (limit_lsn != IB_ULONGLONG_MAX) {
 
 		recv_apply_hashed_log_recs(FALSE);
 
@@ -3384,9 +3595,9 @@ recv_recovery_from_archive_start(
 	return(DB_SUCCESS);
 }
 
-/************************************************************
+/********************************************************//**
 Completes recovery from archive. */
-
+UNIV_INTERN
 void
 recv_recovery_from_archive_finish(void)
 /*===================================*/
diff --git a/storage/innobase/mach/mach0data.c b/storage/innodb_plugin/mach/mach0data.c
similarity index 52%
rename from storage/innobase/mach/mach0data.c
rename to storage/innodb_plugin/mach/mach0data.c
index b92293fd037..e030ce9aadf 100644
--- a/storage/innobase/mach/mach0data.c
+++ b/storage/innodb_plugin/mach/mach0data.c
@@ -1,9 +1,26 @@
-/**********************************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file mach/mach0data.c
 Utilities for converting data from the database file
 to the machine format.
 
-(c) 1995 Innobase Oy
-
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 
@@ -13,17 +30,16 @@ Created 11/28/1995 Heikki Tuuri
 #include "mach0data.ic"
 #endif
 
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
+/*********************************************************//**
+Reads a ulint in a compressed form if the log record fully contains it.
+@return	pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
 byte*
 mach_parse_compressed(
 /*==================*/
-			/* out: pointer to end of the stored field, NULL if
-			not complete */
-	byte*	ptr,	/* in: pointer to buffer from where to read */
-	byte*	end_ptr,/* in: pointer to end of the buffer */
-	ulint*	val)	/* out: read value (< 2^32) */
+	byte*	ptr,	/*!< in: pointer to buffer from where to read */
+	byte*	end_ptr,/*!< in: pointer to end of the buffer */
+	ulint*	val)	/*!< out: read value (< 2^32) */
 {
 	ulint	flag;
 
@@ -77,17 +93,16 @@ mach_parse_compressed(
 	}
 }
 
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
+/*********************************************************//**
+Reads a dulint in a compressed form if the log record fully contains it.
+@return	pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
 byte*
 mach_dulint_parse_compressed(
 /*=========================*/
-			/* out: pointer to end of the stored field, NULL if
-			not complete */
-	byte*	ptr,	/* in: pointer to buffer from where to read */
-	byte*	end_ptr,/* in: pointer to end of the buffer */
-	dulint*	val)	/* out: read value */
+	byte*	ptr,	/*!< in: pointer to buffer from where to read */
+	byte*	end_ptr,/*!< in: pointer to end of the buffer */
+	dulint*	val)	/*!< out: read value */
 {
 	ulint	high;
 	ulint	low;
diff --git a/storage/innobase/mem/mem0dbg.c b/storage/innodb_plugin/mem/mem0dbg.c
similarity index 81%
rename from storage/innobase/mem/mem0dbg.c
rename to storage/innodb_plugin/mem/mem0dbg.c
index 72452907c3f..a20eb2ad7d2 100644
--- a/storage/innobase/mem/mem0dbg.c
+++ b/storage/innodb_plugin/mem/mem0dbg.c
@@ -1,28 +1,50 @@
-/************************************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0dbg.c
 The memory management: the debug code. This is not a compilation module,
 but is included in mem0mem.* !
 
-(c) 1994, 1995 Innobase Oy
-
 Created 6/9/1994 Heikki Tuuri
 *************************************************************************/
 
 #ifdef UNIV_MEM_DEBUG
-mutex_t	mem_hash_mutex;	 /* The mutex which protects in the
-			debug version the hash table containing
-			the list of live memory heaps, and
-			also the global variables below. */
+# ifndef UNIV_HOTBACKUP
+/* The mutex which protects in the debug version the hash table
+containing the list of live memory heaps, and also the global
+variables below. */
+UNIV_INTERN mutex_t	mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
 
 /* The following variables contain information about the
 extent of memory allocations. Only used in the debug version.
 Protected by mem_hash_mutex above. */
 
-static ulint	mem_n_created_heaps		= 0;
-static ulint	mem_n_allocations		= 0;
-static ulint	mem_total_allocated_memory	= 0;
-ulint		mem_current_allocated_memory	= 0;
-static ulint	mem_max_allocated_memory	= 0;
-static ulint	mem_last_print_info		= 0;
+static ulint		mem_n_created_heaps		= 0;
+static ulint		mem_n_allocations		= 0;
+static ulint		mem_total_allocated_memory	= 0;
+UNIV_INTERN ulint	mem_current_allocated_memory	= 0;
+static ulint		mem_max_allocated_memory	= 0;
+# ifndef UNIV_HOTBACKUP
+static ulint		mem_last_print_info		= 0;
+static ibool		mem_hash_initialized		= FALSE;
+# endif /* !UNIV_HOTBACKUP */
 
 /* Size of the hash table for memory management tracking */
 #define	MEM_HASH_SIZE	997
@@ -32,10 +54,10 @@ static ulint	mem_last_print_info		= 0;
 typedef struct mem_hash_node_struct mem_hash_node_t;
 struct mem_hash_node_struct {
 	UT_LIST_NODE_T(mem_hash_node_t)
-				list;	/* hash list node */
-	mem_heap_t*		heap;	/* memory heap */
+				list;	/*!< hash list node */
+	mem_heap_t*		heap;	/*!< memory heap */
 	const char*		file_name;/* file where heap was created*/
-	ulint			line;	/* file line of creation */
+	ulint			line;	/*!< file line of creation */
 	ulint			nth_heap;/* this is the nth heap created */
 	UT_LIST_NODE_T(mem_hash_node_t)
 				all_list;/* list of all created heaps */
@@ -49,7 +71,6 @@ static mem_hash_cell_t		mem_hash_table[MEM_HASH_SIZE];
 /* The base node of the list of all allocated heaps */
 static mem_hash_cell_t		mem_all_list_base;
 
-static ibool	mem_hash_initialized	= FALSE;
 
 
 UNIV_INLINE
@@ -68,37 +89,42 @@ mem_hash_get_nth_cell(ulint i)
 }
 
 /* Accessor functions for a memory field in the debug version */
-
+UNIV_INTERN
 void
 mem_field_header_set_len(byte* field, ulint len)
 {
 	mach_write_to_4(field - 2 * sizeof(ulint), len);
 }
 
+UNIV_INTERN
 ulint
 mem_field_header_get_len(byte* field)
 {
 	return(mach_read_from_4(field - 2 * sizeof(ulint)));
 }
 
+UNIV_INTERN
 void
 mem_field_header_set_check(byte* field, ulint check)
 {
 	mach_write_to_4(field - sizeof(ulint), check);
 }
 
+UNIV_INTERN
 ulint
 mem_field_header_get_check(byte* field)
 {
 	return(mach_read_from_4(field - sizeof(ulint)));
 }
 
+UNIV_INTERN
 void
 mem_field_trailer_set_check(byte* field, ulint check)
 {
 	mach_write_to_4(field + mem_field_header_get_len(field), check);
 }
 
+UNIV_INTERN
 ulint
 mem_field_trailer_get_check(byte* field)
 {
@@ -107,13 +133,14 @@ mem_field_trailer_get_check(byte* field)
 }
 #endif /* UNIV_MEM_DEBUG */
 
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
 Initializes the memory system. */
-
+UNIV_INTERN
 void
 mem_init(
 /*=====*/
-	ulint	size)	/* in: common pool size in bytes */
+	ulint	size)	/*!< in: common pool size in bytes */
 {
 #ifdef UNIV_MEM_DEBUG
 
@@ -133,18 +160,27 @@ mem_init(
 	mem_hash_initialized = TRUE;
 #endif
 
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		/* When innodb_use_sys_malloc is set, the
+		mem_comm_pool won't be used for any allocations.  We
+		create a dummy mem_comm_pool, because some statistics
+		and debugging code relies on it being initialized. */
+		size = 1;
+	}
+
 	mem_comm_pool = mem_pool_create(size);
 }
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_MEM_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Initializes an allocated memory field in the debug version. */
-
+UNIV_INTERN
 void
 mem_field_init(
 /*===========*/
-	byte*	buf,	/* in: memory field */
-	ulint	n)	/* in: how many bytes the user requested */
+	byte*	buf,	/*!< in: memory field */
+	ulint	n)	/*!< in: how many bytes the user requested */
 {
 	ulint	rnd;
 	byte*	usr_buf;
@@ -184,15 +220,15 @@ mem_field_init(
 	mem_init_buf(usr_buf, n);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Erases an allocated memory field in the debug version. */
-
+UNIV_INTERN
 void
 mem_field_erase(
 /*============*/
-	byte*	buf,	/* in: memory field */
+	byte*	buf,	/*!< in: memory field */
 	ulint	n __attribute__((unused)))
-			/* in: how many bytes the user requested */
+			/*!< in: how many bytes the user requested */
 {
 	byte*	usr_buf;
 
@@ -211,15 +247,15 @@ mem_field_erase(
 	mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
 }
 
-/*******************************************************************
+/***************************************************************//**
 Initializes a buffer to a random combination of hex BA and BE.
 Used to initialize allocated memory. */
-
+UNIV_INTERN
 void
 mem_init_buf(
 /*=========*/
-	byte*	buf,	/* in: pointer to buffer */
-	ulint	 n)	/* in: length of buffer */
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	 n)	/*!< in: length of buffer */
 {
 	byte*	ptr;
 
@@ -237,15 +273,15 @@ mem_init_buf(
 	UNIV_MEM_INVALID(buf, n);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
+Used to erase freed memory. */
+UNIV_INTERN
 void
 mem_erase_buf(
 /*==========*/
-	byte*	buf,	/* in: pointer to buffer */
-	ulint	 n)	 /* in: length of buffer */
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	n)	/*!< in: length of buffer */
 {
 	byte*	ptr;
 
@@ -262,16 +298,16 @@ mem_erase_buf(
 	UNIV_MEM_FREE(buf, n);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Inserts a created memory heap to the hash table of current allocated
 memory heaps. */
-
+UNIV_INTERN
 void
 mem_hash_insert(
 /*============*/
-	mem_heap_t*	heap,	   /* in: the created heap */
-	const char*	file_name, /* in: file name of creation */
-	ulint		line)	   /* in: line where created */
+	mem_heap_t*	heap,	   /*!< in: the created heap */
+	const char*	file_name, /*!< in: file name of creation */
+	ulint		line)	   /*!< in: line where created */
 {
 	mem_hash_node_t*	new_node;
 	ulint			cell_no	;
@@ -300,7 +336,7 @@ mem_hash_insert(
 	mutex_exit(&mem_hash_mutex);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Removes a memory heap (which is going to be freed by the caller)
 from the list of live memory heaps. Returns the size of the heap
 in terms of how much memory in bytes was allocated for the user of
@@ -308,13 +344,13 @@ the heap (not the total space occupied by the heap).
 Also validates the heap.
 NOTE: This function does not free the storage occupied by the
 heap itself, only the node in the list of heaps. */
-
+UNIV_INTERN
 void
 mem_hash_remove(
 /*============*/
-	mem_heap_t*	heap,	   /* in: the heap to be freed */
-	const char*	file_name, /* in: file name of freeing */
-	ulint		line)	   /* in: line where freed */
+	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
+	const char*	file_name, /*!< in: file name of freeing */
+	ulint		line)	   /*!< in: line where freed */
 {
 	mem_hash_node_t*	node;
 	ulint			cell_no;
@@ -381,34 +417,34 @@ mem_hash_remove(
 #endif /* UNIV_MEM_DEBUG */
 
 #if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
+/***************************************************************//**
 Checks a memory heap for consistency and prints the contents if requested.
 Outputs the sum of sizes of buffers given to the user (only in
 the debug version), the physical size of the heap and the number of
 blocks in the heap. In case of error returns 0 as sizes and number
 of blocks. */
-
+UNIV_INTERN
 void
 mem_heap_validate_or_print(
 /*=======================*/
-	mem_heap_t*	heap,	/* in: memory heap */
+	mem_heap_t*	heap,	/*!< in: memory heap */
 	byte*		top __attribute__((unused)),
-				/* in: calculate and validate only until
+				/*!< in: calculate and validate only until
 				this top pointer in the heap is reached,
 				if this pointer is NULL, ignored */
-	ibool		print,	/* in: if TRUE, prints the contents
+	ibool		print,	/*!< in: if TRUE, prints the contents
 				of the heap; works only in
 				the debug version */
-	ibool*		error,	/* out: TRUE if error */
-	ulint*		us_size,/* out: allocated memory
+	ibool*		error,	/*!< out: TRUE if error */
+	ulint*		us_size,/*!< out: allocated memory
 				(for the user) in the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored; in the
 				non-debug version this is always -1 */
-	ulint*		ph_size,/* out: physical size of the heap,
+	ulint*		ph_size,/*!< out: physical size of the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored */
-	ulint*		n_blocks) /* out: number of blocks in the heap,
+	ulint*		n_blocks) /*!< out: number of blocks in the heap,
 				if a NULL pointer is passed as this
 				argument, it is ignored */
 {
@@ -486,6 +522,7 @@ mem_heap_validate_or_print(
 
 			if (print) {
 				ut_print_buf(stderr, user_field, len);
+				putc('\n', stderr);
 			}
 
 			total_len += len;
@@ -555,13 +592,13 @@ completed:
 	*error = FALSE;
 }
 
-/******************************************************************
+/**************************************************************//**
 Prints the contents of a memory heap. */
 static
 void
 mem_heap_print(
 /*===========*/
-	mem_heap_t*	heap)	/* in: memory heap */
+	mem_heap_t*	heap)	/*!< in: memory heap */
 {
 	ibool	error;
 	ulint	us_size;
@@ -580,14 +617,14 @@ mem_heap_print(
 	ut_a(!error);
 }
 
-/******************************************************************
-Validates the contents of a memory heap. */
-
+/**************************************************************//**
+Validates the contents of a memory heap.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_heap_validate(
 /*==============*/
-				/* out: TRUE if ok */
-	mem_heap_t*	heap)	/* in: memory heap */
+	mem_heap_t*	heap)	/*!< in: memory heap */
 {
 	ibool	error;
 	ulint	us_size;
@@ -609,14 +646,14 @@ mem_heap_validate(
 #endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
 
 #ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it). */
-
+/**************************************************************//**
+Checks that an object is a memory heap (or a block of it).
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_heap_check(
 /*===========*/
-				/* out: TRUE if ok */
-	mem_heap_t*	heap)	/* in: memory heap */
+	mem_heap_t*	heap)	/*!< in: memory heap */
 {
 	ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
 
@@ -625,13 +662,13 @@ mem_heap_check(
 #endif /* UNIV_DEBUG */
 
 #ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
+/*****************************************************************//**
+TRUE if no memory is currently allocated.
+@return	TRUE if no heaps exist */
+UNIV_INTERN
 ibool
 mem_all_freed(void)
 /*===============*/
-			/* out: TRUE if no heaps exist */
 {
 	mem_hash_node_t*	node;
 	ulint			heap_count	= 0;
@@ -653,8 +690,9 @@ mem_all_freed(void)
 	mutex_exit(&mem_hash_mutex);
 
 	if (heap_count == 0) {
-
+# ifndef UNIV_HOTBACKUP
 		ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
+# endif /* !UNIV_HOTBACKUP */
 
 		return(TRUE);
 	} else {
@@ -662,13 +700,13 @@ mem_all_freed(void)
 	}
 }
 
-/*********************************************************************
-Validates the dynamic memory allocation system. */
-
+/*****************************************************************//**
+Validates the dynamic memory allocation system.
+@return	TRUE if error */
+UNIV_INTERN
 ibool
 mem_validate_no_assert(void)
 /*========================*/
-			/* out: TRUE if error */
 {
 	mem_hash_node_t*	node;
 	ulint			n_heaps			= 0;
@@ -679,7 +717,9 @@ mem_validate_no_assert(void)
 	ulint			n_blocks;
 	ulint			i;
 
+# ifndef UNIV_HOTBACKUP
 	mem_pool_validate(mem_comm_pool);
+# endif /* !UNIV_HOTBACKUP */
 
 	mutex_enter(&mem_hash_mutex);
 
@@ -735,13 +775,13 @@ mem_validate_no_assert(void)
 	return(error);
 }
 
-/****************************************************************
-Validates the dynamic memory */
-
+/************************************************************//**
+Validates the dynamic memory
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_validate(void)
 /*==============*/
-			/* out: TRUE if ok */
 {
 	ut_a(!mem_validate_no_assert());
 
@@ -749,14 +789,14 @@ mem_validate(void)
 }
 #endif /* UNIV_MEM_DEBUG */
 
-/****************************************************************
+/************************************************************//**
 Tries to find neigboring memory allocation blocks and dumps to stderr
 the neighborhood of a given pointer. */
-
+UNIV_INTERN
 void
 mem_analyze_corruption(
 /*===================*/
-	void*	ptr)	/* in: pointer to place of possible corruption */
+	void*	ptr)	/*!< in: pointer to place of possible corruption */
 {
 	byte*	p;
 	ulint	i;
@@ -857,14 +897,15 @@ mem_analyze_corruption(
 	}
 }
 
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
 Prints information of dynamic memory usage and currently allocated
 memory heaps or buffers. Can only be used in the debug version. */
 static
 void
 mem_print_info_low(
 /*===============*/
-	ibool	print_all)	/* in: if TRUE, all heaps are printed,
+	ibool	print_all)	/*!< in: if TRUE, all heaps are printed,
 				else only the heaps allocated after the
 				previous call of this function */
 {
@@ -961,10 +1002,10 @@ next_heap:
 #endif
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Prints information of dynamic memory usage and currently allocated memory
 heaps or buffers. Can only be used in the debug version. */
-
+UNIV_INTERN
 void
 mem_print_info(void)
 /*================*/
@@ -972,13 +1013,14 @@ mem_print_info(void)
 	mem_print_info_low(TRUE);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Prints information of dynamic memory usage and currently allocated memory
 heaps or buffers since the last ..._print_info or..._print_new_info. */
-
+UNIV_INTERN
 void
 mem_print_new_info(void)
 /*====================*/
 {
 	mem_print_info_low(FALSE);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c
similarity index 68%
rename from storage/innobase/mem/mem0mem.c
rename to storage/innodb_plugin/mem/mem0mem.c
index f4fd178a39c..e0dc8716f13 100644
--- a/storage/innobase/mem/mem0mem.c
+++ b/storage/innodb_plugin/mem/mem0mem.c
@@ -1,20 +1,34 @@
-/************************************************************************
-The memory management
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0mem.c
+The memory management
 
 Created 6/9/1994 Heikki Tuuri
 *************************************************************************/
 
-
 #include "mem0mem.h"
 #ifdef UNIV_NONINL
 #include "mem0mem.ic"
 #endif
 
-#include "mach0data.h"
 #include "buf0buf.h"
-#include "btr0sea.h"
 #include "srv0srv.h"
 #include "mem0dbg.c"
 #include <stdarg.h>
@@ -84,81 +98,43 @@ UT_LIST_BASE_NODE_T(mem_block_t)	mem_block_list;
 
 #endif
 
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-
-void*
-mem_alloc_func_noninline(
-/*=====================*/
-					/* out, own: free storage */
-	ulint		n,		/* in: desired number of bytes */
-	const char*	file_name,	/* in: file name where created */
-	ulint		line)		/* in: line where created */
-{
-	return(mem_alloc_func(n, file_name, line));
-}
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
-
+/**********************************************************************//**
+Duplicates a NUL-terminated string, allocated from a memory heap.
+@return	own: a copy of the string */
+UNIV_INTERN
 char*
 mem_heap_strdup(
 /*============*/
-				/* out, own: a copy of the string */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	str)	/* in: string to be copied */
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str)	/*!< in: string to be copied */
 {
 	return(mem_heap_dup(heap, str, strlen(str) + 1));
 }
 
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
-
+/**********************************************************************//**
+Duplicate a block of data, allocated from a memory heap.
+@return	own: a copy of the data */
+UNIV_INTERN
 void*
 mem_heap_dup(
 /*=========*/
-				/* out, own: a copy of the data */
-	mem_heap_t*	heap,	/* in: memory heap where copy is allocated */
-	const void*	data,	/* in: data to be copied */
-	ulint		len)	/* in: length of data, in bytes */
+	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
+	const void*	data,	/*!< in: data to be copied */
+	ulint		len)	/*!< in: length of data, in bytes */
 {
 	return(memcpy(mem_heap_alloc(heap, len), data, len));
 }
 
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
-				/* out, own: the result */
-	mem_heap_t*	heap,	/* in: memory heap where result is allocated */
-	const void*	b1,	/* in: block 1 */
-	ulint		len1,	/* in: length of b1, in bytes */
-	const void*	b2,	/* in: block 2 */
-	ulint		len2)	/* in: length of b2, in bytes */
-{
-	void*	res = mem_heap_alloc(heap, len1 + len2);
-
-	memcpy(res, b1, len1);
-	memcpy((char*)res + len1, b2, len2);
-
-	return(res);
-}
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
-
+/**********************************************************************//**
+Concatenate two strings and return the result, using a memory heap.
+@return	own: the result */
+UNIV_INTERN
 char*
 mem_heap_strcat(
 /*============*/
-				/* out, own: the result */
-	mem_heap_t*	heap,	/* in: memory heap where string is allocated */
-	const char*	s1,	/* in: string 1 */
-	const char*	s2)	/* in: string 2 */
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	s1,	/*!< in: string 1 */
+	const char*	s2)	/*!< in: string 2 */
 {
 	char*	s;
 	ulint	s1_len = strlen(s1);
@@ -175,18 +151,17 @@ mem_heap_strcat(
 }
 
 
-/********************************************************************
-Helper function for mem_heap_printf. */
+/****************************************************************//**
+Helper function for mem_heap_printf.
+@return	length of formatted string, including terminating NUL */
 static
 ulint
 mem_heap_printf_low(
 /*================*/
-				/* out: length of formatted string,
-				including terminating NUL */
-	char*		buf,	/* in/out: buffer to store formatted string
+	char*		buf,	/*!< in/out: buffer to store formatted string
 				in, or NULL to just calculate length */
-	const char*	format,	/* in: format string */
-	va_list		ap)	/* in: arguments */
+	const char*	format,	/*!< in: format string */
+	va_list		ap)	/*!< in: arguments */
 {
 	ulint 		len = 0;
 
@@ -285,18 +260,18 @@ mem_heap_printf_low(
 	return(len);
 }
 
-/********************************************************************
+/****************************************************************//**
 A simple (s)printf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
 the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
+required for the 'u' type).
+@return	heap-allocated formatted string */
+UNIV_INTERN
 char*
 mem_heap_printf(
 /*============*/
-				/* out: heap-allocated formatted string */
-	mem_heap_t*	heap,	/* in: memory heap */
-	const char*	format,	/* in: format string */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	const char*	format,	/*!< in: format string */
 	...)
 {
 	va_list		ap;
@@ -318,26 +293,25 @@ mem_heap_printf(
 	return(str);
 }
 
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
-
+/***************************************************************//**
+Creates a memory heap block where data can be allocated.
+@return own: memory heap block, NULL if did not succeed (only possible
+for MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
 mem_block_t*
 mem_heap_create_block(
 /*==================*/
-				/* out, own: memory heap block, NULL if
-				did not succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps) */
-	mem_heap_t*	heap,	/* in: memory heap or NULL if first block
+	mem_heap_t*	heap,	/*!< in: memory heap or NULL if first block
 				should be created */
-	ulint		n,	/* in: number of bytes needed for user data, or
-				if init_block is not NULL, its size in bytes */
-	void*		init_block, /* in: init block in fast create,
-				type must be MEM_HEAP_DYNAMIC */
-	ulint		type,	/* in: type of heap: MEM_HEAP_DYNAMIC or
+	ulint		n,	/*!< in: number of bytes needed for user data */
+	ulint		type,	/*!< in: type of heap: MEM_HEAP_DYNAMIC or
 				MEM_HEAP_BUFFER */
-	const char*	file_name,/* in: file name where created */
-	ulint		line)	/* in: line where created */
+	const char*	file_name,/*!< in: file name where created */
+	ulint		line)	/*!< in: line where created */
 {
+#ifndef UNIV_HOTBACKUP
+	buf_block_t*	buf_block = NULL;
+#endif /* !UNIV_HOTBACKUP */
 	mem_block_t*	block;
 	ulint		len;
 
@@ -349,47 +323,44 @@ mem_heap_create_block(
 	}
 
 	/* In dynamic allocation, calculate the size: block header + data. */
+	len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
 
-	if (init_block != NULL) {
-		ut_ad(type == MEM_HEAP_DYNAMIC);
-		ut_ad(n > MEM_BLOCK_START_SIZE + MEM_BLOCK_HEADER_SIZE);
-		len = n;
-		block = init_block;
+#ifndef UNIV_HOTBACKUP
+	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
 
-	} else if (type == MEM_HEAP_DYNAMIC) {
+		ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
 
-		len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-		block = mem_area_alloc(len, mem_comm_pool);
+		block = mem_area_alloc(&len, mem_comm_pool);
 	} else {
-		ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+		len = UNIV_PAGE_SIZE;
 
-		len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+		if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
+			/* We cannot allocate the block from the
+			buffer pool, but must get the free block from
+			the heap header free block field */
 
-		if (len < UNIV_PAGE_SIZE / 2) {
+			buf_block = heap->free_block;
+			heap->free_block = NULL;
 
-			block = mem_area_alloc(len, mem_comm_pool);
-		} else {
-			len = UNIV_PAGE_SIZE;
+			if (UNIV_UNLIKELY(!buf_block)) {
 
-			if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
-				/* We cannot allocate the block from the
-				buffer pool, but must get the free block from
-				the heap header free block field */
-
-				block = (mem_block_t*)heap->free_block;
-				heap->free_block = NULL;
-			} else {
-				block = (mem_block_t*)buf_frame_alloc();
+				return(NULL);
 			}
+		} else {
+			buf_block = buf_block_alloc(0);
 		}
+
+		block = (mem_block_t*) buf_block->frame;
 	}
 
-	if (block == NULL) {
-		/* Only MEM_HEAP_BTR_SEARCH allocation should ever fail. */
-		ut_a(type & MEM_HEAP_BTR_SEARCH);
-
-		return(NULL);
-	}
+	ut_ad(block);
+	block->buf_block = buf_block;
+	block->free_block = NULL;
+#else /* !UNIV_HOTBACKUP */
+	len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+	block = ut_malloc(len);
+	ut_ad(block);
+#endif /* !UNIV_HOTBACKUP */
 
 	block->magic_n = MEM_BLOCK_MAGIC_N;
 	ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name));
@@ -412,25 +383,21 @@ mem_heap_create_block(
 	mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
 	mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
 
-	block->free_block = NULL;
-	block->init_block = (init_block != NULL);
-
 	ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
 
 	return(block);
 }
 
-/*******************************************************************
-Adds a new block to a memory heap. */
-
+/***************************************************************//**
+Adds a new block to a memory heap.
+@return created block, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
 mem_block_t*
 mem_heap_add_block(
 /*===============*/
-				/* out: created block, NULL if did not
-				succeed (only possible for
-				MEM_HEAP_BTR_SEARCH type heaps)*/
-	mem_heap_t*	heap,	/* in: memory heap */
-	ulint		n)	/* in: number of bytes user needs */
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: number of bytes user needs */
 {
 	mem_block_t*	block;
 	mem_block_t*	new_block;
@@ -462,7 +429,7 @@ mem_heap_add_block(
 		new_size = n;
 	}
 
-	new_block = mem_heap_create_block(heap, new_size, NULL, heap->type,
+	new_block = mem_heap_create_block(heap, new_size, heap->type,
 					  heap->file_name, heap->line);
 	if (new_block == NULL) {
 
@@ -476,18 +443,20 @@ mem_heap_add_block(
 	return(new_block);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees a block from a memory heap. */
-
+UNIV_INTERN
 void
 mem_heap_block_free(
 /*================*/
-	mem_heap_t*	heap,	/* in: heap */
-	mem_block_t*	block)	/* in: block to free */
+	mem_heap_t*	heap,	/*!< in: heap */
+	mem_block_t*	block)	/*!< in: block to free */
 {
-	ulint	type;
-	ulint	len;
-	ibool	init_block;
+	ulint		type;
+	ulint		len;
+#ifndef UNIV_HOTBACKUP
+	buf_block_t*	buf_block	= block->buf_block;
+#endif /* !UNIV_HOTBACKUP */
 
 	if (block->magic_n != MEM_BLOCK_MAGIC_N) {
 		mem_analyze_corruption(block);
@@ -504,7 +473,6 @@ mem_heap_block_free(
 #endif
 	type = heap->type;
 	len = block->len;
-	init_block = block->init_block;
 	block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
 
 #ifdef UNIV_MEM_DEBUG
@@ -516,44 +484,44 @@ mem_heap_block_free(
 	UNIV_MEM_ASSERT_AND_FREE(block, len);
 #endif /* UNIV_MEM_DEBUG */
 
-	if (init_block) {
-		/* Do not have to free: do nothing */
-
-	} else if (type == MEM_HEAP_DYNAMIC) {
+#ifndef UNIV_HOTBACKUP
+	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
 
+		ut_ad(!buf_block);
 		mem_area_free(block, mem_comm_pool);
 	} else {
 		ut_ad(type & MEM_HEAP_BUFFER);
 
-		if (len >= UNIV_PAGE_SIZE / 2) {
-			buf_frame_free((byte*)block);
-		} else {
-			mem_area_free(block, mem_comm_pool);
-		}
+		buf_block_free(buf_block);
 	}
+#else /* !UNIV_HOTBACKUP */
+	ut_free(block);
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
 Frees the free_block field from a memory heap. */
-
+UNIV_INTERN
 void
 mem_heap_free_block_free(
 /*=====================*/
-	mem_heap_t*	heap)	/* in: heap */
+	mem_heap_t*	heap)	/*!< in: heap */
 {
-	if (heap->free_block) {
+	if (UNIV_LIKELY_NULL(heap->free_block)) {
 
-		buf_frame_free(heap->free_block);
+		buf_block_free(heap->free_block);
 
 		heap->free_block = NULL;
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
+/******************************************************************//**
 Goes through the list of all allocated mem blocks, checks their magic
 numbers, and reports possible corruption. */
-
+UNIV_INTERN
 void
 mem_validate_all_blocks(void)
 /*=========================*/
diff --git a/storage/innobase/mem/mem0pool.c b/storage/innodb_plugin/mem/mem0pool.c
similarity index 79%
rename from storage/innobase/mem/mem0pool.c
rename to storage/innodb_plugin/mem/mem0pool.c
index 315f719ca09..c8fea97a6a3 100644
--- a/storage/innobase/mem/mem0pool.c
+++ b/storage/innodb_plugin/mem/mem0pool.c
@@ -1,7 +1,24 @@
-/************************************************************************
-The lowest-level memory management
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0pool.c
+The lowest-level memory management
 
 Created 5/12/1997 Heikki Tuuri
 *************************************************************************/
@@ -11,6 +28,7 @@ Created 5/12/1997 Heikki Tuuri
 #include "mem0pool.ic"
 #endif
 
+#include "srv0srv.h"
 #include "sync0sync.h"
 #include "ut0mem.h"
 #include "ut0lst.h"
@@ -72,39 +90,39 @@ and for the adaptive index. Thus, for each individual transaction, its locks
 can occupy at most about the size of the buffer frame of memory in the common
 pool, and after that its locks will grow into the buffer pool. */
 
-/* Mask used to extract the free bit from area->size */
+/** Mask used to extract the free bit from area->size */
 #define MEM_AREA_FREE	1
 
-/* The smallest memory area total size */
+/** The smallest memory area total size */
 #define MEM_AREA_MIN_SIZE	(2 * MEM_AREA_EXTRA_SIZE)
 
 
-/* Data structure for a memory pool. The space is allocated using the buddy
+/** Data structure for a memory pool. The space is allocated using the buddy
 algorithm, where free list i contains areas of size 2 to power i. */
 struct mem_pool_struct{
-	byte*		buf;		/* memory pool */
-	ulint		size;		/* memory common pool size */
-	ulint		reserved;	/* amount of currently allocated
+	byte*		buf;		/*!< memory pool */
+	ulint		size;		/*!< memory common pool size */
+	ulint		reserved;	/*!< amount of currently allocated
 					memory */
-	mutex_t		mutex;		/* mutex protecting this struct */
+	mutex_t		mutex;		/*!< mutex protecting this struct */
 	UT_LIST_BASE_NODE_T(mem_area_t)
-			free_list[64];	/* lists of free memory areas: an
+			free_list[64];	/*!< lists of free memory areas: an
 					area is put to the list whose number
 					is the 2-logarithm of the area size */
 };
 
-/* The common memory pool */
-mem_pool_t*	mem_comm_pool	= NULL;
+/** The common memory pool */
+UNIV_INTERN mem_pool_t*	mem_comm_pool	= NULL;
 
 /* We use this counter to check that the mem pool mutex does not leak;
 this is to track a strange assertion failure reported at
 mysql@lists.mysql.com */
 
-ulint		mem_n_threads_inside		= 0;
+UNIV_INTERN ulint	mem_n_threads_inside		= 0;
 
-/************************************************************************
+/********************************************************************//**
 Reserves the mem pool mutex. */
-
+UNIV_INTERN
 void
 mem_pool_mutex_enter(void)
 /*======================*/
@@ -112,9 +130,9 @@ mem_pool_mutex_enter(void)
 	mutex_enter(&(mem_comm_pool->mutex));
 }
 
-/************************************************************************
+/********************************************************************//**
 Releases the mem pool mutex. */
-
+UNIV_INTERN
 void
 mem_pool_mutex_exit(void)
 /*=====================*/
@@ -122,39 +140,39 @@ mem_pool_mutex_exit(void)
 	mutex_exit(&(mem_comm_pool->mutex));
 }
 
-/************************************************************************
-Returns memory area size. */
+/********************************************************************//**
+Returns memory area size.
+@return	size */
 UNIV_INLINE
 ulint
 mem_area_get_size(
 /*==============*/
-				/* out: size */
-	mem_area_t*	area)	/* in: area */
+	mem_area_t*	area)	/*!< in: area */
 {
 	return(area->size_and_free & ~MEM_AREA_FREE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Sets memory area size. */
 UNIV_INLINE
 void
 mem_area_set_size(
 /*==============*/
-	mem_area_t*	area,	/* in: area */
-	ulint		size)	/* in: size */
+	mem_area_t*	area,	/*!< in: area */
+	ulint		size)	/*!< in: size */
 {
 	area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
 		| size;
 }
 
-/************************************************************************
-Returns memory area free bit. */
+/********************************************************************//**
+Returns memory area free bit.
+@return	TRUE if free */
 UNIV_INLINE
 ibool
 mem_area_get_free(
 /*==============*/
-				/* out: TRUE if free */
-	mem_area_t*	area)	/* in: area */
+	mem_area_t*	area)	/*!< in: area */
 {
 #if TRUE != MEM_AREA_FREE
 # error "TRUE != MEM_AREA_FREE"
@@ -162,14 +180,14 @@ mem_area_get_free(
 	return(area->size_and_free & MEM_AREA_FREE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Sets memory area free bit. */
 UNIV_INLINE
 void
 mem_area_set_free(
 /*==============*/
-	mem_area_t*	area,	/* in: area */
-	ibool		free)	/* in: free bit value */
+	mem_area_t*	area,	/*!< in: area */
+	ibool		free)	/*!< in: free bit value */
 {
 #if TRUE != MEM_AREA_FREE
 # error "TRUE != MEM_AREA_FREE"
@@ -178,22 +196,20 @@ mem_area_set_free(
 		| free;
 }
 
-/************************************************************************
-Creates a memory pool. */
-
+/********************************************************************//**
+Creates a memory pool.
+@return	memory pool */
+UNIV_INTERN
 mem_pool_t*
 mem_pool_create(
 /*============*/
-			/* out: memory pool */
-	ulint	size)	/* in: pool size in bytes */
+	ulint	size)	/*!< in: pool size in bytes */
 {
 	mem_pool_t*	pool;
 	mem_area_t*	area;
 	ulint		i;
 	ulint		used;
 
-	ut_a(size > 10000);
-
 	pool = ut_malloc(sizeof(mem_pool_t));
 
 	/* We do not set the memory to zero (FALSE) in the pool,
@@ -244,16 +260,15 @@ mem_pool_create(
 	return(pool);
 }
 
-/************************************************************************
-Fills the specified free list. */
+/********************************************************************//**
+Fills the specified free list.
+@return	TRUE if we were able to insert a block to the free list */
 static
 ibool
 mem_pool_fill_free_list(
 /*====================*/
-				/* out: TRUE if we were able to insert a
-				block to the free list */
-	ulint		i,	/* in: free list index */
-	mem_pool_t*	pool)	/* in: memory pool */
+	ulint		i,	/*!< in: free list index */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
 	mem_area_t*	area;
 	mem_area_t*	area2;
@@ -261,7 +276,7 @@ mem_pool_fill_free_list(
 
 	ut_ad(mutex_own(&(pool->mutex)));
 
-	if (i >= 63) {
+	if (UNIV_UNLIKELY(i >= 63)) {
 		/* We come here when we have run out of space in the
 		memory pool: */
 
@@ -293,7 +308,7 @@ mem_pool_fill_free_list(
 		area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
 	}
 
-	if (UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0) {
+	if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
 		mem_analyze_corruption(area);
 
 		ut_error;
@@ -316,27 +331,33 @@ mem_pool_fill_free_list(
 	return(TRUE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
+used in mem0mem.*!
+@return	own: allocated memory buffer */
+UNIV_INTERN
 void*
 mem_area_alloc(
 /*===========*/
-				/* out, own: allocated memory buffer */
-	ulint		size,	/* in: allocated size in bytes; for optimum
+	ulint*		psize,	/*!< in: requested size in bytes; for optimum
 				space usage, the size should be a power of 2
-				minus MEM_AREA_EXTRA_SIZE */
-	mem_pool_t*	pool)	/* in: memory pool */
+				minus MEM_AREA_EXTRA_SIZE;
+				out: allocated size in bytes (greater than
+				or equal to the requested size) */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
-#ifdef UNIV_DISABLE_MEM_POOL
-        (void)pool; /* Remove compiler warning */
-        return malloc(size);
-#else /* UNIV_DISABLE_MEM_POOL */
 	mem_area_t*	area;
+	ulint		size;
 	ulint		n;
 	ibool		ret;
 
+	/* If we are using os allocator just make a simple call
+	to malloc */
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		return(malloc(*psize));
+	}
+
+	size = *psize;
 	n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
 
 	mutex_enter(&(pool->mutex));
@@ -407,23 +428,23 @@ mem_area_alloc(
 	mutex_exit(&(pool->mutex));
 
 	ut_ad(mem_pool_validate(pool));
-	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
-		       ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
+
+	*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
+	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
 
 	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
-#endif /* UNIV_DISABLE_MEM_POOL */
 }
 
-/************************************************************************
-Gets the buddy of an area, if it exists in pool. */
+/********************************************************************//**
+Gets the buddy of an area, if it exists in pool.
+@return	the buddy, NULL if no buddy in pool */
 UNIV_INLINE
 mem_area_t*
 mem_area_get_buddy(
 /*===============*/
-				/* out: the buddy, NULL if no buddy in pool */
-	mem_area_t*	area,	/* in: memory area */
-	ulint		size,	/* in: memory area size */
-	mem_pool_t*	pool)	/* in: memory pool */
+	mem_area_t*	area,	/*!< in: memory area */
+	ulint		size,	/*!< in: memory area size */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
 	mem_area_t*	buddy;
 
@@ -454,26 +475,28 @@ mem_area_get_buddy(
 	return(buddy);
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees memory to a pool. */
-
+UNIV_INTERN
 void
 mem_area_free(
 /*==========*/
-	void*		ptr,	/* in, own: pointer to allocated memory
+	void*		ptr,	/*!< in, own: pointer to allocated memory
 				buffer */
-	mem_pool_t*	pool)	/* in: memory pool */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
-#ifdef UNIV_DISABLE_MEM_POOL
-        (void)pool; /* Remove compiler warning */
-        free(ptr);
-#else /* UNIV_DISABLE_MEM_POOL */
 	mem_area_t*	area;
 	mem_area_t*	buddy;
 	void*		new_ptr;
 	ulint		size;
 	ulint		n;
 
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		free(ptr);
+
+		return;
+	}
+
 	/* It may be that the area was really allocated from the OS with
 	regular malloc: check if ptr points within our memory pool */
 
@@ -515,7 +538,7 @@ mem_area_free(
 
 		next_size = mem_area_get_size(
 			(mem_area_t*)(((byte*)area) + size));
-		if (ut_2_power_up(next_size) != next_size) {
+		if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
 			fprintf(stderr,
 				"InnoDB: Error: Memory area size %lu,"
 				" next area size %lu not a power of 2!\n"
@@ -579,17 +602,16 @@ mem_area_free(
 	mutex_exit(&(pool->mutex));
 
 	ut_ad(mem_pool_validate(pool));
-#endif /* UNIV_DISABLE_MEM_POOL */
 }
 
-/************************************************************************
-Validates a memory pool. */
-
+/********************************************************************//**
+Validates a memory pool.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 mem_pool_validate(
 /*==============*/
-				/* out: TRUE if ok */
-	mem_pool_t*	pool)	/* in: memory pool */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
 	mem_area_t*	area;
 	mem_area_t*	buddy;
@@ -602,7 +624,8 @@ mem_pool_validate(
 
 	for (i = 0; i < 64; i++) {
 
-		UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
+		UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i],
+				 (void) 0);
 
 		area = UT_LIST_GET_FIRST(pool->free_list[i]);
 
@@ -628,14 +651,14 @@ mem_pool_validate(
 	return(TRUE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Prints info of a memory pool. */
-
+UNIV_INTERN
 void
 mem_pool_print_info(
 /*================*/
-	FILE*		outfile,/* in: output file to write to */
-	mem_pool_t*	pool)	/* in: memory pool */
+	FILE*		outfile,/*!< in: output file to write to */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
 	ulint		i;
 
@@ -661,14 +684,14 @@ mem_pool_print_info(
 	mutex_exit(&(pool->mutex));
 }
 
-/************************************************************************
-Returns the amount of reserved memory. */
-
+/********************************************************************//**
+Returns the amount of reserved memory.
+@return	reserved memory in bytes */
+UNIV_INTERN
 ulint
 mem_pool_get_reserved(
 /*==================*/
-				/* out: reserved memory in bytes */
-	mem_pool_t*	pool)	/* in: memory pool */
+	mem_pool_t*	pool)	/*!< in: memory pool */
 {
 	ulint	reserved;
 
diff --git a/storage/innobase/mtr/mtr0log.c b/storage/innodb_plugin/mtr/mtr0log.c
similarity index 57%
rename from storage/innobase/mtr/mtr0log.c
rename to storage/innodb_plugin/mtr/mtr0log.c
index e5d572bbfa7..3f3dab36b76 100644
--- a/storage/innobase/mtr/mtr0log.c
+++ b/storage/innodb_plugin/mtr/mtr0log.c
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction log routines
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file mtr/mtr0log.c
+Mini-transaction log routines
 
 Created 12/7/1995 Heikki Tuuri
 *******************************************************/
@@ -13,19 +30,22 @@ Created 12/7/1995 Heikki Tuuri
 #endif
 
 #include "buf0buf.h"
-#include "dict0boot.h"
+#include "dict0dict.h"
 #include "log0recv.h"
 #include "page0page.h"
 
-/************************************************************
-Catenates n bytes to the mtr log. */
+#ifndef UNIV_HOTBACKUP
+# include "dict0boot.h"
 
+/********************************************************//**
+Catenates n bytes to the mtr log. */
+UNIV_INTERN
 void
 mlog_catenate_string(
 /*=================*/
-	mtr_t*		mtr,	/* in: mtr */
-	const byte*	str,	/* in: string to write */
-	ulint		len)	/* in: string length */
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len)	/*!< in: string length */
 {
 	dyn_array_t*	mlog;
 
@@ -39,31 +59,25 @@ mlog_catenate_string(
 	dyn_push_string(mlog, str, len);
 }
 
-/************************************************************
+/********************************************************//**
 Writes the initial part of a log record consisting of one-byte item
 type and four-byte space and page numbers. Also pushes info
 to the mtr memo that a buffer page has been modified. */
-
+UNIV_INTERN
 void
 mlog_write_initial_log_record(
 /*==========================*/
-	byte*	ptr,	/* in: pointer to (inside) a buffer frame holding the
-			file page where modification is made */
-	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
 
 	ut_ad(type <= MLOG_BIGGEST_TYPE);
 	ut_ad(type > MLOG_8BYTES);
 
-	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to write to"
-			" a stray memory location %p\n", (void*) ptr);
-		ut_error;
-	}
-
 	log_ptr = mlog_open(mtr, 11);
 
 	/* If no logging is requested, we may return now */
@@ -76,20 +90,20 @@ mlog_write_initial_log_record(
 
 	mlog_close(mtr, log_ptr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
+/********************************************************//**
+Parses an initial log record written by mlog_write_initial_log_record.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
 byte*
 mlog_parse_initial_log_record(
 /*==========================*/
-			/* out: parsed record end, NULL if not a complete
-			record */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	type,	/* out: log record type: MLOG_1BYTE, ... */
-	ulint*	space,	/* out: space id */
-	ulint*	page_no)/* out: page number */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*	space,	/*!< out: space id */
+	ulint*	page_no)/*!< out: page number */
 {
 	if (end_ptr < ptr + 1) {
 
@@ -118,24 +132,25 @@ mlog_parse_initial_log_record(
 	return(ptr);
 }
 
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
+/********************************************************//**
+Parses a log record written by mlog_write_ulint or mlog_write_dulint.
+@return	parsed record end, NULL if not a complete record or a corrupt record */
+UNIV_INTERN
 byte*
 mlog_parse_nbytes(
 /*==============*/
-			/* out: parsed record end, NULL if not a complete
-			record or a corrupt record */
-	ulint	type,	/* in: log record type: MLOG_1BYTE, ... */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	page)	/* in: page where to apply the log record, or NULL */
+	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip)/*!< in/out: compressed page, or NULL */
 {
 	ulint	offset;
 	ulint	val;
 	dulint	dval;
 
 	ut_a(type <= MLOG_8BYTES);
+	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
 
 	if (end_ptr < ptr + 2) {
 
@@ -160,6 +175,11 @@ mlog_parse_nbytes(
 		}
 
 		if (page) {
+			if (UNIV_LIKELY_NULL(page_zip)) {
+				mach_write_to_8
+					(((page_zip_des_t*) page_zip)->data
+					 + offset, dval);
+			}
 			mach_write_to_8(page + offset, dval);
 		}
 
@@ -173,68 +193,78 @@ mlog_parse_nbytes(
 		return(NULL);
 	}
 
-	if (type == MLOG_1BYTE) {
-		if (val > 0xFFUL) {
-			recv_sys->found_corrupt_log = TRUE;
-
-			return(NULL);
+	switch (type) {
+	case MLOG_1BYTE:
+		if (UNIV_UNLIKELY(val > 0xFFUL)) {
+			goto corrupt;
 		}
-	} else if (type == MLOG_2BYTES) {
-		if (val > 0xFFFFUL) {
-			recv_sys->found_corrupt_log = TRUE;
-
-			return(NULL);
-		}
-	} else {
-		if (type != MLOG_4BYTES) {
-			recv_sys->found_corrupt_log = TRUE;
-
-			return(NULL);
-		}
-	}
-
-	if (page) {
-		if (type == MLOG_1BYTE) {
+		if (page) {
+			if (UNIV_LIKELY_NULL(page_zip)) {
+				mach_write_to_1
+					(((page_zip_des_t*) page_zip)->data
+					 + offset, val);
+			}
 			mach_write_to_1(page + offset, val);
-		} else if (type == MLOG_2BYTES) {
+		}
+		break;
+	case MLOG_2BYTES:
+		if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
+			goto corrupt;
+		}
+		if (page) {
+			if (UNIV_LIKELY_NULL(page_zip)) {
+				mach_write_to_2
+					(((page_zip_des_t*) page_zip)->data
+					 + offset, val);
+			}
 			mach_write_to_2(page + offset, val);
-		} else {
-			ut_a(type == MLOG_4BYTES);
+		}
+		break;
+	case MLOG_4BYTES:
+		if (page) {
+			if (UNIV_LIKELY_NULL(page_zip)) {
+				mach_write_to_4
+					(((page_zip_des_t*) page_zip)->data
+					 + offset, val);
+			}
 			mach_write_to_4(page + offset, val);
 		}
+		break;
+	default:
+	corrupt:
+		recv_sys->found_corrupt_log = TRUE;
+		ptr = NULL;
 	}
 
 	return(ptr);
 }
 
-/************************************************************
+/********************************************************//**
 Writes 1 - 4 bytes to a file page buffered in the buffer pool.
 Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
 void
 mlog_write_ulint(
 /*=============*/
-	byte*	ptr,	/* in: pointer where to write */
-	ulint	val,	/* in: value to write */
-	byte	type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	byte*	ptr,	/*!< in: pointer where to write */
+	ulint	val,	/*!< in: value to write */
+	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
 
-	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to write to"
-			" a stray memory location %p\n", (void*) ptr);
-		ut_error;
-	}
-
-	if (type == MLOG_1BYTE) {
+	switch (type) {
+	case MLOG_1BYTE:
 		mach_write_to_1(ptr, val);
-	} else if (type == MLOG_2BYTES) {
+		break;
+	case MLOG_2BYTES:
 		mach_write_to_2(ptr, val);
-	} else {
-		ut_ad(type == MLOG_4BYTES);
+		break;
+	case MLOG_4BYTES:
 		mach_write_to_4(ptr, val);
+		break;
+	default:
+		ut_error;
 	}
 
 	log_ptr = mlog_open(mtr, 11 + 2 + 5);
@@ -247,7 +277,7 @@ mlog_write_ulint(
 
 	log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
 
-	mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+	mach_write_to_2(log_ptr, page_offset(ptr));
 	log_ptr += 2;
 
 	log_ptr += mach_write_compressed(log_ptr, val);
@@ -255,27 +285,19 @@ mlog_write_ulint(
 	mlog_close(mtr, log_ptr);
 }
 
-/************************************************************
+/********************************************************//**
 Writes 8 bytes to a file page buffered in the buffer pool.
 Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
 void
 mlog_write_dulint(
 /*==============*/
-	byte*	ptr,	/* in: pointer where to write */
-	dulint	val,	/* in: value to write */
-	mtr_t*	mtr)	/* in: mini-transaction handle */
+	byte*	ptr,	/*!< in: pointer where to write */
+	dulint	val,	/*!< in: value to write */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
 
-	if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
-	    || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to write to"
-			" a stray memory location %p\n", (void*) ptr);
-		ut_error;
-	}
-
 	ut_ad(ptr && mtr);
 
 	mach_write_to_8(ptr, val);
@@ -291,7 +313,7 @@ mlog_write_dulint(
 	log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
 						     log_ptr, mtr);
 
-	mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+	mach_write_to_2(log_ptr, page_offset(ptr));
 	log_ptr += 2;
 
 	log_ptr += mach_dulint_write_compressed(log_ptr, val);
@@ -299,31 +321,42 @@ mlog_write_dulint(
 	mlog_close(mtr, log_ptr);
 }
 
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
 Writes a string to a file page buffered in the buffer pool. Writes the
 corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
 void
 mlog_write_string(
 /*==============*/
-	byte*		ptr,	/* in: pointer where to write */
-	const byte*	str,	/* in: string to write */
-	ulint		len,	/* in: string length */
-	mtr_t*		mtr)	/* in: mini-transaction handle */
+	byte*		ptr,	/*!< in: pointer where to write */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len,	/*!< in: string length */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
-	byte*	log_ptr;
-
-	if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
-	    || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to write to"
-			" a stray memory location %p\n", (void*) ptr);
-		ut_error;
-	}
 	ut_ad(ptr && mtr);
 	ut_a(len < UNIV_PAGE_SIZE);
 
-	ut_memcpy(ptr, str, len);
+	memcpy(ptr, str, len);
+
+	mlog_log_string(ptr, len, mtr);
+}
+
+/********************************************************//**
+Logs a write of a string to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_log_string(
+/*============*/
+	byte*	ptr,	/*!< in: pointer written to */
+	ulint	len,	/*!< in: string length */
+	mtr_t*	mtr)	/*!< in: mini-transaction handle */
+{
+	byte*	log_ptr;
+
+	ut_ad(ptr && mtr);
+	ut_ad(len <= UNIV_PAGE_SIZE);
 
 	log_ptr = mlog_open(mtr, 30);
 
@@ -335,7 +368,7 @@ mlog_write_string(
 
 	log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
 						     log_ptr, mtr);
-	mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+	mach_write_to_2(log_ptr, page_offset(ptr));
 	log_ptr += 2;
 
 	mach_write_to_2(log_ptr, len);
@@ -343,24 +376,27 @@ mlog_write_string(
 
 	mlog_close(mtr, log_ptr);
 
-	mlog_catenate_string(mtr, str, len);
+	mlog_catenate_string(mtr, ptr, len);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
+/********************************************************//**
+Parses a log record written by mlog_write_string.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
 byte*
 mlog_parse_string(
 /*==============*/
-			/* out: parsed record end, NULL if not a complete
-			record */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	byte*	page)	/* in: page where to apply the log record, or NULL */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip)/*!< in/out: compressed page, or NULL */
 {
 	ulint	offset;
 	ulint	len;
 
+	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
+
 	if (end_ptr < ptr + 4) {
 
 		return(NULL);
@@ -368,44 +404,46 @@ mlog_parse_string(
 
 	offset = mach_read_from_2(ptr);
 	ptr += 2;
+	len = mach_read_from_2(ptr);
+	ptr += 2;
 
-	if (offset >= UNIV_PAGE_SIZE) {
+	if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+			|| UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
 	}
 
-	len = mach_read_from_2(ptr);
-	ptr += 2;
-
-	ut_a(len + offset < UNIV_PAGE_SIZE);
-
 	if (end_ptr < ptr + len) {
 
 		return(NULL);
 	}
 
 	if (page) {
-		ut_memcpy(page + offset, ptr, len);
+		if (UNIV_LIKELY_NULL(page_zip)) {
+			memcpy(((page_zip_des_t*) page_zip)->data
+				+ offset, ptr, len);
+		}
+		memcpy(page + offset, ptr, len);
 	}
 
 	return(ptr + len);
 }
 
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
 Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. */
-
+if needed, the field lengths of an index.
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INTERN
 byte*
 mlog_open_and_write_index(
 /*======================*/
-				/* out: buffer, NULL if log mode
-				MTR_LOG_NONE */
-	mtr_t*		mtr,	/* in: mtr */
-	byte*		rec,	/* in: index record or page */
-	dict_index_t*	index,	/* in: record descriptor */
-	byte		type,	/* in: log item type */
-	ulint		size)	/* in: requested buffer size in bytes
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	rec,	/*!< in: index record or page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	byte		type,	/*!< in: log item type */
+	ulint		size)	/*!< in: requested buffer size in bytes
 				(if 0, calls mlog_close() and returns NULL) */
 {
 	byte*		log_ptr;
@@ -489,20 +527,19 @@ mlog_open_and_write_index(
 	}
 	return(log_ptr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
+/********************************************************//**
+Parses a log record written by mlog_open_and_write_index.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
 byte*
 mlog_parse_index(
 /*=============*/
-				/* out: parsed record end,
-				NULL if not a complete record */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-				/* out: new value of log_ptr */
-	ibool		comp,	/* in: TRUE=compact record format */
-	dict_index_t**	index)	/* out, own: dummy index */
+	byte*		ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	ibool		comp,	/*!< in: TRUE=compact record format */
+	dict_index_t**	index)	/*!< out, own: dummy index */
 {
 	ulint		i, n, n_uniq;
 	dict_table_t*	table;
@@ -549,7 +586,7 @@ mlog_parse_index(
 				len & 0x8000 ? DATA_NOT_NULL : 0,
 				len & 0x7fff);
 
-			dict_index_add_col(ind, table, (dict_col_t*)
+			dict_index_add_col(ind, table,
 					   dict_table_get_nth_col(table, i),
 					   0);
 		}
diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
similarity index 63%
rename from storage/innobase/mtr/mtr0mtr.c
rename to storage/innodb_plugin/mtr/mtr0mtr.c
index 365fa15878a..be31c5df801 100644
--- a/storage/innobase/mtr/mtr0mtr.c
+++ b/storage/innodb_plugin/mtr/mtr0mtr.c
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction buffer
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file mtr/mtr0mtr.c
+Mini-transaction buffer
 
 Created 11/26/1995 Heikki Tuuri
 *******************************************************/
@@ -17,28 +34,15 @@ Created 11/26/1995 Heikki Tuuri
 #include "mtr0log.h"
 #include "log0log.h"
 
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
-			/* out: mtr buffer which also acts as
-			the mtr handle */
-	mtr_t*	mtr)	/* in: memory buffer for the mtr buffer */
-{
-	return(mtr_start(mtr));
-}
-
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
 Releases the item in the slot given. */
 UNIV_INLINE
 void
 mtr_memo_slot_release(
 /*==================*/
-	mtr_t*			mtr,	/* in: mtr */
-	mtr_memo_slot_t*	slot)	/* in: memo slot */
+	mtr_t*			mtr,	/*!< in: mtr */
+	mtr_memo_slot_t*	slot)	/*!< in: memo slot */
 {
 	void*	object;
 	ulint	type;
@@ -54,23 +58,20 @@ mtr_memo_slot_release(
 		} else if (type == MTR_MEMO_S_LOCK) {
 			rw_lock_s_unlock((rw_lock_t*)object);
 #ifdef UNIV_DEBUG
-		} else if (type == MTR_MEMO_X_LOCK) {
-			rw_lock_x_unlock((rw_lock_t*)object);
-		} else {
+		} else if (type != MTR_MEMO_X_LOCK) {
 			ut_ad(type == MTR_MEMO_MODIFY);
 			ut_ad(mtr_memo_contains(mtr, object,
 						MTR_MEMO_PAGE_X_FIX));
-#else
+#endif /* UNIV_DEBUG */
 		} else {
 			rw_lock_x_unlock((rw_lock_t*)object);
-#endif
 		}
 	}
 
 	slot->object = NULL;
 }
 
-/**************************************************************
+/**********************************************************//**
 Releases the mlocks and other objects stored in an mtr memo. They are released
 in the order opposite to which they were pushed to the memo. NOTE! It is
 essential that the x-rw-lock on a modified buffer page is not released before
@@ -81,7 +82,7 @@ UNIV_INLINE
 void
 mtr_memo_pop_all(
 /*=============*/
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	mtr_memo_slot_t* slot;
 	dyn_array_t*	memo;
@@ -103,13 +104,13 @@ mtr_memo_pop_all(
 	}
 }
 
-/****************************************************************
+/************************************************************//**
 Writes the contents of a mini-transaction log, if any, to the database log. */
 static
 void
 mtr_log_reserve_and_write(
 /*======================*/
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	dyn_array_t*	mlog;
 	dyn_block_t*	block;
@@ -161,22 +162,29 @@ mtr_log_reserve_and_write(
 
 	mtr->end_lsn = log_close();
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
+/***************************************************************//**
 Commits a mini-transaction. */
-
+UNIV_INTERN
 void
 mtr_commit(
 /*=======*/
-	mtr_t*	mtr)	/* in: mini-transaction */
+	mtr_t*	mtr)	/*!< in: mini-transaction */
 {
+#ifndef UNIV_HOTBACKUP
+	ibool		write_log;
+#endif /* !UNIV_HOTBACKUP */
+
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
 	ut_ad(mtr->state == MTR_ACTIVE);
-#ifdef UNIV_DEBUG
-	mtr->state = MTR_COMMITTING;
-#endif
-	if (mtr->modifications) {
+	ut_d(mtr->state = MTR_COMMITTING);
+
+#ifndef UNIV_HOTBACKUP
+	write_log = mtr->modifications && mtr->n_log_recs;
+
+	if (write_log) {
 		mtr_log_reserve_and_write(mtr);
 	}
 
@@ -190,27 +198,27 @@ mtr_commit(
 
 	mtr_memo_pop_all(mtr);
 
-	if (mtr->modifications) {
+	if (write_log) {
 		log_release();
 	}
+#endif /* !UNIV_HOTBACKUP */
 
-#ifdef UNIV_DEBUG
-	mtr->state = MTR_COMMITTED;
-#endif
+	ut_d(mtr->state = MTR_COMMITTED);
 	dyn_array_free(&(mtr->memo));
 	dyn_array_free(&(mtr->log));
 }
 
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
 Releases the latches stored in an mtr memo down to a savepoint.
 NOTE! The mtr must not have made changes to buffer pages after the
 savepoint, as these can be handled only by mtr_commit. */
-
+UNIV_INTERN
 void
 mtr_rollback_to_savepoint(
 /*======================*/
-	mtr_t*	mtr,		/* in: mtr */
-	ulint	savepoint)	/* in: savepoint */
+	mtr_t*	mtr,		/*!< in: mtr */
+	ulint	savepoint)	/*!< in: savepoint */
 {
 	mtr_memo_slot_t* slot;
 	dyn_array_t*	memo;
@@ -235,15 +243,15 @@ mtr_rollback_to_savepoint(
 	}
 }
 
-/*******************************************************
+/***************************************************//**
 Releases an object in the memo stack. */
-
+UNIV_INTERN
 void
 mtr_memo_release(
 /*=============*/
-	mtr_t*	mtr,	/* in: mtr */
-	void*	object,	/* in: object */
-	ulint	type)	/* in: object type: MTR_MEMO_S_LOCK, ... */
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
 {
 	mtr_memo_slot_t* slot;
 	dyn_array_t*	memo;
@@ -270,24 +278,23 @@ mtr_memo_release(
 		}
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INTERN
 ulint
 mtr_read_ulint(
 /*===========*/
-				/* out: value read */
-	byte*		ptr,	/* in: pointer from where to read */
-	ulint		type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
 	mtr_t*		mtr __attribute__((unused)))
-				/* in: mini-transaction handle */
+				/*!< in: mini-transaction handle */
 {
 	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
-				MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(ptr),
-				   MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
 	if (type == MLOG_1BYTE) {
 		return(mach_read_from_1(ptr));
 	} else if (type == MLOG_2BYTES) {
@@ -298,34 +305,46 @@ mtr_read_ulint(
 	}
 }
 
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
+/********************************************************//**
+Reads 8 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INTERN
 dulint
 mtr_read_dulint(
 /*============*/
-				/* out: value read */
-	byte*		ptr,	/* in: pointer from where to read */
+	const byte*	ptr,	/*!< in: pointer from where to read */
 	mtr_t*		mtr __attribute__((unused)))
-				/* in: mini-transaction handle */
+				/*!< in: mini-transaction handle */
 {
 	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(ptr && mtr);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
-				MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(ptr),
-				   MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
 	return(mach_read_from_8(ptr));
 }
 
 #ifdef UNIV_DEBUG
-/*************************************************************
-Prints info of an mtr handle. */
+# ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Checks if memo contains the given page.
+@return	TRUE if contains */
+UNIV_INTERN
+ibool
+mtr_memo_contains_page(
+/*===================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	ptr,	/*!< in: pointer to buffer frame */
+	ulint		type)	/*!< in: type of object */
+{
+	return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
+}
 
+/*********************************************************//**
+Prints info of an mtr handle. */
+UNIV_INTERN
 void
 mtr_print(
 /*======*/
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	fprintf(stderr,
 		"Mini-transaction handle: memo size %lu bytes"
@@ -333,4 +352,5 @@ mtr_print(
 		(ulong) dyn_array_get_data_size(&(mtr->memo)),
 		(ulong) dyn_array_get_data_size(&(mtr->log)));
 }
+# endif /* !UNIV_HOTBACKUP */
 #endif /* UNIV_DEBUG */
diff --git a/storage/innodb_plugin/mysql-test/ctype_innodb_like.inc b/storage/innodb_plugin/mysql-test/ctype_innodb_like.inc
new file mode 100644
index 00000000000..ae43342885a
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/ctype_innodb_like.inc
@@ -0,0 +1,21 @@
+#
+# Bug#11650: LIKE pattern matching using prefix index
+# doesn't return correct result
+#
+--disable_warnings
+#
+# This query creates a column using
+# character_set_connection and
+# collation_connection.
+#
+create table t1 engine=innodb select repeat('a',50) as c1;
+--enable_warnings
+alter table t1 add index(c1(5));
+
+insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
+select collation(c1) from t1 limit 1;
+select c1 from t1 where c1 like 'abcdef%' order by c1;
+select c1 from t1 where c1 like 'abcde1%' order by c1;
+select c1 from t1 where c1 like 'abcde11%' order by c1;
+select c1 from t1 where c1 like 'abcde111%' order by c1;
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/have_innodb.inc b/storage/innodb_plugin/mysql-test/have_innodb.inc
new file mode 100644
index 00000000000..8944cc46f3e
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/have_innodb.inc
@@ -0,0 +1,4 @@
+disable_query_log;
+--require r/true.require
+select (support = 'YES' or support = 'DEFAULT' or support = 'ENABLED') as `TRUE` from information_schema.engines where engine = 'innodb';
+enable_query_log;
diff --git a/storage/innodb_plugin/mysql-test/innodb-analyze.result b/storage/innodb_plugin/mysql-test/innodb-analyze.result
new file mode 100644
index 00000000000..2aee004a2d6
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-analyze.result
@@ -0,0 +1,2 @@
+Variable_name	Value
+innodb_stats_sample_pages	1
diff --git a/storage/innodb_plugin/mysql-test/innodb-analyze.test b/storage/innodb_plugin/mysql-test/innodb-analyze.test
new file mode 100644
index 00000000000..d5d6d698170
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-analyze.test
@@ -0,0 +1,63 @@
+#
+# Test that mysqld does not crash when running ANALYZE TABLE with
+# different values of the parameter innodb_stats_sample_pages.
+#
+
+-- source include/have_innodb.inc
+
+# we care only that the following SQL commands do not produce errors
+# and do not crash the server
+-- disable_query_log
+-- disable_result_log
+-- enable_warnings
+
+SET GLOBAL innodb_stats_sample_pages=0;
+
+# check that the value has been adjusted to 1
+-- enable_result_log
+SHOW VARIABLES LIKE 'innodb_stats_sample_pages';
+-- disable_result_log
+
+CREATE TABLE innodb_analyze (
+	a INT,
+	b INT,
+	KEY(a),
+	KEY(b,a)
+) ENGINE=InnoDB;
+
+# test with empty table
+
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=2;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=4;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=8;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=16;
+ANALYZE TABLE innodb_analyze;
+
+INSERT INTO innodb_analyze VALUES
+(1,1), (1,1), (1,2), (1,3), (1,4), (1,5),
+(8,1), (8,8), (8,2), (7,1), (1,4), (3,5);
+
+SET GLOBAL innodb_stats_sample_pages=1;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=2;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=4;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=8;
+ANALYZE TABLE innodb_analyze;
+
+SET GLOBAL innodb_stats_sample_pages=16;
+ANALYZE TABLE innodb_analyze;
+
+DROP TABLE innodb_analyze;
diff --git a/storage/innodb_plugin/mysql-test/innodb-autoinc.result b/storage/innodb_plugin/mysql-test/innodb-autoinc.result
new file mode 100644
index 00000000000..d2e8eb19e0c
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-autoinc.result
@@ -0,0 +1,891 @@
+drop table if exists t1;
+CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (9223372036854775807, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+9223372036854775807	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 TINYINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (127, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+127	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 TINYINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (255, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+255	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 SMALLINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (32767, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+32767	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 SMALLINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (65535, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+65535	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 MEDIUMINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (8388607, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+8388607	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 MEDIUMINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (16777215, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+16777215	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (2147483647, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+2147483647	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 INT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (4294967295, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+4294967295	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (9223372036854775807, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+9223372036854775807	NULL
+DROP TABLE t1;
+CREATE TABLE t1 (c1 BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (18446744073709551615, null);
+INSERT INTO t1 (c2) VALUES ('innodb');
+Got one of the listed errors
+SELECT * FROM t1;
+c1	c2
+18446744073709551615	NULL
+DROP TABLE t1;
+CREATE TABLE t1(c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+4
+5
+6
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
+TRUNCATE TABLE t1;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+4
+5
+6
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1(c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+4
+5
+6
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
+DELETE FROM t1;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+7
+8
+9
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL, 1);
+DELETE FROM t1 WHERE c1 = 1;
+INSERT INTO t1 VALUES (2,1);
+INSERT INTO t1 VALUES (NULL,8);
+SELECT * FROM t1;
+c1	c2
+2	1
+3	8
+DROP TABLE t1;
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL, 1);
+DELETE FROM t1 WHERE c1 = 1;
+INSERT INTO t1 VALUES (2,1), (NULL, 8);
+INSERT INTO t1 VALUES (NULL,9);
+SELECT * FROM t1;
+c1	c2
+2	1
+3	8
+5	9
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	100
+auto_increment_offset	10
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL),(5),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+c1
+5
+10
+110
+250
+310
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+c1
+5
+10
+110
+250
+310
+400
+410
+1000
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(0);
+SELECT * FROM t1;
+c1
+1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+INSERT INTO t1 VALUES (-1), (NULL),(2),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+c1
+-1
+1
+2
+10
+110
+250
+410
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+Got one of the listed errors
+SELECT * FROM t1;
+c1
+-1
+1
+2
+10
+110
+250
+410
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+SELECT * FROM t1;
+c1
+-1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	100
+auto_increment_offset	10
+INSERT INTO t1 VALUES (-2), (NULL),(2),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+c1
+-2
+-1
+1
+2
+10
+250
+310
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+c1
+-2
+-1
+1
+2
+10
+250
+310
+400
+410
+1000
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+Warnings:
+Warning	1264	Out of range value for column 'c1' at row 1
+SELECT * FROM t1;
+c1
+1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	100
+auto_increment_offset	10
+INSERT INTO t1 VALUES (-2);
+Warnings:
+Warning	1264	Out of range value for column 'c1' at row 1
+INSERT INTO t1 VALUES (NULL);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t1 VALUES (NULL);
+INSERT INTO t1 VALUES (250);
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+c1
+1
+2
+10
+110
+210
+250
+310
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES(NULL);
+SELECT * FROM t1;
+c1
+1
+2
+10
+110
+210
+250
+310
+400
+1000
+1010
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 INT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+Warnings:
+Warning	1264	Out of range value for column 'c1' at row 1
+SELECT * FROM t1;
+c1
+1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	100
+auto_increment_offset	10
+INSERT INTO t1 VALUES (-2),(NULL),(2),(NULL);
+Warnings:
+Warning	1264	Out of range value for column 'c1' at row 1
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+c1
+1
+2
+10
+110
+210
+250
+410
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+Got one of the listed errors
+SELECT * FROM t1;
+c1
+1
+2
+10
+110
+210
+250
+410
+1000
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 BIGINT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (9223372036854775794);
+SELECT * FROM t1;
+c1
+1
+9223372036854775794
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=2, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	2
+auto_increment_offset	10
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+SELECT * FROM t1;
+c1
+1
+9223372036854775794
+9223372036854775796
+9223372036854775798
+9223372036854775800
+9223372036854775802
+9223372036854775804
+9223372036854775806
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551603);
+SELECT * FROM t1;
+c1
+1
+18446744073709551603
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=2, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	2
+auto_increment_offset	10
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+SELECT * FROM t1;
+c1
+1
+18446744073709551603
+18446744073709551604
+18446744073709551606
+18446744073709551608
+18446744073709551610
+18446744073709551612
+18446744073709551614
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551603);
+SELECT * FROM t1;
+c1
+1
+18446744073709551603
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=5, @@SESSION.AUTO_INCREMENT_OFFSET=7;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	5
+auto_increment_offset	7
+INSERT INTO t1 VALUES (NULL),(NULL);
+SELECT * FROM t1;
+c1
+1
+18446744073709551603
+18446744073709551607
+18446744073709551612
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 BIGINT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES(-9223372036854775806);
+INSERT INTO t1 VALUES(-9223372036854775807);
+INSERT INTO t1 VALUES(-9223372036854775808);
+SELECT * FROM t1;
+c1
+-9223372036854775808
+-9223372036854775807
+-9223372036854775806
+1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=3, @@SESSION.AUTO_INCREMENT_OFFSET=3;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	3
+auto_increment_offset	3
+INSERT INTO t1 VALUES (NULL),(NULL), (NULL);
+SELECT * FROM t1;
+c1
+-9223372036854775808
+-9223372036854775807
+-9223372036854775806
+1
+3
+6
+9
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551610);
+SELECT * FROM t1;
+c1
+1
+18446744073709551610
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1152921504606846976, @@SESSION.AUTO_INCREMENT_OFFSET=1152921504606846976;
+Warnings:
+Warning	1292	Truncated incorrect auto_increment_increment value: '1152921504606846976'
+Warning	1292	Truncated incorrect auto_increment_offset value: '1152921504606846976'
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	65535
+auto_increment_offset	65535
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+c1
+1
+18446744073709551610
+18446744073709551615
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+Variable_name	Value
+auto_increment_increment	1
+auto_increment_offset	1
+CREATE TABLE t1 (c1 DOUBLE NOT NULL AUTO_INCREMENT, c2 INT, PRIMARY KEY (c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL, 1);
+INSERT INTO t1 VALUES(NULL, 2);
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+ALTER TABLE t1 CHANGE c1 c1 SERIAL;
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+INSERT INTO t1 VALUES(NULL, 3);
+INSERT INTO t1 VALUES(NULL, 4);
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+3	3
+4	4
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 FLOAT NOT NULL AUTO_INCREMENT, c2 INT, PRIMARY KEY (c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL, 1);
+INSERT INTO t1 VALUES(NULL, 2);
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+ALTER TABLE t1 CHANGE c1 c1 SERIAL;
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+INSERT INTO t1 VALUES(NULL, 3);
+INSERT INTO t1 VALUES(NULL, 4);
+SELECT * FROM t1;
+c1	c2
+1	1
+2	2
+3	3
+4	4
+DROP TABLE t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=5;
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+DROP TABLE IF EXISTS t2;
+Warnings:
+Note	1051	Unknown table 't2'
+CREATE TABLE t1 (
+a INT(11) UNSIGNED NOT NULL AUTO_INCREMENT,
+b INT(10) UNSIGNED NOT NULL,
+c ENUM('FALSE','TRUE') DEFAULT NULL,
+PRIMARY KEY (a)) ENGINE = InnoDB;
+CREATE TABLE t2 (
+m INT(11) UNSIGNED NOT NULL AUTO_INCREMENT,
+n INT(10) UNSIGNED NOT NULL,
+o enum('FALSE','TRUE') DEFAULT NULL,
+PRIMARY KEY (m)) ENGINE = InnoDB;
+INSERT INTO t2 (n,o) VALUES
+(1 , 'true'), (1 , 'false'), (2 , 'true'), (2 , 'false'), (3 , 'true'),
+(3 , 'false'), (4 , 'true'), (4 , 'false'), (5 , 'true'), (5 , 'false');
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `m` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `n` int(10) unsigned NOT NULL,
+  `o` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`m`)
+) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 ;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=13 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 ;
+SELECT * FROM t1;
+a	b	c
+1	1	TRUE
+2	1	FALSE
+3	2	TRUE
+4	2	FALSE
+5	3	TRUE
+6	3	FALSE
+7	4	TRUE
+8	4	FALSE
+9	5	TRUE
+10	5	FALSE
+13	1	TRUE
+14	1	FALSE
+15	2	TRUE
+16	2	FALSE
+17	3	TRUE
+18	3	FALSE
+19	4	TRUE
+20	4	FALSE
+21	5	TRUE
+22	5	FALSE
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=23 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+a	b	c
+1	1	TRUE
+2	1	FALSE
+3	2	TRUE
+4	2	FALSE
+5	3	TRUE
+6	3	FALSE
+7	4	TRUE
+8	4	FALSE
+9	5	TRUE
+10	5	FALSE
+13	1	TRUE
+14	1	FALSE
+15	2	TRUE
+16	2	FALSE
+17	3	TRUE
+18	3	FALSE
+19	4	TRUE
+20	4	FALSE
+21	5	TRUE
+22	5	FALSE
+23	1	FALSE
+24	2	FALSE
+25	3	FALSE
+26	4	FALSE
+27	5	FALSE
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=30 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+a	b	c
+1	1	TRUE
+2	1	FALSE
+3	2	TRUE
+4	2	FALSE
+5	3	TRUE
+6	3	FALSE
+7	4	TRUE
+8	4	FALSE
+9	5	TRUE
+10	5	FALSE
+13	1	TRUE
+14	1	FALSE
+15	2	TRUE
+16	2	FALSE
+17	3	TRUE
+18	3	FALSE
+19	4	TRUE
+20	4	FALSE
+21	5	TRUE
+22	5	FALSE
+23	1	FALSE
+24	2	FALSE
+25	3	FALSE
+26	4	FALSE
+27	5	FALSE
+30	1	FALSE
+31	2	FALSE
+32	3	FALSE
+33	4	FALSE
+34	5	FALSE
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=37 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=51 DEFAULT CHARSET=latin1
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+a	b	c
+1	1	TRUE
+2	1	FALSE
+3	2	TRUE
+4	2	FALSE
+5	3	TRUE
+6	3	FALSE
+7	4	TRUE
+8	4	FALSE
+9	5	TRUE
+10	5	FALSE
+13	1	TRUE
+14	1	FALSE
+15	2	TRUE
+16	2	FALSE
+17	3	TRUE
+18	3	FALSE
+19	4	TRUE
+20	4	FALSE
+21	5	TRUE
+22	5	FALSE
+23	1	FALSE
+24	2	FALSE
+25	3	FALSE
+26	4	FALSE
+27	5	FALSE
+30	1	FALSE
+31	2	FALSE
+32	3	FALSE
+33	4	FALSE
+34	5	FALSE
+37	1	FALSE
+38	2	FALSE
+39	3	FALSE
+40	4	FALSE
+41	5	FALSE
+44	1	FALSE
+45	2	FALSE
+46	3	FALSE
+47	4	FALSE
+48	5	FALSE
+51	1	FALSE
+52	2	FALSE
+53	3	FALSE
+54	4	FALSE
+55	5	FALSE
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) unsigned NOT NULL AUTO_INCREMENT,
+  `b` int(10) unsigned NOT NULL,
+  `c` enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB AUTO_INCREMENT=58 DEFAULT CHARSET=latin1
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+DROP TABLE IF EXISTS t2;
+Warnings:
+Note	1051	Unknown table 't2'
+CREATE TABLE t1(
+c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT
+PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+CREATE TABLE t2(
+c1 TINYINT(3) UNSIGNED NOT NULL AUTO_INCREMENT
+PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t2 SELECT c1 FROM t1;
+Got one of the listed errors
+INSERT INTO t2 SELECT NULL FROM t1;
+Got one of the listed errors
+DROP TABLE t1;
+DROP TABLE t2;
+CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (null);
+INSERT INTO t1 VALUES (null);
+ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
+SELECT * FROM t1;
+d1
+1
+3
+SELECT * FROM t1;
+d1
+1
+3
+INSERT INTO t1 VALUES(null);
+Got one of the listed errors
+ALTER TABLE t1 AUTO_INCREMENT = 3;
+INSERT INTO t1 VALUES(null);
+SELECT * FROM t1;
+d1
+1
+3
+4
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-autoinc.test b/storage/innodb_plugin/mysql-test/innodb-autoinc.test
new file mode 100644
index 00000000000..61c42f45733
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-autoinc.test
@@ -0,0 +1,500 @@
+-- source include/have_innodb.inc
+# embedded server ignores 'delayed', so skip this
+-- source include/not_embedded.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# Bug #34335
+#
+CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (9223372036854775807, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+## Test AUTOINC overflow
+##
+
+# TINYINT
+CREATE TABLE t1 (c1 TINYINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (127, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 TINYINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (255, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+# SMALLINT
+#
+CREATE TABLE t1 (c1 SMALLINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (32767, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 SMALLINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (65535, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+# MEDIUMINT
+#
+CREATE TABLE t1 (c1 MEDIUMINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (8388607, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 MEDIUMINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (16777215, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+# INT
+#
+CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (2147483647, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 INT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (4294967295, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+# BIGINT
+#
+CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (9223372036854775807, null);
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (18446744073709551615, null);
+-- error ER_AUTOINC_READ_FAILED,1467
+INSERT INTO t1 (c2) VALUES ('innodb');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Bug 37531
+# After truncate, auto_increment behaves incorrectly for InnoDB
+#
+CREATE TABLE t1(c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+SHOW CREATE TABLE t1;
+TRUNCATE TABLE t1;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#
+# Deleting all records should not reset the AUTOINC counter.
+#
+CREATE TABLE t1(c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+SHOW CREATE TABLE t1;
+DELETE FROM t1;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 VALUES (NULL), (NULL), (NULL);
+SELECT c1 FROM t1;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#
+# Bug 38839
+# Reset the last value generated at end of statement
+#
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL, 1);
+DELETE FROM t1 WHERE c1 = 1;
+INSERT INTO t1 VALUES (2,1); 
+INSERT INTO t1 VALUES (NULL,8);
+SELECT * FROM t1;
+DROP TABLE t1;
+# Bug 38839 -- same as above but for multi value insert
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL, 1);
+DELETE FROM t1 WHERE c1 = 1;
+INSERT INTO t1 VALUES (2,1), (NULL, 8);
+INSERT INTO t1 VALUES (NULL,9);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Test changes to AUTOINC next value calculation
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL),(5),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Test with SIGNED INT column, by inserting a 0 for the first column value
+# 0 is treated in the same was NULL.
+# Reset the AUTOINC session variables
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(0);
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+INSERT INTO t1 VALUES (-1), (NULL),(2),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+SET @@INSERT_ID=400;
+# Duplicate error expected here for autoinc_lock_mode != TRADITIONAL
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Test with SIGNED INT column
+# Reset the AUTOINC session variables
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+INSERT INTO t1 VALUES (-2), (NULL),(2),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Test with UNSIGNED INT column, single insert
+# The sign in the value is ignored and a new column value is generated
+# Reset the AUTOINC session variables
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+INSERT INTO t1 VALUES (-2);
+INSERT INTO t1 VALUES (NULL);
+INSERT INTO t1 VALUES (2);
+INSERT INTO t1 VALUES (NULL);
+INSERT INTO t1 VALUES (250);
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Test with UNSIGNED INT column, multi-value inserts
+# The sign in the value is ignored and a new column value is generated
+# Reset the AUTOINC session variables
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 INT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-1);
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=100, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+INSERT INTO t1 VALUES (-2),(NULL),(2),(NULL);
+INSERT INTO t1 VALUES (250),(NULL);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (1000);
+SET @@INSERT_ID=400;
+# Duplicate error expected here for autoinc_lock_mode != TRADITIONAL
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t1 VALUES(NULL),(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Check for overflow handling when increment is > 1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 BIGINT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+# TODO: Fix the autoinc init code
+# We have to do this because of a bug in the AUTOINC init code.
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (9223372036854775794); #-- 2^63 - 14
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=2, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+# This should just fit
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Check for overflow handling when increment and offser are > 1
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+# TODO: Fix the autoinc init code
+# We have to do this because of a bug in the AUTOINC init code.
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551603); #-- 2^64 - 13
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=2, @@SESSION.AUTO_INCREMENT_OFFSET=10;
+SHOW VARIABLES LIKE "%auto_inc%";
+# This should fail because of overflow but it doesn't, it seems to be
+# a MySQL server bug. It wraps around to 0 for the last value.
+# See MySQL Bug# 39828
+#
+# Instead of wrapping around, it asserts when MySQL is compiled --with-debug
+# (see sql/handler.cc:handler::update_auto_increment()).  Don't test for
+# overflow until Bug #39828 is fixed.
+#
+# Since this asserts when compiled --with-debug, we can't properly test this
+# until Bug #39828 is fixed.  For now, this test is meaningless.
+#if Bug #39828 is fixed
+#INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+#else
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+#endif
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Check for overflow handling when increment and offset are odd numbers
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+# TODO: Fix the autoinc init code
+# We have to do this because of a bug in the AUTOINC init code.
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551603); #-- 2^64 - 13
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=5, @@SESSION.AUTO_INCREMENT_OFFSET=7;
+SHOW VARIABLES LIKE "%auto_inc%";
+# This should fail because of overflow but it doesn't. It fails with
+# a duplicate entry message because of a MySQL server bug, it wraps
+# around.  See MySQL Bug# 39828, once MySQL fix the bug we can replace
+# the ER_DUP_ENTRY, 1062 below with the appropriate error message
+#
+# Since this asserts when compiled --with-debug, we can't properly test this
+# until Bug #39828 is fixed.  For now, this test is meaningless.
+#if Bug #39828 is fixed
+# Still need to fix this error code, error should mention overflow
+#-- error ER_DUP_ENTRY,1062
+#INSERT INTO t1 VALUES (NULL),(NULL), (NULL);
+#else
+INSERT INTO t1 VALUES (NULL),(NULL);
+#endif
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# Check for overflow handling when increment and offset are odd numbers
+# and check for large -ve numbers
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 BIGINT AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+# TODO: Fix the autoinc init code
+# We have to do this because of a bug in the AUTOINC init code.
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES(-9223372036854775806); #-- -2^63 + 2
+INSERT INTO t1 VALUES(-9223372036854775807); #-- -2^63 + 1
+INSERT INTO t1 VALUES(-9223372036854775808); #-- -2^63
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=3, @@SESSION.AUTO_INCREMENT_OFFSET=3;
+SHOW VARIABLES LIKE "%auto_inc%";
+INSERT INTO t1 VALUES (NULL),(NULL), (NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
+#
+# Check for overflow handling when increment and offset are very
+# large numbers 2^60
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 BIGINT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(c1)) ENGINE=InnoDB;
+# TODO: Fix the autoinc init code
+# We have to do this because of a bug in the AUTOINC init code.
+INSERT INTO t1 VALUES(NULL);
+INSERT INTO t1 VALUES (18446744073709551610); #-- 2^64 - 2
+SELECT * FROM t1;
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1152921504606846976, @@SESSION.AUTO_INCREMENT_OFFSET=1152921504606846976;
+SHOW VARIABLES LIKE "%auto_inc%";
+# This should fail because of overflow but it doesn't. It wraps around
+# and the autoinc values look bogus too.
+# See MySQL Bug# 39828, once MySQL fix the bug we can enable the error
+# code expected test.
+# -- error ER_AUTOINC_READ_FAILED,1467
+#
+# Since this asserts when compiled --with-debug, we can't properly test this
+# until Bug #39828 is fixed.  For now, this test is meaningless.
+#if Bug #39828 is fixed
+#-- error ER_AUTOINC_READ_FAILED,1467
+#INSERT INTO t1 VALUES (NULL),(NULL);
+#else
+INSERT INTO t1 VALUES (NULL);
+#endif
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Check for floating point autoinc column handling
+#
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
+SET @@INSERT_ID=1;
+SHOW VARIABLES LIKE "%auto_inc%";
+CREATE TABLE t1 (c1 DOUBLE NOT NULL AUTO_INCREMENT, c2 INT, PRIMARY KEY (c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL, 1);
+INSERT INTO t1 VALUES(NULL, 2);
+SELECT * FROM t1;
+ALTER TABLE t1 CHANGE c1 c1 SERIAL;
+SELECT * FROM t1;
+INSERT INTO t1 VALUES(NULL, 3);
+INSERT INTO t1 VALUES(NULL, 4);
+SELECT * FROM t1;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 FLOAT NOT NULL AUTO_INCREMENT, c2 INT, PRIMARY KEY (c1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(NULL, 1);
+INSERT INTO t1 VALUES(NULL, 2);
+SELECT * FROM t1;
+ALTER TABLE t1 CHANGE c1 c1 SERIAL;
+SELECT * FROM t1;
+INSERT INTO t1 VALUES(NULL, 3);
+INSERT INTO t1 VALUES(NULL, 4);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Bug# 42714: AUTOINC column calculated next value not greater than highest
+# value stored in table.
+#
+SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=5;
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (
+  a INT(11) UNSIGNED NOT NULL AUTO_INCREMENT,
+  b INT(10) UNSIGNED NOT NULL,
+  c ENUM('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (a)) ENGINE = InnoDB;
+CREATE TABLE t2 (
+  m INT(11) UNSIGNED NOT NULL AUTO_INCREMENT,
+  n INT(10) UNSIGNED NOT NULL,
+  o enum('FALSE','TRUE') DEFAULT NULL,
+  PRIMARY KEY (m)) ENGINE = InnoDB;
+INSERT INTO t2 (n,o) VALUES
+  (1 , 'true'), (1 , 'false'), (2 , 'true'), (2 , 'false'), (3 , 'true'),
+  (3 , 'false'), (4 , 'true'), (4 , 'false'), (5 , 'true'), (5 , 'false');
+SHOW CREATE TABLE t2;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 ;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 ;
+SELECT * FROM t1;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SHOW CREATE TABLE t1;
+INSERT INTO t1 (b,c) SELECT n,o FROM t2 WHERE o = 'false';
+SELECT * FROM t1;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+DROP TABLE t2;
+#
+# 43203: Overflow from auto incrementing causes server segv
+#
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1(
+   c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT
+   PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);
+CREATE TABLE t2(
+    c1 TINYINT(3) UNSIGNED NOT NULL AUTO_INCREMENT
+    PRIMARY KEY) ENGINE=InnoDB;
+-- error ER_DUP_ENTRY,1062
+INSERT INTO t2 SELECT c1 FROM t1;
+-- error ER_DUP_ENTRY,1467
+INSERT INTO t2 SELECT NULL FROM t1;
+DROP TABLE t1;
+DROP TABLE t2;
+#
+# 44030: Error: (1500) Couldn't read the MAX(ID) autoinc value from
+# the index (PRIMARY)
+# This test requires a restart of the server
+CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (null);
+INSERT INTO t1 VALUES (null);
+ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
+SELECT * FROM t1;
+# Restart the server
+-- source include/restart_mysqld.inc
+# The MySQL and InnoDB data dictionaries should now be out of sync.
+# The select should print message to the error log
+SELECT * FROM t1;
+-- error ER_AUTOINC_READ_FAILED,1467
+INSERT INTO t1 VALUES(null);
+ALTER TABLE t1 AUTO_INCREMENT = 3;
+INSERT INTO t1 VALUES(null);
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.inc b/storage/innodb_plugin/mysql-test/innodb-index.inc
new file mode 100644
index 00000000000..37de3162abe
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-index.inc
@@ -0,0 +1,26 @@
+--eval create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=$charset
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+show create table t1;
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+check table t1;
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.result b/storage/innodb_plugin/mysql-test/innodb-index.result
new file mode 100644
index 00000000000..a7d66b15300
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-index.result
@@ -0,0 +1,1170 @@
+create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
+insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak');
+commit;
+alter table t1 add index b (b), add index b (b);
+ERROR 42000: Duplicate key name 'b'
+alter table t1 add index (b,b);
+ERROR 42S21: Duplicate column name 'b'
+alter table t1 add index d2 (d);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(d2) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d2	23	NULL	4	
+select * from t1 force index (d2) order by d;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+5	5	oo	oo
+4	4	tr	tr
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '4' for key 'b'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  KEY `d2` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+alter table t1 add unique index (c), add index (d);
+ERROR HY000: Table 'test.t1#1' already exists
+rename table `t1#1` to `t1#2`;
+alter table t1 add unique index (c), add index (d);
+ERROR HY000: Table 'test.t1#2' already exists
+drop table `t1#2`;
+alter table t1 add unique index (c), add index (d);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	4	
+alter table t1 add primary key (a), drop index c;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add primary key (c);
+ERROR 42000: Multiple primary key defined
+alter table t1 drop primary key, add primary key (b);
+ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
+create unique index c on t1 (c);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	4	
+select * from t1 force index(c) order by c;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+5	5	oo	oo
+4	4	tr	tr
+alter table t1 drop index b, add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `d` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 values(6,1,'ggg','ggg');
+select * from t1;
+a	b	c	d
+2	3	ak	ak
+3	4	ad	ad
+4	4	tr	tr
+5	5	oo	oo
+6	1	ggg	ggg
+select * from t1 force index(b) order by b;
+a	b	c	d
+6	1	ggg	ggg
+2	3	ak	ak
+3	4	ad	ad
+4	4	tr	tr
+5	5	oo	oo
+select * from t1 force index(c) order by c;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+6	1	ggg	ggg
+5	5	oo	oo
+4	4	tr	tr
+select * from t1 force index(d) order by d;
+a	b	c	d
+3	4	ad	ad
+2	3	ak	ak
+6	1	ggg	ggg
+5	5	oo	oo
+4	4	tr	tr
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	5	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	10	NULL	5	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	23	NULL	5	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) NOT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `d2` (`d`),
+  KEY `d` (`d`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add index (c(2));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add unique index (d(10));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `d` (`d`(10)),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 values(5,1,'ggg','ggg');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	Using filesort
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `d` (`d`(10)),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 drop index d;
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+5	1	ggg	ggg
+8	9	fff	fff
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	3	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+5	1	ggg	ggg
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+explain select * from t1 order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using filesort
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`(2))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b,c);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	5	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add index (b,c);
+insert into t1 values(11,11,'kkk','kkk');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`),
+  KEY `b_2` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 add unique index (c,d);
+insert into t1 values(13,13,'yyy','aaa');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+select * from t1 force index(c) order by c;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+11	11	kkk	kkk
+13	13	yyy	aaa
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	16	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	34	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`,`c`),
+  UNIQUE KEY `c` (`c`,`d`),
+  KEY `b_2` (`b`,`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb;
+create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb;
+create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb;
+create table t2(a int not null, b int not null, c int not null, d int not null, e int,
+foreign key (b) references t1(b) on delete cascade,
+foreign key (c) references t3(c), foreign key (d) references t4(d))
+engine = innodb;
+alter table t1 drop index b;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+alter table t3 drop index c;
+ERROR HY000: Cannot drop index 'c': needed in a foreign key constraint
+alter table t4 drop index d;
+ERROR HY000: Cannot drop index 'd': needed in a foreign key constraint
+alter table t2 drop index b;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+alter table t2 drop index b, drop index c, drop index d;
+ERROR HY000: Cannot drop index 'b': needed in a foreign key constraint
+create unique index dc on t2 (d,c);
+create index dc on t1 (b,c);
+alter table t2 add primary key (a);
+insert into t1 values (1,1,1);
+insert into t3 values (1,1,1);
+insert into t4 values (1,1,1);
+insert into t2 values (1,1,1,1,1);
+commit;
+alter table t4 add constraint dc foreign key (a) references t1(a);
+show create table t4;
+Table	Create Table
+t4	CREATE TABLE `t4` (
+  `a` int(11) NOT NULL,
+  `d` int(11) NOT NULL,
+  `e` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `d` (`d`),
+  CONSTRAINT `dc` FOREIGN KEY (`a`) REFERENCES `t1` (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t3 add constraint dc foreign key (a) references t1(a);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 121)
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `a` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `d` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `c` (`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t2 drop index b, add index (b);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `d` int(11) NOT NULL,
+  `e` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `dc` (`d`,`c`),
+  KEY `c` (`c`),
+  KEY `b` (`b`),
+  CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`b`) ON DELETE CASCADE,
+  CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`c`) REFERENCES `t3` (`c`),
+  CONSTRAINT `t2_ibfk_3` FOREIGN KEY (`d`) REFERENCES `t4` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+delete from t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `dc` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+drop index dc on t4;
+ERROR 42000: Can't DROP 'dc'; check that column/key exists
+alter table t3 drop foreign key dc;
+ERROR HY000: Error on rename of './test/t3' to '#sql2-temporary' (errno: 152)
+alter table t4 drop foreign key dc;
+select * from t2;
+a	b	c	d	e
+1	1	1	1	1
+delete from t1;
+select * from t2;
+a	b	c	d	e
+drop table t2,t4,t3,t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=utf8;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '2' for key 'b'
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(c) order by c;
+a	b	c	d
+11	11	aaa	mmm
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	31	NULL	7	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	63	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
+create table t1(a int not null, b int) engine = innodb;
+insert into t1 values (1,1),(1,1),(1,1),(1,1);
+alter table t1 add unique index (a);
+ERROR 23000: Duplicate entry '1' for key 'a'
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '1' for key 'b'
+alter table t1 add unique index (a), add unique index(b);
+ERROR 23000: Duplicate entry '1' for key 'a'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb;
+alter table t1 drop index c, drop index b;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `c` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, primary key(a)) engine = innodb;
+alter table t1 add index (b);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'),(5,4,'affe','affe');
+alter table t1 add unique index (b), add unique index (c), add unique index (d);
+ERROR 23000: Duplicate entry '4' for key 'b'
+alter table t1 add unique index (c), add unique index (b), add index (d);
+ERROR 23000: Duplicate entry 'ac' for key 'c'
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb;
+insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1);
+alter table t1 add unique index (b);
+insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  `c` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`),
+  KEY `c` (`c`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	5	NULL	9	
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	9	
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	4	NULL	9	
+select * from t1 order by a;
+a	b	c
+1	5	1
+2	4	2
+3	3	3
+4	2	4
+5	1	5
+10	20	20
+11	19	19
+12	18	18
+13	17	17
+select * from t1 force index(b) order by b;
+a	b	c
+5	1	5
+4	2	4
+3	3	3
+2	4	2
+1	5	1
+13	17	17
+12	18	18
+11	19	19
+10	20	20
+select * from t1 force index(c) order by c;
+a	b	c
+1	5	1
+2	4	2
+3	3	3
+4	2	4
+5	1	5
+13	17	17
+12	18	18
+11	19	19
+10	20	20
+drop table t1;
+create table t1(a int not null, b int not null) engine=innodb;
+insert into t1 values (1,1);
+alter table t1 add primary key(b);
+insert into t1 values (2,2);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) NOT NULL,
+  PRIMARY KEY (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+select * from t1;
+a	b
+1	1
+2	2
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	Using filesort
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	
+checksum table t1;
+Table	Checksum
+test.t1	582702641
+drop table t1;
+create table t1(a int not null) engine=innodb;
+insert into t1 values (1);
+alter table t1 add primary key(a);
+insert into t1 values (2);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+commit;
+select * from t1;
+a
+1
+2
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	Using index
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	Using index
+drop table t1;
+create table t2(d varchar(17) primary key) engine=innodb default charset=utf8;
+create table t3(a int primary key) engine=innodb;
+insert into t3 values(22),(44),(33),(55),(66);
+insert into t2 values ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+create table t1(a int, b blob, c text, d text not null)
+engine=innodb default charset = utf8;
+insert into t1
+select a,left(repeat(d,100*a),65535),repeat(d,20*a),d from t2,t3;
+drop table t2, t3;
+select count(*) from t1 where a=44;
+count(*)
+5
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+a	length(b)	b=left(repeat(d,100*a),65535)	length(c)	c=repeat(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	35200	1	7040	1	adfdijn0loKNHJik
+22	28600	1	5720	1	adfdijnmnb78k
+22	26400	1	5280	1	adfdpplkeock
+22	22000	1	4400	1	jejdkrun87
+33	33000	1	6600	1	adfd72nh9k
+33	52800	1	10560	1	adfdijn0loKNHJik
+33	42900	1	8580	1	adfdijnmnb78k
+33	39600	1	7920	1	adfdpplkeock
+33	33000	1	6600	1	jejdkrun87
+44	44000	1	8800	1	adfd72nh9k
+44	65535	1	14080	1	adfdijn0loKNHJik
+44	57200	1	11440	1	adfdijnmnb78k
+44	52800	1	10560	1	adfdpplkeock
+44	44000	1	8800	1	jejdkrun87
+55	55000	1	11000	1	adfd72nh9k
+55	65535	1	17600	1	adfdijn0loKNHJik
+55	65535	1	14300	1	adfdijnmnb78k
+55	65535	1	13200	1	adfdpplkeock
+55	55000	1	11000	1	jejdkrun87
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	21120	1	adfdijn0loKNHJik
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	13200	1	jejdkrun87
+alter table t1 add primary key (a), add key (b(20));
+ERROR 23000: Duplicate entry '22' for key 'PRIMARY'
+delete from t1 where a%2;
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+alter table t1 add primary key (a,b(255),c(255)), add key (b(767));
+select count(*) from t1 where a=44;
+count(*)
+5
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+a	length(b)	b=left(repeat(d,100*a),65535)	length(c)	c=repeat(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	35200	1	7040	1	adfdijn0loKNHJik
+22	28600	1	5720	1	adfdijnmnb78k
+22	26400	1	5280	1	adfdpplkeock
+22	22000	1	4400	1	jejdkrun87
+44	44000	1	8800	1	adfd72nh9k
+44	65535	1	14080	1	adfdijn0loKNHJik
+44	57200	1	11440	1	adfdijnmnb78k
+44	52800	1	10560	1	adfdpplkeock
+44	44000	1	8800	1	jejdkrun87
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	21120	1	adfdijn0loKNHJik
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	13200	1	jejdkrun87
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL DEFAULT '0',
+  `b` blob NOT NULL,
+  `c` text NOT NULL,
+  `d` text NOT NULL,
+  PRIMARY KEY (`a`,`b`(255),`c`(255)),
+  KEY `b` (`b`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+explain select * from t1 where b like 'adfd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	b	NULL	NULL	NULL	15	Using where
+create table t2(a int, b varchar(255), primary key(a,b)) engine=innodb;
+insert into t2 select a,left(b,255) from t1;
+drop table t1;
+rename table t2 to t1;
+set innodb_lock_wait_timeout=1;
+begin;
+select a from t1 limit 1 for update;
+a
+22
+set innodb_lock_wait_timeout=1;
+create index t1ba on t1 (b,a);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+begin;
+select a from t1 limit 1 lock in share mode;
+a
+22
+create index t1ba on t1 (b,a);
+drop index t1ba on t1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+explain select a from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	t1ba	261	NULL	15	Using index
+select a,sleep(2+a/100) from t1 order by b limit 3;
+select sleep(1);
+sleep(1)
+0
+drop index t1ba on t1;
+a	sleep(2+a/100)
+22	0
+44	0
+66	0
+explain select a from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	261	NULL	15	Using index; Using filesort
+select a from t1 order by b limit 3;
+a
+22
+66
+44
+commit;
+drop table t1;
+set global innodb_file_per_table=on;
+set global innodb_file_format='Barracuda';
+create table t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob,
+q blob,r blob,s blob,t blob,u blob)
+engine=innodb row_format=dynamic;
+create index t1a on t1 (a(1));
+create index t1b on t1 (b(1));
+create index t1c on t1 (c(1));
+create index t1d on t1 (d(1));
+create index t1e on t1 (e(1));
+create index t1f on t1 (f(1));
+create index t1g on t1 (g(1));
+create index t1h on t1 (h(1));
+create index t1i on t1 (i(1));
+create index t1j on t1 (j(1));
+create index t1k on t1 (k(1));
+create index t1l on t1 (l(1));
+create index t1m on t1 (m(1));
+create index t1n on t1 (n(1));
+create index t1o on t1 (o(1));
+create index t1p on t1 (p(1));
+create index t1q on t1 (q(1));
+create index t1r on t1 (r(1));
+create index t1s on t1 (s(1));
+create index t1t on t1 (t(1));
+create index t1u on t1 (u(1));
+ERROR HY000: Too big row
+create index t1ut on t1 (u(1), t(1));
+ERROR HY000: Too big row
+create index t1st on t1 (s(1), t(1));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` blob,
+  `b` blob,
+  `c` blob,
+  `d` blob,
+  `e` blob,
+  `f` blob,
+  `g` blob,
+  `h` blob,
+  `i` blob,
+  `j` blob,
+  `k` blob,
+  `l` blob,
+  `m` blob,
+  `n` blob,
+  `o` blob,
+  `p` blob,
+  `q` blob,
+  `r` blob,
+  `s` blob,
+  `t` blob,
+  `u` blob,
+  KEY `t1a` (`a`(1)),
+  KEY `t1b` (`b`(1)),
+  KEY `t1c` (`c`(1)),
+  KEY `t1d` (`d`(1)),
+  KEY `t1e` (`e`(1)),
+  KEY `t1f` (`f`(1)),
+  KEY `t1g` (`g`(1)),
+  KEY `t1h` (`h`(1)),
+  KEY `t1i` (`i`(1)),
+  KEY `t1j` (`j`(1)),
+  KEY `t1k` (`k`(1)),
+  KEY `t1l` (`l`(1)),
+  KEY `t1m` (`m`(1)),
+  KEY `t1n` (`n`(1)),
+  KEY `t1o` (`o`(1)),
+  KEY `t1p` (`p`(1)),
+  KEY `t1q` (`q`(1)),
+  KEY `t1r` (`r`(1)),
+  KEY `t1s` (`s`(1)),
+  KEY `t1t` (`t`(1)),
+  KEY `t1st` (`s`(1),`t`(1))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+create index t1u on t1 (u(1));
+ERROR HY000: Too big row
+alter table t1 row_format=compact;
+create index t1u on t1 (u(1));
+drop table t1;
+set global innodb_file_per_table=0;
+set global innodb_file_format=Antelope;
+SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
+SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
+CREATE TABLE t1(
+c1	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3) REFERENCES t1(c1);
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+INSERT INTO t2 VALUES(0,0,0);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+INSERT INTO t1 VALUES(0);
+INSERT INTO t2 VALUES(0,0,0);
+DROP TABLE t2;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1,c2,c3)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3) REFERENCES t1(c1);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`,`c2`,`c3`),
+  KEY `fk_t2_ca` (`c3`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`,`c2`,`c3`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t2 VALUES(0,0,1);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+INSERT INTO t2 VALUES(0,0,0);
+DELETE FROM t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`) REFERENCES `t1` (`c1`))
+DELETE FROM t2;
+DROP TABLE t2;
+DROP TABLE t1;
+CREATE TABLE t1(
+c1	BIGINT(12) NOT NULL,
+c2	INT(4) NOT NULL,
+PRIMARY KEY (c2,c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+CREATE TABLE t2(
+c1	BIGINT(16) NOT NULL,
+c2	BIGINT(12) NOT NULL,
+c3	BIGINT(12) NOT NULL,
+PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c1);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t1 MODIFY COLUMN c2 BIGINT(12) NOT NULL;
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+ERROR HY000: Can't create table '#sql-temporary' (errno: 150)
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` bigint(12) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  PRIMARY KEY (`c2`,`c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c2_c1 ON t2(c2, c1);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c1_c2 ON t2(c3, c1, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `fk_t2_ca` (`c3`,`c2`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  KEY `i_t2_c3_c1_c2` (`c3`,`c1`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `c1` bigint(16) NOT NULL,
+  `c2` bigint(12) NOT NULL,
+  `c3` bigint(12) NOT NULL,
+  PRIMARY KEY (`c1`),
+  KEY `i_t2_c2_c1` (`c2`,`c1`),
+  KEY `i_t2_c3_c1_c2` (`c3`,`c1`,`c2`),
+  KEY `i_t2_c3_c2` (`c3`,`c2`),
+  CONSTRAINT `fk_t2_ca` FOREIGN KEY (`c3`, `c2`) REFERENCES `t1` (`c2`, `c1`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+DROP TABLE t2;
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (3,'a'),(3,'b'),(1,'c'),(0,'d'),(1,'e');
+BEGIN;
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+CREATE INDEX t1a ON t1(a);
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+ERROR HY000: Table definition has changed, please retry transaction
+SELECT * FROM t1;
+a	b
+3	a
+3	b
+1	c
+0	d
+1	e
+COMMIT;
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+a	b
+0	d
+1	c
+1	e
+3	a
+3	b
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.test b/storage/innodb_plugin/mysql-test/innodb-index.test
new file mode 100644
index 00000000000..42888ff3686
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-index.test
@@ -0,0 +1,534 @@
+-- source include/have_innodb.inc
+
+create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
+insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak');
+commit;
+--error ER_DUP_KEYNAME
+alter table t1 add index b (b), add index b (b);
+--error ER_DUP_FIELDNAME
+alter table t1 add index (b,b);
+alter table t1 add index d2 (d);
+show create table t1;
+explain select * from t1 force index(d2) order by d;
+select * from t1 force index (d2) order by d;
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+show create table t1;
+alter table t1 add index (b);
+show create table t1;
+
+# Check how existing tables interfere with temporary tables.
+CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+
+--error 156
+alter table t1 add unique index (c), add index (d);
+rename table `t1#1` to `t1#2`;
+--error 156
+alter table t1 add unique index (c), add index (d);
+drop table `t1#2`;
+
+alter table t1 add unique index (c), add index (d);
+show create table t1;
+explain select * from t1 force index(c) order by c;
+alter table t1 add primary key (a), drop index c;
+show create table t1;
+--error ER_MULTIPLE_PRI_KEY
+alter table t1 add primary key (c);
+--error ER_DUP_ENTRY
+alter table t1 drop primary key, add primary key (b);
+create unique index c on t1 (c);
+show create table t1;
+explain select * from t1 force index(c) order by c;
+select * from t1 force index(c) order by c;
+alter table t1 drop index b, add index (b);
+show create table t1;
+insert into t1 values(6,1,'ggg','ggg');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add index (c(2));
+show create table t1;
+alter table t1 add unique index (d(10));
+show create table t1;
+insert into t1 values(5,1,'ggg','ggg');
+select * from t1;
+select * from t1 force index(c) order by c;
+select * from t1 force index(d) order by d;
+explain select * from t1 order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 force index(d) order by d;
+show create table t1;
+alter table t1 drop index d;
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+select * from t1 force index(c) order by c;
+explain select * from t1 order by b;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 order by d;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b,c);
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add index (b,c);
+insert into t1 values(11,11,'kkk','kkk');
+select * from t1;
+select * from t1 force index(b) order by b;
+explain select * from t1 force index(b) order by b;
+show create table t1;
+alter table t1 add unique index (c,d);
+insert into t1 values(13,13,'yyy','aaa');
+select * from t1;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+explain select * from t1 force index(b) order by b;
+explain select * from t1 force index(c) order by c;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int not null, c int, primary key (a), key (b)) engine = innodb;
+create table t3(a int not null, c int not null, d int, primary key (a), key (c)) engine = innodb;
+create table t4(a int not null, d int not null, e int, primary key (a), key (d)) engine = innodb;
+create table t2(a int not null, b int not null, c int not null, d int not null, e int,
+foreign key (b) references t1(b) on delete cascade,
+foreign key (c) references t3(c), foreign key (d) references t4(d))
+engine = innodb;
+--error ER_DROP_INDEX_FK
+alter table t1 drop index b;
+--error ER_DROP_INDEX_FK
+alter table t3 drop index c;
+--error ER_DROP_INDEX_FK
+alter table t4 drop index d;
+--error ER_DROP_INDEX_FK
+alter table t2 drop index b;
+--error ER_DROP_INDEX_FK
+alter table t2 drop index b, drop index c, drop index d;
+# Apparently, the following makes mysql_alter_table() drop index d.
+create unique index dc on t2 (d,c);
+create index dc on t1 (b,c);
+# This should preserve the foreign key constraints.
+alter table t2 add primary key (a);
+insert into t1 values (1,1,1);
+insert into t3 values (1,1,1);
+insert into t4 values (1,1,1);
+insert into t2 values (1,1,1,1,1);
+commit;
+alter table t4 add constraint dc foreign key (a) references t1(a);
+show create table t4;
+--replace_regex /'test\.#sql-[0-9a-f_]*'/'#sql-temporary'/
+# a foreign key 'test/dc' already exists
+--error ER_CANT_CREATE_TABLE
+alter table t3 add constraint dc foreign key (a) references t1(a);
+show create table t3;
+alter table t2 drop index b, add index (b);
+show create table t2;
+--error ER_ROW_IS_REFERENCED_2
+delete from t1;
+--error ER_CANT_DROP_FIELD_OR_KEY
+drop index dc on t4;
+# there is no foreign key dc on t3
+--replace_regex /'\.\/test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
+--error ER_ERROR_ON_RENAME
+alter table t3 drop foreign key dc;
+alter table t4 drop foreign key dc;
+select * from t2;
+delete from t1;
+select * from t2;
+
+drop table t2,t4,t3,t1;
+
+-- let charset = utf8
+-- source include/innodb-index.inc
+
+create table t1(a int not null, b int) engine = innodb;
+insert into t1 values (1,1),(1,1),(1,1),(1,1);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (a);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (a), add unique index(b);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, c int not null,b int, primary key(a), unique key(c), key(b)) engine = innodb;
+alter table t1 drop index c, drop index b;
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, primary key(a)) engine = innodb;
+alter table t1 add index (b);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,3,'ac','ac'),(4,4,'afe','afe'),(5,4,'affe','affe');
+--error ER_DUP_ENTRY
+alter table t1 add unique index (b), add unique index (c), add unique index (d);
+--error ER_DUP_ENTRY
+alter table t1 add unique index (c), add unique index (b), add index (d);
+show create table t1;
+drop table t1;
+
+create table t1(a int not null, b int not null, c int, primary key (a), key(c)) engine=innodb;
+insert into t1 values (5,1,5),(4,2,4),(3,3,3),(2,4,2),(1,5,1);
+alter table t1 add unique index (b);
+insert into t1 values (10,20,20),(11,19,19),(12,18,18),(13,17,17);
+show create table t1;
+check table t1;
+explain select * from t1 force index(c) order by c;
+explain select * from t1 order by a;
+explain select * from t1 force index(b) order by b;
+select * from t1 order by a;
+select * from t1 force index(b) order by b;
+select * from t1 force index(c) order by c;
+drop table t1;
+
+create table t1(a int not null, b int not null) engine=innodb;
+insert into t1 values (1,1);
+alter table t1 add primary key(b);
+insert into t1 values (2,2);
+show create table t1;
+check table t1;
+select * from t1;
+explain select * from t1;
+explain select * from t1 order by a;
+explain select * from t1 order by b;
+checksum table t1;
+drop table t1;
+
+create table t1(a int not null) engine=innodb;
+insert into t1 values (1);
+alter table t1 add primary key(a);
+insert into t1 values (2);
+show create table t1;
+check table t1;
+commit;
+select * from t1;
+explain select * from t1;
+explain select * from t1 order by a;
+drop table t1;
+
+create table t2(d varchar(17) primary key) engine=innodb default charset=utf8;
+create table t3(a int primary key) engine=innodb;
+
+insert into t3 values(22),(44),(33),(55),(66);
+
+insert into t2 values ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+
+create table t1(a int, b blob, c text, d text not null)
+engine=innodb default charset = utf8;
+
+# r2667 The following test is disabled because MySQL behavior changed.
+# r2667 The test was added with this comment:
+# r2667 
+# r2667 ------------------------------------------------------------------------
+# r2667 r1699 | marko | 2007-08-10 19:53:19 +0300 (Fri, 10 Aug 2007) | 5 lines
+# r2667 
+# r2667 branches/zip: Add changes that accidentally omitted from r1698:
+# r2667 
+# r2667 innodb-index.test, innodb-index.result: Add a test for creating
+# r2667 a PRIMARY KEY on a column that contains a NULL value.
+# r2667 ------------------------------------------------------------------------
+# r2667 
+# r2667 but in BZR-r2667:
+# r2667 http://bazaar.launchpad.net/~mysql/mysql-server/mysql-5.1/revision/davi%40mysql.com-20080617141221-8yre8ys9j4uw3xx5?start_revid=joerg%40mysql.com-20080630105418-7qoe5ehomgrcdb89
+# r2667 MySQL changed the behavior to do full table copy when creating PRIMARY INDEX
+# r2667 on a non-NULL column instead of calling ::add_index() which would fail (and
+# r2667 this is what we were testing here). Before r2667 the code execution path was
+# r2667 like this (when adding PRIMARY INDEX on a non-NULL column with ALTER TABLE):
+# r2667 
+# r2667 mysql_alter_table()
+# r2667   compare_tables()  // would return ALTER_TABLE_INDEX_CHANGED
+# r2667   ::add_index()  // would fail with "primary index cannot contain NULL"
+# r2667 
+# r2667 after r2667 the code execution path is the following:
+# r2667 
+# r2667 mysql_alter_table()
+# r2667   compare_tables()  // returns ALTER_TABLE_DATA_CHANGED
+# r2667   full copy is done, without calling ::add_index()
+# r2667 
+# r2667 To enable, remove "# r2667: " below.
+# r2667 
+# r2667: insert into t1 values (null,null,null,'null');
+insert into t1
+select a,left(repeat(d,100*a),65535),repeat(d,20*a),d from t2,t3;
+drop table t2, t3;
+select count(*) from t1 where a=44;
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+# r2667: --error ER_PRIMARY_CANT_HAVE_NULL
+# r2667: alter table t1 add primary key (a), add key (b(20));
+# r2667: delete from t1 where d='null';
+--error ER_DUP_ENTRY
+alter table t1 add primary key (a), add key (b(20));
+delete from t1 where a%2;
+check table t1;
+alter table t1 add primary key (a,b(255),c(255)), add key (b(767));
+select count(*) from t1 where a=44;
+select a,
+length(b),b=left(repeat(d,100*a),65535),length(c),c=repeat(d,20*a),d from t1;
+show create table t1;
+check table t1;
+explain select * from t1 where b like 'adfd%';
+
+#
+# Test locking
+#
+
+create table t2(a int, b varchar(255), primary key(a,b)) engine=innodb;
+insert into t2 select a,left(b,255) from t1;
+drop table t1;
+rename table t2 to t1;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+set innodb_lock_wait_timeout=1;
+begin;
+# Obtain an IX lock on the table
+select a from t1 limit 1 for update;
+connection b;
+set innodb_lock_wait_timeout=1;
+# This would require an S lock on the table, conflicting with the IX lock.
+--error ER_LOCK_WAIT_TIMEOUT
+create index t1ba on t1 (b,a);
+connection a;
+commit;
+begin;
+# Obtain an IS lock on the table
+select a from t1 limit 1 lock in share mode;
+connection b;
+# This will require an S lock on the table.  No conflict with the IS lock.
+create index t1ba on t1 (b,a);
+# This would require an X lock on the table, conflicting with the IS lock.
+--error ER_LOCK_WAIT_TIMEOUT
+drop index t1ba on t1;
+connection a;
+commit;
+explain select a from t1 order by b;
+--send
+select a,sleep(2+a/100) from t1 order by b limit 3;
+
+# The following DROP INDEX will succeed, altough the SELECT above has
+# opened a read view.  However, during the execution of the SELECT,
+# MySQL should hold a table lock that should block the execution
+# of the DROP INDEX below.
+
+connection b;
+select sleep(1);
+drop index t1ba on t1;
+
+# After the index was dropped, subsequent SELECTs will use the same
+# read view, but they should not be accessing the dropped index any more.
+
+connection a;
+reap;
+explain select a from t1 order by b;
+select a from t1 order by b limit 3;
+commit;
+
+connection default;
+disconnect a;
+disconnect b;
+
+drop table t1;
+
+let $per_table=`select @@innodb_file_per_table`;
+let $format=`select @@innodb_file_format`;
+set global innodb_file_per_table=on;
+set global innodb_file_format='Barracuda';
+# Test creating a table that could lead to undo log overflow.
+# In the undo log, we write a 768-byte prefix (REC_MAX_INDEX_COL_LEN)
+# of each externally stored column that appears as a column prefix in an index.
+# For this test case, it would suffice to write 1 byte, though.
+create table t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+		i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob,
+		q blob,r blob,s blob,t blob,u blob)
+	engine=innodb row_format=dynamic;
+create index t1a on t1 (a(1));
+create index t1b on t1 (b(1));
+create index t1c on t1 (c(1));
+create index t1d on t1 (d(1));
+create index t1e on t1 (e(1));
+create index t1f on t1 (f(1));
+create index t1g on t1 (g(1));
+create index t1h on t1 (h(1));
+create index t1i on t1 (i(1));
+create index t1j on t1 (j(1));
+create index t1k on t1 (k(1));
+create index t1l on t1 (l(1));
+create index t1m on t1 (m(1));
+create index t1n on t1 (n(1));
+create index t1o on t1 (o(1));
+create index t1p on t1 (p(1));
+create index t1q on t1 (q(1));
+create index t1r on t1 (r(1));
+create index t1s on t1 (s(1));
+create index t1t on t1 (t(1));
+--error 139
+create index t1u on t1 (u(1));
+--error 139
+create index t1ut on t1 (u(1), t(1));
+create index t1st on t1 (s(1), t(1));
+show create table t1;
+--error 139
+create index t1u on t1 (u(1));
+alter table t1 row_format=compact;
+create index t1u on t1 (u(1));
+
+drop table t1;
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format=$format;
+
+#
+# Test to check whether CREATE INDEX handles implicit foreign key
+# constraint modifications (Issue #70, Bug #38786)
+#
+SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
+SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
+
+CREATE TABLE t1(
+  c1	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3) REFERENCES t1(c1);
+
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+
+SHOW CREATE TABLE t2;
+
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+
+SHOW CREATE TABLE t2;
+
+SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
+SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
+
+--error ER_NO_REFERENCED_ROW_2
+INSERT INTO t2 VALUES(0,0,0);
+INSERT INTO t1 VALUES(0);
+INSERT INTO t2 VALUES(0,0,0);
+
+DROP TABLE t2;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1,c2,c3)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3) REFERENCES t1(c1);
+
+SHOW CREATE TABLE t2;
+
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+
+SHOW CREATE TABLE t2;
+--error ER_NO_REFERENCED_ROW_2
+INSERT INTO t2 VALUES(0,0,1);
+INSERT INTO t2 VALUES(0,0,0);
+--error ER_ROW_IS_REFERENCED_2
+DELETE FROM t1;
+DELETE FROM t2;
+
+DROP TABLE t2;
+DROP TABLE t1;
+
+CREATE TABLE t1(
+  c1	BIGINT(12) NOT NULL,
+  c2	INT(4) NOT NULL,
+  PRIMARY KEY (c2,c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+CREATE TABLE t2(
+  c1	BIGINT(16) NOT NULL,
+  c2	BIGINT(12) NOT NULL,
+  c3	BIGINT(12) NOT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c1);
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+ALTER TABLE t1 MODIFY COLUMN c2 BIGINT(12) NOT NULL;
+--replace_regex /'test\.#sql-[0-9_a-f-]*'/'#sql-temporary'/
+--error ER_CANT_CREATE_TABLE
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c1,c2);
+
+ALTER TABLE t2 ADD CONSTRAINT fk_t2_ca
+ FOREIGN KEY (c3,c2) REFERENCES t1(c2,c1);
+SHOW CREATE TABLE t1;
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c2_c1 ON t2(c2, c1);
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c3_c1_c2 ON t2(c3, c1, c2);
+SHOW CREATE TABLE t2;
+CREATE INDEX i_t2_c3_c2 ON t2(c3, c2);
+SHOW CREATE TABLE t2;
+
+DROP TABLE t2;
+DROP TABLE t1;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+CREATE TABLE t1 (a INT, b CHAR(1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (3,'a'),(3,'b'),(1,'c'),(0,'d'),(1,'e');
+connection b;
+BEGIN;
+SELECT * FROM t1;
+connection a;
+CREATE INDEX t1a ON t1(a);
+connection b;
+SELECT * FROM t1;
+--error ER_TABLE_DEF_CHANGED
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+SELECT * FROM t1;
+COMMIT;
+SELECT * FROM t1 FORCE INDEX(t1a) ORDER BY a;
+connection default;
+disconnect a;
+disconnect b;
+
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.result b/storage/innodb_plugin/mysql-test/innodb-index_ucs2.result
new file mode 100644
index 00000000000..c8a1e8c7da1
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-index_ucs2.result
@@ -0,0 +1,116 @@
+create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a)) engine = innodb default charset=ucs2;
+insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
+commit;
+alter table t1 add unique index (b);
+ERROR 23000: Duplicate entry '2' for key 'b'
+insert into t1 values(8,9,'fff','fff');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+alter table t1 add index (b);
+insert into t1 values(10,10,'kkk','iii');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	6	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+alter table t1 add unique index (c), add index (d);
+insert into t1 values(11,11,'aaa','mmm');
+select * from t1;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(b) order by b;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+select * from t1 force index(c) order by c;
+a	b	c	d
+11	11	aaa	mmm
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+select * from t1 force index(d) order by d;
+a	b	c	d
+1	1	ab	ab
+2	2	ac	ac
+3	2	ad	ad
+4	4	afe	afe
+8	9	fff	fff
+10	10	kkk	iii
+11	11	aaa	mmm
+explain select * from t1 force index(b) order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	5	NULL	7	
+explain select * from t1 force index(c) order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	21	NULL	7	
+explain select * from t1 force index(d) order by d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	d	43	NULL	7	
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `c` (`c`),
+  KEY `b` (`b`),
+  KEY `d` (`d`)
+) ENGINE=InnoDB DEFAULT CHARSET=ucs2
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.test b/storage/innodb_plugin/mysql-test/innodb-index_ucs2.test
new file mode 100644
index 00000000000..fff9a4da1a8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-index_ucs2.test
@@ -0,0 +1,5 @@
+-- source include/have_innodb.inc
+-- source include/have_ucs2.inc
+
+-- let charset = ucs2
+-- source include/innodb-index.inc
diff --git a/storage/innodb_plugin/mysql-test/innodb-lock.result b/storage/innodb_plugin/mysql-test/innodb-lock.result
new file mode 100644
index 00000000000..4ace4065c34
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-lock.result
@@ -0,0 +1,57 @@
+set global innodb_table_locks=1;
+select @@innodb_table_locks;
+@@innodb_table_locks
+1
+drop table if exists t1;
+set @@innodb_table_locks=1;
+create table t1 (id integer, x integer) engine=INNODB;
+insert into t1 values(0, 0);
+set autocommit=0;
+SELECT * from t1 where id = 0 FOR UPDATE;
+id	x
+0	0
+set autocommit=0;
+lock table t1 write;
+update t1 set x=1 where id = 0;
+select * from t1;
+id	x
+0	1
+commit;
+update t1 set x=2 where id = 0;
+commit;
+unlock tables;
+select * from t1;
+id	x
+0	2
+commit;
+drop table t1;
+set @@innodb_table_locks=0;
+create table t1 (id integer primary key, x integer) engine=INNODB;
+insert into t1 values(0, 0),(1,1),(2,2);
+commit;
+SELECT * from t1 where id = 0 FOR UPDATE;
+id	x
+0	0
+set autocommit=0;
+set @@innodb_table_locks=0;
+lock table t1 write;
+update t1 set x=10 where id = 2;
+SELECT * from t1 where id = 2;
+id	x
+2	2
+UPDATE t1 set x=3 where id = 2;
+commit;
+SELECT * from t1;
+id	x
+0	0
+1	1
+2	3
+commit;
+unlock tables;
+commit;
+select * from t1;
+id	x
+0	0
+1	1
+2	10
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-lock.test b/storage/innodb_plugin/mysql-test/innodb-lock.test
new file mode 100644
index 00000000000..eacf7e562be
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-lock.test
@@ -0,0 +1,102 @@
+-- source include/have_innodb.inc
+
+#
+# Check and select innodb lock type
+#
+
+set global innodb_table_locks=1;
+
+select @@innodb_table_locks;
+
+#
+# Testing of explicit table locks with enforced table locks
+#
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# Testing of explicit table locks with enforced table locks
+#
+
+set @@innodb_table_locks=1;
+
+connection con1;
+create table t1 (id integer, x integer) engine=INNODB;
+insert into t1 values(0, 0);
+set autocommit=0;
+SELECT * from t1 where id = 0 FOR UPDATE;
+
+connection con2;
+set autocommit=0;
+
+# The following statement should hang because con1 is locking the page
+--send
+lock table t1 write;
+--sleep 2
+
+connection con1;
+update t1 set x=1 where id = 0;
+select * from t1;
+commit;
+
+connection con2;
+reap;
+update t1 set x=2 where id = 0;
+commit;
+unlock tables;
+
+connection con1;
+select * from t1;
+commit;
+
+drop table t1;
+
+#
+# Try with old lock method (where LOCK TABLE is ignored by InnoDB)
+#
+
+set @@innodb_table_locks=0;
+
+create table t1 (id integer primary key, x integer) engine=INNODB;
+insert into t1 values(0, 0),(1,1),(2,2);
+commit;
+SELECT * from t1 where id = 0 FOR UPDATE;
+
+connection con2;
+set autocommit=0;
+set @@innodb_table_locks=0;
+
+# The following statement should work becase innodb doesn't check table locks
+lock table t1 write;
+
+connection con1;
+
+# This will be locked by MySQL
+--send
+update t1 set x=10 where id = 2;
+--sleep 2
+
+connection con2;
+
+# Note that we will get a deadlock if we try to select any rows marked
+# for update by con1 !
+
+SELECT * from t1 where id = 2;
+UPDATE t1 set x=3 where id = 2;
+commit;
+SELECT * from t1;
+commit;
+unlock tables;
+
+connection con1;
+reap;
+commit;
+select * from t1;
+drop table t1;
+
+# End of 4.1 tests
diff --git a/storage/innodb_plugin/mysql-test/innodb-master.opt b/storage/innodb_plugin/mysql-test/innodb-master.opt
new file mode 100644
index 00000000000..4901efb416c
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-master.opt
@@ -0,0 +1 @@
+--binlog_cache_size=32768 --innodb_lock_wait_timeout=1
diff --git a/storage/innodb_plugin/mysql-test/innodb-replace.result b/storage/innodb_plugin/mysql-test/innodb-replace.result
new file mode 100644
index 00000000000..c926bb89a2e
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-replace.result
@@ -0,0 +1,13 @@
+drop table if exists t1;
+create table t1 (c1 char(5) unique not null, c2 int, stamp timestamp) engine=innodb;
+select * from t1;
+c1	c2	stamp
+replace delayed into t1 (c1, c2)  values ( "text1","11");
+ERROR HY000: DELAYED option not supported for table 't1'
+select * from t1;
+c1	c2	stamp
+replace delayed into t1 (c1, c2)  values ( "text1","12");
+ERROR HY000: DELAYED option not supported for table 't1'
+select * from t1;
+c1	c2	stamp
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-replace.test b/storage/innodb_plugin/mysql-test/innodb-replace.test
new file mode 100644
index 00000000000..8c3aacde5e8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-replace.test
@@ -0,0 +1,22 @@
+-- source include/have_innodb.inc
+# embedded server ignores 'delayed', so skip this
+-- source include/not_embedded.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# Bug #1078
+#
+create table t1 (c1 char(5) unique not null, c2 int, stamp timestamp) engine=innodb;
+select * from t1;
+--error ER_DELAYED_NOT_SUPPORTED
+replace delayed into t1 (c1, c2)  values ( "text1","11");
+select * from t1;
+--error ER_DELAYED_NOT_SUPPORTED
+replace delayed into t1 (c1, c2)  values ( "text1","12");
+select * from t1;
+drop table t1;
+
+# End of 4.1 tests
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt b/storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt
new file mode 100644
index 00000000000..e76299453d3
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt
@@ -0,0 +1 @@
+--innodb_lock_wait_timeout=2
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.result b/storage/innodb_plugin/mysql-test/innodb-semi-consistent.result
new file mode 100644
index 00000000000..ca0e362ef80
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-semi-consistent.result
@@ -0,0 +1,47 @@
+drop table if exists t1;
+set binlog_format=mixed;
+set session transaction isolation level repeatable read;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+set autocommit=0;
+select * from t1 where a=3 lock in share mode;
+a
+3
+set binlog_format=mixed;
+set session transaction isolation level repeatable read;
+set autocommit=0;
+update t1 set a=10 where a=5;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+set session transaction isolation level read committed;
+update t1 set a=10 where a=5;
+select * from t1 where a=2 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1 where a=2 limit 1 for update;
+a
+2
+update t1 set a=11 where a=6;
+update t1 set a=12 where a=2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+update t1 set a=13 where a=1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+update t1 set a=14 where a=1;
+commit;
+select * from t1;
+a
+14
+2
+3
+4
+10
+11
+7
+drop table t1;
+create table t1 (a int, b int) engine=myisam;
+create table t2 (c int, d int, key (c)) engine=innodb;
+insert into t1 values (1,1);
+insert into t2 values (1,2);
+set session transaction isolation level read committed;
+delete from t1 using t1 join t2 on t1.a = t2.c where t2.d in (1);
+drop table t1, t2;
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.test b/storage/innodb_plugin/mysql-test/innodb-semi-consistent.test
new file mode 100644
index 00000000000..61ad7815ca9
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-semi-consistent.test
@@ -0,0 +1,68 @@
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# basic tests of semi-consistent reads
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+set binlog_format=mixed;
+set session transaction isolation level repeatable read;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+set autocommit=0;
+# this should lock the entire table
+select * from t1 where a=3 lock in share mode;
+connection b;
+set binlog_format=mixed;
+set session transaction isolation level repeatable read;
+set autocommit=0;
+-- error ER_LOCK_WAIT_TIMEOUT
+update t1 set a=10 where a=5;
+connection a;
+commit;
+connection b;
+# perform a semi-consisent read (and unlock non-matching rows)
+set session transaction isolation level read committed;
+update t1 set a=10 where a=5;
+connection a;
+-- error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where a=2 for update;
+# this should lock the records (1),(2)
+select * from t1 where a=2 limit 1 for update;
+connection b;
+# semi-consistent read will skip non-matching locked rows a=1, a=2
+update t1 set a=11 where a=6;
+-- error ER_LOCK_WAIT_TIMEOUT
+update t1 set a=12 where a=2;
+-- error ER_LOCK_WAIT_TIMEOUT
+update t1 set a=13 where a=1;
+connection a;
+commit;
+connection b;
+update t1 set a=14 where a=1;
+commit;
+connection a;
+select * from t1;
+drop table t1;
+
+connection default;
+disconnect a;
+disconnect b;
+
+# Bug 39320
+create table t1 (a int, b int) engine=myisam;
+create table t2 (c int, d int, key (c)) engine=innodb;
+insert into t1 values (1,1);
+insert into t2 values (1,2);
+connect (a,localhost,root,,);
+connection a;
+set session transaction isolation level read committed;
+delete from t1 using t1 join t2 on t1.a = t2.c where t2.d in (1);
+connection default;
+disconnect a;
+drop table t1, t2;
diff --git a/storage/innodb_plugin/mysql-test/innodb-timeout.result b/storage/innodb_plugin/mysql-test/innodb-timeout.result
new file mode 100644
index 00000000000..be9a688cd72
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-timeout.result
@@ -0,0 +1,38 @@
+set global innodb_lock_wait_timeout=42;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+1
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set global innodb_lock_wait_timeout=347;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+42
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+1
+select @@innodb_lock_wait_timeout;
+@@innodb_lock_wait_timeout
+347
+create table t1(a int primary key)engine=innodb;
+begin;
+insert into t1 values(1),(2),(3);
+select * from t1 for update;
+commit;
+a
+1
+2
+3
+begin;
+insert into t1 values(4);
+select * from t1 for update;
+commit;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+drop table t1;
+set global innodb_lock_wait_timeout=50;
diff --git a/storage/innodb_plugin/mysql-test/innodb-timeout.test b/storage/innodb_plugin/mysql-test/innodb-timeout.test
new file mode 100644
index 00000000000..f23fe3cff2d
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-timeout.test
@@ -0,0 +1,64 @@
+-- source include/have_innodb.inc
+
+let $timeout=`select @@innodb_lock_wait_timeout`;
+set global innodb_lock_wait_timeout=42;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+
+connection a;
+select @@innodb_lock_wait_timeout;
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+
+connection b;
+select @@innodb_lock_wait_timeout;
+set global innodb_lock_wait_timeout=347;
+select @@innodb_lock_wait_timeout;
+set innodb_lock_wait_timeout=1;
+select @@innodb_lock_wait_timeout;
+
+connect (c,localhost,root,,);
+connection c;
+select @@innodb_lock_wait_timeout;
+connection default;
+disconnect c;
+
+connection a;
+create table t1(a int primary key)engine=innodb;
+begin;
+insert into t1 values(1),(2),(3);
+
+connection b;
+--send
+select * from t1 for update;
+
+connection a;
+commit;
+
+connection b;
+reap;
+
+connection a;
+begin;
+insert into t1 values(4);
+
+connection b;
+--send
+select * from t1 for update;
+
+connection a;
+sleep 2;
+commit;
+
+connection b;
+--error ER_LOCK_WAIT_TIMEOUT
+reap;
+drop table t1;
+
+connection default;
+
+disconnect a;
+disconnect b;
+
+eval set global innodb_lock_wait_timeout=$timeout;
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt
new file mode 100644
index 00000000000..889834add01
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt
@@ -0,0 +1,2 @@
+--innodb-use-sys-malloc=true
+--innodb-use-sys-malloc=true
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result
new file mode 100644
index 00000000000..2ec4c7c8130
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result
@@ -0,0 +1,48 @@
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
+Expected error 'Read only variable'
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+drop table if exists t1;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+drop table t1;
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
+Expected error 'Read only variable'
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+@@GLOBAL.innodb_use_sys_malloc
+1
+1 Expected
+drop table if exists t1;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test
new file mode 100644
index 00000000000..325dd19d086
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test
@@ -0,0 +1,48 @@
+--source include/have_innodb.inc
+
+#display current value of innodb_use_sys_malloc
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+#try changing it. Should fail.
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+--echo Expected error 'Read only variable'
+
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+
+#do some stuff to see if it works.
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+drop table t1;
+--source include/have_innodb.inc
+
+#display current value of innodb_use_sys_malloc
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+#try changing it. Should fail.
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_use_sys_malloc=0;
+--echo Expected error 'Read only variable'
+
+SELECT @@GLOBAL.innodb_use_sys_malloc;
+--echo 1 Expected
+
+
+#do some stuff to see if it works.
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2),(3),(4),(5),(6),(7);
+select * from t1;
+drop table t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb-zip.result b/storage/innodb_plugin/mysql-test/innodb-zip.result
new file mode 100644
index 00000000000..c81401743a5
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-zip.result
@@ -0,0 +1,421 @@
+set global innodb_file_per_table=off;
+set global innodb_file_format=`0`;
+create table t0(a int primary key) engine=innodb row_format=compressed;
+Warnings:
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t00(a int primary key) engine=innodb
+key_block_size=4 row_format=compressed;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4.
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t1(a int primary key) engine=innodb row_format=dynamic;
+Warnings:
+Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_per_table.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t2(a int primary key) engine=innodb row_format=redundant;
+create table t3(a int primary key) engine=innodb row_format=compact;
+create table t4(a int primary key) engine=innodb key_block_size=9;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=9.
+create table t5(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+set global innodb_file_per_table=on;
+create table t6(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+set global innodb_file_format=`1`;
+create table t7(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t8(a int primary key) engine=innodb
+key_block_size=1 row_format=fixed;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+create table t9(a int primary key) engine=innodb
+key_block_size=1 row_format=compact;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t10(a int primary key) engine=innodb
+key_block_size=1 row_format=dynamic;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+create table t11(a int primary key) engine=innodb
+key_block_size=1 row_format=compressed;
+create table t12(a int primary key) engine=innodb
+key_block_size=1;
+create table t13(a int primary key) engine=innodb
+row_format=compressed;
+create table t14(a int primary key) engine=innodb key_block_size=9;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=9.
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t0	Compact
+test	t00	Compact
+test	t1	Compact
+test	t10	Dynamic
+test	t11	Compressed
+test	t12	Compressed
+test	t13	Compressed
+test	t14	Compact
+test	t2	Redundant
+test	t3	Compact
+test	t4	Compact
+test	t5	Redundant
+test	t6	Redundant
+test	t7	Redundant
+test	t8	Compact
+test	t9	Compact
+drop table t0,t00,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+alter table t1 key_block_size=0;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=0.
+alter table t1 row_format=dynamic;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Dynamic
+alter table t1 row_format=compact;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compact
+alter table t1 row_format=redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Redundant
+drop table t1;
+create table t1(a int not null, b text, index(b(10))) engine=innodb
+key_block_size=1;
+create table t2(b text)engine=innodb;
+insert into t2 values(concat('1abcdefghijklmnopqrstuvwxyz', repeat('A',5000)));
+insert into t1 select 1, b from t2;
+commit;
+begin;
+update t1 set b=repeat('B',100);
+select a,left(b,40) from t1 natural join t2;
+a	left(b,40)
+1	1abcdefghijklmnopqrstuvwxyzAAAAAAAAAAAAA
+rollback;
+select a,left(b,40) from t1 natural join t2;
+a	left(b,40)
+1	1abcdefghijklmnopqrstuvwxyzAAAAAAAAAAAAA
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compressed
+test	t2	Compact
+drop table t1,t2;
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(
+c TEXT NOT NULL, d TEXT NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(439)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+create table t1( c1 int not null, c2 blob, c3 blob, c4 blob,
+primary key(c1, c2(22), c3(22)))
+engine = innodb row_format = dynamic;
+begin;
+insert into t1 values(1, repeat('A', 20000), repeat('B', 20000),
+repeat('C', 20000));
+update t1 set c3 = repeat('D', 20000) where c1 = 1;
+commit;
+select count(*) from t1 where c2 = repeat('A', 20000);
+count(*)
+1
+select count(*) from t1 where c3 = repeat('D', 20000);
+count(*)
+1
+select count(*) from t1 where c4 = repeat('C', 20000);
+count(*)
+1
+update t1 set c3 = repeat('E', 20000) where c1 = 1;
+drop table t1;
+set global innodb_file_format=`0`;
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format=`1`;
+select @@innodb_file_format;
+@@innodb_file_format
+Barracuda
+set global innodb_file_format=`2`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`-1`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`Antelope`;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format=`Cheetah`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`abc`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=`1a`;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=``;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_per_table = on;
+set global innodb_file_format = `1`;
+set innodb_strict_mode = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=0.
+drop table t1;
+set innodb_strict_mode = on;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 0. Valid values are [1, 2, 4, 8, 16]
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 9;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 1;
+create table t4 (id int primary key) engine = innodb key_block_size = 2;
+create table t5 (id int primary key) engine = innodb key_block_size = 4;
+create table t6 (id int primary key) engine = innodb key_block_size = 8;
+create table t7 (id int primary key) engine = innodb key_block_size = 16;
+create table t8 (id int primary key) engine = innodb row_format = compressed;
+create table t9 (id int primary key) engine = innodb row_format = dynamic;
+create table t10(id int primary key) engine = innodb row_format = compact;
+create table t11(id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t10	Compact
+test	t11	Redundant
+test	t3	Compressed
+test	t4	Compressed
+test	t5	Compressed
+test	t6	Compressed
+test	t7	Compressed
+test	t8	Compressed
+test	t9	Dynamic
+drop table t3, t4, t5, t6, t7, t8, t9, t10, t11;
+create table t1 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compressed;
+create table t2 (id int primary key) engine = innodb
+key_block_size = 8 row_format = redundant;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compact;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb
+key_block_size = 8 row_format = dynamic;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb
+key_block_size = 8 row_format = default;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t1	Compressed
+drop table t1;
+create table t1 (id int primary key) engine = innodb
+key_block_size = 9 row_format = redundant;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = compact;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = dynamic;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+set global innodb_file_per_table = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+ERROR HY000: Can't create table 'test.t6' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t6' (errno: 1478)
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+ERROR HY000: Can't create table 'test.t7' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_per_table.
+Error	1005	Can't create table 'test.t7' (errno: 1478)
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t8	Compact
+test	t9	Redundant
+drop table t8, t9;
+set global innodb_file_per_table = on;
+set global innodb_file_format = `0`;
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+ERROR HY000: Can't create table 'test.t1' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t1' (errno: 1478)
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+ERROR HY000: Can't create table 'test.t2' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t2' (errno: 1478)
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+ERROR HY000: Can't create table 'test.t3' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t3' (errno: 1478)
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+ERROR HY000: Can't create table 'test.t4' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t4' (errno: 1478)
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+ERROR HY000: Can't create table 'test.t5' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t5' (errno: 1478)
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+ERROR HY000: Can't create table 'test.t6' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t6' (errno: 1478)
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+ERROR HY000: Can't create table 'test.t7' (errno: 1478)
+show errors;
+Level	Code	Message
+Error	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_format > Antelope.
+Error	1005	Can't create table 'test.t7' (errno: 1478)
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+table_schema	table_name	row_format
+test	t8	Compact
+test	t9	Redundant
+drop table t8, t9;
+set global innodb_file_per_table=0;
+set global innodb_file_format=Antelope;
+set global innodb_file_per_table=on;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format_check=`Antelope`;
+create table normal_table (
+c1 int
+) engine = innodb;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+create table zip_table (
+c1 int
+) engine = innodb key_block_size = 8;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format_check=`Antelope`;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+show table status;
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+drop table normal_table, zip_table;
diff --git a/storage/innodb_plugin/mysql-test/innodb-zip.test b/storage/innodb_plugin/mysql-test/innodb-zip.test
new file mode 100644
index 00000000000..ddc39d44487
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb-zip.test
@@ -0,0 +1,343 @@
+-- source include/have_innodb.inc
+
+let $per_table=`select @@innodb_file_per_table`;
+let $format=`select @@innodb_file_format`;
+let $innodb_file_format_check_orig=`select @@innodb_file_format_check`;
+set global innodb_file_per_table=off;
+set global innodb_file_format=`0`;
+
+create table t0(a int primary key) engine=innodb row_format=compressed;
+create table t00(a int primary key) engine=innodb
+key_block_size=4 row_format=compressed;
+create table t1(a int primary key) engine=innodb row_format=dynamic;
+create table t2(a int primary key) engine=innodb row_format=redundant;
+create table t3(a int primary key) engine=innodb row_format=compact;
+create table t4(a int primary key) engine=innodb key_block_size=9;
+create table t5(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+
+set global innodb_file_per_table=on;
+create table t6(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+set global innodb_file_format=`1`;
+create table t7(a int primary key) engine=innodb
+key_block_size=1 row_format=redundant;
+create table t8(a int primary key) engine=innodb
+key_block_size=1 row_format=fixed;
+create table t9(a int primary key) engine=innodb
+key_block_size=1 row_format=compact;
+create table t10(a int primary key) engine=innodb
+key_block_size=1 row_format=dynamic;
+create table t11(a int primary key) engine=innodb
+key_block_size=1 row_format=compressed;
+create table t12(a int primary key) engine=innodb
+key_block_size=1;
+create table t13(a int primary key) engine=innodb
+row_format=compressed;
+create table t14(a int primary key) engine=innodb key_block_size=9;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+
+drop table t0,t00,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+alter table t1 key_block_size=0;
+alter table t1 row_format=dynamic;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+alter table t1 row_format=compact;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+alter table t1 row_format=redundant;
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1;
+
+create table t1(a int not null, b text, index(b(10))) engine=innodb
+key_block_size=1;
+
+create table t2(b text)engine=innodb;
+insert into t2 values(concat('1abcdefghijklmnopqrstuvwxyz', repeat('A',5000)));
+
+insert into t1 select 1, b from t2;
+commit;
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+
+connection a;
+begin;
+update t1 set b=repeat('B',100);
+
+connection b;
+select a,left(b,40) from t1 natural join t2;
+
+connection a;
+rollback;
+
+connection b;
+select a,left(b,40) from t1 natural join t2;
+
+connection default;
+disconnect a;
+disconnect b;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1,t2;
+
+# The following should fail even in non-strict mode.
+SET SESSION innodb_strict_mode = off;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+CREATE TABLE t1(
+	c TEXT NOT NULL, d TEXT NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+CREATE TABLE t1(c TEXT, PRIMARY KEY (c(439)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+
+#
+# Test blob column inheritance (mantis issue#36)
+#
+
+create table t1( c1 int not null, c2 blob, c3 blob, c4 blob,
+		primary key(c1, c2(22), c3(22)))
+		engine = innodb row_format = dynamic;
+begin;
+insert into t1 values(1, repeat('A', 20000), repeat('B', 20000),
+			repeat('C', 20000));
+
+update t1 set c3 = repeat('D', 20000) where c1 = 1;
+commit;
+
+# one blob column which is unchanged in update and part of PK
+# one blob column which is changed and part of of PK
+# one blob column which is not part of PK and is unchanged
+select count(*) from t1 where c2 = repeat('A', 20000);
+select count(*) from t1 where c3 = repeat('D', 20000);
+select count(*) from t1 where c4 = repeat('C', 20000);
+
+update t1 set c3 = repeat('E', 20000) where c1 = 1;
+drop table t1;
+
+#
+#
+# Test innodb_file_format
+#
+set global innodb_file_format=`0`;
+select @@innodb_file_format;
+set global innodb_file_format=`1`;
+select @@innodb_file_format;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`2`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`-1`;
+set global innodb_file_format=`Antelope`;
+set global innodb_file_format=`Barracuda`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`Cheetah`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`abc`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=`1a`;
+-- error ER_WRONG_ARGUMENTS
+set global innodb_file_format=``;
+
+#test strict mode.
+# this does not work anymore, has been removed from mysqltest
+# -- enable_errors
+set global innodb_file_per_table = on;
+set global innodb_file_format = `1`;
+
+set innodb_strict_mode = off;
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+drop table t1;
+
+#set strict_mode
+set innodb_strict_mode = on;
+
+#Test different values of KEY_BLOCK_SIZE
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 0;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 9;
+show errors;
+
+
+create table t3 (id int primary key) engine = innodb key_block_size = 1;
+create table t4 (id int primary key) engine = innodb key_block_size = 2;
+create table t5 (id int primary key) engine = innodb key_block_size = 4;
+create table t6 (id int primary key) engine = innodb key_block_size = 8;
+create table t7 (id int primary key) engine = innodb key_block_size = 16;
+
+#check various ROW_FORMAT values.
+create table t8 (id int primary key) engine = innodb row_format = compressed;
+create table t9 (id int primary key) engine = innodb row_format = dynamic;
+create table t10(id int primary key) engine = innodb row_format = compact;
+create table t11(id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t3, t4, t5, t6, t7, t8, t9, t10, t11;
+
+#test different values of ROW_FORMAT with KEY_BLOCK_SIZE
+create table t1 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compressed;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 8 row_format = redundant;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb
+key_block_size = 8 row_format = compact;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb
+key_block_size = 8 row_format = dynamic;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb
+key_block_size = 8 row_format = default;
+show errors;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t1;
+
+#test multiple errors
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb
+key_block_size = 9 row_format = redundant;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = compact;
+show errors;
+
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb
+key_block_size = 9 row_format = dynamic;
+show errors;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+
+#test valid values with innodb_file_per_table unset
+set global innodb_file_per_table = off;
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+show errors;
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t8, t9;
+
+#test valid values with innodb_file_format unset
+set global innodb_file_per_table = on;
+set global innodb_file_format = `0`; 
+
+--error ER_CANT_CREATE_TABLE
+create table t1 (id int primary key) engine = innodb key_block_size = 1;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t2 (id int primary key) engine = innodb key_block_size = 2;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t3 (id int primary key) engine = innodb key_block_size = 4;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t4 (id int primary key) engine = innodb key_block_size = 8;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t5 (id int primary key) engine = innodb key_block_size = 16;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t6 (id int primary key) engine = innodb row_format = compressed;
+show errors;
+--error ER_CANT_CREATE_TABLE
+create table t7 (id int primary key) engine = innodb row_format = dynamic;
+show errors;
+create table t8 (id int primary key) engine = innodb row_format = compact;
+create table t9 (id int primary key) engine = innodb row_format = redundant;
+
+SELECT table_schema, table_name, row_format
+FROM information_schema.tables WHERE engine='innodb';
+drop table t8, t9;
+
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format=$format;
+#
+# Testing of tablespace tagging
+#
+-- disable_info
+set global innodb_file_per_table=on;
+set global innodb_file_format=`Barracuda`;
+set global innodb_file_format_check=`Antelope`;
+create table normal_table (
+  c1 int
+) engine = innodb;
+select @@innodb_file_format_check;
+create table zip_table (
+  c1 int
+) engine = innodb key_block_size = 8;
+select @@innodb_file_format_check;
+set global innodb_file_format_check=`Antelope`;
+select @@innodb_file_format_check;
+-- disable_result_log
+show table status;
+-- enable_result_log
+select @@innodb_file_format_check;
+drop table normal_table, zip_table;
+-- disable_result_log
+
+#
+# restore environment to the state it was before this test execution
+#
+
+-- disable_query_log
+eval set global innodb_file_format=$format;
+eval set global innodb_file_per_table=$per_table;
+eval set global innodb_file_format_check=$innodb_file_format_check_orig;
diff --git a/storage/innodb_plugin/mysql-test/innodb.result b/storage/innodb_plugin/mysql-test/innodb.result
new file mode 100644
index 00000000000..bdae7633fd1
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb.result
@@ -0,0 +1,3310 @@
+drop table if exists t1,t2,t3,t4;
+drop database if exists mysqltest;
+create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
+insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt');
+select id, code, name from t1 order by id;
+id	code	name
+1	1	Tim
+2	1	Monty
+3	2	David
+4	2	Erik
+5	3	Sasha
+6	3	Jeremy
+7	4	Matt
+update ignore t1 set id = 8, name = 'Sinisa' where id < 3;
+select id, code, name from t1 order by id;
+id	code	name
+2	1	Monty
+3	2	David
+4	2	Erik
+5	3	Sasha
+6	3	Jeremy
+7	4	Matt
+8	1	Sinisa
+update ignore t1 set id = id + 10, name = 'Ralph' where id < 4;
+select id, code, name from t1 order by id;
+id	code	name
+3	2	David
+4	2	Erik
+5	3	Sasha
+6	3	Jeremy
+7	4	Matt
+8	1	Sinisa
+12	1	Ralph
+drop table t1;
+CREATE TABLE t1 (
+id int(11) NOT NULL auto_increment,
+parent_id int(11) DEFAULT '0' NOT NULL,
+level tinyint(4) DEFAULT '0' NOT NULL,
+PRIMARY KEY (id),
+KEY parent_id (parent_id),
+KEY level (level)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1),(179,5,2);
+update t1 set parent_id=parent_id+100;
+select * from t1 where parent_id=102;
+id	parent_id	level
+8	102	2
+9	102	2
+15	102	2
+update t1 set id=id+1000;
+update t1 set id=1024 where id=1009;
+Got one of the listed errors
+select * from t1;
+id	parent_id	level
+1001	100	0
+1002	101	1
+1003	101	1
+1004	101	1
+1005	101	1
+1006	101	1
+1007	101	1
+1008	102	2
+1009	102	2
+1015	102	2
+1016	103	2
+1017	103	2
+1018	103	2
+1019	103	2
+1020	103	2
+1021	104	2
+1022	104	2
+1024	104	2
+1025	105	2
+1026	105	2
+1027	105	2
+1028	105	2
+1029	105	2
+1030	105	2
+1031	106	2
+1032	106	2
+1033	106	2
+1034	106	2
+1035	106	2
+1036	107	2
+1037	107	2
+1038	107	2
+1040	107	2
+1157	100	0
+1179	105	2
+1183	104	2
+1193	105	2
+1202	107	2
+1203	107	2
+update ignore t1 set id=id+1;
+select * from t1;
+id	parent_id	level
+1001	100	0
+1002	101	1
+1003	101	1
+1004	101	1
+1005	101	1
+1006	101	1
+1007	101	1
+1008	102	2
+1010	102	2
+1015	102	2
+1016	103	2
+1017	103	2
+1018	103	2
+1019	103	2
+1020	103	2
+1021	104	2
+1023	104	2
+1024	104	2
+1025	105	2
+1026	105	2
+1027	105	2
+1028	105	2
+1029	105	2
+1030	105	2
+1031	106	2
+1032	106	2
+1033	106	2
+1034	106	2
+1035	106	2
+1036	107	2
+1037	107	2
+1039	107	2
+1041	107	2
+1158	100	0
+1180	105	2
+1184	104	2
+1194	105	2
+1202	107	2
+1204	107	2
+update ignore t1 set id=1023 where id=1010;
+select * from t1 where parent_id=102;
+id	parent_id	level
+1008	102	2
+1010	102	2
+1015	102	2
+explain select level from t1 where level=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	level	level	1	const	#	Using index
+explain select level,id from t1 where level=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	level	level	1	const	#	Using index
+explain select level,id,parent_id from t1 where level=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	level	level	1	const	#	
+select level,id from t1 where level=1;
+level	id
+1	1002
+1	1003
+1	1004
+1	1005
+1	1006
+1	1007
+select level,id,parent_id from t1 where level=1;
+level	id	parent_id
+1	1002	101
+1	1003	101
+1	1004	101
+1	1005	101
+1	1006	101
+1	1007	101
+optimize table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	note	Table does not support optimize, doing recreate + analyze instead
+test.t1	optimize	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	0	PRIMARY	1	id	A	#	NULL	NULL		BTREE	
+t1	1	parent_id	1	parent_id	A	#	NULL	NULL		BTREE	
+t1	1	level	1	level	A	#	NULL	NULL		BTREE	
+drop table t1;
+CREATE TABLE t1 (
+gesuchnr int(11) DEFAULT '0' NOT NULL,
+benutzer_id int(11) DEFAULT '0' NOT NULL,
+PRIMARY KEY (gesuchnr,benutzer_id)
+) engine=innodb;
+replace into t1 (gesuchnr,benutzer_id) values (2,1);
+replace into t1 (gesuchnr,benutzer_id) values (1,1);
+replace into t1 (gesuchnr,benutzer_id) values (1,1);
+select * from t1;
+gesuchnr	benutzer_id
+1	1
+2	1
+drop table t1;
+create table t1 (a int) engine=innodb;
+insert into t1 values (1), (2);
+optimize table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	note	Table does not support optimize, doing recreate + analyze instead
+test.t1	optimize	status	OK
+delete from t1 where a = 1;
+select * from t1;
+a
+2
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
+create table t1 (a int,b varchar(20)) engine=innodb;
+insert into t1 values (1,""), (2,"testing");
+delete from t1 where a = 1;
+select * from t1;
+a	b
+2	testing
+create index skr on t1 (a);
+insert into t1 values (3,""), (4,"testing");
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	1	skr	1	a	A	#	NULL	NULL	YES	BTREE	
+drop table t1;
+create table t1 (a int,b varchar(20),key(a)) engine=innodb;
+insert into t1 values (1,""), (2,"testing");
+select * from t1 where a = 1;
+a	b
+1	
+drop table t1;
+create table t1 (n int not null primary key) engine=innodb;
+set autocommit=0;
+insert into t1 values (4);
+rollback;
+select n, "after rollback" from t1;
+n	after rollback
+insert into t1 values (4);
+commit;
+select n, "after commit" from t1;
+n	after commit
+4	after commit
+commit;
+insert into t1 values (5);
+insert into t1 values (4);
+ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
+commit;
+select n, "after commit" from t1;
+n	after commit
+4	after commit
+5	after commit
+set autocommit=1;
+insert into t1 values (6);
+insert into t1 values (4);
+ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
+select n from t1;
+n
+4
+5
+6
+set autocommit=0;
+begin;
+savepoint `my_savepoint`;
+insert into t1 values (7);
+savepoint `savept2`;
+insert into t1 values (3);
+select n from t1;
+n
+3
+4
+5
+6
+7
+savepoint savept3;
+rollback to savepoint savept2;
+rollback to savepoint savept3;
+ERROR 42000: SAVEPOINT savept3 does not exist
+rollback to savepoint savept2;
+release savepoint `my_savepoint`;
+select n from t1;
+n
+4
+5
+6
+7
+rollback to savepoint `my_savepoint`;
+ERROR 42000: SAVEPOINT my_savepoint does not exist
+rollback to savepoint savept2;
+ERROR 42000: SAVEPOINT savept2 does not exist
+insert into t1 values (8);
+savepoint sv;
+commit;
+savepoint sv;
+set autocommit=1;
+rollback;
+drop table t1;
+create table t1 (n int not null primary key) engine=innodb;
+start transaction;
+insert into t1 values (4);
+flush tables with read lock;
+commit;
+unlock tables;
+commit;
+select * from t1;
+n
+4
+drop table t1;
+create table t1 ( id int NOT NULL PRIMARY KEY, nom varchar(64)) engine=innodb;
+begin;
+insert into t1 values(1,'hamdouni');
+select id as afterbegin_id,nom as afterbegin_nom from t1;
+afterbegin_id	afterbegin_nom
+1	hamdouni
+rollback;
+select id as afterrollback_id,nom as afterrollback_nom from t1;
+afterrollback_id	afterrollback_nom
+set autocommit=0;
+insert into t1 values(2,'mysql');
+select id as afterautocommit0_id,nom as afterautocommit0_nom from t1;
+afterautocommit0_id	afterautocommit0_nom
+2	mysql
+rollback;
+select id as afterrollback_id,nom as afterrollback_nom from t1;
+afterrollback_id	afterrollback_nom
+set autocommit=1;
+drop table t1;
+CREATE TABLE t1 (id char(8) not null primary key, val int not null) engine=innodb;
+insert into t1 values ('pippo', 12);
+insert into t1 values ('pippo', 12);
+ERROR 23000: Duplicate entry 'pippo' for key 'PRIMARY'
+delete from t1;
+delete from t1 where id = 'pippo';
+select * from t1;
+id	val
+insert into t1 values ('pippo', 12);
+set autocommit=0;
+delete from t1;
+rollback;
+select * from t1;
+id	val
+pippo	12
+delete from t1;
+commit;
+select * from t1;
+id	val
+drop table t1;
+create table t1 (a integer) engine=innodb;
+start transaction;
+rename table t1 to t2;
+create table t1 (b integer) engine=innodb;
+insert into t1 values (1);
+rollback;
+drop table t1;
+rename table t2 to t1;
+drop table t1;
+set autocommit=1;
+CREATE TABLE t1 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR(64)) ENGINE=innodb;
+INSERT INTO t1 VALUES (1, 'Jochen');
+select * from t1;
+ID	NAME
+1	Jochen
+drop table t1;
+CREATE TABLE t1 ( _userid VARCHAR(60) NOT NULL PRIMARY KEY) ENGINE=innodb;
+set autocommit=0;
+INSERT INTO t1  SET _userid='marc@anyware.co.uk';
+COMMIT;
+SELECT * FROM t1;
+_userid
+marc@anyware.co.uk
+SELECT _userid FROM t1 WHERE _userid='marc@anyware.co.uk';
+_userid
+marc@anyware.co.uk
+drop table t1;
+set autocommit=1;
+CREATE TABLE t1 (
+user_id int(10) DEFAULT '0' NOT NULL,
+name varchar(100),
+phone varchar(100),
+ref_email varchar(100) DEFAULT '' NOT NULL,
+detail varchar(200),
+PRIMARY KEY (user_id,ref_email)
+)engine=innodb;
+INSERT INTO t1 VALUES (10292,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10292,'shirish','2333604','shirish@yahoo.com','ddsds'),(10292,'sonali','323232','sonali@bolly.com','filmstar');
+select * from t1 where user_id=10292;
+user_id	name	phone	ref_email	detail
+10292	sanjeev	29153373	sansh777@hotmail.com	xxx
+10292	shirish	2333604	shirish@yahoo.com	ddsds
+10292	sonali	323232	sonali@bolly.com	filmstar
+INSERT INTO t1 VALUES (10291,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10293,'shirish','2333604','shirish@yahoo.com','ddsds');
+select * from t1 where user_id=10292;
+user_id	name	phone	ref_email	detail
+10292	sanjeev	29153373	sansh777@hotmail.com	xxx
+10292	shirish	2333604	shirish@yahoo.com	ddsds
+10292	sonali	323232	sonali@bolly.com	filmstar
+select * from t1 where user_id>=10292;
+user_id	name	phone	ref_email	detail
+10292	sanjeev	29153373	sansh777@hotmail.com	xxx
+10292	shirish	2333604	shirish@yahoo.com	ddsds
+10292	sonali	323232	sonali@bolly.com	filmstar
+10293	shirish	2333604	shirish@yahoo.com	ddsds
+select * from t1 where user_id>10292;
+user_id	name	phone	ref_email	detail
+10293	shirish	2333604	shirish@yahoo.com	ddsds
+select * from t1 where user_id<10292;
+user_id	name	phone	ref_email	detail
+10291	sanjeev	29153373	sansh777@hotmail.com	xxx
+drop table t1;
+CREATE TABLE t1 (a int not null, b int not null,c int not null,
+key(a),primary key(a,b), unique(c),key(a),unique(b));
+show index from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	0	PRIMARY	1	a	A	#	NULL	NULL		BTREE	
+t1	0	PRIMARY	2	b	A	#	NULL	NULL		BTREE	
+t1	0	c	1	c	A	#	NULL	NULL		BTREE	
+t1	0	b	1	b	A	#	NULL	NULL		BTREE	
+t1	1	a	1	a	A	#	NULL	NULL		BTREE	
+t1	1	a_2	1	a	A	#	NULL	NULL		BTREE	
+drop table t1;
+create table t1 (col1 int not null, col2 char(4) not null, primary key(col1));
+alter table t1 engine=innodb;
+insert into t1 values ('1','1'),('5','2'),('2','3'),('3','4'),('4','4');
+select * from t1;
+col1	col2
+1	1
+2	3
+3	4
+4	4
+5	2
+update t1 set col2='7' where col1='4';
+select * from t1;
+col1	col2
+1	1
+2	3
+3	4
+4	7
+5	2
+alter table t1 add co3 int not null;
+select * from t1;
+col1	col2	co3
+1	1	0
+2	3	0
+3	4	0
+4	7	0
+5	2	0
+update t1 set col2='9' where col1='2';
+select * from t1;
+col1	col2	co3
+1	1	0
+2	9	0
+3	4	0
+4	7	0
+5	2	0
+drop table t1;
+create table t1 (a int not null , b int, primary key (a)) engine = innodb;
+create table t2 (a int not null , b int, primary key (a)) engine = myisam;
+insert into t1 VALUES (1,3) , (2,3), (3,3);
+select * from t1;
+a	b
+1	3
+2	3
+3	3
+insert into t2 select * from t1;
+select * from t2;
+a	b
+1	3
+2	3
+3	3
+delete from t1 where b = 3;
+select * from t1;
+a	b
+insert into t1 select * from t2;
+select * from t1;
+a	b
+1	3
+2	3
+3	3
+select * from t2;
+a	b
+1	3
+2	3
+3	3
+drop table t1,t2;
+CREATE TABLE t1 (
+user_name varchar(12),
+password text,
+subscribed char(1),
+user_id int(11) DEFAULT '0' NOT NULL,
+quota bigint(20),
+weight double,
+access_date date,
+access_time time,
+approved datetime,
+dummy_primary_key int(11) NOT NULL auto_increment,
+PRIMARY KEY (dummy_primary_key)
+) ENGINE=innodb;
+INSERT INTO t1 VALUES ('user_0','somepassword','N',0,0,0,'2000-09-07','23:06:59','2000-09-07 23:06:59',1);
+INSERT INTO t1 VALUES ('user_1','somepassword','Y',1,1,1,'2000-09-07','23:06:59','2000-09-07 23:06:59',2);
+INSERT INTO t1 VALUES ('user_2','somepassword','N',2,2,1.4142135623731,'2000-09-07','23:06:59','2000-09-07 23:06:59',3);
+INSERT INTO t1 VALUES ('user_3','somepassword','Y',3,3,1.7320508075689,'2000-09-07','23:06:59','2000-09-07 23:06:59',4);
+INSERT INTO t1 VALUES ('user_4','somepassword','N',4,4,2,'2000-09-07','23:06:59','2000-09-07 23:06:59',5);
+select  user_name, password , subscribed, user_id, quota, weight, access_date, access_time, approved, dummy_primary_key from t1 order by user_name;
+user_name	password	subscribed	user_id	quota	weight	access_date	access_time	approved	dummy_primary_key
+user_0	somepassword	N	0	0	0	2000-09-07	23:06:59	2000-09-07 23:06:59	1
+user_1	somepassword	Y	1	1	1	2000-09-07	23:06:59	2000-09-07 23:06:59	2
+user_2	somepassword	N	2	2	1.4142135623731	2000-09-07	23:06:59	2000-09-07 23:06:59	3
+user_3	somepassword	Y	3	3	1.7320508075689	2000-09-07	23:06:59	2000-09-07 23:06:59	4
+user_4	somepassword	N	4	4	2	2000-09-07	23:06:59	2000-09-07 23:06:59	5
+drop table t1;
+CREATE TABLE t1 (
+id int(11) NOT NULL auto_increment,
+parent_id int(11) DEFAULT '0' NOT NULL,
+level tinyint(4) DEFAULT '0' NOT NULL,
+KEY (id),
+KEY parent_id (parent_id),
+KEY level (level)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1);
+INSERT INTO t1 values (179,5,2);
+update t1 set parent_id=parent_id+100;
+select * from t1 where parent_id=102;
+id	parent_id	level
+8	102	2
+9	102	2
+15	102	2
+update t1 set id=id+1000;
+update t1 set id=1024 where id=1009;
+select * from t1;
+id	parent_id	level
+1001	100	0
+1003	101	1
+1004	101	1
+1008	102	2
+1024	102	2
+1017	103	2
+1022	104	2
+1024	104	2
+1028	105	2
+1029	105	2
+1030	105	2
+1031	106	2
+1032	106	2
+1033	106	2
+1203	107	2
+1202	107	2
+1020	103	2
+1157	100	0
+1193	105	2
+1040	107	2
+1002	101	1
+1015	102	2
+1006	101	1
+1034	106	2
+1035	106	2
+1016	103	2
+1007	101	1
+1036	107	2
+1018	103	2
+1026	105	2
+1027	105	2
+1183	104	2
+1038	107	2
+1025	105	2
+1037	107	2
+1021	104	2
+1019	103	2
+1005	101	1
+1179	105	2
+update ignore t1 set id=id+1;
+select * from t1;
+id	parent_id	level
+1002	100	0
+1004	101	1
+1005	101	1
+1009	102	2
+1025	102	2
+1018	103	2
+1023	104	2
+1025	104	2
+1029	105	2
+1030	105	2
+1031	105	2
+1032	106	2
+1033	106	2
+1034	106	2
+1204	107	2
+1203	107	2
+1021	103	2
+1158	100	0
+1194	105	2
+1041	107	2
+1003	101	1
+1016	102	2
+1007	101	1
+1035	106	2
+1036	106	2
+1017	103	2
+1008	101	1
+1037	107	2
+1019	103	2
+1027	105	2
+1028	105	2
+1184	104	2
+1039	107	2
+1026	105	2
+1038	107	2
+1022	104	2
+1020	103	2
+1006	101	1
+1180	105	2
+update ignore t1 set id=1023 where id=1010;
+select * from t1 where parent_id=102;
+id	parent_id	level
+1009	102	2
+1025	102	2
+1016	102	2
+explain select level from t1 where level=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	level	level	1	const	#	Using index
+select level,id from t1 where level=1;
+level	id
+1	1004
+1	1005
+1	1003
+1	1007
+1	1008
+1	1006
+select level,id,parent_id from t1 where level=1;
+level	id	parent_id
+1	1004	101
+1	1005	101
+1	1003	101
+1	1007	101
+1	1008	101
+1	1006	101
+select level,id from t1 where level=1 order by id;
+level	id
+1	1003
+1	1004
+1	1005
+1	1006
+1	1007
+1	1008
+delete from t1 where level=1;
+select * from t1;
+id	parent_id	level
+1002	100	0
+1009	102	2
+1025	102	2
+1018	103	2
+1023	104	2
+1025	104	2
+1029	105	2
+1030	105	2
+1031	105	2
+1032	106	2
+1033	106	2
+1034	106	2
+1204	107	2
+1203	107	2
+1021	103	2
+1158	100	0
+1194	105	2
+1041	107	2
+1016	102	2
+1035	106	2
+1036	106	2
+1017	103	2
+1037	107	2
+1019	103	2
+1027	105	2
+1028	105	2
+1184	104	2
+1039	107	2
+1026	105	2
+1038	107	2
+1022	104	2
+1020	103	2
+1180	105	2
+drop table t1;
+CREATE TABLE t1 (
+sca_code char(6) NOT NULL,
+cat_code char(6) NOT NULL,
+sca_desc varchar(50),
+lan_code char(2) NOT NULL,
+sca_pic varchar(100),
+sca_sdesc varchar(50),
+sca_sch_desc varchar(16),
+PRIMARY KEY (sca_code, cat_code, lan_code),
+INDEX sca_pic (sca_pic)
+) engine = innodb ;
+INSERT INTO t1 ( sca_code, cat_code, sca_desc, lan_code, sca_pic, sca_sdesc, sca_sch_desc) VALUES ( 'PD', 'J', 'PENDANT', 'EN', NULL, NULL, 'PENDANT'),( 'RI', 'J', 'RING', 'EN', NULL, NULL, 'RING'),( 'QQ', 'N', 'RING', 'EN', 'not null', NULL, 'RING');
+select count(*) from t1 where sca_code = 'PD';
+count(*)
+1
+select count(*) from t1 where sca_code <= 'PD';
+count(*)
+1
+select count(*) from t1 where sca_pic is null;
+count(*)
+2
+alter table t1 drop index sca_pic, add index sca_pic (cat_code, sca_pic);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+count(*)
+1
+select count(*) from t1 where cat_code='E';
+count(*)
+0
+alter table t1 drop index sca_pic, add index (sca_pic, cat_code);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+count(*)
+1
+select count(*) from t1 where sca_pic >= 'n';
+count(*)
+1
+select sca_pic from t1 where sca_pic is null;
+sca_pic
+NULL
+NULL
+update t1 set sca_pic="test" where sca_pic is null;
+delete from t1 where sca_code='pd';
+drop table t1;
+set @a:=now();
+CREATE TABLE t1 (a int not null, b timestamp not null, primary key (a)) engine=innodb;
+insert into t1 (a) values(1),(2),(3);
+select t1.a from t1 natural join t1 as t2 where t1.b >= @a order by t1.a;
+a
+1
+2
+3
+select a from t1 natural join t1 as t2 where b >= @a order by a;
+a
+1
+2
+3
+update t1 set a=5 where a=1;
+select a from t1;
+a
+2
+3
+5
+drop table t1;
+create table t1 (a varchar(100) not null, primary key(a), b int not null) engine=innodb;
+insert into t1 values("hello",1),("world",2);
+select * from t1 order by b desc;
+a	b
+world	2
+hello	1
+optimize table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	note	Table does not support optimize, doing recreate + analyze instead
+test.t1	optimize	status	OK
+show keys from t1;
+Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment
+t1	0	PRIMARY	1	a	A	#	NULL	NULL		BTREE	
+drop table t1;
+create table t1 (i int, j int ) ENGINE=innodb;
+insert into t1 values (1,2);
+select * from t1 where i=1 and j=2;
+i	j
+1	2
+create index ax1 on t1 (i,j);
+select * from t1 where i=1 and j=2;
+i	j
+1	2
+drop table t1;
+CREATE TABLE t1 (
+a int3 unsigned NOT NULL,
+b int1 unsigned NOT NULL,
+UNIQUE (a, b)
+) ENGINE = innodb;
+INSERT INTO t1 VALUES (1, 1);
+SELECT MIN(B),MAX(b) FROM t1 WHERE t1.a = 1;
+MIN(B)	MAX(b)
+1	1
+drop table t1;
+CREATE TABLE t1 (a int unsigned NOT NULL) engine=innodb;
+INSERT INTO t1 VALUES (1);
+SELECT * FROM t1;
+a
+1
+DROP TABLE t1;
+create table t1 (a int  primary key,b int, c int, d int, e int, f int, g int, h int, i int, j int, k int, l int, m int, n int, o int, p int, q int, r int, s int, t int, u int, v int, w int, x int, y int, z int, a1 int, a2 int, a3 int, a4 int, a5 int, a6 int, a7 int, a8 int, a9 int, b1 int, b2 int, b3 int, b4 int, b5 int, b6 int) engine = innodb;
+insert into t1 values (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
+explain select * from t1 where a > 0 and a < 50;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	PRIMARY	PRIMARY	4	NULL	#	Using where
+drop table t1;
+create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb;
+insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL');
+LOCK TABLES t1 WRITE;
+insert into t1 values (99,1,2,'D'),(1,1,2,'D');
+ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY'
+select id from t1;
+id
+0
+1
+2
+select id from t1;
+id
+0
+1
+2
+UNLOCK TABLES;
+DROP TABLE t1;
+create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb;
+insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL');
+LOCK TABLES t1 WRITE;
+begin;
+insert into t1 values (99,1,2,'D'),(1,1,2,'D');
+ERROR 23000: Duplicate entry '1-1' for key 'PRIMARY'
+select id from t1;
+id
+0
+1
+2
+insert ignore into t1 values (100,1,2,'D'),(1,1,99,'D');
+commit;
+select id,id3 from t1;
+id	id3
+0	0
+1	1
+2	2
+100	2
+UNLOCK TABLES;
+DROP TABLE t1;
+create table t1 (a char(20), unique (a(5))) engine=innodb;
+drop table t1;
+create table t1 (a char(20), index (a(5))) engine=innodb;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` char(20) DEFAULT NULL,
+  KEY `a` (`a`(5))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create temporary table t1 (a int not null auto_increment, primary key(a)) engine=innodb;
+insert into t1 values (NULL),(NULL),(NULL);
+delete from t1 where a=3;
+insert into t1 values (NULL);
+select * from t1;
+a
+1
+2
+4
+alter table t1 add b int;
+select * from t1;
+a	b
+1	NULL
+2	NULL
+4	NULL
+drop table t1;
+create table t1
+(
+id int auto_increment primary key,
+name varchar(32) not null,
+value text not null,
+uid int not null,
+unique key(name,uid)
+) engine=innodb;
+insert into t1 values (1,'one','one value',101),
+(2,'two','two value',102),(3,'three','three value',103);
+set insert_id=5;
+replace into t1 (value,name,uid) values ('other value','two',102);
+delete from t1 where uid=102;
+set insert_id=5;
+replace into t1 (value,name,uid) values ('other value','two',102);
+set insert_id=6;
+replace into t1 (value,name,uid) values ('other value','two',102);
+select * from t1;
+id	name	value	uid
+1	one	one value	101
+3	three	three value	103
+6	two	other value	102
+drop table t1;
+create database mysqltest;
+create table mysqltest.t1 (a int not null) engine= innodb;
+insert into mysqltest.t1 values(1);
+create table mysqltest.t2 (a int not null) engine= myisam;
+insert into mysqltest.t2 values(1);
+create table mysqltest.t3 (a int not null) engine= heap;
+insert into mysqltest.t3 values(1);
+commit;
+drop database mysqltest;
+show tables from mysqltest;
+ERROR 42000: Unknown database 'mysqltest'
+set autocommit=0;
+create table t1 (a int not null) engine= innodb;
+insert into t1 values(1),(2);
+truncate table t1;
+commit;
+truncate table t1;
+truncate table t1;
+select * from t1;
+a
+insert into t1 values(1),(2);
+delete from t1;
+select * from t1;
+a
+commit;
+drop table t1;
+set autocommit=1;
+create table t1 (a int not null) engine= innodb;
+insert into t1 values(1),(2);
+truncate table t1;
+insert into t1 values(1),(2);
+select * from t1;
+a
+1
+2
+truncate table t1;
+insert into t1 values(1),(2);
+delete from t1;
+select * from t1;
+a
+drop table t1;
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=innodb;
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+explain select * from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	#	
+explain select * from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	#	Using filesort
+explain select * from t1 order by c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	#	Using filesort
+explain select a from t1 order by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	#	Using index
+explain select b from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	4	NULL	#	Using index
+explain select a,b from t1 order by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	4	NULL	#	Using index
+explain select a,b from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	b	4	NULL	#	Using index
+explain select a,b,c from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	#	
+drop table t1;
+create table t1 (t int not null default 1, key (t)) engine=innodb;
+desc t1;
+Field	Type	Null	Key	Default	Extra
+t	int(11)	NO	MUL	1	
+drop table t1;
+CREATE TABLE t1 (
+number bigint(20) NOT NULL default '0',
+cname char(15) NOT NULL default '',
+carrier_id smallint(6) NOT NULL default '0',
+privacy tinyint(4) NOT NULL default '0',
+last_mod_date timestamp NOT NULL,
+last_mod_id smallint(6) NOT NULL default '0',
+last_app_date timestamp NOT NULL,
+last_app_id smallint(6) default '-1',
+version smallint(6) NOT NULL default '0',
+assigned_scps int(11) default '0',
+status tinyint(4) default '0'
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (4077711111,'SeanWheeler',90,2,20020111112846,500,00000000000000,-1,2,3,1);
+INSERT INTO t1 VALUES (9197722223,'berry',90,3,20020111112809,500,20020102114532,501,4,10,0);
+INSERT INTO t1 VALUES (650,'San Francisco',0,0,20011227111336,342,00000000000000,-1,1,24,1);
+INSERT INTO t1 VALUES (302467,'Sue\'s Subshop',90,3,20020109113241,500,20020102115111,501,7,24,0);
+INSERT INTO t1 VALUES (6014911113,'SudzCarwash',520,1,20020102115234,500,20020102115259,501,33,32768,0);
+INSERT INTO t1 VALUES (333,'tubs',99,2,20020109113440,501,20020109113440,500,3,10,0);
+CREATE TABLE t2 (
+number bigint(20) NOT NULL default '0',
+cname char(15) NOT NULL default '',
+carrier_id smallint(6) NOT NULL default '0',
+privacy tinyint(4) NOT NULL default '0',
+last_mod_date timestamp NOT NULL,
+last_mod_id smallint(6) NOT NULL default '0',
+last_app_date timestamp NOT NULL,
+last_app_id smallint(6) default '-1',
+version smallint(6) NOT NULL default '0',
+assigned_scps int(11) default '0',
+status tinyint(4) default '0'
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (4077711111,'SeanWheeler',0,2,20020111112853,500,00000000000000,-1,2,3,1);
+INSERT INTO t2 VALUES (9197722223,'berry',90,3,20020111112818,500,20020102114532,501,4,10,0);
+INSERT INTO t2 VALUES (650,'San Francisco',90,0,20020109113158,342,00000000000000,-1,1,24,1);
+INSERT INTO t2 VALUES (333,'tubs',99,2,20020109113453,501,20020109113453,500,3,10,0);
+select * from t1;
+number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
+4077711111	SeanWheeler	90	2	2002-01-11 11:28:46	500	0000-00-00 00:00:00	-1	2	3	1
+9197722223	berry	90	3	2002-01-11 11:28:09	500	2002-01-02 11:45:32	501	4	10	0
+650	San Francisco	0	0	2001-12-27 11:13:36	342	0000-00-00 00:00:00	-1	1	24	1
+302467	Sue's Subshop	90	3	2002-01-09 11:32:41	500	2002-01-02 11:51:11	501	7	24	0
+6014911113	SudzCarwash	520	1	2002-01-02 11:52:34	500	2002-01-02 11:52:59	501	33	32768	0
+333	tubs	99	2	2002-01-09 11:34:40	501	2002-01-09 11:34:40	500	3	10	0
+select * from t2;
+number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
+4077711111	SeanWheeler	0	2	2002-01-11 11:28:53	500	0000-00-00 00:00:00	-1	2	3	1
+9197722223	berry	90	3	2002-01-11 11:28:18	500	2002-01-02 11:45:32	501	4	10	0
+650	San Francisco	90	0	2002-01-09 11:31:58	342	0000-00-00 00:00:00	-1	1	24	1
+333	tubs	99	2	2002-01-09 11:34:53	501	2002-01-09 11:34:53	500	3	10	0
+delete t1, t2 from t1 left join t2 on t1.number=t2.number where (t1.carrier_id=90 and t1.number=t2.number) or (t2.carrier_id=90 and t1.number=t2.number) or  (t1.carrier_id=90 and t2.number is null);
+select * from t1;
+number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
+6014911113	SudzCarwash	520	1	2002-01-02 11:52:34	500	2002-01-02 11:52:59	501	33	32768	0
+333	tubs	99	2	2002-01-09 11:34:40	501	2002-01-09 11:34:40	500	3	10	0
+select * from t2;
+number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
+333	tubs	99	2	2002-01-09 11:34:53	501	2002-01-09 11:34:53	500	3	10	0
+select * from t2;
+number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
+333	tubs	99	2	2002-01-09 11:34:53	501	2002-01-09 11:34:53	500	3	10	0
+drop table t1,t2;
+create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+SELECT @@tx_isolation,@@global.tx_isolation;
+@@tx_isolation	@@global.tx_isolation
+SERIALIZABLE	REPEATABLE-READ
+insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David');
+select id, code, name from t1 order by id;
+id	code	name
+1	1	Tim
+2	1	Monty
+3	2	David
+COMMIT;
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+insert into t1 (code, name) values (2, 'Erik'), (3, 'Sasha');
+select id, code, name from t1 order by id;
+id	code	name
+1	1	Tim
+2	1	Monty
+3	2	David
+4	2	Erik
+5	3	Sasha
+COMMIT;
+SET binlog_format='MIXED';
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+insert into t1 (code, name) values (3, 'Jeremy'), (4, 'Matt');
+select id, code, name from t1 order by id;
+id	code	name
+1	1	Tim
+2	1	Monty
+3	2	David
+4	2	Erik
+5	3	Sasha
+6	3	Jeremy
+7	4	Matt
+COMMIT;
+DROP TABLE t1;
+create table t1 (n int(10), d int(10)) engine=innodb;
+create table t2 (n int(10), d int(10)) engine=innodb;
+insert into t1 values(1,1),(1,2);
+insert into t2 values(1,10),(2,20);
+UPDATE t1,t2 SET t1.d=t2.d,t2.d=30 WHERE t1.n=t2.n;
+select * from t1;
+n	d
+1	10
+1	10
+select * from t2;
+n	d
+1	30
+2	20
+drop table t1,t2;
+drop table if exists t1, t2;
+CREATE TABLE t1 (a int, PRIMARY KEY (a));
+CREATE TABLE t2 (a int, PRIMARY KEY (a)) ENGINE=InnoDB;
+create trigger trg_del_t2 after  delete on t2 for each row
+insert into t1 values (1);
+insert into t1 values (1);
+insert into t2 values (1),(2);
+delete t2 from t2;
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+select count(*) from t2 /* must be 2 as restored after rollback caused by the error */;
+count(*)
+2
+drop table t1, t2;
+drop table if exists t1, t2;
+CREATE TABLE t1 (a int, PRIMARY KEY (a));
+CREATE TABLE t2 (a int, PRIMARY KEY (a)) ENGINE=InnoDB;
+create trigger trg_del_t2 after  delete on t2 for each row
+insert into t1 values (1);
+insert into t1 values (1);
+insert into t2 values (1),(2);
+delete t2 from t2;
+ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
+select count(*) from t2 /* must be 2 as restored after rollback caused by the error */;
+count(*)
+2
+drop table t1, t2;
+create table t1 (a int, b int) engine=innodb;
+insert into t1 values(20,null);
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a;
+b	ifnull(t2.b,"this is null")
+NULL	this is null
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a order by 1;
+b	ifnull(t2.b,"this is null")
+NULL	this is null
+insert into t1 values(10,null);
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a order by 1;
+b	ifnull(t2.b,"this is null")
+NULL	this is null
+NULL	this is null
+drop table t1;
+create table t1 (a varchar(10) not null) engine=myisam;
+create table t2 (b varchar(10) not null unique) engine=innodb;
+select t1.a from t1,t2 where t1.a=t2.b;
+a
+drop table t1,t2;
+create table t1 (a int not null, b int, primary key (a)) engine = innodb;
+create table t2 (a int not null, b int, primary key (a)) engine = innodb;
+insert into t1 values (10, 20);
+insert into t2 values (10, 20);
+update t1, t2 set t1.b = 150, t2.b = t1.b where t2.a = t1.a and t1.a = 10;
+drop table t1,t2;
+CREATE TABLE t1 (id INT NOT NULL, PRIMARY KEY (id)) ENGINE=INNODB;
+CREATE TABLE t2 (id INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id), FOREIGN KEY (t1_id) REFERENCES t1(id)  ON DELETE CASCADE ) ENGINE=INNODB;
+insert into t1 set id=1;
+insert into t2 set id=1, t1_id=1;
+delete t1,t2 from t1,t2 where t1.id=t2.t1_id;
+select * from t1;
+id
+select * from t2;
+id	t1_id
+drop table t2,t1;
+CREATE TABLE t1(id INT NOT NULL,  PRIMARY KEY (id)) ENGINE=INNODB;
+CREATE TABLE t2(id  INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id)  ) ENGINE=INNODB;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t2 VALUES(1, 1);
+SELECT * from t1;
+id
+1
+UPDATE t1,t2 SET t1.id=t1.id+1, t2.t1_id=t1.id+1;
+SELECT * from t1;
+id
+2
+UPDATE t1,t2 SET t1.id=t1.id+1 where t1.id!=t2.id;
+SELECT * from t1;
+id
+3
+DROP TABLE t1,t2;
+set autocommit=0;
+CREATE TABLE t1 (id CHAR(15) NOT NULL, value CHAR(40) NOT NULL, PRIMARY KEY(id)) ENGINE=InnoDB;
+CREATE TABLE t2 (id CHAR(15) NOT NULL, value CHAR(40) NOT NULL, PRIMARY KEY(id)) ENGINE=InnoDB;
+CREATE TABLE t3 (id1 CHAR(15) NOT NULL, id2 CHAR(15) NOT NULL, PRIMARY KEY(id1, id2)) ENGINE=InnoDB;
+INSERT INTO t3 VALUES("my-test-1", "my-test-2");
+COMMIT;
+INSERT INTO t1 VALUES("this-key", "will disappear");
+INSERT INTO t2 VALUES("this-key", "will also disappear");
+DELETE FROM t3 WHERE id1="my-test-1";
+SELECT * FROM t1;
+id	value
+this-key	will disappear
+SELECT * FROM t2;
+id	value
+this-key	will also disappear
+SELECT * FROM t3;
+id1	id2
+ROLLBACK;
+SELECT * FROM t1;
+id	value
+SELECT * FROM t2;
+id	value
+SELECT * FROM t3;
+id1	id2
+my-test-1	my-test-2
+SELECT * FROM t3 WHERE id1="my-test-1" LOCK IN SHARE MODE;
+id1	id2
+my-test-1	my-test-2
+COMMIT;
+set autocommit=1;
+DROP TABLE t1,t2,t3;
+CREATE TABLE t1 (a int not null primary key, b int not null, unique (b)) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+UPDATE t1 set a=a+100 where b between 2 and 3 and a < 1000;
+SELECT * from t1;
+a	b
+1	1
+102	2
+103	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+drop table t1;
+CREATE TABLE t1 (a int not null primary key, b int not null, key (b)) engine=innodb;
+CREATE TABLE t2 (a int not null primary key, b int not null, key (b)) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11),(12,12);
+INSERT INTO t2 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+update t1,t2 set t1.a=t1.a+100;
+select * from t1;
+a	b
+101	1
+102	2
+103	3
+104	4
+105	5
+106	6
+107	7
+108	8
+109	9
+110	10
+111	11
+112	12
+update t1,t2 set t1.a=t1.a+100 where t1.a=101;
+select * from t1;
+a	b
+201	1
+102	2
+103	3
+104	4
+105	5
+106	6
+107	7
+108	8
+109	9
+110	10
+111	11
+112	12
+update t1,t2 set t1.b=t1.b+10 where t1.b=2;
+select * from t1;
+a	b
+201	1
+103	3
+104	4
+105	5
+106	6
+107	7
+108	8
+109	9
+110	10
+111	11
+102	12
+112	12
+update t1,t2 set t1.b=t1.b+2,t2.b=t1.b+10 where t1.b between 3 and 5 and t1.a=t2.a+100;
+select * from t1;
+a	b
+201	1
+103	5
+104	6
+106	6
+105	7
+107	7
+108	8
+109	9
+110	10
+111	11
+102	12
+112	12
+select * from t2;
+a	b
+1	1
+2	2
+6	6
+7	7
+8	8
+9	9
+3	13
+4	14
+5	15
+drop table t1,t2;
+CREATE TABLE t2 (   NEXT_T         BIGINT NOT NULL PRIMARY KEY) ENGINE=MyISAM;
+CREATE TABLE t1 (  B_ID           INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB;
+SET AUTOCOMMIT=0;
+INSERT INTO t1 ( B_ID ) VALUES ( 1 );
+INSERT INTO t2 ( NEXT_T ) VALUES ( 1 );
+ROLLBACK;
+Warnings:
+Warning	1196	Some non-transactional changed tables couldn't be rolled back
+SELECT * FROM t1;
+B_ID
+drop table  t1,t2;
+create table t1  ( pk         int primary key,    parent     int not null,    child      int not null,       index (parent)  ) engine = innodb;
+insert into t1 values   (1,0,4),  (2,1,3),  (3,2,1),  (4,1,2);
+select distinct  parent,child   from t1   order by parent;
+parent	child
+0	4
+1	2
+1	3
+2	1
+drop table t1;
+create table t1 (a int not null auto_increment primary key, b int, c int, key(c)) engine=innodb;
+create table t2 (a int not null auto_increment primary key, b int);
+insert into t1 (b) values (null),(null),(null),(null),(null),(null),(null);
+insert into t2 (a) select b from t1;
+insert into t1 (b) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+select count(*) from t1;
+count(*)
+623
+explain select * from t1 where c between 1 and 2500;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	c	c	5	NULL	#	Using where
+update t1 set c=a;
+explain select * from t1 where c between 1 and 2500;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	c	NULL	NULL	NULL	#	Using where
+drop table t1,t2;
+create table t1 (id int primary key auto_increment, fk int, index index_fk (fk)) engine=innodb;
+insert into t1 (id) values (null),(null),(null),(null),(null);
+update t1 set fk=69 where fk is null order by id limit 1;
+SELECT * from t1;
+id	fk
+2	NULL
+3	NULL
+4	NULL
+5	NULL
+1	69
+drop table t1;
+create table t1 (a int not null, b int not null, key (a));
+insert into t1 values (1,1),(1,2),(1,3),(3,1),(3,2),(3,3),(3,1),(3,2),(3,3),(2,1),(2,2),(2,3);
+SET @tmp=0;
+update t1 set b=(@tmp:=@tmp+1) order by a;
+update t1 set b=99 where a=1 order by b asc limit 1;
+update t1 set b=100 where a=1 order by b desc limit 2;
+update t1 set a=a+10+b where a=1 order by b;
+select * from t1 order by a,b;
+a	b
+2	4
+2	5
+2	6
+3	7
+3	8
+3	9
+3	10
+3	11
+3	12
+13	2
+111	100
+111	100
+drop table t1;
+create table t1 ( c char(8) not null ) engine=innodb;
+insert into t1 values ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9');
+insert into t1 values ('A'),('B'),('C'),('D'),('E'),('F');
+alter table t1 add b char(8) not null;
+alter table t1 add a char(8) not null;
+alter table t1 add primary key (a,b,c);
+update t1 set a=c, b=c;
+create table t2 (c char(8) not null, b char(8) not null, a char(8) not null, primary key(a,b,c)) engine=innodb;
+insert into t2 select * from t1;
+delete t1,t2 from t2,t1 where t1.a<'B' and t2.b=t1.b;
+drop table t1,t2;
+SET AUTOCOMMIT=1;
+create table t1 (a integer auto_increment primary key) engine=innodb;
+insert into t1 (a) values (NULL),(NULL);
+truncate table t1;
+insert into t1 (a) values (NULL),(NULL);
+SELECT * from t1;
+a
+1
+2
+drop table t1;
+CREATE TABLE t1 (`id 1` INT NOT NULL, PRIMARY KEY (`id 1`)) ENGINE=INNODB;
+CREATE TABLE t2 (id INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id), FOREIGN KEY (`t1_id`) REFERENCES `t1`(`id 1`)  ON DELETE CASCADE ) ENGINE=INNODB;
+drop table t2,t1;
+create table `t1` (`id` int( 11 ) not null  ,primary key ( `id` )) engine = innodb;
+insert into `t1`values ( 1 ) ;
+create table `t2` (`id` int( 11 ) not null default '0',unique key `id` ( `id` ) ,constraint `t1_id_fk` foreign key ( `id` ) references `t1` (`id` )) engine = innodb;
+insert into `t2`values ( 1 ) ;
+create table `t3` (`id` int( 11 ) not null default '0',key `id` ( `id` ) ,constraint `t2_id_fk` foreign key ( `id` ) references `t2` (`id` )) engine = innodb;
+insert into `t3`values ( 1 ) ;
+delete t3,t2,t1 from t1,t2,t3 where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`))
+update t1,t2,t3 set t3.id=5, t2.id=6, t1.id=7  where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`))
+update t3 set  t3.id=7  where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+ERROR 42S22: Unknown column 't1.id' in 'where clause'
+drop table t3,t2,t1;
+create table t1(
+id int primary key,
+pid int,
+index(pid),
+foreign key(pid) references t1(id) on delete cascade) engine=innodb;
+insert into t1 values(0,0),(1,0),(2,1),(3,2),(4,3),(5,4),(6,5),(7,6),
+(8,7),(9,8),(10,9),(11,10),(12,11),(13,12),(14,13),(15,14);
+delete from t1 where id=0;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t1`, CONSTRAINT `t1_ibfk_1` FOREIGN KEY (`pid`) REFERENCES `t1` (`id`) ON DELETE CASCADE)
+delete from t1 where id=15;
+delete from t1 where id=0;
+drop table t1;
+CREATE TABLE t1 (col1 int(1))ENGINE=InnoDB;
+CREATE TABLE t2 (col1 int(1),stamp TIMESTAMP,INDEX stamp_idx
+(stamp))ENGINE=InnoDB;
+insert into t1 values (1),(2),(3);
+insert into t2 values (1, 20020204130000),(2, 20020204130000),(4,20020204310000 ),(5,20020204230000);
+Warnings:
+Warning	1265	Data truncated for column 'stamp' at row 3
+SELECT col1 FROM t1 UNION SELECT col1 FROM t2 WHERE stamp <
+'20020204120000' GROUP BY col1;
+col1
+1
+2
+3
+4
+drop table t1,t2;
+CREATE TABLE t1 (
+`id` int(10) unsigned NOT NULL auto_increment,
+`id_object` int(10) unsigned default '0',
+`id_version` int(10) unsigned NOT NULL default '1',
+`label` varchar(100) NOT NULL default '',
+`description` text,
+PRIMARY KEY  (`id`),
+KEY `id_object` (`id_object`),
+KEY `id_version` (`id_version`)
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES("6", "3382", "9", "Test", NULL), ("7", "102", "5", "Le Pekin (Test)", NULL),("584", "1794", "4", "Test de resto", NULL),("837", "1822", "6", "Test 3", NULL),("1119", "3524", "1", "Societe Test", NULL),("1122", "3525", "1", "Fournisseur Test", NULL);
+CREATE TABLE t2 (
+`id` int(10) unsigned NOT NULL auto_increment,
+`id_version` int(10) unsigned NOT NULL default '1',
+PRIMARY KEY  (`id`),
+KEY `id_version` (`id_version`)
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES("3524", "1"),("3525", "1"),("1794", "4"),("102", "5"),("1822", "6"),("3382", "9");
+SELECT t2.id, t1.`label` FROM t2 INNER JOIN
+(SELECT t1.id_object as id_object FROM t1 WHERE t1.`label` LIKE '%test%') AS lbl 
+ON (t2.id = lbl.id_object) INNER JOIN t1 ON (t2.id = t1.id_object);
+id	label
+3382	Test
+102	Le Pekin (Test)
+1794	Test de resto
+1822	Test 3
+3524	Societe Test
+3525	Fournisseur Test
+drop table t1,t2;
+create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=myisam;
+create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=innodb;
+create table t3 (a int, b varchar(200), c text not null) checksum=1 engine=innodb;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+insert t3 select * from t1;
+checksum table t1, t2, t3, t4 quick;
+Table	Checksum
+test.t1	2948697075
+test.t2	NULL
+test.t3	NULL
+test.t4	NULL
+Warnings:
+Error	1146	Table 'test.t4' doesn't exist
+checksum table t1, t2, t3, t4;
+Table	Checksum
+test.t1	2948697075
+test.t2	2948697075
+test.t3	2948697075
+test.t4	NULL
+Warnings:
+Error	1146	Table 'test.t4' doesn't exist
+checksum table t1, t2, t3, t4 extended;
+Table	Checksum
+test.t1	2948697075
+test.t2	2948697075
+test.t3	2948697075
+test.t4	NULL
+Warnings:
+Error	1146	Table 'test.t4' doesn't exist
+drop table t1,t2,t3;
+create table t1 (id int,  name char(10) not null,  name2 char(10) not null) engine=innodb;
+insert into t1 values(1,'first','fff'),(2,'second','sss'),(3,'third','ttt');
+select trim(name2) from t1  union all  select trim(name) from t1 union all select trim(id) from t1;
+trim(name2)
+fff
+sss
+ttt
+first
+second
+third
+1
+2
+3
+drop table t1;
+create table t1 (a int) engine=innodb;
+create table t2 like t1;
+drop table t1,t2;
+create table t1 (id int(11) not null, id2 int(11) not null, unique (id,id2)) engine=innodb;
+create table t2 (id int(11) not null, constraint t1_id_fk foreign key ( id ) references t1 (id)) engine = innodb;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  UNIQUE KEY `id` (`id`,`id2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  KEY `t1_id_fk` (`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+create index id on t2 (id);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  KEY `id` (`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+create index id2 on t2 (id);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  KEY `id` (`id`),
+  KEY `id2` (`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop index id2 on t2;
+drop index id on t2;
+ERROR HY000: Cannot drop index 'id': needed in a foreign key constraint
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  KEY `id` (`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null, id2 int(11) not null, constraint t1_id_fk foreign key (id,id2) references t1 (id,id2)) engine = innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  KEY `t1_id_fk` (`id`,`id2`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`, `id2`) REFERENCES `t1` (`id`, `id2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+create unique index id on t2 (id,id2);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  UNIQUE KEY `id` (`id`,`id2`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`, `id2`) REFERENCES `t1` (`id`, `id2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2),constraint t1_id_fk foreign key (id2,id) references t1 (id,id2)) engine = innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  UNIQUE KEY `id` (`id`,`id2`),
+  KEY `t1_id_fk` (`id2`,`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id2`, `id`) REFERENCES `t1` (`id`, `id2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2), constraint t1_id_fk foreign key (id) references t1 (id)) engine = innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  UNIQUE KEY `id` (`id`,`id2`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2),constraint t1_id_fk foreign key (id2,id) references t1 (id,id2)) engine = innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `id2` int(11) NOT NULL,
+  UNIQUE KEY `id` (`id`,`id2`),
+  KEY `t1_id_fk` (`id2`,`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id2`, `id`) REFERENCES `t1` (`id`, `id2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null auto_increment, id2 int(11) not null, constraint t1_id_fk foreign key (id) references t1 (id), primary key (id), index (id,id2)) engine = innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `id2` int(11) NOT NULL,
+  PRIMARY KEY (`id`),
+  KEY `id` (`id`,`id2`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null auto_increment, id2 int(11) not null, constraint t1_id_fk foreign key (id) references t1 (id)) engine= innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `id2` int(11) NOT NULL,
+  KEY `t1_id_fk` (`id`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t2 add index id_test (id), add index id_test2 (id,id2);
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `id2` int(11) NOT NULL,
+  KEY `id_test` (`id`),
+  KEY `id_test2` (`id`,`id2`),
+  CONSTRAINT `t1_id_fk` FOREIGN KEY (`id`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (id int(11) not null, id2 int(11) not null, constraint t1_id_fk foreign key (id2,id) references t1 (id)) engine = innodb;
+ERROR 42000: Incorrect foreign key definition for 't1_id_fk': Key reference and table reference don't match
+create table t2 (a int auto_increment primary key, b int, index(b), foreign key (b) references t1(id), unique(b)) engine=innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) NOT NULL AUTO_INCREMENT,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b_2` (`b`),
+  KEY `b` (`b`),
+  CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2;
+create table t2 (a int auto_increment primary key, b int, foreign key (b) references t1(id), foreign key (b) references t1(id), unique(b)) engine=innodb;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) NOT NULL AUTO_INCREMENT,
+  `b` int(11) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  UNIQUE KEY `b` (`b`),
+  CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`id`),
+  CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`b`) REFERENCES `t1` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t2, t1;
+create table t1 (c char(10), index (c,c)) engine=innodb;
+ERROR 42S21: Duplicate column name 'c'
+create table t1 (c1 char(10), c2 char(10), index (c1,c2,c1)) engine=innodb;
+ERROR 42S21: Duplicate column name 'c1'
+create table t1 (c1 char(10), c2 char(10), index (c1,c1,c2)) engine=innodb;
+ERROR 42S21: Duplicate column name 'c1'
+create table t1 (c1 char(10), c2 char(10), index (c2,c1,c1)) engine=innodb;
+ERROR 42S21: Duplicate column name 'c1'
+create table t1 (c1 char(10), c2 char(10)) engine=innodb;
+alter table t1 add key (c1,c1);
+ERROR 42S21: Duplicate column name 'c1'
+alter table t1 add key (c2,c1,c1);
+ERROR 42S21: Duplicate column name 'c1'
+alter table t1 add key (c1,c2,c1);
+ERROR 42S21: Duplicate column name 'c1'
+alter table t1 add key (c1,c1,c2);
+ERROR 42S21: Duplicate column name 'c1'
+drop table t1;
+create table t1(a int(1) , b int(1)) engine=innodb;
+insert into t1 values ('1111', '3333');
+select distinct concat(a, b) from t1;
+concat(a, b)
+11113333
+drop table t1;
+CREATE TABLE t1 ( a char(10) ) ENGINE=InnoDB;
+SELECT a FROM t1 WHERE MATCH (a) AGAINST ('test' IN BOOLEAN MODE);
+ERROR HY000: The used table type doesn't support FULLTEXT indexes
+DROP TABLE t1;
+CREATE TABLE t1 (a_id tinyint(4) NOT NULL default '0', PRIMARY KEY  (a_id)) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES (1),(2),(3);
+CREATE TABLE t2 (b_id tinyint(4) NOT NULL default '0',b_a tinyint(4) NOT NULL default '0', PRIMARY KEY  (b_id), KEY  (b_a), 
+CONSTRAINT fk_b_a FOREIGN KEY (b_a) REFERENCES t1 (a_id) ON DELETE CASCADE ON UPDATE NO ACTION) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+INSERT INTO t2 VALUES (1,1),(2,1),(3,1),(4,2),(5,2);
+SELECT * FROM (SELECT t1.*,GROUP_CONCAT(t2.b_id SEPARATOR ',') as b_list FROM (t1 LEFT JOIN (t2) on t1.a_id = t2.b_a) GROUP BY t1.a_id ) AS xyz;
+a_id	b_list
+1	1,2,3
+2	4,5
+3	NULL
+DROP TABLE t2;
+DROP TABLE t1;
+create temporary table t1 (a int) engine=innodb;
+insert into t1 values (4711);
+truncate t1;
+insert into t1 values (42);
+select * from t1;
+a
+42
+drop table t1;
+create table t1 (a int) engine=innodb;
+insert into t1 values (4711);
+truncate t1;
+insert into t1 values (42);
+select * from t1;
+a
+42
+drop table t1;
+create table t1 (a int not null, b int not null, c blob not null, d int not null, e int, primary key (a,b,c(255),d)) engine=innodb;
+insert into t1 values (2,2,"b",2,2),(1,1,"a",1,1),(3,3,"ab",3,3);
+select * from t1 order by a,b,c,d;
+a	b	c	d	e
+1	1	a	1	1
+2	2	b	2	2
+3	3	ab	3	3
+explain select * from t1 order by a,b,c,d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+drop table t1;
+create table t1 (a char(1), b char(1), key(a, b)) engine=innodb;
+insert into t1 values ('8', '6'), ('4', '7');
+select min(a) from t1;
+min(a)
+4
+select min(b) from t1 where a='8';
+min(b)
+6
+drop table t1;
+create table t1 (x bigint unsigned not null primary key) engine=innodb;
+insert into t1(x) values (0xfffffffffffffff0),(0xfffffffffffffff1);
+select * from t1;
+x
+18446744073709551600
+18446744073709551601
+select count(*) from t1 where x>0;
+count(*)
+2
+select count(*) from t1 where x=0;
+count(*)
+0
+select count(*) from t1 where x<0;
+count(*)
+0
+select count(*) from t1 where x < -16;
+count(*)
+0
+select count(*) from t1 where x = -16;
+count(*)
+0
+explain select count(*) from t1 where x > -16;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	PRIMARY	PRIMARY	8	NULL	2	Using where; Using index
+select count(*) from t1 where x > -16;
+count(*)
+2
+select * from t1 where x > -16;
+x
+18446744073709551600
+18446744073709551601
+select count(*) from t1 where x = 18446744073709551601;
+count(*)
+1
+drop table t1;
+SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+variable_value
+8191
+SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+variable_value
+16384
+SELECT variable_value - @innodb_rows_deleted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_deleted';
+variable_value - @innodb_rows_deleted_orig
+71
+SELECT variable_value - @innodb_rows_inserted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted';
+variable_value - @innodb_rows_inserted_orig
+1084
+SELECT variable_value - @innodb_rows_updated_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated';
+variable_value - @innodb_rows_updated_orig
+885
+SELECT variable_value - @innodb_row_lock_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_waits';
+variable_value - @innodb_row_lock_waits_orig
+0
+SELECT variable_value - @innodb_row_lock_current_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_current_waits';
+variable_value - @innodb_row_lock_current_waits_orig
+0
+SELECT variable_value - @innodb_row_lock_time_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time';
+variable_value - @innodb_row_lock_time_orig
+0
+SELECT variable_value - @innodb_row_lock_time_max_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max';
+variable_value - @innodb_row_lock_time_max_orig
+0
+SELECT variable_value - @innodb_row_lock_time_avg_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg';
+variable_value - @innodb_row_lock_time_avg_orig
+0
+SET @innodb_sync_spin_loops_orig = @@innodb_sync_spin_loops;
+show variables like "innodb_sync_spin_loops";
+Variable_name	Value
+innodb_sync_spin_loops	30
+set global innodb_sync_spin_loops=1000;
+show variables like "innodb_sync_spin_loops";
+Variable_name	Value
+innodb_sync_spin_loops	1000
+set global innodb_sync_spin_loops=0;
+show variables like "innodb_sync_spin_loops";
+Variable_name	Value
+innodb_sync_spin_loops	0
+set global innodb_sync_spin_loops=20;
+show variables like "innodb_sync_spin_loops";
+Variable_name	Value
+innodb_sync_spin_loops	20
+set global innodb_sync_spin_loops=@innodb_sync_spin_loops_orig;
+show variables like "innodb_thread_concurrency";
+Variable_name	Value
+innodb_thread_concurrency	0
+set global innodb_thread_concurrency=1001;
+Warnings:
+Warning	1292	Truncated incorrect thread_concurrency value: '1001'
+show variables like "innodb_thread_concurrency";
+Variable_name	Value
+innodb_thread_concurrency	1000
+set global innodb_thread_concurrency=0;
+show variables like "innodb_thread_concurrency";
+Variable_name	Value
+innodb_thread_concurrency	0
+set global innodb_thread_concurrency=16;
+show variables like "innodb_thread_concurrency";
+Variable_name	Value
+innodb_thread_concurrency	16
+show variables like "innodb_concurrency_tickets";
+Variable_name	Value
+innodb_concurrency_tickets	500
+set global innodb_concurrency_tickets=1000;
+show variables like "innodb_concurrency_tickets";
+Variable_name	Value
+innodb_concurrency_tickets	1000
+set global innodb_concurrency_tickets=0;
+Warnings:
+Warning	1292	Truncated incorrect concurrency_tickets value: '0'
+show variables like "innodb_concurrency_tickets";
+Variable_name	Value
+innodb_concurrency_tickets	1
+set global innodb_concurrency_tickets=500;
+show variables like "innodb_concurrency_tickets";
+Variable_name	Value
+innodb_concurrency_tickets	500
+show variables like "innodb_thread_sleep_delay";
+Variable_name	Value
+innodb_thread_sleep_delay	10000
+set global innodb_thread_sleep_delay=100000;
+show variables like "innodb_thread_sleep_delay";
+Variable_name	Value
+innodb_thread_sleep_delay	100000
+set global innodb_thread_sleep_delay=0;
+show variables like "innodb_thread_sleep_delay";
+Variable_name	Value
+innodb_thread_sleep_delay	0
+set global innodb_thread_sleep_delay=10000;
+show variables like "innodb_thread_sleep_delay";
+Variable_name	Value
+innodb_thread_sleep_delay	10000
+set storage_engine=INNODB;
+drop table if exists t1,t2,t3;
+--- Testing varchar ---
+--- Testing varchar ---
+create table t1 (v varchar(10), c char(10), t text);
+insert into t1 values('+ ', '+ ', '+ ');
+set @a=repeat(' ',20);
+insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a));
+Warnings:
+Note	1265	Data truncated for column 'v' at row 1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+ *+*+ *
+*+         *+*+                    *
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+create table t2 like t1;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+create table t3 select * from t1;
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 modify c varchar(10);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` varchar(10) DEFAULT NULL,
+  `t` text
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 modify v char(10);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` char(10) DEFAULT NULL,
+  `c` varchar(10) DEFAULT NULL,
+  `t` text
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 modify t varchar(10);
+Warnings:
+Note	1265	Data truncated for column 't' at row 2
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` char(10) DEFAULT NULL,
+  `c` varchar(10) DEFAULT NULL,
+  `t` varchar(10) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+*+*+ *
+*+*+*+         *
+drop table t1,t2,t3;
+create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10)));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text,
+  KEY `v` (`v`),
+  KEY `c` (`c`),
+  KEY `t` (`t`(10))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+270
+insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1)));
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where c='a';
+count(*)
+10
+select count(*) from t1 where t='a';
+count(*)
+10
+select count(*) from t1 where v='a  ';
+count(*)
+10
+select count(*) from t1 where c='a  ';
+count(*)
+10
+select count(*) from t1 where t='a  ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where c like 'a%';
+count(*)
+11
+select count(*) from t1 where t like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a  ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	13	const	#	Using where; Using index
+explain select count(*) from t1 where c='a  ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	c	c	11	const	#	Using where; Using index
+explain select count(*) from t1 where t='a  ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	t	t	13	const	#	Using where
+explain select count(*) from t1 where v like 'a%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	13	NULL	#	Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	13	const	#	Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	13	const	#	Using where; Using index
+alter table t1 add unique(v);
+ERROR 23000: Duplicate entry 'v' for key 'v_2'
+alter table t1 add key(v);
+select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a';
+qq
+*a*a*a*
+*a *a*a *
+*a  *a*a  *
+*a   *a*a   *
+*a    *a*a    *
+*a     *a*a     *
+*a      *a*a      *
+*a       *a*a       *
+*a        *a*a        *
+*a         *a*a         *
+explain select * from t1 where v='a';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v,v_2	#	13	const	#	Using where
+select v,count(*) from t1 group by v limit 10;
+v	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select v,count(c) from t1 group by v limit 10;
+v	count(c)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result v,count(c) from t1 group by v limit 10;
+v	count(c)
+a	1
+a 	10
+b     	10
+c    	10
+d   	10
+e  	10
+f     	10
+g    	10
+h	10
+i     	10
+select c,count(*) from t1 group by c limit 10;
+c	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select c,count(t) from t1 group by c limit 10;
+c	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result c,count(t) from t1 group by c limit 10;
+c	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select t,count(*) from t1 group by t limit 10;
+t	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select t,count(t) from t1 group by t limit 10;
+t	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result t,count(t) from t1 group by t limit 10;
+t	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(300) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text,
+  KEY `c` (`c`),
+  KEY `t` (`t`(10)),
+  KEY `v` (`v`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a  ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a  ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	303	const	#	Using where; Using index
+explain select count(*) from t1 where v like 'a%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	303	NULL	#	Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	303	const	#	Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	303	const	#	Using where; Using index
+explain select * from t1 where v='a';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	303	const	#	Using where
+select v,count(*) from t1 group by v limit 10;
+v	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+alter table t1 drop key v, add key v (v(30));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(300) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text,
+  KEY `c` (`c`),
+  KEY `t` (`t`(10)),
+  KEY `v` (`v`(30))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a  ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a  ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	33	const	#	Using where
+explain select count(*) from t1 where v like 'a%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	v	v	33	NULL	#	Using where
+explain select count(*) from t1 where v between 'a' and 'a ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	33	const	#	Using where
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a  ' and 'b\n';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	33	const	#	Using where
+explain select * from t1 where v='a';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	v	v	33	const	#	Using where
+select v,count(*) from t1 group by v limit 10;
+v	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+alter table t1 modify v varchar(600), drop key v, add key v (v);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(600) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text,
+  KEY `c` (`c`),
+  KEY `t` (`t`(10)),
+  KEY `v` (`v`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+select v,count(*) from t1 group by v limit 10;
+v	count(*)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v	count(t)
+a	1
+a	10
+b	10
+c	10
+d	10
+e	10
+f	10
+g	10
+h	10
+i	10
+drop table t1;
+create table t1 (a char(10), unique (a));
+insert into t1 values ('a   ');
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a' for key 'a'
+alter table t1 modify a varchar(10);
+insert into t1 values ('a '),('a  '),('a   '),('a         ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a     ');
+ERROR 23000: Duplicate entry 'a     ' for key 'a'
+insert into t1 values ('a          ');
+ERROR 23000: Duplicate entry 'a         ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+update t1 set a='a  ' where a like 'a%';
+select concat(a,'.') from t1;
+concat(a,'.')
+a  .
+update t1 set a='abc    ' where a like 'a ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a  .
+update t1 set a='a      ' where a like 'a %';
+select concat(a,'.') from t1;
+concat(a,'.')
+a      .
+update t1 set a='a  ' where a like 'a      ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a  .
+drop table t1;
+create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5)));
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `t` text,
+  KEY `v` (`v`(5)),
+  KEY `c` (`c`(5)),
+  KEY `t` (`t`(5))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v char(10) character set utf8);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` char(10) CHARACTER SET utf8 DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(10), c char(10)) row_format=fixed;
+Warnings:
+Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` varchar(10) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
+insert into t1 values('a','a'),('a ','a ');
+select concat('*',v,'*',c,'*') from t1;
+concat('*',v,'*',c,'*')
+*a*a*
+*a *a*
+drop table t1;
+create table t1 (v varchar(65530), key(v(10)));
+insert into t1 values(repeat('a',65530));
+select length(v) from t1 where v=repeat('a',65530);
+length(v)
+65530
+drop table t1;
+create table t1(a int, b varchar(12), key ba(b, a));
+insert into t1 values (1, 'A'), (20, NULL);
+explain select * from t1 where a=20 and b is null;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	ba	ba	20	const,const	1	Using where; Using index
+select * from t1 where a=20 and b is null;
+a	b
+20	NULL
+drop table t1;
+create table t1 (v varchar(65530), key(v));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+drop table t1;
+create table t1 (v varchar(65536));
+Warnings:
+Note	1246	Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` mediumtext
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+Warnings:
+Note	1246	Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `v` mediumtext CHARACTER SET utf8
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1;
+set storage_engine=MyISAM;
+create table t1 (v varchar(16384)) engine=innodb;
+drop table t1;
+create table t1 (a char(1), b char(1), key(a, b)) engine=innodb;
+insert into t1 values ('8', '6'), ('4', '7');
+select min(a) from t1;
+min(a)
+4
+select min(b) from t1 where a='8';
+min(b)
+6
+drop table t1;
+CREATE TABLE t1 ( `a` int(11) NOT NULL auto_increment, `b` int(11) default NULL,PRIMARY KEY  (`a`),UNIQUE KEY `b` (`b`)) ENGINE=innodb;
+insert into t1 (b) values (1);
+replace into t1 (b) values (2), (1), (3);
+select * from t1;
+a	b
+3	1
+2	2
+4	3
+truncate table t1;
+insert into t1 (b) values (1);
+replace into t1 (b) values (2);
+replace into t1 (b) values (1);
+replace into t1 (b) values (3);
+select * from t1;
+a	b
+3	1
+2	2
+4	3
+drop table t1;
+create table t1 (rowid int not null auto_increment, val int not null,primary
+key (rowid), unique(val)) engine=innodb;
+replace into t1 (val) values ('1'),('2');
+replace into t1 (val) values ('1'),('2');
+insert into t1 (val) values ('1'),('2');
+ERROR 23000: Duplicate entry '1' for key 'val'
+select * from t1;
+rowid	val
+3	1
+4	2
+drop table t1;
+create table t1 (a int not null auto_increment primary key, val int) engine=InnoDB;
+insert into t1 (val) values (1);
+update t1 set a=2 where a=1;
+insert into t1 (val) values (1);
+ERROR 23000: Duplicate entry '2' for key 'PRIMARY'
+select * from t1;
+a	val
+2	1
+drop table t1;
+CREATE TABLE t1 (GRADE DECIMAL(4) NOT NULL, PRIMARY KEY (GRADE)) ENGINE=INNODB;
+INSERT INTO t1 (GRADE) VALUES (151),(252),(343);
+SELECT GRADE  FROM t1 WHERE GRADE > 160 AND GRADE < 300;
+GRADE
+252
+SELECT GRADE  FROM t1 WHERE GRADE= 151;
+GRADE
+151
+DROP TABLE t1;
+create table t1 (f1 varchar(10), f2 varchar(10), primary key (f1,f2)) engine=innodb;
+create table t2 (f3 varchar(10), f4 varchar(10), key (f4)) engine=innodb;
+insert into t2 values ('aa','cc');
+insert into t1 values ('aa','bb'),('aa','cc');
+delete t1 from t1,t2 where f1=f3 and f4='cc';
+select * from t1;
+f1	f2
+drop table t1,t2;
+CREATE TABLE t1 (
+id INTEGER NOT NULL AUTO_INCREMENT, PRIMARY KEY (id)
+) ENGINE=InnoDB;
+CREATE TABLE t2 (
+id INTEGER NOT NULL,
+FOREIGN KEY (id) REFERENCES t1 (id)
+) ENGINE=InnoDB;
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+id
+1
+TRUNCATE t1;
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+id
+1
+DELETE FROM t1;
+TRUNCATE t1;
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+id
+1
+DROP TABLE t2, t1;
+CREATE TABLE t1
+(
+id INT PRIMARY KEY
+) ENGINE=InnoDB;
+CREATE TEMPORARY TABLE t2
+(
+id INT NOT NULL PRIMARY KEY,
+b INT,
+FOREIGN KEY (b) REFERENCES test.t1(id)
+) ENGINE=InnoDB;
+Got one of the listed errors
+DROP TABLE t1;
+create table t1 (col1 varchar(2000), index (col1(767)))
+character set = latin1 engine = innodb;
+create table t2 (col1 char(255), index (col1))
+character set = latin1 engine = innodb;
+create table t3 (col1 binary(255), index (col1))
+character set = latin1 engine = innodb;
+create table t4 (col1 varchar(767), index (col1))
+character set = latin1 engine = innodb;
+create table t5 (col1 varchar(767) primary key)
+character set = latin1 engine = innodb;
+create table t6 (col1 varbinary(767) primary key)
+character set = latin1 engine = innodb;
+create table t7 (col1 text, index(col1(767)))
+character set = latin1 engine = innodb;
+create table t8 (col1 blob, index(col1(767)))
+character set = latin1 engine = innodb;
+create table t9 (col1 varchar(512), col2 varchar(512), index(col1, col2))
+character set = latin1 engine = innodb;
+show create table t9;
+Table	Create Table
+t9	CREATE TABLE `t9` (
+  `col1` varchar(512) DEFAULT NULL,
+  `col2` varchar(512) DEFAULT NULL,
+  KEY `col1` (`col1`,`col2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1, t2, t3, t4, t5, t6, t7, t8, t9;
+create table t1 (col1 varchar(768), index(col1))
+character set = latin1 engine = innodb;
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+create table t2 (col1 varbinary(768), index(col1))
+character set = latin1 engine = innodb;
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+create table t3 (col1 text, index(col1(768)))
+character set = latin1 engine = innodb;
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+create table t4 (col1 blob, index(col1(768)))
+character set = latin1 engine = innodb;
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `col1` varchar(768) DEFAULT NULL,
+  KEY `col1` (`col1`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1, t2, t3, t4;
+create table t1 (col1 varchar(768) primary key)
+character set = latin1 engine = innodb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+create table t2 (col1 varbinary(768) primary key)
+character set = latin1 engine = innodb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+create table t3 (col1 text, primary key(col1(768)))
+character set = latin1 engine = innodb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+create table t4 (col1 blob, primary key(col1(768)))
+character set = latin1 engine = innodb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+CREATE TABLE t1
+(
+id INT PRIMARY KEY
+) ENGINE=InnoDB;
+CREATE TABLE t2
+(
+v INT,
+CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES(2);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c1` FOREIGN KEY (`v`) REFERENCES `t1` (`id`))
+INSERT INTO t1 VALUES(1);
+INSERT INTO t2 VALUES(1);
+DELETE FROM t1 WHERE id = 1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c1` FOREIGN KEY (`v`) REFERENCES `t1` (`id`))
+DROP TABLE t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails
+SET FOREIGN_KEY_CHECKS=0;
+DROP TABLE t1;
+SET FOREIGN_KEY_CHECKS=1;
+INSERT INTO t2 VALUES(3);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c1` FOREIGN KEY (`v`) REFERENCES `t1` (`id`))
+DROP TABLE t2;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2);
+set autocommit=0;
+checksum table t1;
+Table	Checksum
+test.t1	1531596814
+insert into t1 values(3);
+checksum table t1;
+Table	Checksum
+test.t1	1531596814
+commit;
+checksum table t1;
+Table	Checksum
+test.t1	2050879373
+commit;
+drop table t1;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2);
+set autocommit=1;
+checksum table t1;
+Table	Checksum
+test.t1	1531596814
+set autocommit=1;
+insert into t1 values(3);
+checksum table t1;
+Table	Checksum
+test.t1	2050879373
+drop table t1;
+set foreign_key_checks=0;
+create table t2 (a int primary key, b int, foreign key (b) references t1(a)) engine = innodb;
+create table t1(a char(10) primary key, b varchar(20)) engine = innodb;
+ERROR HY000: Can't create table 'test.t1' (errno: 150)
+set foreign_key_checks=1;
+drop table t2;
+set foreign_key_checks=0;
+create table t1(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=latin1;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=utf8;
+ERROR HY000: Can't create table 'test.t2' (errno: 150)
+set foreign_key_checks=1;
+drop table t1;
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb;
+create table t1(a varchar(10) primary key) engine = innodb;
+alter table t1 modify column a int;
+Got one of the listed errors
+set foreign_key_checks=1;
+drop table t2,t1;
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1;
+create table t1(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=latin1;
+alter table t1 convert to character set utf8;
+set foreign_key_checks=1;
+drop table t2,t1;
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1;
+create table t3(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=utf8;
+rename table t3 to t1;
+ERROR HY000: Error on rename of './test/t3' to './test/t1' (errno: 150)
+set foreign_key_checks=1;
+drop table t2,t3;
+create table t1(a int primary key) row_format=redundant engine=innodb;
+create table t2(a int primary key,constraint foreign key(a)references t1(a)) row_format=compact engine=innodb;
+create table t3(a int primary key) row_format=compact engine=innodb;
+create table t4(a int primary key,constraint foreign key(a)references t3(a)) row_format=redundant engine=innodb;
+insert into t1 values(1);
+insert into t3 values(1);
+insert into t2 values(2);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+insert into t4 values(2);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `t4_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t3` (`a`))
+insert into t2 values(1);
+insert into t4 values(1);
+update t1 set a=2;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+update t2 set a=2;
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+update t3 set a=2;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `t4_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t3` (`a`))
+update t4 set a=2;
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `t4_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t3` (`a`))
+truncate t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t1` (`a`))
+truncate t3;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t4`, CONSTRAINT `t4_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t3` (`a`))
+truncate t2;
+truncate t4;
+truncate t1;
+truncate t3;
+drop table t4,t3,t2,t1;
+create table t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+key (a,b,c,d)) engine=innodb;
+drop table t1;
+create table t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+e varchar(255) character set utf8,
+key (a,b,c,d,e)) engine=innodb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+create table t1 (s1 varbinary(2),primary key (s1)) engine=innodb;
+create table t2 (s1 binary(2),primary key (s1)) engine=innodb;
+create table t3 (s1 varchar(2) binary,primary key (s1)) engine=innodb;
+create table t4 (s1 char(2) binary,primary key (s1)) engine=innodb;
+insert into t1 values (0x41),(0x4120),(0x4100);
+insert into t2 values (0x41),(0x4120),(0x4100);
+ERROR 23000: Duplicate entry 'A' for key 'PRIMARY'
+insert into t2 values (0x41),(0x4120);
+insert into t3 values (0x41),(0x4120),(0x4100);
+ERROR 23000: Duplicate entry 'A ' for key 'PRIMARY'
+insert into t3 values (0x41),(0x4100);
+insert into t4 values (0x41),(0x4120),(0x4100);
+ERROR 23000: Duplicate entry 'A' for key 'PRIMARY'
+insert into t4 values (0x41),(0x4100);
+select hex(s1) from t1;
+hex(s1)
+41
+4100
+4120
+select hex(s1) from t2;
+hex(s1)
+4100
+4120
+select hex(s1) from t3;
+hex(s1)
+4100
+41
+select hex(s1) from t4;
+hex(s1)
+4100
+41
+drop table t1,t2,t3,t4;
+create table t1 (a int primary key,s1 varbinary(3) not null unique) engine=innodb;
+create table t2 (s1 binary(2) not null, constraint c foreign key(s1) references t1(s1) on update cascade) engine=innodb;
+insert into t1 values(1,0x4100),(2,0x41),(3,0x4120),(4,0x42);
+insert into t2 values(0x42);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+insert into t2 values(0x41);
+select hex(s1) from t2;
+hex(s1)
+4100
+update t1 set s1=0x123456 where a=2;
+select hex(s1) from t2;
+hex(s1)
+4100
+update t1 set s1=0x12 where a=1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+update t1 set s1=0x12345678 where a=1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+update t1 set s1=0x123457 where a=1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+update t1 set s1=0x1220 where a=1;
+select hex(s1) from t2;
+hex(s1)
+1220
+update t1 set s1=0x1200 where a=1;
+select hex(s1) from t2;
+hex(s1)
+1200
+update t1 set s1=0x4200 where a=1;
+select hex(s1) from t2;
+hex(s1)
+4200
+delete from t1 where a=1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+delete from t1 where a=2;
+update t2 set s1=0x4120;
+delete from t1;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+delete from t1 where a!=3;
+select a,hex(s1) from t1;
+a	hex(s1)
+3	4120
+select hex(s1) from t2;
+hex(s1)
+4120
+drop table t2,t1;
+create table t1 (a int primary key,s1 varchar(2) binary not null unique) engine=innodb;
+create table t2 (s1 char(2) binary not null, constraint c foreign key(s1) references t1(s1) on update cascade) engine=innodb;
+insert into t1 values(1,0x4100),(2,0x41);
+insert into t2 values(0x41);
+select hex(s1) from t2;
+hex(s1)
+41
+update t1 set s1=0x1234 where a=1;
+select hex(s1) from t2;
+hex(s1)
+41
+update t1 set s1=0x12 where a=2;
+select hex(s1) from t2;
+hex(s1)
+12
+delete from t1 where a=1;
+delete from t1 where a=2;
+ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c` FOREIGN KEY (`s1`) REFERENCES `t1` (`s1`) ON UPDATE CASCADE)
+select a,hex(s1) from t1;
+a	hex(s1)
+2	12
+select hex(s1) from t2;
+hex(s1)
+12
+drop table t2,t1;
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) ENGINE=InnoDB;
+CREATE TABLE t2(a INT) ENGINE=InnoDB;
+ALTER TABLE t2 ADD FOREIGN KEY (a) REFERENCES t1(a);
+ALTER TABLE t2 DROP FOREIGN KEY t2_ibfk_1;
+ALTER TABLE t2 ADD CONSTRAINT t2_ibfk_0 FOREIGN KEY (a) REFERENCES t1(a);
+ALTER TABLE t2 DROP FOREIGN KEY t2_ibfk_0;
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) DEFAULT NULL,
+  KEY `t2_ibfk_0` (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+DROP TABLE t2,t1;
+create table t1(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+insert into t1(a) values (1),(2),(3);
+commit;
+set autocommit = 0;
+update t1 set b = 5 where a = 2;
+create trigger t1t before insert on t1 for each row begin set NEW.b = NEW.a * 10 + 5, NEW.c = NEW.a / 10; end |
+set autocommit = 0;
+insert into t1(a) values (10),(20),(30),(40),(50),(60),(70),(80),(90),(100),
+(11),(21),(31),(41),(51),(61),(71),(81),(91),(101),
+(12),(22),(32),(42),(52),(62),(72),(82),(92),(102),
+(13),(23),(33),(43),(53),(63),(73),(83),(93),(103),
+(14),(24),(34),(44),(54),(64),(74),(84),(94),(104);
+commit;
+commit;
+drop trigger t1t;
+drop table t1;
+create table t1(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t2(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t3(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t4(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t5(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+insert into t1(a) values (1),(2),(3);
+insert into t2(a) values (1),(2),(3);
+insert into t3(a) values (1),(2),(3);
+insert into t4(a) values (1),(2),(3);
+insert into t3(a) values (5),(7),(8);
+insert into t4(a) values (5),(7),(8);
+insert into t5(a) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12);
+create trigger t1t before insert on t1 for each row begin 
+INSERT INTO t2 SET a = NEW.a;
+end |
+create trigger t2t before insert on t2 for each row begin
+DELETE FROM t3 WHERE a = NEW.a;
+end |
+create trigger t3t before delete on t3 for each row begin  
+UPDATE t4 SET b = b + 1 WHERE a = OLD.a;
+end |
+create trigger t4t before update on t4 for each row begin
+UPDATE t5 SET b = b + 1 where a = NEW.a;
+end |
+commit;
+set autocommit = 0;
+update t1 set b = b + 5 where a = 1;
+update t2 set b = b + 5 where a = 1;
+update t3 set b = b + 5 where a = 1;
+update t4 set b = b + 5 where a = 1;
+insert into t5(a) values(20);
+set autocommit = 0;
+insert into t1(a) values(7);
+insert into t2(a) values(8);
+delete from t2 where a = 3;
+update t4 set b = b + 1 where a = 3;
+commit;
+drop trigger t1t;
+drop trigger t2t;
+drop trigger t3t;
+drop trigger t4t;
+drop table t1, t2, t3, t4, t5;
+CREATE TABLE t1 (
+field1 varchar(8) NOT NULL DEFAULT '',
+field2 varchar(8) NOT NULL DEFAULT '',
+PRIMARY KEY  (field1, field2)
+) ENGINE=InnoDB;
+CREATE TABLE t2 (
+field1 varchar(8) NOT NULL DEFAULT '' PRIMARY KEY,
+FOREIGN KEY (field1) REFERENCES t1 (field1)
+ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES ('old', 'somevalu');
+INSERT INTO t1 VALUES ('other', 'anyvalue');
+INSERT INTO t2 VALUES ('old');
+INSERT INTO t2 VALUES ('other');
+UPDATE t1 SET field1 = 'other' WHERE field2 = 'somevalu';
+ERROR 23000: Upholding foreign key constraints for table 't1', entry 'other-somevalu', key 1 would lead to a duplicate entry
+DROP TABLE t2;
+DROP TABLE t1;
+create table t1 (
+c1 bigint not null,
+c2 bigint not null,
+primary key (c1),
+unique  key (c2)
+) engine=innodb;
+create table t2 (
+c1 bigint not null,
+primary key (c1)
+) engine=innodb;
+alter table t1 add constraint c2_fk foreign key (c2)
+references t2(c1) on delete cascade;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` bigint(20) NOT NULL,
+  `c2` bigint(20) NOT NULL,
+  PRIMARY KEY (`c1`),
+  UNIQUE KEY `c2` (`c2`),
+  CONSTRAINT `c2_fk` FOREIGN KEY (`c2`) REFERENCES `t2` (`c1`) ON DELETE CASCADE
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table t1 drop foreign key c2_fk;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` bigint(20) NOT NULL,
+  `c2` bigint(20) NOT NULL,
+  PRIMARY KEY (`c1`),
+  UNIQUE KEY `c2` (`c2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table t1, t2;
+create table t1(a date) engine=innodb;
+create table t2(a date, key(a)) engine=innodb;
+insert into t1 values('2005-10-01');
+insert into t2 values('2005-10-01');
+select * from t1, t2
+where t2.a between t1.a - interval 2 day and t1.a + interval 2 day;
+a	a
+2005-10-01	2005-10-01
+drop table t1, t2;
+create table t1 (id int not null, f_id int not null, f int not null,
+primary key(f_id, id)) engine=innodb;
+create table t2 (id int not null,s_id int not null,s varchar(200),
+primary key(id)) engine=innodb;
+INSERT INTO t1 VALUES (8, 1, 3);
+INSERT INTO t1 VALUES (1, 2, 1);
+INSERT INTO t2 VALUES (1, 0, '');
+INSERT INTO t2 VALUES (8, 1, '');
+commit;
+DELETE ml.* FROM t1 AS ml LEFT JOIN t2 AS mm ON (mm.id=ml.id)
+WHERE mm.id IS NULL;
+select ml.* from t1 as ml left join t2 as mm on (mm.id=ml.id)
+where mm.id is null lock in share mode;
+id	f_id	f
+drop table t1,t2;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values(1,1),(2,2),(3,1),(4,2),(5,1),(6,2),(7,3);
+commit;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+update t1 set b = 5 where b = 1;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+select * from t1 where a = 7 and b = 3 for update;
+a	b
+7	3
+commit;
+commit;
+drop table t1;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values(1,1),(2,2),(3,1),(4,2),(5,1),(6,2);
+commit;
+set autocommit = 0;
+select * from t1 lock in share mode;
+a	b
+1	1
+2	2
+3	1
+4	2
+5	1
+6	2
+update t1 set b = 5 where b = 1;
+set autocommit = 0;
+select * from t1 where a = 2 and b = 2 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+commit;
+drop table t1;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values (1,2),(5,3),(4,2);
+create table t2(d int not null, e int, primary key(d)) engine=innodb;
+insert into t2 values (8,6),(12,1),(3,1);
+commit;
+set autocommit = 0;
+select * from t2 for update;
+d	e
+3	1
+8	6
+12	1
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+insert into t1 select * from t2;
+update t1 set b = (select e from t2 where a = d);
+create table t3(d int not null, e int, primary key(d)) engine=innodb
+select * from t2;
+commit;
+commit;
+drop table t1, t2, t3;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values (1,2),(5,3),(4,2);
+create table t2(a int not null, b int, primary key(a)) engine=innodb;
+insert into t2 values (8,6),(12,1),(3,1);
+create table t3(d int not null, b int, primary key(d)) engine=innodb;
+insert into t3 values (8,6),(12,1),(3,1);
+create table t5(a int not null, b int, primary key(a)) engine=innodb;
+insert into t5 values (1,2),(5,3),(4,2);
+create table t6(d int not null, e int, primary key(d)) engine=innodb;
+insert into t6 values (8,6),(12,1),(3,1);
+create table t8(a int not null, b int, primary key(a)) engine=innodb;
+insert into t8 values (1,2),(5,3),(4,2);
+create table t9(d int not null, e int, primary key(d)) engine=innodb;
+insert into t9 values (8,6),(12,1),(3,1);
+commit;
+set autocommit = 0;
+select * from t2 for update;
+a	b
+3	1
+8	6
+12	1
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+insert into t1 select * from t2;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+update t3 set b = (select b from t2 where a = d);
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+create table t4(a int not null, b int, primary key(a)) engine=innodb select * from t2;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+insert into t5 (select * from t2 lock in share mode);
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+update t6 set e = (select b from t2 where a = d lock in share mode);
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t7(a int not null, b int, primary key(a)) engine=innodb select * from t2 lock in share mode;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+insert into t8 (select * from t2 for update);
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+update t9 set e = (select b from t2 where a = d for update);
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t10(a int not null, b int, primary key(a)) engine=innodb select * from t2 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+commit;
+drop table t1, t2, t3, t5, t6, t8, t9;
+CREATE TABLE t1 (DB_ROW_ID int) engine=innodb;
+ERROR HY000: Can't create table 'test.t1' (errno: -1)
+CREATE TABLE t1 (
+a BIGINT(20) NOT NULL,
+PRIMARY KEY  (a)
+) ENGINE=INNODB DEFAULT CHARSET=UTF8;
+CREATE TABLE t2 (
+a BIGINT(20) NOT NULL,
+b VARCHAR(128) NOT NULL,
+c TEXT NOT NULL,
+PRIMARY KEY  (a,b),
+KEY idx_t2_b_c (b,c(200)),
+CONSTRAINT t_fk FOREIGN KEY (a) REFERENCES t1 (a) 
+ON DELETE CASCADE
+) ENGINE=INNODB DEFAULT CHARSET=UTF8;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1, 'bar', 'vbar');
+INSERT INTO t2 VALUES (1, 'BAR2', 'VBAR');
+INSERT INTO t2 VALUES (1, 'bar_bar', 'bibi');
+INSERT INTO t2 VALUES (1, 'customer_over', '1');
+SELECT * FROM t2 WHERE b = 'customer_over';
+a	b	c
+1	customer_over	1
+SELECT * FROM t2 WHERE BINARY b = 'customer_over';
+a	b	c
+1	customer_over	1
+SELECT DISTINCT p0.a FROM t2 p0 WHERE p0.b = 'customer_over';
+a
+1
+/* Bang: Empty result set, above was expected: */
+SELECT DISTINCT p0.a FROM t2 p0 WHERE BINARY p0.b = 'customer_over';
+a
+1
+SELECT p0.a FROM t2 p0 WHERE BINARY p0.b = 'customer_over';
+a
+1
+drop table t2, t1;
+CREATE TABLE t1 ( a int ) ENGINE=innodb;
+BEGIN;
+INSERT INTO t1 VALUES (1);
+OPTIMIZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	note	Table does not support optimize, doing recreate + analyze instead
+test.t1	optimize	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (id int PRIMARY KEY, f int NOT NULL, INDEX(f)) ENGINE=InnoDB;
+CREATE TABLE t2 (id int PRIMARY KEY, f INT NOT NULL,
+CONSTRAINT t2_t1 FOREIGN KEY (id) REFERENCES t1 (id)
+ON DELETE CASCADE ON UPDATE CASCADE) ENGINE=InnoDB;
+ALTER TABLE t2 ADD FOREIGN KEY (f) REFERENCES t1 (f) ON
+DELETE CASCADE ON UPDATE CASCADE;
+SHOW CREATE TABLE t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `id` int(11) NOT NULL,
+  `f` int(11) NOT NULL,
+  PRIMARY KEY (`id`),
+  KEY `f` (`f`),
+  CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`f`) REFERENCES `t1` (`f`) ON DELETE CASCADE ON UPDATE CASCADE,
+  CONSTRAINT `t2_t1` FOREIGN KEY (`id`) REFERENCES `t1` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+DROP TABLE t2, t1;
+CREATE TABLE t1 (a INT, INDEX(a)) ENGINE=InnoDB;
+CREATE TABLE t2 (a INT, INDEX(a)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+ALTER TABLE t2 ADD FOREIGN KEY (a) REFERENCES t1 (a) ON DELETE SET NULL;
+ALTER TABLE t2 MODIFY a INT NOT NULL;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t2' (errno: 150)
+DELETE FROM t1;
+DROP TABLE t2,t1;
+CREATE TABLE t1 (a VARCHAR(5) COLLATE utf8_unicode_ci PRIMARY KEY)
+ENGINE=InnoDB;
+INSERT INTO t1 VALUES (0xEFBCA4EFBCA4EFBCA4);
+DELETE FROM t1;
+INSERT INTO t1 VALUES ('DDD');
+SELECT * FROM t1;
+a
+DDD
+DROP TABLE t1;
+CREATE TABLE t1 (id int PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB
+AUTO_INCREMENT=42;
+INSERT INTO t1 VALUES (0),(347),(0);
+SELECT * FROM t1;
+id
+42
+347
+348
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=349 DEFAULT CHARSET=latin1
+CREATE TABLE t2 (id int PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t2 VALUES(42),(347),(348);
+ALTER TABLE t1 ADD CONSTRAINT t1_t2 FOREIGN KEY (id) REFERENCES t2(id);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  CONSTRAINT `t1_t2` FOREIGN KEY (`id`) REFERENCES `t2` (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=349 DEFAULT CHARSET=latin1
+DROP TABLE t1,t2;
+set innodb_strict_mode=on;
+CREATE TABLE t1 (
+c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
+c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
+c09 CHAR(255), c10 CHAR(255), c11 CHAR(255), c12 CHAR(255),
+c13 CHAR(255), c14 CHAR(255), c15 CHAR(255), c16 CHAR(255),
+c17 CHAR(255), c18 CHAR(255), c19 CHAR(255), c20 CHAR(255),
+c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255),
+c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
+c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
+) ENGINE = InnoDB;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note	1051	Unknown table 't1'
+CREATE TABLE t1(
+id BIGINT(20) NOT NULL AUTO_INCREMENT PRIMARY KEY
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-10);
+SELECT * FROM t1;
+id
+-10
+INSERT INTO t1 VALUES(NULL);
+SELECT * FROM t1;
+id
+-10
+1
+DROP TABLE t1;
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+DROP TABLE IF EXISTS t1, t2;
+Warnings:
+Note	1051	Unknown table 't1'
+Note	1051	Unknown table 't2'
+CREATE TABLE t1 ( a int ) ENGINE=InnoDB;
+CREATE TABLE t2 LIKE t1;
+SELECT * FROM t2;
+a
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+INSERT INTO t1 VALUES (1);
+COMMIT;
+SELECT * FROM t1 WHERE a=1;
+a
+1
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+SELECT * FROM t2;
+a
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+INSERT INTO t1 VALUES (2);
+COMMIT;
+SELECT * FROM t1 WHERE a=2;
+a
+2
+SELECT * FROM t1 WHERE a=2;
+a
+2
+DROP TABLE t1;
+DROP TABLE t2;
+create table t1 (i int, j int) engine=innodb;
+insert into t1 (i, j) values (1, 1), (2, 2);
+update t1 set j = 2;
+affected rows: 1
+info: Rows matched: 2  Changed: 1  Warnings: 0
+drop table t1;
+create table t1 (id int) comment='this is a comment' engine=innodb;
+select table_comment, data_free > 0 as data_free_is_set
+from information_schema.tables
+where table_schema='test' and table_name = 't1';
+table_comment	data_free_is_set
+this is a comment	1
+drop table t1;
+CREATE TABLE t1 (
+c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+c2 VARCHAR(128) NOT NULL,
+PRIMARY KEY(c1)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=100;
+CREATE TABLE t2 (
+c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+c2 INT(10) UNSIGNED DEFAULT NULL,
+PRIMARY KEY(c1)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=200;
+SELECT AUTO_INCREMENT FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 't2';
+AUTO_INCREMENT
+200
+ALTER TABLE t2 ADD CONSTRAINT t1_t2_1 FOREIGN KEY(c1) REFERENCES t1(c1);
+SELECT AUTO_INCREMENT FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 't2';
+AUTO_INCREMENT
+200
+DROP TABLE t2;
+DROP TABLE t1;
+CREATE TABLE t1 (c1 int default NULL,
+c2 int default NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+TRUNCATE TABLE t1;
+affected rows: 0
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+affected rows: 5
+info: Records: 5  Duplicates: 0  Warnings: 0
+TRUNCATE TABLE t1;
+affected rows: 0
+DROP TABLE t1;
+Variable_name	Value
+Handler_update	0
+Variable_name	Value
+Handler_delete	0
+Variable_name	Value
+Handler_update	1
+Variable_name	Value
+Handler_delete	1
diff --git a/storage/innodb_plugin/mysql-test/innodb.test b/storage/innodb_plugin/mysql-test/innodb.test
new file mode 100644
index 00000000000..f46a3a70b56
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb.test
@@ -0,0 +1,2569 @@
+#######################################################################
+#                                                                     #
+# Please, DO NOT TOUCH this file as well as the innodb.result file.   #
+# These files are to be modified ONLY BY INNOBASE guys.               #
+#                                                                     #
+# Use innodb_mysql.[test|result] files instead.                       #
+#                                                                     #
+# If nevertheless you need to make some changes here, please, forward #
+# your commit message                                                 #
+# To: innodb_dev_ww@oracle.com                                        #
+# Cc: dev-innodb@mysql.com                                            #
+# (otherwise your changes may be erased).                             #
+#                                                                     #
+#######################################################################
+
+-- source include/have_innodb.inc
+
+# Save the original values of some variables in order to be able to
+# estimate how much they have changed during the tests. Previously this
+# test assumed that e.g. rows_deleted is 0 here and after deleting 23
+# rows it expected that rows_deleted will be 23. Now we do not make
+# assumptions about the values of the variables at the beginning, e.g.
+# rows_deleted should be 23 + "rows_deleted before the test". This allows
+# the test to be run multiple times without restarting the mysqld server.
+# See Bug#43309 Test main.innodb can't be run twice
+-- disable_query_log
+SET @innodb_thread_concurrency_orig = @@innodb_thread_concurrency;
+
+SET @innodb_rows_deleted_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_deleted');
+SET @innodb_rows_inserted_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted');
+SET @innodb_rows_updated_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated');
+SET @innodb_row_lock_waits_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_waits');
+SET @innodb_row_lock_current_waits_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_current_waits');
+SET @innodb_row_lock_time_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time');
+SET @innodb_row_lock_time_max_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max');
+SET @innodb_row_lock_time_avg_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg');
+-- enable_query_log
+
+--disable_warnings
+drop table if exists t1,t2,t3,t4;
+drop database if exists mysqltest;
+--enable_warnings
+
+#
+# Small basic test with ignore
+#
+
+create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
+
+insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt');
+select id, code, name from t1 order by id;
+
+update ignore t1 set id = 8, name = 'Sinisa' where id < 3;
+select id, code, name from t1 order by id;
+update ignore t1 set id = id + 10, name = 'Ralph' where id < 4;
+select id, code, name from t1 order by id;
+
+drop table t1;
+
+#
+# A bit bigger test
+# The 'replace_column' statements are needed because the cardinality calculated
+# by innodb is not always the same between runs
+#
+
+CREATE TABLE t1 (
+  id int(11) NOT NULL auto_increment,
+  parent_id int(11) DEFAULT '0' NOT NULL,
+  level tinyint(4) DEFAULT '0' NOT NULL,
+  PRIMARY KEY (id),
+  KEY parent_id (parent_id),
+  KEY level (level)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1),(179,5,2);
+update t1 set parent_id=parent_id+100;
+select * from t1 where parent_id=102;
+update t1 set id=id+1000;
+-- error ER_DUP_ENTRY,1022
+update t1 set id=1024 where id=1009; 
+select * from t1;
+update ignore t1 set id=id+1; # This will change all rows
+select * from t1;
+update ignore t1 set id=1023 where id=1010;
+select * from t1 where parent_id=102;
+--replace_column 9 #
+explain select level from t1 where level=1;
+--replace_column 9 #
+explain select level,id from t1 where level=1;
+--replace_column 9 #
+explain select level,id,parent_id from t1 where level=1;
+select level,id from t1 where level=1;
+select level,id,parent_id from t1 where level=1;
+optimize table t1;
+--replace_column 7 #
+show keys from t1;
+drop table t1;
+
+#
+# Test replace
+#
+
+CREATE TABLE t1 (
+  gesuchnr int(11) DEFAULT '0' NOT NULL,
+  benutzer_id int(11) DEFAULT '0' NOT NULL,
+  PRIMARY KEY (gesuchnr,benutzer_id)
+) engine=innodb;
+
+replace into t1 (gesuchnr,benutzer_id) values (2,1);
+replace into t1 (gesuchnr,benutzer_id) values (1,1);
+replace into t1 (gesuchnr,benutzer_id) values (1,1);
+select * from t1;
+drop table t1;
+
+#
+# test delete using hidden_primary_key
+#
+
+create table t1 (a int) engine=innodb;
+insert into t1 values (1), (2);
+optimize table t1;
+delete from t1 where a = 1;
+select * from t1;
+check table t1;
+drop table t1;
+
+create table t1 (a int,b varchar(20)) engine=innodb;
+insert into t1 values (1,""), (2,"testing");
+delete from t1 where a = 1;
+select * from t1;
+create index skr on t1 (a);
+insert into t1 values (3,""), (4,"testing");
+analyze table t1;
+--replace_column 7 #
+show keys from t1;
+drop table t1;
+
+
+# Test of reading on secondary key with may be null
+
+create table t1 (a int,b varchar(20),key(a)) engine=innodb;
+insert into t1 values (1,""), (2,"testing");
+select * from t1 where a = 1;
+drop table t1;
+
+#
+# Test rollback
+#
+
+create table t1 (n int not null primary key) engine=innodb;
+set autocommit=0;
+insert into t1 values (4);
+rollback;
+select n, "after rollback" from t1;
+insert into t1 values (4);
+commit;
+select n, "after commit" from t1;
+commit;
+insert into t1 values (5);
+-- error ER_DUP_ENTRY
+insert into t1 values (4);
+commit;
+select n, "after commit" from t1;
+set autocommit=1;
+insert into t1 values (6);
+-- error ER_DUP_ENTRY
+insert into t1 values (4);
+select n from t1;
+set autocommit=0;
+#
+# savepoints
+#
+begin;
+savepoint `my_savepoint`;
+insert into t1 values (7);
+savepoint `savept2`;
+insert into t1 values (3);
+select n from t1;
+savepoint savept3;
+rollback to savepoint savept2;
+--error 1305
+rollback to savepoint savept3;
+rollback to savepoint savept2;
+release savepoint `my_savepoint`;
+select n from t1;
+-- error 1305
+rollback to savepoint `my_savepoint`;
+--error 1305
+rollback to savepoint savept2;
+insert into t1 values (8);
+savepoint sv;
+commit;
+savepoint sv;
+set autocommit=1;
+# nop
+rollback;
+drop table t1;
+
+#
+# Test for commit and FLUSH TABLES WITH READ LOCK
+#
+
+create table t1 (n int not null primary key) engine=innodb;
+start transaction;
+insert into t1 values (4);
+flush tables with read lock;
+#
+# Current code can't handle a read lock in middle of transaction
+#--error 1223;
+commit;
+unlock tables;
+commit;
+select * from t1;
+drop table t1;
+
+#
+# Testing transactions
+#
+
+create table t1 ( id int NOT NULL PRIMARY KEY, nom varchar(64)) engine=innodb;
+begin;
+insert into t1 values(1,'hamdouni');
+select id as afterbegin_id,nom as afterbegin_nom from t1;
+rollback;
+select id as afterrollback_id,nom as afterrollback_nom from t1;
+set autocommit=0;
+insert into t1 values(2,'mysql');
+select id as afterautocommit0_id,nom as afterautocommit0_nom from t1;
+rollback;
+select id as afterrollback_id,nom as afterrollback_nom from t1;
+set autocommit=1;
+drop table t1;
+
+#
+# Simple not autocommit test
+# 
+
+CREATE TABLE t1 (id char(8) not null primary key, val int not null) engine=innodb;
+insert into t1 values ('pippo', 12);
+-- error ER_DUP_ENTRY
+insert into t1 values ('pippo', 12); # Gives error
+delete from t1;
+delete from t1 where id = 'pippo';
+select * from t1;
+
+insert into t1 values ('pippo', 12);
+set autocommit=0;
+delete from t1;
+rollback;
+select * from t1;
+delete from t1;
+commit;
+select * from t1;
+drop table t1;
+
+#
+# Test of active transactions
+#
+
+create table t1 (a integer) engine=innodb;
+start transaction;
+rename table t1 to t2;
+create table t1 (b integer) engine=innodb;
+insert into t1 values (1);
+rollback;
+drop table t1;
+rename table t2 to t1;
+drop table t1;
+set autocommit=1;
+
+#
+# The following simple tests failed at some point
+#
+
+CREATE TABLE t1 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR(64)) ENGINE=innodb;
+INSERT INTO t1 VALUES (1, 'Jochen');
+select * from t1;
+drop table t1;
+
+CREATE TABLE t1 ( _userid VARCHAR(60) NOT NULL PRIMARY KEY) ENGINE=innodb;
+set autocommit=0;
+INSERT INTO t1  SET _userid='marc@anyware.co.uk';
+COMMIT;
+SELECT * FROM t1;
+SELECT _userid FROM t1 WHERE _userid='marc@anyware.co.uk';
+drop table t1;
+set autocommit=1;
+
+#
+# Test when reading on part of unique key
+#
+CREATE TABLE t1 (
+  user_id int(10) DEFAULT '0' NOT NULL,
+  name varchar(100),
+  phone varchar(100),
+  ref_email varchar(100) DEFAULT '' NOT NULL,
+  detail varchar(200),
+  PRIMARY KEY (user_id,ref_email)
+)engine=innodb;
+
+INSERT INTO t1 VALUES (10292,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10292,'shirish','2333604','shirish@yahoo.com','ddsds'),(10292,'sonali','323232','sonali@bolly.com','filmstar');
+select * from t1 where user_id=10292;
+INSERT INTO t1 VALUES (10291,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10293,'shirish','2333604','shirish@yahoo.com','ddsds');
+select * from t1 where user_id=10292;
+select * from t1 where user_id>=10292;
+select * from t1 where user_id>10292;
+select * from t1 where user_id<10292;
+drop table t1;
+
+#
+# Test that keys are created in right order
+#
+
+CREATE TABLE t1 (a int not null, b int not null,c int not null,
+key(a),primary key(a,b), unique(c),key(a),unique(b));
+--replace_column 7 #
+show index from t1;
+drop table t1;
+
+#
+# Test of ALTER TABLE and innodb tables
+#
+
+create table t1 (col1 int not null, col2 char(4) not null, primary key(col1));
+alter table t1 engine=innodb;
+insert into t1 values ('1','1'),('5','2'),('2','3'),('3','4'),('4','4');
+select * from t1;
+update t1 set col2='7' where col1='4';
+select * from t1;
+alter table t1 add co3 int not null;
+select * from t1;
+update t1 set col2='9' where col1='2';
+select * from t1;
+drop table t1;
+
+#
+# INSERT INTO innodb tables
+#
+
+create table t1 (a int not null , b int, primary key (a)) engine = innodb;
+create table t2 (a int not null , b int, primary key (a)) engine = myisam;
+insert into t1 VALUES (1,3) , (2,3), (3,3);
+select * from t1;
+insert into t2 select * from t1;
+select * from t2;
+delete from t1 where b = 3;
+select * from t1;
+insert into t1 select * from t2;
+select * from t1;
+select * from t2;
+drop table t1,t2;
+
+#
+# ORDER BY on not primary key
+#
+
+CREATE TABLE t1 (
+  user_name varchar(12),
+  password text,
+  subscribed char(1),
+  user_id int(11) DEFAULT '0' NOT NULL,
+  quota bigint(20),
+  weight double,
+  access_date date,
+  access_time time,
+  approved datetime,
+  dummy_primary_key int(11) NOT NULL auto_increment,
+  PRIMARY KEY (dummy_primary_key)
+) ENGINE=innodb;
+INSERT INTO t1 VALUES ('user_0','somepassword','N',0,0,0,'2000-09-07','23:06:59','2000-09-07 23:06:59',1);
+INSERT INTO t1 VALUES ('user_1','somepassword','Y',1,1,1,'2000-09-07','23:06:59','2000-09-07 23:06:59',2);
+INSERT INTO t1 VALUES ('user_2','somepassword','N',2,2,1.4142135623731,'2000-09-07','23:06:59','2000-09-07 23:06:59',3);
+INSERT INTO t1 VALUES ('user_3','somepassword','Y',3,3,1.7320508075689,'2000-09-07','23:06:59','2000-09-07 23:06:59',4);
+INSERT INTO t1 VALUES ('user_4','somepassword','N',4,4,2,'2000-09-07','23:06:59','2000-09-07 23:06:59',5);
+select  user_name, password , subscribed, user_id, quota, weight, access_date, access_time, approved, dummy_primary_key from t1 order by user_name;
+drop table t1;
+
+#
+# Testing of tables without primary keys
+#
+
+CREATE TABLE t1 (
+  id int(11) NOT NULL auto_increment,
+  parent_id int(11) DEFAULT '0' NOT NULL,
+  level tinyint(4) DEFAULT '0' NOT NULL,
+  KEY (id),
+  KEY parent_id (parent_id),
+  KEY level (level)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1);
+INSERT INTO t1 values (179,5,2);
+update t1 set parent_id=parent_id+100;
+select * from t1 where parent_id=102;
+update t1 set id=id+1000;
+update t1 set id=1024 where id=1009; 
+select * from t1;
+update ignore t1 set id=id+1; # This will change all rows
+select * from t1;
+update ignore t1 set id=1023 where id=1010;
+select * from t1 where parent_id=102;
+--replace_column 9 #
+explain select level from t1 where level=1;
+select level,id from t1 where level=1;
+select level,id,parent_id from t1 where level=1;
+select level,id from t1 where level=1 order by id;
+delete from t1 where level=1;
+select * from t1;
+drop table t1;
+
+#
+# Test of index only reads
+#
+CREATE TABLE t1 (
+   sca_code char(6) NOT NULL,
+   cat_code char(6) NOT NULL,
+   sca_desc varchar(50),
+   lan_code char(2) NOT NULL,
+   sca_pic varchar(100),
+   sca_sdesc varchar(50),
+   sca_sch_desc varchar(16),
+   PRIMARY KEY (sca_code, cat_code, lan_code),
+   INDEX sca_pic (sca_pic)
+) engine = innodb ;
+
+INSERT INTO t1 ( sca_code, cat_code, sca_desc, lan_code, sca_pic, sca_sdesc, sca_sch_desc) VALUES ( 'PD', 'J', 'PENDANT', 'EN', NULL, NULL, 'PENDANT'),( 'RI', 'J', 'RING', 'EN', NULL, NULL, 'RING'),( 'QQ', 'N', 'RING', 'EN', 'not null', NULL, 'RING');
+select count(*) from t1 where sca_code = 'PD';
+select count(*) from t1 where sca_code <= 'PD';
+select count(*) from t1 where sca_pic is null;
+alter table t1 drop index sca_pic, add index sca_pic (cat_code, sca_pic);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+select count(*) from t1 where cat_code='E';
+
+alter table t1 drop index sca_pic, add index (sca_pic, cat_code);
+select count(*) from t1 where sca_code='PD' and sca_pic is null;
+select count(*) from t1 where sca_pic >= 'n';
+select sca_pic from t1 where sca_pic is null;
+update t1 set sca_pic="test" where sca_pic is null;
+delete from t1 where sca_code='pd';
+drop table t1;
+
+#
+# Test of opening table twice and timestamps
+#
+set @a:=now();
+CREATE TABLE t1 (a int not null, b timestamp not null, primary key (a)) engine=innodb;
+insert into t1 (a) values(1),(2),(3);
+select t1.a from t1 natural join t1 as t2 where t1.b >= @a order by t1.a;
+select a from t1 natural join t1 as t2 where b >= @a order by a;
+update t1 set a=5 where a=1;
+select a from t1;
+drop table t1;
+
+#
+# Test with variable length primary key
+#
+create table t1 (a varchar(100) not null, primary key(a), b int not null) engine=innodb;
+insert into t1 values("hello",1),("world",2);
+select * from t1 order by b desc;
+optimize table t1;
+--replace_column 7 #
+show keys from t1;
+drop table t1;
+
+#
+# Test of create index with NULL columns
+#
+create table t1 (i int, j int ) ENGINE=innodb;
+insert into t1 values (1,2);
+select * from t1 where i=1 and j=2;
+create index ax1 on t1 (i,j);
+select * from t1 where i=1 and j=2;
+drop table t1;
+
+#
+# Test min-max optimization
+#
+
+CREATE TABLE t1 (
+  a int3 unsigned NOT NULL,
+  b int1 unsigned NOT NULL,
+  UNIQUE (a, b)
+) ENGINE = innodb;
+ 
+INSERT INTO t1 VALUES (1, 1);
+SELECT MIN(B),MAX(b) FROM t1 WHERE t1.a = 1;
+drop table t1;
+
+#
+# Test INSERT DELAYED
+#
+
+CREATE TABLE t1 (a int unsigned NOT NULL) engine=innodb;
+# Can't test this in 3.23
+# INSERT DELAYED INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (1);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+
+#
+# Crash when using many tables (Test case by Jeremy D Zawodny)
+#
+
+create table t1 (a int  primary key,b int, c int, d int, e int, f int, g int, h int, i int, j int, k int, l int, m int, n int, o int, p int, q int, r int, s int, t int, u int, v int, w int, x int, y int, z int, a1 int, a2 int, a3 int, a4 int, a5 int, a6 int, a7 int, a8 int, a9 int, b1 int, b2 int, b3 int, b4 int, b5 int, b6 int) engine = innodb;
+insert into t1 values (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
+--replace_column 9 #
+explain select * from t1 where a > 0 and a < 50;
+drop table t1;
+
+#
+# Test lock tables
+#
+
+create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb;
+insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL');
+LOCK TABLES t1 WRITE;
+--error ER_DUP_ENTRY
+insert into t1 values (99,1,2,'D'),(1,1,2,'D');
+select id from t1;
+select id from t1;
+UNLOCK TABLES;
+DROP TABLE t1;
+
+create table t1 (id int NOT NULL,id2 int NOT NULL,id3 int NOT NULL,dummy1 char(30),primary key (id,id2),index index_id3 (id3)) engine=innodb;
+insert into t1 values (0,0,0,'ABCDEFGHIJ'),(2,2,2,'BCDEFGHIJK'),(1,1,1,'CDEFGHIJKL');
+LOCK TABLES t1 WRITE;
+begin;
+--error ER_DUP_ENTRY
+insert into t1 values (99,1,2,'D'),(1,1,2,'D');
+select id from t1;
+insert ignore into t1 values (100,1,2,'D'),(1,1,99,'D');
+commit;
+select id,id3 from t1;
+UNLOCK TABLES;
+DROP TABLE t1;
+
+#
+# Test prefix key
+#
+create table t1 (a char(20), unique (a(5))) engine=innodb;
+drop table t1;
+create table t1 (a char(20), index (a(5))) engine=innodb;
+show create table t1;
+drop table t1;
+
+#
+# Test using temporary table and auto_increment
+#
+
+create temporary table t1 (a int not null auto_increment, primary key(a)) engine=innodb;
+insert into t1 values (NULL),(NULL),(NULL);
+delete from t1 where a=3;
+insert into t1 values (NULL);
+select * from t1;
+alter table t1 add b int;
+select * from t1;
+drop table t1;
+
+#Slashdot bug
+create table t1
+ (
+  id int auto_increment primary key,
+  name varchar(32) not null,
+  value text not null,
+  uid int not null,
+  unique key(name,uid)
+ ) engine=innodb;
+insert into t1 values (1,'one','one value',101),
+ (2,'two','two value',102),(3,'three','three value',103);
+set insert_id=5;
+replace into t1 (value,name,uid) values ('other value','two',102);
+delete from t1 where uid=102;
+set insert_id=5;
+replace into t1 (value,name,uid) values ('other value','two',102);
+set insert_id=6;
+replace into t1 (value,name,uid) values ('other value','two',102);
+select * from t1;
+drop table t1;
+
+#
+# Test DROP DATABASE
+#
+
+create database mysqltest;
+create table mysqltest.t1 (a int not null) engine= innodb;
+insert into mysqltest.t1 values(1);
+create table mysqltest.t2 (a int not null) engine= myisam;
+insert into mysqltest.t2 values(1);
+create table mysqltest.t3 (a int not null) engine= heap;
+insert into mysqltest.t3 values(1);
+commit;
+drop database mysqltest;
+# Don't check error message
+--error 1049
+show tables from mysqltest;
+
+#
+# Test truncate table with and without auto_commit
+#
+
+set autocommit=0;
+create table t1 (a int not null) engine= innodb;
+insert into t1 values(1),(2);
+truncate table t1;
+commit;
+truncate table t1;
+truncate table t1;
+select * from t1;
+insert into t1 values(1),(2);
+delete from t1;
+select * from t1;
+commit;
+drop table t1;
+set autocommit=1;
+
+create table t1 (a int not null) engine= innodb;
+insert into t1 values(1),(2);
+truncate table t1;
+insert into t1 values(1),(2);
+select * from t1;
+truncate table t1;
+insert into t1 values(1),(2);
+delete from t1;
+select * from t1;
+drop table t1;
+
+#
+# Test of how ORDER BY works when doing it on the whole table
+#
+
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=innodb;
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+--replace_column 9 #
+explain select * from t1 order by a;
+--replace_column 9 #
+explain select * from t1 order by b;
+--replace_column 9 #
+explain select * from t1 order by c;
+--replace_column 9 #
+explain select a from t1 order by a;
+--replace_column 9 #
+explain select b from t1 order by b;
+--replace_column 9 #
+explain select a,b from t1 order by b;
+--replace_column 9 #
+explain select a,b from t1;
+--replace_column 9 #
+explain select a,b,c from t1;
+drop table t1;
+
+#
+# Check describe
+#
+
+create table t1 (t int not null default 1, key (t)) engine=innodb;
+desc t1;
+drop table t1;
+
+#
+# Test of multi-table-delete
+#
+
+CREATE TABLE t1 (
+  number bigint(20) NOT NULL default '0',
+  cname char(15) NOT NULL default '',
+  carrier_id smallint(6) NOT NULL default '0',
+  privacy tinyint(4) NOT NULL default '0',
+  last_mod_date timestamp NOT NULL,
+  last_mod_id smallint(6) NOT NULL default '0',
+  last_app_date timestamp NOT NULL,
+  last_app_id smallint(6) default '-1',
+  version smallint(6) NOT NULL default '0',
+  assigned_scps int(11) default '0',
+  status tinyint(4) default '0'
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (4077711111,'SeanWheeler',90,2,20020111112846,500,00000000000000,-1,2,3,1);
+INSERT INTO t1 VALUES (9197722223,'berry',90,3,20020111112809,500,20020102114532,501,4,10,0);
+INSERT INTO t1 VALUES (650,'San Francisco',0,0,20011227111336,342,00000000000000,-1,1,24,1);
+INSERT INTO t1 VALUES (302467,'Sue\'s Subshop',90,3,20020109113241,500,20020102115111,501,7,24,0);
+INSERT INTO t1 VALUES (6014911113,'SudzCarwash',520,1,20020102115234,500,20020102115259,501,33,32768,0);
+INSERT INTO t1 VALUES (333,'tubs',99,2,20020109113440,501,20020109113440,500,3,10,0);
+CREATE TABLE t2 (
+  number bigint(20) NOT NULL default '0',
+  cname char(15) NOT NULL default '',
+  carrier_id smallint(6) NOT NULL default '0',
+  privacy tinyint(4) NOT NULL default '0',
+  last_mod_date timestamp NOT NULL,
+  last_mod_id smallint(6) NOT NULL default '0',
+  last_app_date timestamp NOT NULL,
+  last_app_id smallint(6) default '-1',
+  version smallint(6) NOT NULL default '0',
+  assigned_scps int(11) default '0',
+  status tinyint(4) default '0'
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (4077711111,'SeanWheeler',0,2,20020111112853,500,00000000000000,-1,2,3,1);
+INSERT INTO t2 VALUES (9197722223,'berry',90,3,20020111112818,500,20020102114532,501,4,10,0);
+INSERT INTO t2 VALUES (650,'San Francisco',90,0,20020109113158,342,00000000000000,-1,1,24,1);
+INSERT INTO t2 VALUES (333,'tubs',99,2,20020109113453,501,20020109113453,500,3,10,0);
+select * from t1;
+select * from t2;
+delete t1, t2 from t1 left join t2 on t1.number=t2.number where (t1.carrier_id=90 and t1.number=t2.number) or (t2.carrier_id=90 and t1.number=t2.number) or  (t1.carrier_id=90 and t2.number is null);
+select * from t1;
+select * from t2; 
+select * from t2;
+drop table t1,t2;
+
+#
+# A simple test with some isolation levels
+# TODO: Make this into a test using replication to really test how
+# this works.
+#
+
+create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
+
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+SELECT @@tx_isolation,@@global.tx_isolation;
+insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David');
+select id, code, name from t1 order by id;
+COMMIT;
+
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+insert into t1 (code, name) values (2, 'Erik'), (3, 'Sasha');
+select id, code, name from t1 order by id;
+COMMIT;
+
+SET binlog_format='MIXED';
+BEGIN;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+insert into t1 (code, name) values (3, 'Jeremy'), (4, 'Matt');
+select id, code, name from t1 order by id;
+COMMIT;
+DROP TABLE t1;
+
+#
+# Test of multi-table-update
+#
+create table t1 (n int(10), d int(10)) engine=innodb;
+create table t2 (n int(10), d int(10)) engine=innodb;
+insert into t1 values(1,1),(1,2);
+insert into t2 values(1,10),(2,20);
+UPDATE t1,t2 SET t1.d=t2.d,t2.d=30 WHERE t1.n=t2.n;
+select * from t1;
+select * from t2;
+drop table t1,t2;
+
+#
+# Bug #29136  	erred multi-delete on trans table does not rollback 
+#
+
+# prepare
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+CREATE TABLE t1 (a int, PRIMARY KEY (a));
+CREATE TABLE t2 (a int, PRIMARY KEY (a)) ENGINE=InnoDB;
+create trigger trg_del_t2 after  delete on t2 for each row
+       insert into t1 values (1);
+insert into t1 values (1);
+insert into t2 values (1),(2);
+
+
+# exec cases A, B - see multi_update.test
+
+# A. send_error() w/o send_eof() branch
+
+--error ER_DUP_ENTRY
+delete t2 from t2;
+
+# check
+
+select count(*) from t2 /* must be 2 as restored after rollback caused by the error */;
+
+# cleanup bug#29136
+
+drop table t1, t2;
+
+
+#
+# Bug #29136  	erred multi-delete on trans table does not rollback 
+#
+
+# prepare
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+CREATE TABLE t1 (a int, PRIMARY KEY (a));
+CREATE TABLE t2 (a int, PRIMARY KEY (a)) ENGINE=InnoDB;
+create trigger trg_del_t2 after  delete on t2 for each row
+       insert into t1 values (1);
+insert into t1 values (1);
+insert into t2 values (1),(2);
+
+
+# exec cases A, B - see multi_update.test
+
+# A. send_error() w/o send_eof() branch
+
+--error ER_DUP_ENTRY
+delete t2 from t2;
+
+# check
+
+select count(*) from t2 /* must be 2 as restored after rollback caused by the error */;
+
+# cleanup bug#29136
+
+drop table t1, t2;
+
+
+#
+# Testing of IFNULL
+#
+create table t1 (a int, b int) engine=innodb;
+insert into t1 values(20,null);
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a;
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a order by 1;
+insert into t1 values(10,null);
+select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
+t2.b=t3.a order by 1;
+drop table t1;
+
+#
+# Test of read_through not existing const_table
+#
+
+create table t1 (a varchar(10) not null) engine=myisam;
+create table t2 (b varchar(10) not null unique) engine=innodb;
+select t1.a from t1,t2 where t1.a=t2.b;
+drop table t1,t2;
+create table t1 (a int not null, b int, primary key (a)) engine = innodb;
+create table t2 (a int not null, b int, primary key (a)) engine = innodb;
+insert into t1 values (10, 20);
+insert into t2 values (10, 20);
+update t1, t2 set t1.b = 150, t2.b = t1.b where t2.a = t1.a and t1.a = 10;
+drop table t1,t2;
+
+#
+# Test of multi-table-delete with foreign key constraints
+#
+
+CREATE TABLE t1 (id INT NOT NULL, PRIMARY KEY (id)) ENGINE=INNODB;
+CREATE TABLE t2 (id INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id), FOREIGN KEY (t1_id) REFERENCES t1(id)  ON DELETE CASCADE ) ENGINE=INNODB;
+insert into t1 set id=1;
+insert into t2 set id=1, t1_id=1;
+delete t1,t2 from t1,t2 where t1.id=t2.t1_id;
+select * from t1;
+select * from t2;
+drop table t2,t1;
+CREATE TABLE t1(id INT NOT NULL,  PRIMARY KEY (id)) ENGINE=INNODB;
+CREATE TABLE t2(id  INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id)  ) ENGINE=INNODB;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t2 VALUES(1, 1);
+SELECT * from t1;
+UPDATE t1,t2 SET t1.id=t1.id+1, t2.t1_id=t1.id+1;
+SELECT * from t1;
+UPDATE t1,t2 SET t1.id=t1.id+1 where t1.id!=t2.id;
+SELECT * from t1;
+DROP TABLE t1,t2;
+
+#
+# Test of range_optimizer
+#
+
+set autocommit=0;
+
+CREATE TABLE t1 (id CHAR(15) NOT NULL, value CHAR(40) NOT NULL, PRIMARY KEY(id)) ENGINE=InnoDB;
+
+CREATE TABLE t2 (id CHAR(15) NOT NULL, value CHAR(40) NOT NULL, PRIMARY KEY(id)) ENGINE=InnoDB;
+
+CREATE TABLE t3 (id1 CHAR(15) NOT NULL, id2 CHAR(15) NOT NULL, PRIMARY KEY(id1, id2)) ENGINE=InnoDB;
+
+INSERT INTO t3 VALUES("my-test-1", "my-test-2");
+COMMIT;
+
+INSERT INTO t1 VALUES("this-key", "will disappear");
+INSERT INTO t2 VALUES("this-key", "will also disappear");
+DELETE FROM t3 WHERE id1="my-test-1";
+
+SELECT * FROM t1;
+SELECT * FROM t2;
+SELECT * FROM t3;
+ROLLBACK;
+
+SELECT * FROM t1;
+SELECT * FROM t2;
+SELECT * FROM t3;
+SELECT * FROM t3 WHERE id1="my-test-1" LOCK IN SHARE MODE;
+COMMIT;
+set autocommit=1;
+DROP TABLE t1,t2,t3;
+
+#
+# Check update with conflicting key
+#
+
+CREATE TABLE t1 (a int not null primary key, b int not null, unique (b)) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+# We need the a < 1000 test here to quard against the halloween problems
+UPDATE t1 set a=a+100 where b between 2 and 3 and a < 1000;
+SELECT * from t1;
+drop table t1;
+
+#
+# Test multi update with different join methods
+#
+
+CREATE TABLE t1 (a int not null primary key, b int not null, key (b)) engine=innodb;
+CREATE TABLE t2 (a int not null primary key, b int not null, key (b)) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11),(12,12);
+INSERT INTO t2 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+
+# Full join, without key
+update t1,t2 set t1.a=t1.a+100;
+select * from t1;
+
+# unique key
+update t1,t2 set t1.a=t1.a+100 where t1.a=101;
+select * from t1;
+
+# ref key
+update t1,t2 set t1.b=t1.b+10 where t1.b=2;
+select * from t1;
+
+# Range key (in t1)
+update t1,t2 set t1.b=t1.b+2,t2.b=t1.b+10 where t1.b between 3 and 5 and t1.a=t2.a+100;
+select * from t1;
+select * from t2;
+
+drop table t1,t2;
+CREATE TABLE t2 (   NEXT_T         BIGINT NOT NULL PRIMARY KEY) ENGINE=MyISAM;
+CREATE TABLE t1 (  B_ID           INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB;
+SET AUTOCOMMIT=0;
+INSERT INTO t1 ( B_ID ) VALUES ( 1 );
+INSERT INTO t2 ( NEXT_T ) VALUES ( 1 );
+ROLLBACK;
+SELECT * FROM t1;
+drop table  t1,t2;
+create table t1  ( pk         int primary key,    parent     int not null,    child      int not null,       index (parent)  ) engine = innodb;
+insert into t1 values   (1,0,4),  (2,1,3),  (3,2,1),  (4,1,2);
+select distinct  parent,child   from t1   order by parent;
+drop table t1;
+
+#
+# Test that MySQL priorities clustered indexes
+#
+create table t1 (a int not null auto_increment primary key, b int, c int, key(c)) engine=innodb;
+create table t2 (a int not null auto_increment primary key, b int);
+insert into t1 (b) values (null),(null),(null),(null),(null),(null),(null);
+insert into t2 (a) select b from t1;
+insert into t1 (b) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+insert into t2 (a) select b from t1;
+insert into t1 (a) select b from t2;
+select count(*) from t1;
+--replace_column 9 #
+explain select * from t1 where c between 1 and 2500;
+update t1 set c=a;
+--replace_column 9 #
+explain select * from t1 where c between 1 and 2500;
+drop table t1,t2;
+
+#
+# Test of UPDATE ... ORDER BY
+#
+
+create table t1 (id int primary key auto_increment, fk int, index index_fk (fk)) engine=innodb;
+
+insert into t1 (id) values (null),(null),(null),(null),(null);
+update t1 set fk=69 where fk is null order by id limit 1;
+SELECT * from t1;
+drop table t1;
+
+create table t1 (a int not null, b int not null, key (a));
+insert into t1 values (1,1),(1,2),(1,3),(3,1),(3,2),(3,3),(3,1),(3,2),(3,3),(2,1),(2,2),(2,3);
+SET @tmp=0;
+update t1 set b=(@tmp:=@tmp+1) order by a;
+update t1 set b=99 where a=1 order by b asc limit 1;
+update t1 set b=100 where a=1 order by b desc limit 2;
+update t1 set a=a+10+b where a=1 order by b;
+select * from t1 order by a,b;
+drop table t1;
+
+#
+# Test of multi-table-updates (bug #1980).
+#
+
+create table t1 ( c char(8) not null ) engine=innodb;
+insert into t1 values ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9');
+insert into t1 values ('A'),('B'),('C'),('D'),('E'),('F');
+
+alter table t1 add b char(8) not null;
+alter table t1 add a char(8) not null;
+alter table t1 add primary key (a,b,c);
+update t1 set a=c, b=c;
+
+create table t2 (c char(8) not null, b char(8) not null, a char(8) not null, primary key(a,b,c)) engine=innodb;
+insert into t2 select * from t1;
+
+delete t1,t2 from t2,t1 where t1.a<'B' and t2.b=t1.b;
+drop table t1,t2;
+
+#
+# test autoincrement with TRUNCATE
+#
+
+SET AUTOCOMMIT=1;
+create table t1 (a integer auto_increment primary key) engine=innodb;
+insert into t1 (a) values (NULL),(NULL);
+truncate table t1;
+insert into t1 (a) values (NULL),(NULL);
+SELECT * from t1;
+drop table t1;
+
+#
+# Test dictionary handling with spaceand quoting
+#
+
+CREATE TABLE t1 (`id 1` INT NOT NULL, PRIMARY KEY (`id 1`)) ENGINE=INNODB;
+CREATE TABLE t2 (id INT PRIMARY KEY, t1_id INT, INDEX par_ind (t1_id), FOREIGN KEY (`t1_id`) REFERENCES `t1`(`id 1`)  ON DELETE CASCADE ) ENGINE=INNODB;
+#show create table t2;
+drop table t2,t1;
+
+#
+# Test of multi updated and foreign keys
+#
+
+create table `t1` (`id` int( 11 ) not null  ,primary key ( `id` )) engine = innodb;
+insert into `t1`values ( 1 ) ;
+create table `t2` (`id` int( 11 ) not null default '0',unique key `id` ( `id` ) ,constraint `t1_id_fk` foreign key ( `id` ) references `t1` (`id` )) engine = innodb;
+insert into `t2`values ( 1 ) ;
+create table `t3` (`id` int( 11 ) not null default '0',key `id` ( `id` ) ,constraint `t2_id_fk` foreign key ( `id` ) references `t2` (`id` )) engine = innodb;
+insert into `t3`values ( 1 ) ;
+--error 1451
+delete t3,t2,t1 from t1,t2,t3 where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+--error 1451
+update t1,t2,t3 set t3.id=5, t2.id=6, t1.id=7  where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+--error 1054
+update t3 set  t3.id=7  where t1.id =1 and t2.id = t1.id and t3.id = t2.id;
+drop table t3,t2,t1;
+
+#
+# test for recursion depth limit
+#
+create table t1(
+	id int primary key,
+	pid int,
+	index(pid),
+	foreign key(pid) references t1(id) on delete cascade) engine=innodb;
+insert into t1 values(0,0),(1,0),(2,1),(3,2),(4,3),(5,4),(6,5),(7,6),
+	(8,7),(9,8),(10,9),(11,10),(12,11),(13,12),(14,13),(15,14);
+-- error 1451
+delete from t1 where id=0;
+delete from t1 where id=15;
+delete from t1 where id=0;
+
+drop table t1;
+
+#
+# Test timestamps
+#
+
+CREATE TABLE t1 (col1 int(1))ENGINE=InnoDB;
+CREATE TABLE t2 (col1 int(1),stamp TIMESTAMP,INDEX stamp_idx
+(stamp))ENGINE=InnoDB;
+insert into t1 values (1),(2),(3);
+# Note that timestamp 3 is wrong
+insert into t2 values (1, 20020204130000),(2, 20020204130000),(4,20020204310000 ),(5,20020204230000);
+SELECT col1 FROM t1 UNION SELECT col1 FROM t2 WHERE stamp <
+'20020204120000' GROUP BY col1;
+drop table t1,t2;
+
+#
+# Test by Francois MASUREL
+#
+
+CREATE TABLE t1 (
+  `id` int(10) unsigned NOT NULL auto_increment,
+  `id_object` int(10) unsigned default '0',
+  `id_version` int(10) unsigned NOT NULL default '1',
+  `label` varchar(100) NOT NULL default '',
+  `description` text,
+  PRIMARY KEY  (`id`),
+  KEY `id_object` (`id_object`),
+  KEY `id_version` (`id_version`)
+) ENGINE=InnoDB;
+
+INSERT INTO t1 VALUES("6", "3382", "9", "Test", NULL), ("7", "102", "5", "Le Pekin (Test)", NULL),("584", "1794", "4", "Test de resto", NULL),("837", "1822", "6", "Test 3", NULL),("1119", "3524", "1", "Societe Test", NULL),("1122", "3525", "1", "Fournisseur Test", NULL);
+
+CREATE TABLE t2 (
+  `id` int(10) unsigned NOT NULL auto_increment,
+  `id_version` int(10) unsigned NOT NULL default '1',
+  PRIMARY KEY  (`id`),
+  KEY `id_version` (`id_version`)
+) ENGINE=InnoDB;
+
+INSERT INTO t2 VALUES("3524", "1"),("3525", "1"),("1794", "4"),("102", "5"),("1822", "6"),("3382", "9");
+
+SELECT t2.id, t1.`label` FROM t2 INNER JOIN
+(SELECT t1.id_object as id_object FROM t1 WHERE t1.`label` LIKE '%test%') AS lbl 
+ON (t2.id = lbl.id_object) INNER JOIN t1 ON (t2.id = t1.id_object);
+drop table t1,t2;
+
+create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=myisam;
+create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=innodb;
+create table t3 (a int, b varchar(200), c text not null) checksum=1 engine=innodb;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+insert t3 select * from t1;
+checksum table t1, t2, t3, t4 quick;
+checksum table t1, t2, t3, t4;
+checksum table t1, t2, t3, t4 extended;
+#show table status;
+drop table t1,t2,t3;
+
+#
+# Test problem with refering to different fields in same table in UNION
+# (Bug #2552)
+#
+create table t1 (id int,  name char(10) not null,  name2 char(10) not null) engine=innodb;
+insert into t1 values(1,'first','fff'),(2,'second','sss'),(3,'third','ttt');
+select trim(name2) from t1  union all  select trim(name) from t1 union all select trim(id) from t1;
+drop table t1;
+
+#
+# Bug2160
+#
+create table t1 (a int) engine=innodb;
+create table t2 like t1;
+drop table t1,t2;
+
+#
+# Test of automaticly created foreign keys
+#
+
+create table t1 (id int(11) not null, id2 int(11) not null, unique (id,id2)) engine=innodb;
+create table t2 (id int(11) not null, constraint t1_id_fk foreign key ( id ) references t1 (id)) engine = innodb;
+show create table t1;
+show create table t2;
+create index id on t2 (id);
+show create table t2;
+create index id2 on t2 (id);
+show create table t2;
+drop index id2 on t2;
+--error ER_DROP_INDEX_FK
+drop index id on t2;
+show create table t2;
+drop table t2;
+
+create table t2 (id int(11) not null, id2 int(11) not null, constraint t1_id_fk foreign key (id,id2) references t1 (id,id2)) engine = innodb;
+show create table t2;
+create unique index id on t2 (id,id2);
+show create table t2;
+drop table t2;
+
+# Check foreign key columns created in different order than key columns
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2),constraint t1_id_fk foreign key (id2,id) references t1 (id,id2)) engine = innodb;
+show create table t2;
+drop table t2;
+
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2), constraint t1_id_fk foreign key (id) references t1 (id)) engine = innodb;
+show create table t2;
+drop table t2;
+
+create table t2 (id int(11) not null, id2 int(11) not null, unique (id,id2),constraint t1_id_fk foreign key (id2,id) references t1 (id,id2)) engine = innodb;
+show create table t2;
+drop table t2;
+
+create table t2 (id int(11) not null auto_increment, id2 int(11) not null, constraint t1_id_fk foreign key (id) references t1 (id), primary key (id), index (id,id2)) engine = innodb;
+show create table t2;
+drop table t2;
+
+create table t2 (id int(11) not null auto_increment, id2 int(11) not null, constraint t1_id_fk foreign key (id) references t1 (id)) engine= innodb;
+show create table t2;
+alter table t2 add index id_test (id), add index id_test2 (id,id2);
+show create table t2;
+drop table t2;
+
+# Test error handling
+
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+--error ER_WRONG_FK_DEF
+create table t2 (id int(11) not null, id2 int(11) not null, constraint t1_id_fk foreign key (id2,id) references t1 (id)) engine = innodb;
+
+# bug#3749
+
+create table t2 (a int auto_increment primary key, b int, index(b), foreign key (b) references t1(id), unique(b)) engine=innodb;
+show create table t2;
+drop table t2;
+create table t2 (a int auto_increment primary key, b int, foreign key (b) references t1(id), foreign key (b) references t1(id), unique(b)) engine=innodb;
+show create table t2;
+drop table t2, t1;
+
+
+#
+# Bug #6126: Duplicate columns in keys gives misleading error message
+#
+--error 1060
+create table t1 (c char(10), index (c,c)) engine=innodb;
+--error 1060
+create table t1 (c1 char(10), c2 char(10), index (c1,c2,c1)) engine=innodb;
+--error 1060
+create table t1 (c1 char(10), c2 char(10), index (c1,c1,c2)) engine=innodb;
+--error 1060
+create table t1 (c1 char(10), c2 char(10), index (c2,c1,c1)) engine=innodb;
+create table t1 (c1 char(10), c2 char(10)) engine=innodb;
+--error 1060
+alter table t1 add key (c1,c1);
+--error 1060
+alter table t1 add key (c2,c1,c1);
+--error 1060
+alter table t1 add key (c1,c2,c1);
+--error 1060
+alter table t1 add key (c1,c1,c2);
+drop table t1;
+
+#
+# Bug #4082: integer truncation
+#
+
+create table t1(a int(1) , b int(1)) engine=innodb;
+insert into t1 values ('1111', '3333');
+select distinct concat(a, b) from t1;
+drop table t1;
+
+#
+# BUG#7709 test case - Boolean fulltext query against unsupported 
+#                      engines does not fail
+#
+
+CREATE TABLE t1 ( a char(10) ) ENGINE=InnoDB;
+--error 1214
+SELECT a FROM t1 WHERE MATCH (a) AGAINST ('test' IN BOOLEAN MODE);
+DROP TABLE t1;
+
+#
+# check null values #1
+#
+
+--disable_warnings
+CREATE TABLE t1 (a_id tinyint(4) NOT NULL default '0', PRIMARY KEY  (a_id)) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES (1),(2),(3);
+CREATE TABLE t2 (b_id tinyint(4) NOT NULL default '0',b_a tinyint(4) NOT NULL default '0', PRIMARY KEY  (b_id), KEY  (b_a), 
+                CONSTRAINT fk_b_a FOREIGN KEY (b_a) REFERENCES t1 (a_id) ON DELETE CASCADE ON UPDATE NO ACTION) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+--enable_warnings
+INSERT INTO t2 VALUES (1,1),(2,1),(3,1),(4,2),(5,2);
+SELECT * FROM (SELECT t1.*,GROUP_CONCAT(t2.b_id SEPARATOR ',') as b_list FROM (t1 LEFT JOIN (t2) on t1.a_id = t2.b_a) GROUP BY t1.a_id ) AS xyz;
+DROP TABLE t2;
+DROP TABLE t1;
+
+#
+# Bug#11816 - Truncate table doesn't work with temporary innodb tables
+# This is not an innodb bug, but we test it using innodb.
+#
+create temporary table t1 (a int) engine=innodb;
+insert into t1 values (4711);
+truncate t1;
+insert into t1 values (42);
+select * from t1;
+drop table t1;
+# Show that it works with permanent tables too.
+create table t1 (a int) engine=innodb;
+insert into t1 values (4711);
+truncate t1;
+insert into t1 values (42);
+select * from t1;
+drop table t1;
+
+#
+# Bug #13025  Server crash during filesort	
+#
+
+create table t1 (a int not null, b int not null, c blob not null, d int not null, e int, primary key (a,b,c(255),d)) engine=innodb;
+insert into t1 values (2,2,"b",2,2),(1,1,"a",1,1),(3,3,"ab",3,3);
+select * from t1 order by a,b,c,d;
+explain select * from t1 order by a,b,c,d;
+drop table t1;
+
+#
+# BUG#11039,#13218 Wrong key length in min()
+#
+
+create table t1 (a char(1), b char(1), key(a, b)) engine=innodb;
+insert into t1 values ('8', '6'), ('4', '7');
+select min(a) from t1;
+select min(b) from t1 where a='8';
+drop table t1;
+
+# End of 4.1 tests
+
+#
+# range optimizer problem
+#
+
+create table t1 (x bigint unsigned not null primary key) engine=innodb;
+insert into t1(x) values (0xfffffffffffffff0),(0xfffffffffffffff1);
+select * from t1;
+select count(*) from t1 where x>0;
+select count(*) from t1 where x=0;
+select count(*) from t1 where x<0;
+select count(*) from t1 where x < -16;
+select count(*) from t1 where x = -16;
+explain select count(*) from t1 where x > -16;
+select count(*) from t1 where x > -16;
+select * from t1 where x > -16;
+select count(*) from t1 where x = 18446744073709551601;
+drop table t1;
+
+
+# Test for testable InnoDB status variables. This test
+# uses previous ones(pages_created, rows_deleted, ...).
+--replace_result 8192 8191
+SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+SELECT variable_value - @innodb_rows_deleted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_deleted';
+SELECT variable_value - @innodb_rows_inserted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted';
+SELECT variable_value - @innodb_rows_updated_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated';
+
+# Test for row locks InnoDB status variables.
+SELECT variable_value - @innodb_row_lock_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_waits';
+SELECT variable_value - @innodb_row_lock_current_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_current_waits';
+SELECT variable_value - @innodb_row_lock_time_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time';
+SELECT variable_value - @innodb_row_lock_time_max_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max';
+SELECT variable_value - @innodb_row_lock_time_avg_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg';
+
+# Test for innodb_sync_spin_loops variable
+SET @innodb_sync_spin_loops_orig = @@innodb_sync_spin_loops;
+show variables like "innodb_sync_spin_loops";
+set global innodb_sync_spin_loops=1000;
+show variables like "innodb_sync_spin_loops";
+set global innodb_sync_spin_loops=0;
+show variables like "innodb_sync_spin_loops";
+set global innodb_sync_spin_loops=20;
+show variables like "innodb_sync_spin_loops";
+set global innodb_sync_spin_loops=@innodb_sync_spin_loops_orig;
+
+# Test for innodb_thread_concurrency variable
+show variables like "innodb_thread_concurrency";
+set global innodb_thread_concurrency=1001;
+show variables like "innodb_thread_concurrency";
+set global innodb_thread_concurrency=0;
+show variables like "innodb_thread_concurrency";
+set global innodb_thread_concurrency=16;
+show variables like "innodb_thread_concurrency";
+
+# Test for innodb_concurrency_tickets variable
+show variables like "innodb_concurrency_tickets";
+set global innodb_concurrency_tickets=1000;
+show variables like "innodb_concurrency_tickets";
+set global innodb_concurrency_tickets=0;
+show variables like "innodb_concurrency_tickets";
+set global innodb_concurrency_tickets=500;
+show variables like "innodb_concurrency_tickets";
+
+# Test for innodb_thread_sleep_delay variable
+show variables like "innodb_thread_sleep_delay";
+set global innodb_thread_sleep_delay=100000;
+show variables like "innodb_thread_sleep_delay";
+set global innodb_thread_sleep_delay=0;
+show variables like "innodb_thread_sleep_delay";
+set global innodb_thread_sleep_delay=10000;
+show variables like "innodb_thread_sleep_delay";
+
+#
+# Test varchar
+#
+
+let $default=`select @@storage_engine`;
+set storage_engine=INNODB;
+source include/varchar.inc;
+
+#
+# Some errors/warnings on create
+#
+
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+create table t1 (v varchar(65530), key(v));
+drop table t1;
+create table t1 (v varchar(65536));
+show create table t1;
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+show create table t1;
+drop table t1;
+
+eval set storage_engine=$default;
+
+# InnoDB specific varchar tests
+create table t1 (v varchar(16384)) engine=innodb;
+drop table t1;
+
+#
+# BUG#11039 Wrong key length in min()
+#
+
+create table t1 (a char(1), b char(1), key(a, b)) engine=innodb;
+insert into t1 values ('8', '6'), ('4', '7');
+select min(a) from t1;
+select min(b) from t1 where a='8';
+drop table t1;
+
+#
+# Bug #11080 & #11005  Multi-row REPLACE fails on a duplicate key error
+#
+
+CREATE TABLE t1 ( `a` int(11) NOT NULL auto_increment, `b` int(11) default NULL,PRIMARY KEY  (`a`),UNIQUE KEY `b` (`b`)) ENGINE=innodb;
+insert into t1 (b) values (1);
+replace into t1 (b) values (2), (1), (3);
+select * from t1;
+truncate table t1;
+insert into t1 (b) values (1);
+replace into t1 (b) values (2);
+replace into t1 (b) values (1);
+replace into t1 (b) values (3);
+select * from t1;
+drop table t1;
+
+create table t1 (rowid int not null auto_increment, val int not null,primary
+key (rowid), unique(val)) engine=innodb;
+replace into t1 (val) values ('1'),('2');
+replace into t1 (val) values ('1'),('2');
+--error ER_DUP_ENTRY
+insert into t1 (val) values ('1'),('2');
+select * from t1;
+drop table t1;
+
+#
+# Test that update does not change internal auto-increment value
+#
+
+create table t1 (a int not null auto_increment primary key, val int) engine=InnoDB;
+insert into t1 (val) values (1);
+update t1 set a=2 where a=1;
+# We should get the following error because InnoDB does not update the counter
+--error ER_DUP_ENTRY
+insert into t1 (val) values (1);
+select * from t1;
+drop table t1;
+#
+# Bug #10465
+#
+
+--disable_warnings
+CREATE TABLE t1 (GRADE DECIMAL(4) NOT NULL, PRIMARY KEY (GRADE)) ENGINE=INNODB;
+--enable_warnings
+INSERT INTO t1 (GRADE) VALUES (151),(252),(343);
+SELECT GRADE  FROM t1 WHERE GRADE > 160 AND GRADE < 300;
+SELECT GRADE  FROM t1 WHERE GRADE= 151;
+DROP TABLE t1;
+
+#
+# Bug #12340 multitable delete deletes only one record
+#
+create table t1 (f1 varchar(10), f2 varchar(10), primary key (f1,f2)) engine=innodb;
+create table t2 (f3 varchar(10), f4 varchar(10), key (f4)) engine=innodb;
+insert into t2 values ('aa','cc');
+insert into t1 values ('aa','bb'),('aa','cc');
+delete t1 from t1,t2 where f1=f3 and f4='cc';
+select * from t1;
+drop table t1,t2;
+
+#
+# Test that the slow TRUNCATE implementation resets autoincrement columns
+# (bug #11946)
+#
+
+CREATE TABLE t1 (
+id INTEGER NOT NULL AUTO_INCREMENT, PRIMARY KEY (id)
+) ENGINE=InnoDB;
+
+CREATE TABLE t2 (
+id INTEGER NOT NULL,
+FOREIGN KEY (id) REFERENCES t1 (id)
+) ENGINE=InnoDB;
+
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+TRUNCATE t1;
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+
+# continued from above; test that doing a slow TRUNCATE on a table with 0
+# rows resets autoincrement columns
+DELETE FROM t1;
+TRUNCATE t1;
+INSERT INTO t1 (id) VALUES (NULL);
+SELECT * FROM t1;
+DROP TABLE t2, t1;
+
+# Test that foreign keys in temporary tables are not accepted (bug #12084)
+CREATE TABLE t1
+(
+ id INT PRIMARY KEY
+) ENGINE=InnoDB;
+
+--error 1005,1005
+CREATE TEMPORARY TABLE t2
+(
+ id INT NOT NULL PRIMARY KEY,
+ b INT,
+ FOREIGN KEY (b) REFERENCES test.t1(id)
+) ENGINE=InnoDB;
+DROP TABLE t1;
+
+#
+# Test that index column max sizes are honored (bug #13315)
+#
+
+# prefix index
+create table t1 (col1 varchar(2000), index (col1(767)))
+ character set = latin1 engine = innodb;
+
+# normal indexes
+create table t2 (col1 char(255), index (col1))
+ character set = latin1 engine = innodb;
+create table t3 (col1 binary(255), index (col1))
+ character set = latin1 engine = innodb;
+create table t4 (col1 varchar(767), index (col1))
+ character set = latin1 engine = innodb;
+create table t5 (col1 varchar(767) primary key)
+ character set = latin1 engine = innodb;
+create table t6 (col1 varbinary(767) primary key)
+ character set = latin1 engine = innodb;
+create table t7 (col1 text, index(col1(767)))
+ character set = latin1 engine = innodb;
+create table t8 (col1 blob, index(col1(767)))
+ character set = latin1 engine = innodb;
+
+# multi-column indexes are allowed to be longer
+create table t9 (col1 varchar(512), col2 varchar(512), index(col1, col2))
+ character set = latin1 engine = innodb;
+
+show create table t9;
+
+drop table t1, t2, t3, t4, t5, t6, t7, t8, t9;
+
+# these should have their index length trimmed
+create table t1 (col1 varchar(768), index(col1))
+ character set = latin1 engine = innodb;
+create table t2 (col1 varbinary(768), index(col1))
+ character set = latin1 engine = innodb;
+create table t3 (col1 text, index(col1(768)))
+ character set = latin1 engine = innodb;
+create table t4 (col1 blob, index(col1(768)))
+ character set = latin1 engine = innodb;
+
+show create table t1;
+
+drop table t1, t2, t3, t4;
+
+# these should be refused
+--error 1071
+create table t1 (col1 varchar(768) primary key)
+ character set = latin1 engine = innodb;
+--error 1071
+create table t2 (col1 varbinary(768) primary key)
+ character set = latin1 engine = innodb;
+--error 1071
+create table t3 (col1 text, primary key(col1(768)))
+ character set = latin1 engine = innodb;
+--error 1071
+create table t4 (col1 blob, primary key(col1(768)))
+ character set = latin1 engine = innodb;
+
+#
+# Test improved foreign key error messages (bug #3443)
+#
+
+CREATE TABLE t1
+(
+ id INT PRIMARY KEY
+) ENGINE=InnoDB;
+
+CREATE TABLE t2
+(
+ v INT,
+ CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
+) ENGINE=InnoDB;
+
+--error 1452
+INSERT INTO t2 VALUES(2);
+
+INSERT INTO t1 VALUES(1);
+INSERT INTO t2 VALUES(1);
+
+--error 1451
+DELETE FROM t1 WHERE id = 1;
+
+--error 1217
+DROP TABLE t1;
+
+SET FOREIGN_KEY_CHECKS=0;
+DROP TABLE t1;
+SET FOREIGN_KEY_CHECKS=1;
+
+--error 1452
+INSERT INTO t2 VALUES(3);
+
+DROP TABLE t2;
+#
+# Test that checksum table uses a consistent read Bug #12669
+#
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2);
+set autocommit=0;
+checksum table t1;
+connection b;
+insert into t1 values(3);
+connection a;
+#
+# Here checksum should not see insert
+#
+checksum table t1;
+connection a;
+commit;
+checksum table t1;
+commit;
+drop table t1;
+#
+# autocommit = 1
+#
+connection a;
+create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
+insert into t1 values (1),(2);
+set autocommit=1;
+checksum table t1;
+connection b;
+set autocommit=1;
+insert into t1 values(3);
+connection a;
+#
+# Here checksum sees insert
+#
+checksum table t1;
+drop table t1;
+
+connection default;
+disconnect a;
+disconnect b;
+
+# tests for bugs #9802 and #13778
+
+# test that FKs between invalid types are not accepted
+
+set foreign_key_checks=0;
+create table t2 (a int primary key, b int, foreign key (b) references t1(a)) engine = innodb;
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+-- error 1005
+create table t1(a char(10) primary key, b varchar(20)) engine = innodb;
+set foreign_key_checks=1;
+drop table t2;
+
+# test that FKs between different charsets are not accepted in CREATE even
+# when f_k_c is 0
+
+set foreign_key_checks=0;
+create table t1(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=latin1;
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+-- error 1005
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=utf8;
+set foreign_key_checks=1;
+drop table t1;
+
+# test that invalid datatype conversions with ALTER are not allowed
+
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb;
+create table t1(a varchar(10) primary key) engine = innodb;
+-- error 1025,1025
+alter table t1 modify column a int;
+set foreign_key_checks=1;
+drop table t2,t1;
+
+# test that charset conversions with ALTER are allowed when f_k_c is 0
+
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1;
+create table t1(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=latin1;
+alter table t1 convert to character set utf8;
+set foreign_key_checks=1;
+drop table t2,t1;
+
+# test that RENAME does not allow invalid charsets when f_k_c is 0
+
+set foreign_key_checks=0;
+create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1;
+create table t3(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=utf8;
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+-- error 1025
+rename table t3 to t1;
+set foreign_key_checks=1;
+drop table t2,t3;
+
+# test that foreign key errors are reported correctly (Bug #15550)
+
+create table t1(a int primary key) row_format=redundant engine=innodb;
+create table t2(a int primary key,constraint foreign key(a)references t1(a)) row_format=compact engine=innodb;
+create table t3(a int primary key) row_format=compact engine=innodb;
+create table t4(a int primary key,constraint foreign key(a)references t3(a)) row_format=redundant engine=innodb;
+
+insert into t1 values(1);
+insert into t3 values(1);
+-- error 1452
+insert into t2 values(2);
+-- error 1452
+insert into t4 values(2);
+insert into t2 values(1);
+insert into t4 values(1);
+-- error 1451
+update t1 set a=2;
+-- error 1452
+update t2 set a=2;
+-- error 1451
+update t3 set a=2;
+-- error 1452
+update t4 set a=2;
+-- error 1451
+truncate t1;
+-- error 1451
+truncate t3;
+truncate t2;
+truncate t4;
+truncate t1;
+truncate t3;
+
+drop table t4,t3,t2,t1;
+
+
+#
+# Test that we can create a large (>1K) key
+#
+create table t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 key (a,b,c,d)) engine=innodb;
+drop table t1;
+--error ER_TOO_LONG_KEY
+create table t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 e varchar(255) character set utf8,
+                 key (a,b,c,d,e)) engine=innodb;
+
+
+# test the padding of BINARY types and collations (Bug #14189)
+
+create table t1 (s1 varbinary(2),primary key (s1)) engine=innodb;
+create table t2 (s1 binary(2),primary key (s1)) engine=innodb;
+create table t3 (s1 varchar(2) binary,primary key (s1)) engine=innodb;
+create table t4 (s1 char(2) binary,primary key (s1)) engine=innodb;
+
+insert into t1 values (0x41),(0x4120),(0x4100);
+-- error ER_DUP_ENTRY
+insert into t2 values (0x41),(0x4120),(0x4100);
+insert into t2 values (0x41),(0x4120);
+-- error ER_DUP_ENTRY
+insert into t3 values (0x41),(0x4120),(0x4100);
+insert into t3 values (0x41),(0x4100);
+-- error ER_DUP_ENTRY
+insert into t4 values (0x41),(0x4120),(0x4100);
+insert into t4 values (0x41),(0x4100);
+select hex(s1) from t1;
+select hex(s1) from t2;
+select hex(s1) from t3;
+select hex(s1) from t4;
+drop table t1,t2,t3,t4;
+
+create table t1 (a int primary key,s1 varbinary(3) not null unique) engine=innodb;
+create table t2 (s1 binary(2) not null, constraint c foreign key(s1) references t1(s1) on update cascade) engine=innodb;
+
+insert into t1 values(1,0x4100),(2,0x41),(3,0x4120),(4,0x42);
+-- error 1452
+insert into t2 values(0x42);
+insert into t2 values(0x41);
+select hex(s1) from t2;
+update t1 set s1=0x123456 where a=2;
+select hex(s1) from t2;
+-- error 1451
+update t1 set s1=0x12 where a=1;
+-- error 1451
+update t1 set s1=0x12345678 where a=1;
+-- error 1451
+update t1 set s1=0x123457 where a=1;
+update t1 set s1=0x1220 where a=1;
+select hex(s1) from t2;
+update t1 set s1=0x1200 where a=1;
+select hex(s1) from t2;
+update t1 set s1=0x4200 where a=1;
+select hex(s1) from t2;
+-- error 1451
+delete from t1 where a=1;
+delete from t1 where a=2;
+update t2 set s1=0x4120;
+-- error 1451
+delete from t1;
+delete from t1 where a!=3;
+select a,hex(s1) from t1;
+select hex(s1) from t2;
+
+drop table t2,t1;
+
+create table t1 (a int primary key,s1 varchar(2) binary not null unique) engine=innodb;
+create table t2 (s1 char(2) binary not null, constraint c foreign key(s1) references t1(s1) on update cascade) engine=innodb;
+
+insert into t1 values(1,0x4100),(2,0x41);
+insert into t2 values(0x41);
+select hex(s1) from t2;
+update t1 set s1=0x1234 where a=1;
+select hex(s1) from t2;
+update t1 set s1=0x12 where a=2;
+select hex(s1) from t2;
+delete from t1 where a=1;
+-- error 1451
+delete from t1 where a=2;
+select a,hex(s1) from t1;
+select hex(s1) from t2;
+
+drop table t2,t1;
+# Ensure that <tablename>_ibfk_0 is not mistreated as a
+# generated foreign key identifier.  (Bug #16387)
+
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) ENGINE=InnoDB;
+CREATE TABLE t2(a INT) ENGINE=InnoDB;
+ALTER TABLE t2 ADD FOREIGN KEY (a) REFERENCES t1(a);
+ALTER TABLE t2 DROP FOREIGN KEY t2_ibfk_1;
+ALTER TABLE t2 ADD CONSTRAINT t2_ibfk_0 FOREIGN KEY (a) REFERENCES t1(a);
+ALTER TABLE t2 DROP FOREIGN KEY t2_ibfk_0;
+SHOW CREATE TABLE t2;
+DROP TABLE t2,t1;
+
+#
+# Test case for bug #16229: MySQL/InnoDB uses full explicit table locks in trigger processing
+#
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+insert into t1(a) values (1),(2),(3);
+commit;
+connection b;
+set autocommit = 0;
+update t1 set b = 5 where a = 2;
+connection a;
+delimiter |;
+create trigger t1t before insert on t1 for each row begin set NEW.b = NEW.a * 10 + 5, NEW.c = NEW.a / 10; end |
+delimiter ;|
+set autocommit = 0;
+connection a;
+insert into t1(a) values (10),(20),(30),(40),(50),(60),(70),(80),(90),(100),
+(11),(21),(31),(41),(51),(61),(71),(81),(91),(101),
+(12),(22),(32),(42),(52),(62),(72),(82),(92),(102),
+(13),(23),(33),(43),(53),(63),(73),(83),(93),(103),
+(14),(24),(34),(44),(54),(64),(74),(84),(94),(104);
+connection b;
+commit;
+connection a;
+commit;
+drop trigger t1t;
+drop table t1;
+disconnect a;
+disconnect b;
+#
+# Another trigger test
+#
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t2(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t3(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t4(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+create table t5(a int not null, b int, c int, d int, primary key(a)) engine=innodb;
+insert into t1(a) values (1),(2),(3);
+insert into t2(a) values (1),(2),(3);
+insert into t3(a) values (1),(2),(3);
+insert into t4(a) values (1),(2),(3);
+insert into t3(a) values (5),(7),(8);
+insert into t4(a) values (5),(7),(8);
+insert into t5(a) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12);
+
+delimiter |;
+create trigger t1t before insert on t1 for each row begin 
+    INSERT INTO t2 SET a = NEW.a;
+end |
+
+create trigger t2t before insert on t2 for each row begin
+    DELETE FROM t3 WHERE a = NEW.a;
+end |
+
+create trigger t3t before delete on t3 for each row begin  
+    UPDATE t4 SET b = b + 1 WHERE a = OLD.a;
+end |
+
+create trigger t4t before update on t4 for each row begin
+    UPDATE t5 SET b = b + 1 where a = NEW.a;
+end |
+delimiter ;|
+commit;
+set autocommit = 0;
+update t1 set b = b + 5 where a = 1;
+update t2 set b = b + 5 where a = 1;
+update t3 set b = b + 5 where a = 1;
+update t4 set b = b + 5 where a = 1;
+insert into t5(a) values(20);
+connection b;
+set autocommit = 0;
+insert into t1(a) values(7);
+insert into t2(a) values(8);
+delete from t2 where a = 3;
+update t4 set b = b + 1 where a = 3;
+commit;
+drop trigger t1t;
+drop trigger t2t;
+drop trigger t3t;
+drop trigger t4t;
+drop table t1, t2, t3, t4, t5;
+connection default;
+disconnect a;
+disconnect b;
+
+#
+# Test that cascading updates leading to duplicate keys give the correct
+# error message (bug #9680)
+#
+
+CREATE TABLE t1 (
+  field1 varchar(8) NOT NULL DEFAULT '',
+  field2 varchar(8) NOT NULL DEFAULT '',
+  PRIMARY KEY  (field1, field2)
+) ENGINE=InnoDB;
+
+CREATE TABLE t2 (
+  field1 varchar(8) NOT NULL DEFAULT '' PRIMARY KEY,
+  FOREIGN KEY (field1) REFERENCES t1 (field1)
+    ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+INSERT INTO t1 VALUES ('old', 'somevalu');
+INSERT INTO t1 VALUES ('other', 'anyvalue');
+
+INSERT INTO t2 VALUES ('old');
+INSERT INTO t2 VALUES ('other');
+
+--error ER_FOREIGN_DUPLICATE_KEY
+UPDATE t1 SET field1 = 'other' WHERE field2 = 'somevalu';
+
+DROP TABLE t2;
+DROP TABLE t1;
+
+#
+# Bug#18477 - MySQL/InnoDB Ignoring Foreign Keys in ALTER TABLE
+#
+create table t1 (
+  c1 bigint not null,
+  c2 bigint not null,
+  primary key (c1),
+  unique  key (c2)
+) engine=innodb;
+#
+create table t2 (
+  c1 bigint not null,
+  primary key (c1)
+) engine=innodb;
+#
+alter table t1 add constraint c2_fk foreign key (c2)
+  references t2(c1) on delete cascade;
+show create table t1;
+#
+alter table t1 drop foreign key c2_fk;
+show create table t1;
+#
+drop table t1, t2;
+
+#
+# Bug #14360: problem with intervals
+#
+
+create table t1(a date) engine=innodb;
+create table t2(a date, key(a)) engine=innodb;
+insert into t1 values('2005-10-01');
+insert into t2 values('2005-10-01');
+select * from t1, t2
+  where t2.a between t1.a - interval 2 day and t1.a + interval 2 day;
+drop table t1, t2;
+
+create table t1 (id int not null, f_id int not null, f int not null,
+primary key(f_id, id)) engine=innodb;
+create table t2 (id int not null,s_id int not null,s varchar(200),
+primary key(id)) engine=innodb;
+INSERT INTO t1 VALUES (8, 1, 3);
+INSERT INTO t1 VALUES (1, 2, 1);
+INSERT INTO t2 VALUES (1, 0, '');
+INSERT INTO t2 VALUES (8, 1, '');
+commit;
+DELETE ml.* FROM t1 AS ml LEFT JOIN t2 AS mm ON (mm.id=ml.id)
+WHERE mm.id IS NULL;
+select ml.* from t1 as ml left join t2 as mm on (mm.id=ml.id)
+where mm.id is null lock in share mode;
+drop table t1,t2;
+
+#
+# Test case where X-locks on unused rows should be released in a
+# update (because READ COMMITTED isolation level)
+#
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values(1,1),(2,2),(3,1),(4,2),(5,1),(6,2),(7,3);
+commit;
+SET binlog_format='MIXED';
+set autocommit = 0; 
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+update t1 set b = 5 where b = 1;
+connection b;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+#
+# X-lock to record (7,3) should be released in a update 
+#
+select * from t1 where a = 7 and b = 3 for update;
+connection a;
+commit;
+connection b;
+commit;
+drop table t1;
+connection default;
+disconnect a;
+disconnect b;
+
+#
+# Test case where no locks should be released (because we are not
+# using READ COMMITTED isolation level)
+#
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values(1,1),(2,2),(3,1),(4,2),(5,1),(6,2);
+commit;
+set autocommit = 0; 
+select * from t1 lock in share mode;
+update t1 set b = 5 where b = 1;
+connection b;
+set autocommit = 0;
+#
+# S-lock to records (2,2),(4,2), and (6,2) should not be released in a update
+#
+--error 1205
+select * from t1 where a = 2 and b = 2 for update;
+#
+# X-lock to record (1,1),(3,1),(5,1) should not be released in a update
+#
+--error 1205
+connection a;
+commit;
+connection b;
+commit;
+connection default;
+disconnect a;
+disconnect b;
+drop table t1;
+
+#
+# Consistent read should be used in following selects
+#
+# 1) INSERT INTO ... SELECT
+# 2) UPDATE ... = ( SELECT ...)
+# 3) CREATE ... SELECT
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values (1,2),(5,3),(4,2);
+create table t2(d int not null, e int, primary key(d)) engine=innodb;
+insert into t2 values (8,6),(12,1),(3,1);
+commit;
+set autocommit = 0;
+select * from t2 for update;
+connection b;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+insert into t1 select * from t2;
+update t1 set b = (select e from t2 where a = d);
+create table t3(d int not null, e int, primary key(d)) engine=innodb
+select * from t2;
+commit;
+connection a;
+commit;
+connection default;
+disconnect a;
+disconnect b;
+drop table t1, t2, t3;
+
+#
+# Consistent read should not be used if 
+#
+# (a) isolation level is serializable OR
+# (b) select ... lock in share mode OR
+# (c) select ... for update
+#
+# in following queries:
+#
+# 1) INSERT INTO ... SELECT
+# 2) UPDATE ... = ( SELECT ...)
+# 3) CREATE ... SELECT
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+connect (c,localhost,root,,);
+connect (d,localhost,root,,);
+connect (e,localhost,root,,);
+connect (f,localhost,root,,);
+connect (g,localhost,root,,);
+connect (h,localhost,root,,);
+connect (i,localhost,root,,);
+connect (j,localhost,root,,);
+connection a;
+create table t1(a int not null, b int, primary key(a)) engine=innodb;
+insert into t1 values (1,2),(5,3),(4,2);
+create table t2(a int not null, b int, primary key(a)) engine=innodb;
+insert into t2 values (8,6),(12,1),(3,1);
+create table t3(d int not null, b int, primary key(d)) engine=innodb;
+insert into t3 values (8,6),(12,1),(3,1);
+create table t5(a int not null, b int, primary key(a)) engine=innodb;
+insert into t5 values (1,2),(5,3),(4,2);
+create table t6(d int not null, e int, primary key(d)) engine=innodb;
+insert into t6 values (8,6),(12,1),(3,1);
+create table t8(a int not null, b int, primary key(a)) engine=innodb;
+insert into t8 values (1,2),(5,3),(4,2);
+create table t9(d int not null, e int, primary key(d)) engine=innodb;
+insert into t9 values (8,6),(12,1),(3,1);
+commit;
+set autocommit = 0;
+select * from t2 for update;
+connection b;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+--send
+insert into t1 select * from t2;
+connection c;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+--send
+update t3 set b = (select b from t2 where a = d);
+connection d;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
+--send
+create table t4(a int not null, b int, primary key(a)) engine=innodb select * from t2;
+connection e;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+insert into t5 (select * from t2 lock in share mode);
+connection f;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+update t6 set e = (select b from t2 where a = d lock in share mode);
+connection g;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+create table t7(a int not null, b int, primary key(a)) engine=innodb select * from t2 lock in share mode;
+connection h;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+insert into t8 (select * from t2 for update);
+connection i;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+update t9 set e = (select b from t2 where a = d for update);
+connection j;
+SET binlog_format='MIXED';
+set autocommit = 0;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+--send
+create table t10(a int not null, b int, primary key(a)) engine=innodb select * from t2 for update;
+
+connection b;
+--error 1205
+reap;
+
+connection c;
+--error 1205
+reap;
+
+connection d;
+--error 1205
+reap;
+
+connection e;
+--error 1205
+reap;
+
+connection f;
+--error 1205
+reap;
+
+connection g;
+--error 1205
+reap;
+
+connection h;
+--error 1205
+reap;
+
+connection i;
+--error 1205
+reap;
+
+connection j;
+--error 1205
+reap;
+
+connection a;
+commit;
+
+connection default;
+disconnect a;
+disconnect b;
+disconnect c;
+disconnect d;
+disconnect e;
+disconnect f;
+disconnect g;
+disconnect h;
+disconnect i;
+disconnect j;
+drop table t1, t2, t3, t5, t6, t8, t9;
+
+# bug 18934, "InnoDB crashes when table uses column names like DB_ROW_ID"
+--error 1005
+CREATE TABLE t1 (DB_ROW_ID int) engine=innodb;
+
+#
+# Bug #17152: Wrong result with BINARY comparison on aliased column
+#
+
+CREATE TABLE t1 (
+   a BIGINT(20) NOT NULL,
+    PRIMARY KEY  (a)
+ ) ENGINE=INNODB DEFAULT CHARSET=UTF8;
+
+CREATE TABLE t2 (
+  a BIGINT(20) NOT NULL,
+  b VARCHAR(128) NOT NULL,
+  c TEXT NOT NULL,
+  PRIMARY KEY  (a,b),
+  KEY idx_t2_b_c (b,c(200)),
+  CONSTRAINT t_fk FOREIGN KEY (a) REFERENCES t1 (a) 
+   ON DELETE CASCADE
+ ) ENGINE=INNODB DEFAULT CHARSET=UTF8;
+
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1, 'bar', 'vbar');
+INSERT INTO t2 VALUES (1, 'BAR2', 'VBAR');
+INSERT INTO t2 VALUES (1, 'bar_bar', 'bibi');
+INSERT INTO t2 VALUES (1, 'customer_over', '1');
+
+SELECT * FROM t2 WHERE b = 'customer_over';
+SELECT * FROM t2 WHERE BINARY b = 'customer_over';
+SELECT DISTINCT p0.a FROM t2 p0 WHERE p0.b = 'customer_over';
+/* Bang: Empty result set, above was expected: */
+SELECT DISTINCT p0.a FROM t2 p0 WHERE BINARY p0.b = 'customer_over';
+SELECT p0.a FROM t2 p0 WHERE BINARY p0.b = 'customer_over';
+
+drop table t2, t1;
+
+#
+# Test optimize on table with open transaction
+#
+
+CREATE TABLE t1 ( a int ) ENGINE=innodb;
+BEGIN;
+INSERT INTO t1 VALUES (1);
+OPTIMIZE TABLE t1;
+DROP TABLE t1;
+
+#
+# Bug #24741 (existing cascade clauses disappear when adding foreign keys)
+#
+
+CREATE TABLE t1 (id int PRIMARY KEY, f int NOT NULL, INDEX(f)) ENGINE=InnoDB;
+
+CREATE TABLE t2 (id int PRIMARY KEY, f INT NOT NULL,
+  CONSTRAINT t2_t1 FOREIGN KEY (id) REFERENCES t1 (id)
+  ON DELETE CASCADE ON UPDATE CASCADE) ENGINE=InnoDB;
+
+ALTER TABLE t2 ADD FOREIGN KEY (f) REFERENCES t1 (f) ON
+DELETE CASCADE ON UPDATE CASCADE;
+
+SHOW CREATE TABLE t2;
+DROP TABLE t2, t1;
+
+#
+# Bug #25927: Prevent ALTER TABLE ... MODIFY ... NOT NULL on columns
+# for which there is a foreign key constraint ON ... SET NULL.
+#
+
+CREATE TABLE t1 (a INT, INDEX(a)) ENGINE=InnoDB;
+CREATE TABLE t2 (a INT, INDEX(a)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+ALTER TABLE t2 ADD FOREIGN KEY (a) REFERENCES t1 (a) ON DELETE SET NULL;
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . master-data/ ''
+--error 1025
+ALTER TABLE t2 MODIFY a INT NOT NULL;
+DELETE FROM t1;
+DROP TABLE t2,t1;
+
+#
+# Bug #26835: table corruption after delete+insert
+#
+
+CREATE TABLE t1 (a VARCHAR(5) COLLATE utf8_unicode_ci PRIMARY KEY)
+ENGINE=InnoDB;
+INSERT INTO t1 VALUES (0xEFBCA4EFBCA4EFBCA4);
+DELETE FROM t1;
+INSERT INTO t1 VALUES ('DDD');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Bug #23313 (AUTO_INCREMENT=# not reported back for InnoDB tables)
+# Bug #21404 (AUTO_INCREMENT value reset when Adding FKEY (or ALTER?))
+#
+
+CREATE TABLE t1 (id int PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB
+AUTO_INCREMENT=42;
+
+INSERT INTO t1 VALUES (0),(347),(0);
+SELECT * FROM t1;
+
+SHOW CREATE TABLE t1;
+
+CREATE TABLE t2 (id int PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t2 VALUES(42),(347),(348);
+ALTER TABLE t1 ADD CONSTRAINT t1_t2 FOREIGN KEY (id) REFERENCES t2(id);
+SHOW CREATE TABLE t1;
+
+DROP TABLE t1,t2;
+
+#
+# Bug #21101 (Prints wrong error message if max row size is too large)
+#
+set innodb_strict_mode=on;
+--error 1118
+CREATE TABLE t1 (
+	c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
+	c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
+	c09 CHAR(255), c10 CHAR(255), c11 CHAR(255), c12 CHAR(255),
+	c13 CHAR(255), c14 CHAR(255), c15 CHAR(255), c16 CHAR(255),
+	c17 CHAR(255), c18 CHAR(255), c19 CHAR(255), c20 CHAR(255),
+	c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255),
+	c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
+	c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
+	) ENGINE = InnoDB;
+
+#
+# Bug #31860 InnoDB assumes AUTOINC values can only be positive.
+#
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(
+	id BIGINT(20) NOT NULL AUTO_INCREMENT PRIMARY KEY
+	) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(-10);
+SELECT * FROM t1;
+#
+# NOTE: The server really needs to be restarted at this point
+# for the test to be useful.  
+#
+# Without the fix InnoDB would trip over an assertion here.
+INSERT INTO t1 VALUES(NULL);
+# The next value should be 1 and not -9 or a -ve number
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# 
+# Bug #21409 Incorrect result returned when in READ-COMMITTED with
+# query_cache ON
+#
+CONNECT (c1,localhost,root,,);
+CONNECT (c2,localhost,root,,);
+CONNECTION c1;
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+DROP TABLE IF EXISTS t1, t2;
+CREATE TABLE t1 ( a int ) ENGINE=InnoDB;
+CREATE TABLE t2 LIKE t1;
+SELECT * FROM t2;
+CONNECTION c2;
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+INSERT INTO t1 VALUES (1);
+COMMIT;
+CONNECTION c1;
+SELECT * FROM t1 WHERE a=1;
+DISCONNECT c1;
+DISCONNECT c2;
+CONNECT (c1,localhost,root,,);
+CONNECT (c2,localhost,root,,);
+CONNECTION c1;
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+SELECT * FROM t2;
+CONNECTION c2;
+SET binlog_format='MIXED';
+SET TX_ISOLATION='read-committed';
+SET AUTOCOMMIT=0;
+INSERT INTO t1 VALUES (2);
+COMMIT;
+CONNECTION c1;
+# The result set below should be the same for both selects
+SELECT * FROM t1 WHERE a=2;
+SELECT * FROM t1 WHERE a=2;
+DROP TABLE t1;
+DROP TABLE t2;
+DISCONNECT c1;
+DISCONNECT c2;
+CONNECTION default;
+
+#
+# Bug #29157 UPDATE, changed rows incorrect
+#
+create table t1 (i int, j int) engine=innodb;
+insert into t1 (i, j) values (1, 1), (2, 2);
+--enable_info
+update t1 set j = 2;
+--disable_info
+drop table t1;
+
+#
+# Bug #32440 InnoDB free space info does not appear in SHOW TABLE STATUS or
+# I_S
+#
+create table t1 (id int) comment='this is a comment' engine=innodb;
+select table_comment, data_free > 0 as data_free_is_set
+  from information_schema.tables
+  where table_schema='test' and table_name = 't1';
+drop table t1;
+
+#
+# Bug 34920 test
+#
+CONNECTION default;
+CREATE TABLE t1 (
+	c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+	c2 VARCHAR(128) NOT NULL,
+	PRIMARY KEY(c1)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=100;
+
+CREATE TABLE t2 (
+	c1 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+	c2 INT(10) UNSIGNED DEFAULT NULL,
+	PRIMARY KEY(c1)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=200;
+
+SELECT AUTO_INCREMENT FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 't2';
+ALTER TABLE t2 ADD CONSTRAINT t1_t2_1 FOREIGN KEY(c1) REFERENCES t1(c1);
+SELECT AUTO_INCREMENT FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 't2';
+DROP TABLE t2;
+DROP TABLE t1;
+# End 34920 test
+#
+# Bug #29507 TRUNCATE shows to many rows effected
+#
+CONNECTION default;
+CREATE TABLE t1 (c1 int default NULL,
+		 c2 int default NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+
+--enable_info
+TRUNCATE TABLE t1;
+
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+TRUNCATE TABLE t1;
+
+--disable_info
+DROP TABLE t1;
+#
+# Bug#35537 Innodb doesn't increment handler_update and handler_delete.
+#
+-- disable_query_log
+-- disable_result_log
+
+CONNECT (c1,localhost,root,,);
+
+DROP TABLE IF EXISTS bug35537;
+CREATE TABLE bug35537 (
+  c1 int
+) ENGINE=InnoDB;
+
+INSERT INTO bug35537 VALUES (1);
+
+-- enable_result_log
+
+SHOW SESSION STATUS LIKE 'Handler_update%';
+SHOW SESSION STATUS LIKE 'Handler_delete%';
+
+UPDATE bug35537 SET c1 = 2 WHERE c1 = 1;
+DELETE FROM bug35537 WHERE c1 = 2;
+
+SHOW SESSION STATUS LIKE 'Handler_update%';
+SHOW SESSION STATUS LIKE 'Handler_delete%';
+
+DROP TABLE bug35537;
+
+DISCONNECT c1;
+CONNECTION default;
+
+SET GLOBAL innodb_thread_concurrency = @innodb_thread_concurrency_orig;
+
+-- enable_query_log
+
+#######################################################################
+#                                                                     #
+# Please, DO NOT TOUCH this file as well as the innodb.result file.   #
+# These files are to be modified ONLY BY INNOBASE guys.               #
+#                                                                     #
+# Use innodb_mysql.[test|result] files instead.                       #
+#                                                                     #
+# If nevertheless you need to make some changes here, please, forward #
+# your commit message                                                 #
+# To: innodb_dev_ww@oracle.com                                        #
+# Cc: dev-innodb@mysql.com                                            #
+# (otherwise your changes may be erased).                             #
+#                                                                     #
+#######################################################################
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug21704.result b/storage/innodb_plugin/mysql-test/innodb_bug21704.result
new file mode 100644
index 00000000000..b8e0b15d50d
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug21704.result
@@ -0,0 +1,55 @@
+#
+# Bug#21704: Renaming column does not update FK definition.
+#
+
+# Test that it's not possible to rename columns participating in a
+# foreign key (either in the referencing or referenced table).
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ROW_FORMAT=COMPACT ENGINE=INNODB;
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT,
+CONSTRAINT fk1 FOREIGN KEY (a) REFERENCES t1(a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY(b), C INT,
+CONSTRAINT fk2 FOREIGN KEY (b) REFERENCES t3 (a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+INSERT INTO t1 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t3 VALUES (1,1,1),(2,2,2),(3,3,3);
+
+# Test renaming the column in the referenced table.
+
+ALTER TABLE t1 CHANGE a c INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t1' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t1 CHANGE b c INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Test renaming the column in the referencing table
+
+ALTER TABLE t2 CHANGE a c INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t2' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t2 CHANGE b c INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Test with self-referential constraints
+
+ALTER TABLE t3 CHANGE a d INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t3' (errno: 150)
+ALTER TABLE t3 CHANGE b d INT;
+ERROR HY000: Error on rename of '#sql-temporary' to './test/t3' (errno: 150)
+# Ensure that online column rename works.
+ALTER TABLE t3 CHANGE c d INT;
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+
+# Cleanup.
+
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug21704.test b/storage/innodb_plugin/mysql-test/innodb_bug21704.test
new file mode 100644
index 00000000000..c649b61034c
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug21704.test
@@ -0,0 +1,96 @@
+-- source include/have_innodb.inc
+
+--echo #
+--echo # Bug#21704: Renaming column does not update FK definition.
+--echo #
+
+--echo
+--echo # Test that it's not possible to rename columns participating in a
+--echo # foreign key (either in the referencing or referenced table).
+--echo
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS t3;
+--enable_warnings
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT,
+                 CONSTRAINT fk1 FOREIGN KEY (a) REFERENCES t1(a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY(b), C INT,
+  CONSTRAINT fk2 FOREIGN KEY (b) REFERENCES t3 (a))
+ROW_FORMAT=COMPACT ENGINE=INNODB;
+
+INSERT INTO t1 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
+INSERT INTO t3 VALUES (1,1,1),(2,2,2),(3,3,3);
+
+--echo
+--echo # Test renaming the column in the referenced table.
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t1 CHANGE a c INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t1 CHANGE b c INT;
+--disable_info
+
+--echo
+--echo # Test renaming the column in the referencing table
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t2 CHANGE a c INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t2 CHANGE b c INT;
+--disable_info
+
+--echo
+--echo # Test with self-referential constraints
+--echo
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t3 CHANGE a d INT;
+
+# mysqltest first does replace_regex, then replace_result
+--replace_regex /'[^']*test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLTEST_VARDIR . mysqld.1/data/ ''
+--error ER_ERROR_ON_RENAME
+ALTER TABLE t3 CHANGE b d INT;
+
+--echo # Ensure that online column rename works.
+
+--enable_info
+ALTER TABLE t3 CHANGE c d INT;
+--disable_info
+
+--echo
+--echo # Cleanup.
+--echo
+
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34053.result b/storage/innodb_plugin/mysql-test/innodb_bug34053.result
new file mode 100644
index 00000000000..195775f74c8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug34053.result
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34053.test b/storage/innodb_plugin/mysql-test/innodb_bug34053.test
new file mode 100644
index 00000000000..b935e45c06d
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug34053.test
@@ -0,0 +1,50 @@
+#
+# Make sure http://bugs.mysql.com/34053 remains fixed.
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+SET storage_engine=InnoDB;
+
+# we do not really care about what gets printed, we are only
+# interested in getting success or failure according to our
+# expectations
+-- disable_query_log
+-- disable_result_log
+
+GRANT USAGE ON *.* TO 'shane'@'localhost' IDENTIFIED BY '12345';
+FLUSH PRIVILEGES;
+
+-- connect (con1,localhost,shane,12345,)
+
+-- connection con1
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
+CREATE TABLE innodb_monitorx (a INT) ENGINE=INNODB;
+DROP TABLE innodb_monitorx;
+CREATE TABLE innodb_monito (a INT) ENGINE=INNODB;
+DROP TABLE innodb_monito;
+CREATE TABLE xinnodb_monitor (a INT) ENGINE=INNODB;
+DROP TABLE xinnodb_monitor;
+CREATE TABLE nnodb_monitor (a INT) ENGINE=INNODB;
+DROP TABLE nnodb_monitor;
+
+-- connection default
+CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
+CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
+
+-- connection con1
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+DROP TABLE innodb_monitor;
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+DROP TABLE innodb_mem_validate;
+
+-- connection default
+DROP TABLE innodb_monitor;
+DROP TABLE innodb_mem_validate;
+DROP USER 'shane'@'localhost';
+
+-- disconnect con1
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34300.result b/storage/innodb_plugin/mysql-test/innodb_bug34300.result
new file mode 100644
index 00000000000..ae9fee81ad7
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug34300.result
@@ -0,0 +1,4 @@
+f4	f8
+xxx	zzz
+f4	f8
+xxx	zzz
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34300.test b/storage/innodb_plugin/mysql-test/innodb_bug34300.test
new file mode 100644
index 00000000000..114bcf98c25
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug34300.test
@@ -0,0 +1,32 @@
+#
+# Bug#34300 Tinyblob & tinytext fields currupted after export/import and alter in 5.1
+# http://bugs.mysql.com/34300
+#
+
+-- source include/have_innodb.inc
+
+-- disable_query_log
+-- disable_result_log
+
+# set packet size and reconnect 
+SET @@global.max_allowed_packet=16777216;
+--connect (newconn, localhost, root,,)
+
+DROP TABLE IF EXISTS bug34300;
+CREATE TABLE bug34300 (
+  f4 TINYTEXT,
+  f6 MEDIUMTEXT,
+  f8 TINYBLOB
+) ENGINE=InnoDB;
+
+INSERT INTO bug34300 VALUES ('xxx', repeat('a', 8459264), 'zzz');
+
+-- enable_result_log
+
+SELECT f4, f8 FROM bug34300;
+
+ALTER TABLE bug34300 ADD COLUMN (f10 INT);
+
+SELECT f4, f8 FROM bug34300;
+
+DROP TABLE bug34300;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug35220.result b/storage/innodb_plugin/mysql-test/innodb_bug35220.result
new file mode 100644
index 00000000000..195775f74c8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug35220.result
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug35220.test b/storage/innodb_plugin/mysql-test/innodb_bug35220.test
new file mode 100644
index 00000000000..26f7d6b1ddd
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug35220.test
@@ -0,0 +1,16 @@
+#
+# Bug#35220 ALTER TABLE too picky on reserved word "foreign"
+# http://bugs.mysql.com/35220
+#
+
+-- source include/have_innodb.inc
+
+SET storage_engine=InnoDB;
+
+# we care only that the following SQL commands do not produce errors
+-- disable_query_log
+-- disable_result_log
+
+CREATE TABLE bug35220 (foreign_col INT, dummy_cant_delete_all_columns INT);
+ALTER TABLE bug35220 DROP foreign_col;
+DROP TABLE bug35220;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36169.result b/storage/innodb_plugin/mysql-test/innodb_bug36169.result
new file mode 100644
index 00000000000..aa80e4d7aa4
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug36169.result
@@ -0,0 +1,2 @@
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=ON;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36169.test b/storage/innodb_plugin/mysql-test/innodb_bug36169.test
new file mode 100644
index 00000000000..d3566d3eb39
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug36169.test
@@ -0,0 +1,1155 @@
+#
+# Bug#36169 create innodb compressed table with too large row size crashed
+# http://bugs.mysql.com/36169
+#
+
+-- source include/have_innodb.inc
+
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=ON;
+
+#
+# The following is copied from http://bugs.mysql.com/36169
+# (http://bugs.mysql.com/file.php?id=9121)
+# Probably it can be simplified but that is not obvious.
+#
+
+# we care only that the following SQL commands do produce errors
+# as expected and do not crash the server
+-- disable_query_log
+-- disable_result_log
+
+# Generating 10 tables
+# Creating a table with 94 columns and 24 indexes
+DROP TABLE IF EXISTS `table0`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table0`
+(`col0` BOOL,
+`col1` BOOL,
+`col2` TINYINT,
+`col3` DATE,
+`col4` TIME,
+`col5` SET ('test1','test2','test3'),
+`col6` TIME,
+`col7` TEXT,
+`col8` DECIMAL,
+`col9` SET ('test1','test2','test3'),
+`col10` FLOAT,
+`col11` DOUBLE PRECISION,
+`col12` ENUM ('test1','test2','test3'),
+`col13` TINYBLOB,
+`col14` YEAR,
+`col15` SET ('test1','test2','test3'),
+`col16` NUMERIC,
+`col17` NUMERIC,
+`col18` BLOB,
+`col19` DATETIME,
+`col20` DOUBLE PRECISION,
+`col21` DECIMAL,
+`col22` DATETIME,
+`col23` NUMERIC,
+`col24` NUMERIC,
+`col25` LONGTEXT,
+`col26` TINYBLOB,
+`col27` TIME,
+`col28` TINYBLOB,
+`col29` ENUM ('test1','test2','test3'),
+`col30` SMALLINT,
+`col31` REAL,
+`col32` FLOAT,
+`col33` CHAR (175),
+`col34` TINYTEXT,
+`col35` TINYTEXT,
+`col36` TINYBLOB,
+`col37` TINYBLOB,
+`col38` TINYTEXT,
+`col39` MEDIUMBLOB,
+`col40` TIMESTAMP,
+`col41` DOUBLE,
+`col42` SMALLINT,
+`col43` LONGBLOB,
+`col44` VARCHAR (80),
+`col45` MEDIUMTEXT,
+`col46` NUMERIC,
+`col47` BIGINT,
+`col48` DATE,
+`col49` TINYBLOB,
+`col50` DATE,
+`col51` BOOL,
+`col52` MEDIUMINT,
+`col53` FLOAT,
+`col54` TINYBLOB,
+`col55` LONGTEXT,
+`col56` SMALLINT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` DATETIME,
+`col59` MEDIUMTEXT,
+`col60` VARCHAR (232),
+`col61` NUMERIC,
+`col62` YEAR,
+`col63` SMALLINT,
+`col64` TIMESTAMP,
+`col65` BLOB,
+`col66` LONGBLOB,
+`col67` INT,
+`col68` LONGTEXT,
+`col69` ENUM ('test1','test2','test3'),
+`col70` INT,
+`col71` TIME,
+`col72` TIMESTAMP,
+`col73` TIMESTAMP,
+`col74` VARCHAR (170),
+`col75` SET ('test1','test2','test3'),
+`col76` TINYBLOB,
+`col77` BIGINT,
+`col78` NUMERIC,
+`col79` DATETIME,
+`col80` YEAR,
+`col81` NUMERIC,
+`col82` LONGBLOB,
+`col83` TEXT,
+`col84` CHAR (83),
+`col85` DECIMAL,
+`col86` FLOAT,
+`col87` INT,
+`col88` VARCHAR (145),
+`col89` DATE,
+`col90` DECIMAL,
+`col91` DECIMAL,
+`col92` MEDIUMBLOB,
+`col93` TIME,
+KEY `idx0` (`col69`,`col90`,`col8`),
+KEY `idx1` (`col60`),
+KEY `idx2` (`col60`,`col70`,`col74`),
+KEY `idx3` (`col22`,`col32`,`col72`,`col30`),
+KEY `idx4` (`col29`),
+KEY `idx5` (`col19`,`col45`(143)),
+KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),
+KEY `idx7` (`col48`,`col61`),
+KEY `idx8` (`col93`),
+KEY `idx9` (`col31`),
+KEY `idx10` (`col30`,`col21`),
+KEY `idx11` (`col67`),
+KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),
+KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),
+KEY `idx14` (`col78`),
+KEY `idx15` (`col63`,`col67`,`col64`),
+KEY `idx16` (`col17`,`col86`),
+KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),
+KEY `idx18` (`col62`),
+KEY `idx19` (`col31`,`col57`,`col56`,`col53`),
+KEY `idx20` (`col46`),
+KEY `idx21` (`col83`(54)),
+KEY `idx22` (`col51`,`col7`(120)),
+KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 10 columns and 32 indexes
+DROP TABLE IF EXISTS `table1`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table1`
+(`col0` CHAR (113),
+`col1` FLOAT,
+`col2` BIGINT,
+`col3` DECIMAL,
+`col4` BLOB,
+`col5` LONGTEXT,
+`col6` SET ('test1','test2','test3'),
+`col7` BIGINT,
+`col8` BIGINT,
+`col9` TINYBLOB,
+KEY `idx0` (`col5`(101),`col7`,`col8`),
+KEY `idx1` (`col8`),
+KEY `idx2` (`col4`(177),`col9`(126),`col6`,`col3`),
+KEY `idx3` (`col5`(160)),
+KEY `idx4` (`col9`(242)),
+KEY `idx5` (`col4`(139),`col2`,`col3`),
+KEY `idx6` (`col7`),
+KEY `idx7` (`col6`,`col2`,`col0`,`col3`),
+KEY `idx8` (`col9`(66)),
+KEY `idx9` (`col5`(253)),
+KEY `idx10` (`col1`,`col7`,`col2`),
+KEY `idx11` (`col9`(242),`col0`,`col8`,`col5`(163)),
+KEY `idx12` (`col8`),
+KEY `idx13` (`col0`,`col9`(37)),
+KEY `idx14` (`col0`),
+KEY `idx15` (`col5`(111)),
+KEY `idx16` (`col8`,`col0`,`col5`(13)),
+KEY `idx17` (`col4`(139)),
+KEY `idx18` (`col5`(189),`col2`,`col3`,`col9`(136)),
+KEY `idx19` (`col0`,`col3`,`col1`,`col8`),
+KEY `idx20` (`col8`),
+KEY `idx21` (`col0`,`col7`,`col9`(227),`col3`),
+KEY `idx22` (`col0`),
+KEY `idx23` (`col2`),
+KEY `idx24` (`col3`),
+KEY `idx25` (`col2`,`col3`),
+KEY `idx26` (`col0`),
+KEY `idx27` (`col5`(254)),
+KEY `idx28` (`col3`),
+KEY `idx29` (`col3`),
+KEY `idx30` (`col7`,`col3`,`col0`,`col4`(220)),
+KEY `idx31` (`col4`(1),`col0`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 141 columns and 18 indexes
+DROP TABLE IF EXISTS `table2`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table2`
+(`col0` BOOL,
+`col1` MEDIUMINT,
+`col2` VARCHAR (209),
+`col3` MEDIUMBLOB,
+`col4` CHAR (13),
+`col5` DOUBLE,
+`col6` TINYTEXT,
+`col7` REAL,
+`col8` SMALLINT,
+`col9` BLOB,
+`col10` TINYINT,
+`col11` DECIMAL,
+`col12` BLOB,
+`col13` DECIMAL,
+`col14` LONGBLOB,
+`col15` SMALLINT,
+`col16` LONGBLOB,
+`col17` TINYTEXT,
+`col18` FLOAT,
+`col19` CHAR (78),
+`col20` MEDIUMTEXT,
+`col21` SET ('test1','test2','test3'),
+`col22` MEDIUMINT,
+`col23` INT,
+`col24` MEDIUMBLOB,
+`col25` ENUM ('test1','test2','test3'),
+`col26` TINYBLOB,
+`col27` VARCHAR (116),
+`col28` TIMESTAMP,
+`col29` BLOB,
+`col30` SMALLINT,
+`col31` DOUBLE PRECISION,
+`col32` DECIMAL,
+`col33` DECIMAL,
+`col34` TEXT,
+`col35` MEDIUMINT,
+`col36` MEDIUMINT,
+`col37` BIGINT,
+`col38` VARCHAR (253),
+`col39` TINYBLOB,
+`col40` MEDIUMBLOB,
+`col41` BIGINT,
+`col42` DOUBLE,
+`col43` TEXT,
+`col44` BLOB,
+`col45` TIME,
+`col46` MEDIUMINT,
+`col47` DOUBLE PRECISION,
+`col48` SET ('test1','test2','test3'),
+`col49` DOUBLE PRECISION,
+`col50` VARCHAR (97),
+`col51` TEXT,
+`col52` NUMERIC,
+`col53` ENUM ('test1','test2','test3'),
+`col54` MEDIUMTEXT,
+`col55` MEDIUMINT,
+`col56` DATETIME,
+`col57` DATETIME,
+`col58` MEDIUMTEXT,
+`col59` CHAR (244),
+`col60` LONGBLOB,
+`col61` MEDIUMBLOB,
+`col62` DOUBLE,
+`col63` SMALLINT,
+`col64` BOOL,
+`col65` SMALLINT,
+`col66` VARCHAR (212),
+`col67` TIME,
+`col68` REAL,
+`col69` BOOL,
+`col70` BIGINT,
+`col71` DATE,
+`col72` TINYINT,
+`col73` ENUM ('test1','test2','test3'),
+`col74` DATE,
+`col75` TIME,
+`col76` DATETIME,
+`col77` BOOL,
+`col78` TINYTEXT,
+`col79` MEDIUMINT,
+`col80` NUMERIC,
+`col81` LONGTEXT,
+`col82` SET ('test1','test2','test3'),
+`col83` DOUBLE PRECISION,
+`col84` NUMERIC,
+`col85` VARCHAR (184),
+`col86` DOUBLE PRECISION,
+`col87` MEDIUMTEXT,
+`col88` MEDIUMBLOB,
+`col89` BOOL,
+`col90` SMALLINT,
+`col91` TINYINT,
+`col92` ENUM ('test1','test2','test3'),
+`col93` BOOL,
+`col94` TIMESTAMP,
+`col95` BOOL,
+`col96` MEDIUMTEXT,
+`col97` DECIMAL,
+`col98` BOOL,
+`col99` DECIMAL,
+`col100` MEDIUMINT,
+`col101` DOUBLE PRECISION,
+`col102` TINYINT,
+`col103` BOOL,
+`col104` MEDIUMINT,
+`col105` DECIMAL,
+`col106` NUMERIC,
+`col107` TIMESTAMP,
+`col108` MEDIUMBLOB,
+`col109` TINYBLOB,
+`col110` SET ('test1','test2','test3'),
+`col111` YEAR,
+`col112` TIMESTAMP,
+`col113` CHAR (201),
+`col114` BOOL,
+`col115` TINYINT,
+`col116` DOUBLE,
+`col117` TINYINT,
+`col118` TIMESTAMP,
+`col119` SET ('test1','test2','test3'),
+`col120` SMALLINT,
+`col121` TINYBLOB,
+`col122` TIMESTAMP,
+`col123` BLOB,
+`col124` DATE,
+`col125` SMALLINT,
+`col126` ENUM ('test1','test2','test3'),
+`col127` MEDIUMBLOB,
+`col128` DOUBLE PRECISION,
+`col129` REAL,
+`col130` VARCHAR (159),
+`col131` MEDIUMBLOB,
+`col132` BIGINT,
+`col133` INT,
+`col134` SET ('test1','test2','test3'),
+`col135` CHAR (198),
+`col136` SET ('test1','test2','test3'),
+`col137` MEDIUMTEXT,
+`col138` SMALLINT,
+`col139` BLOB,
+`col140` LONGBLOB,
+KEY `idx0` (`col14`(139),`col24`(208),`col38`,`col35`),
+KEY `idx1` (`col48`,`col118`,`col29`(131),`col100`),
+KEY `idx2` (`col86`,`col67`,`col43`(175)),
+KEY `idx3` (`col19`),
+KEY `idx4` (`col40`(220),`col67`),
+KEY `idx5` (`col99`,`col56`),
+KEY `idx6` (`col68`,`col28`,`col137`(157)),
+KEY `idx7` (`col51`(160),`col99`,`col45`,`col39`(9)),
+KEY `idx8` (`col15`,`col52`,`col90`,`col94`),
+KEY `idx9` (`col24`(3),`col139`(248),`col108`(118),`col41`),
+KEY `idx10` (`col36`,`col92`,`col114`),
+KEY `idx11` (`col115`,`col9`(116)),
+KEY `idx12` (`col130`,`col93`,`col134`),
+KEY `idx13` (`col123`(65)),
+KEY `idx14` (`col44`(90),`col86`,`col119`),
+KEY `idx15` (`col69`),
+KEY `idx16` (`col132`,`col81`(118),`col18`),
+KEY `idx17` (`col24`(250),`col7`,`col92`,`col45`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 199 columns and 1 indexes
+DROP TABLE IF EXISTS `table3`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table3`
+(`col0` SMALLINT,
+`col1` SET ('test1','test2','test3'),
+`col2` TINYTEXT,
+`col3` DOUBLE,
+`col4` NUMERIC,
+`col5` DATE,
+`col6` BIGINT,
+`col7` DOUBLE,
+`col8` TEXT,
+`col9` INT,
+`col10` REAL,
+`col11` TINYINT,
+`col12` NUMERIC,
+`col13` NUMERIC,
+`col14` TIME,
+`col15` DOUBLE,
+`col16` REAL,
+`col17` MEDIUMBLOB,
+`col18` YEAR,
+`col19` TINYTEXT,
+`col20` YEAR,
+`col21` CHAR (250),
+`col22` TINYINT,
+`col23` TINYINT,
+`col24` SMALLINT,
+`col25` DATETIME,
+`col26` MEDIUMINT,
+`col27` LONGBLOB,
+`col28` VARCHAR (106),
+`col29` FLOAT,
+`col30` MEDIUMTEXT,
+`col31` TINYBLOB,
+`col32` BIGINT,
+`col33` YEAR,
+`col34` REAL,
+`col35` MEDIUMBLOB,
+`col36` LONGTEXT,
+`col37` LONGBLOB,
+`col38` BIGINT,
+`col39` FLOAT,
+`col40` TIME,
+`col41` DATETIME,
+`col42` BOOL,
+`col43` BIGINT,
+`col44` SMALLINT,
+`col45` TIME,
+`col46` DOUBLE PRECISION,
+`col47` TIME,
+`col48` TINYTEXT,
+`col49` DOUBLE PRECISION,
+`col50` BIGINT,
+`col51` NUMERIC,
+`col52` TINYBLOB,
+`col53` DATE,
+`col54` DECIMAL,
+`col55` SMALLINT,
+`col56` TINYTEXT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` YEAR,
+`col59` TIME,
+`col60` TINYINT,
+`col61` DECIMAL,
+`col62` DOUBLE,
+`col63` DATE,
+`col64` LONGTEXT,
+`col65` DOUBLE,
+`col66` VARCHAR (88),
+`col67` MEDIUMTEXT,
+`col68` DATE,
+`col69` MEDIUMINT,
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` LONGTEXT,
+`col73` REAL,
+`col74` DOUBLE,
+`col75` TIME,
+`col76` DATE,
+`col77` DECIMAL,
+`col78` MEDIUMBLOB,
+`col79` NUMERIC,
+`col80` BIGINT,
+`col81` YEAR,
+`col82` SMALLINT,
+`col83` MEDIUMINT,
+`col84` TINYINT,
+`col85` MEDIUMBLOB,
+`col86` TIME,
+`col87` MEDIUMBLOB,
+`col88` LONGTEXT,
+`col89` BOOL,
+`col90` BLOB,
+`col91` LONGBLOB,
+`col92` YEAR,
+`col93` BLOB,
+`col94` INT,
+`col95` TINYTEXT,
+`col96` TINYINT,
+`col97` DECIMAL,
+`col98` ENUM ('test1','test2','test3'),
+`col99` MEDIUMINT,
+`col100` TINYINT,
+`col101` MEDIUMBLOB,
+`col102` TINYINT,
+`col103` SET ('test1','test2','test3'),
+`col104` TIMESTAMP,
+`col105` TEXT,
+`col106` DATETIME,
+`col107` MEDIUMTEXT,
+`col108` CHAR (220),
+`col109` TIME,
+`col110` VARCHAR (131),
+`col111` DECIMAL,
+`col112` FLOAT,
+`col113` SMALLINT,
+`col114` BIGINT,
+`col115` LONGBLOB,
+`col116` SET ('test1','test2','test3'),
+`col117` ENUM ('test1','test2','test3'),
+`col118` BLOB,
+`col119` MEDIUMTEXT,
+`col120` SET ('test1','test2','test3'),
+`col121` DATETIME,
+`col122` FLOAT,
+`col123` VARCHAR (242),
+`col124` YEAR,
+`col125` MEDIUMBLOB,
+`col126` TIME,
+`col127` BOOL,
+`col128` TINYBLOB,
+`col129` DOUBLE,
+`col130` TINYINT,
+`col131` BIGINT,
+`col132` SMALLINT,
+`col133` INT,
+`col134` DOUBLE PRECISION,
+`col135` MEDIUMBLOB,
+`col136` SET ('test1','test2','test3'),
+`col137` TINYTEXT,
+`col138` DOUBLE PRECISION,
+`col139` NUMERIC,
+`col140` BLOB,
+`col141` SET ('test1','test2','test3'),
+`col142` INT,
+`col143` VARCHAR (26),
+`col144` BLOB,
+`col145` REAL,
+`col146` SET ('test1','test2','test3'),
+`col147` LONGBLOB,
+`col148` TEXT,
+`col149` BLOB,
+`col150` CHAR (189),
+`col151` LONGTEXT,
+`col152` INT,
+`col153` FLOAT,
+`col154` LONGTEXT,
+`col155` DATE,
+`col156` LONGBLOB,
+`col157` TINYBLOB,
+`col158` REAL,
+`col159` DATE,
+`col160` TIME,
+`col161` YEAR,
+`col162` DOUBLE,
+`col163` VARCHAR (90),
+`col164` FLOAT,
+`col165` NUMERIC,
+`col166` ENUM ('test1','test2','test3'),
+`col167` DOUBLE PRECISION,
+`col168` DOUBLE PRECISION,
+`col169` TINYBLOB,
+`col170` TIME,
+`col171` SMALLINT,
+`col172` TINYTEXT,
+`col173` SMALLINT,
+`col174` DOUBLE,
+`col175` VARCHAR (14),
+`col176` VARCHAR (90),
+`col177` REAL,
+`col178` MEDIUMINT,
+`col179` TINYBLOB,
+`col180` FLOAT,
+`col181` TIMESTAMP,
+`col182` REAL,
+`col183` DOUBLE PRECISION,
+`col184` BIGINT,
+`col185` INT,
+`col186` MEDIUMTEXT,
+`col187` TIME,
+`col188` FLOAT,
+`col189` TIME,
+`col190` INT,
+`col191` FLOAT,
+`col192` MEDIUMINT,
+`col193` TINYINT,
+`col194` MEDIUMTEXT,
+`col195` DATE,
+`col196` TIME,
+`col197` YEAR,
+`col198` CHAR (206),
+KEY `idx0` (`col39`,`col23`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 133 columns and 16 indexes
+DROP TABLE IF EXISTS `table4`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table4`
+(`col0` VARCHAR (60),
+`col1` NUMERIC,
+`col2` LONGTEXT,
+`col3` MEDIUMTEXT,
+`col4` LONGTEXT,
+`col5` LONGBLOB,
+`col6` LONGBLOB,
+`col7` DATETIME,
+`col8` TINYTEXT,
+`col9` BLOB,
+`col10` BOOL,
+`col11` BIGINT,
+`col12` TEXT,
+`col13` VARCHAR (213),
+`col14` TINYBLOB,
+`col15` BOOL,
+`col16` MEDIUMTEXT,
+`col17` DOUBLE,
+`col18` TEXT,
+`col19` BLOB,
+`col20` SET ('test1','test2','test3'),
+`col21` TINYINT,
+`col22` DATETIME,
+`col23` TINYINT,
+`col24` ENUM ('test1','test2','test3'),
+`col25` REAL,
+`col26` BOOL,
+`col27` FLOAT,
+`col28` LONGBLOB,
+`col29` DATETIME,
+`col30` FLOAT,
+`col31` SET ('test1','test2','test3'),
+`col32` LONGBLOB,
+`col33` NUMERIC,
+`col34` YEAR,
+`col35` VARCHAR (146),
+`col36` BIGINT,
+`col37` DATETIME,
+`col38` DATE,
+`col39` SET ('test1','test2','test3'),
+`col40` CHAR (112),
+`col41` FLOAT,
+`col42` YEAR,
+`col43` TIME,
+`col44` DOUBLE,
+`col45` NUMERIC,
+`col46` FLOAT,
+`col47` DECIMAL,
+`col48` BIGINT,
+`col49` DECIMAL,
+`col50` YEAR,
+`col51` MEDIUMTEXT,
+`col52` LONGBLOB,
+`col53` SET ('test1','test2','test3'),
+`col54` BLOB,
+`col55` FLOAT,
+`col56` REAL,
+`col57` REAL,
+`col58` TEXT,
+`col59` MEDIUMBLOB,
+`col60` INT,
+`col61` INT,
+`col62` DATE,
+`col63` TEXT,
+`col64` DATE,
+`col65` ENUM ('test1','test2','test3'),
+`col66` DOUBLE PRECISION,
+`col67` TINYTEXT,
+`col68` TINYBLOB,
+`col69` FLOAT,
+`col70` BLOB,
+`col71` DATETIME,
+`col72` DOUBLE,
+`col73` LONGTEXT,
+`col74` TIME,
+`col75` DATETIME,
+`col76` VARCHAR (122),
+`col77` MEDIUMTEXT,
+`col78` MEDIUMTEXT,
+`col79` BOOL,
+`col80` LONGTEXT,
+`col81` TINYTEXT,
+`col82` NUMERIC,
+`col83` DOUBLE PRECISION,
+`col84` DATE,
+`col85` YEAR,
+`col86` BLOB,
+`col87` TINYTEXT,
+`col88` DOUBLE PRECISION,
+`col89` MEDIUMINT,
+`col90` MEDIUMTEXT,
+`col91` NUMERIC,
+`col92` DATETIME,
+`col93` NUMERIC,
+`col94` SET ('test1','test2','test3'),
+`col95` TINYTEXT,
+`col96` SET ('test1','test2','test3'),
+`col97` YEAR,
+`col98` MEDIUMINT,
+`col99` TEXT,
+`col100` TEXT,
+`col101` TIME,
+`col102` VARCHAR (225),
+`col103` TINYTEXT,
+`col104` TEXT,
+`col105` MEDIUMTEXT,
+`col106` TINYINT,
+`col107` TEXT,
+`col108` LONGBLOB,
+`col109` LONGTEXT,
+`col110` TINYTEXT,
+`col111` CHAR (56),
+`col112` YEAR,
+`col113` ENUM ('test1','test2','test3'),
+`col114` TINYBLOB,
+`col115` DATETIME,
+`col116` DATE,
+`col117` TIME,
+`col118` MEDIUMTEXT,
+`col119` DOUBLE PRECISION,
+`col120` FLOAT,
+`col121` TIMESTAMP,
+`col122` MEDIUMINT,
+`col123` YEAR,
+`col124` DATE,
+`col125` TEXT,
+`col126` FLOAT,
+`col127` TINYTEXT,
+`col128` BOOL,
+`col129` NUMERIC,
+`col130` TIMESTAMP,
+`col131` INT,
+`col132` MEDIUMBLOB,
+KEY `idx0` (`col130`),
+KEY `idx1` (`col30`,`col55`,`col19`(31)),
+KEY `idx2` (`col104`(186)),
+KEY `idx3` (`col131`),
+KEY `idx4` (`col64`,`col93`,`col2`(11)),
+KEY `idx5` (`col34`,`col121`,`col22`),
+KEY `idx6` (`col33`,`col55`,`col83`),
+KEY `idx7` (`col17`,`col87`(245),`col99`(17)),
+KEY `idx8` (`col65`,`col120`),
+KEY `idx9` (`col82`),
+KEY `idx10` (`col9`(72)),
+KEY `idx11` (`col88`),
+KEY `idx12` (`col128`,`col9`(200),`col71`,`col66`),
+KEY `idx13` (`col77`(126)),
+KEY `idx14` (`col105`(26),`col13`,`col117`),
+KEY `idx15` (`col4`(246),`col130`,`col115`,`col3`(141))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 176 columns and 13 indexes
+DROP TABLE IF EXISTS `table5`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table5`
+(`col0` MEDIUMTEXT,
+`col1` VARCHAR (90),
+`col2` TINYTEXT,
+`col3` TIME,
+`col4` BOOL,
+`col5` TINYTEXT,
+`col6` BOOL,
+`col7` TIMESTAMP,
+`col8` TINYBLOB,
+`col9` TINYINT,
+`col10` YEAR,
+`col11` SET ('test1','test2','test3'),
+`col12` TEXT,
+`col13` CHAR (248),
+`col14` BIGINT,
+`col15` TEXT,
+`col16` TINYINT,
+`col17` NUMERIC,
+`col18` SET ('test1','test2','test3'),
+`col19` LONGBLOB,
+`col20` FLOAT,
+`col21` INT,
+`col22` TEXT,
+`col23` BOOL,
+`col24` DECIMAL,
+`col25` DOUBLE PRECISION,
+`col26` FLOAT,
+`col27` TINYBLOB,
+`col28` NUMERIC,
+`col29` MEDIUMBLOB,
+`col30` DATE,
+`col31` LONGTEXT,
+`col32` DATE,
+`col33` FLOAT,
+`col34` BIGINT,
+`col35` TINYTEXT,
+`col36` MEDIUMTEXT,
+`col37` TIME,
+`col38` INT,
+`col39` TINYINT,
+`col40` SET ('test1','test2','test3'),
+`col41` CHAR (130),
+`col42` SMALLINT,
+`col43` INT,
+`col44` MEDIUMTEXT,
+`col45` VARCHAR (126),
+`col46` INT,
+`col47` DOUBLE PRECISION,
+`col48` BIGINT,
+`col49` MEDIUMTEXT,
+`col50` TINYBLOB,
+`col51` MEDIUMINT,
+`col52` TEXT,
+`col53` VARCHAR (208),
+`col54` VARCHAR (207),
+`col55` NUMERIC,
+`col56` DATETIME,
+`col57` ENUM ('test1','test2','test3'),
+`col58` NUMERIC,
+`col59` TINYBLOB,
+`col60` VARCHAR (73),
+`col61` MEDIUMTEXT,
+`col62` TINYBLOB,
+`col63` DATETIME,
+`col64` NUMERIC,
+`col65` MEDIUMINT,
+`col66` DATETIME,
+`col67` NUMERIC,
+`col68` TINYINT,
+`col69` VARCHAR (58),
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` DATE,
+`col73` TIME,
+`col74` DOUBLE PRECISION,
+`col75` DECIMAL,
+`col76` MEDIUMBLOB,
+`col77` REAL,
+`col78` YEAR,
+`col79` YEAR,
+`col80` LONGBLOB,
+`col81` BLOB,
+`col82` BIGINT,
+`col83` ENUM ('test1','test2','test3'),
+`col84` NUMERIC,
+`col85` SET ('test1','test2','test3'),
+`col86` MEDIUMTEXT,
+`col87` LONGBLOB,
+`col88` TIME,
+`col89` ENUM ('test1','test2','test3'),
+`col90` DECIMAL,
+`col91` FLOAT,
+`col92` DATETIME,
+`col93` TINYTEXT,
+`col94` TIMESTAMP,
+`col95` TIMESTAMP,
+`col96` TEXT,
+`col97` REAL,
+`col98` VARCHAR (198),
+`col99` TIME,
+`col100` TINYINT,
+`col101` BIGINT,
+`col102` LONGBLOB,
+`col103` LONGBLOB,
+`col104` MEDIUMINT,
+`col105` MEDIUMTEXT,
+`col106` TIMESTAMP,
+`col107` SMALLINT,
+`col108` NUMERIC,
+`col109` DECIMAL,
+`col110` FLOAT,
+`col111` DECIMAL,
+`col112` REAL,
+`col113` TINYTEXT,
+`col114` FLOAT,
+`col115` VARCHAR (7),
+`col116` LONGTEXT,
+`col117` DATE,
+`col118` BIGINT,
+`col119` TEXT,
+`col120` BIGINT,
+`col121` BLOB,
+`col122` CHAR (110),
+`col123` NUMERIC,
+`col124` MEDIUMBLOB,
+`col125` NUMERIC,
+`col126` NUMERIC,
+`col127` BOOL,
+`col128` TIME,
+`col129` TINYBLOB,
+`col130` TINYBLOB,
+`col131` DATE,
+`col132` INT,
+`col133` VARCHAR (123),
+`col134` CHAR (238),
+`col135` VARCHAR (225),
+`col136` LONGTEXT,
+`col137` LONGBLOB,
+`col138` REAL,
+`col139` TINYBLOB,
+`col140` DATETIME,
+`col141` TINYTEXT,
+`col142` LONGBLOB,
+`col143` BIGINT,
+`col144` VARCHAR (236),
+`col145` TEXT,
+`col146` YEAR,
+`col147` DECIMAL,
+`col148` TEXT,
+`col149` MEDIUMBLOB,
+`col150` TINYINT,
+`col151` BOOL,
+`col152` VARCHAR (72),
+`col153` INT,
+`col154` VARCHAR (165),
+`col155` TINYINT,
+`col156` MEDIUMTEXT,
+`col157` DOUBLE PRECISION,
+`col158` TIME,
+`col159` MEDIUMBLOB,
+`col160` LONGBLOB,
+`col161` DATETIME,
+`col162` DOUBLE PRECISION,
+`col163` BLOB,
+`col164` ENUM ('test1','test2','test3'),
+`col165` TIMESTAMP,
+`col166` DATE,
+`col167` TINYBLOB,
+`col168` TINYBLOB,
+`col169` LONGBLOB,
+`col170` DATETIME,
+`col171` BIGINT,
+`col172` VARCHAR (30),
+`col173` LONGTEXT,
+`col174` TIME,
+`col175` FLOAT,
+KEY `idx0` (`col16`,`col156`(139),`col97`,`col120`),
+KEY `idx1` (`col24`,`col0`(108)),
+KEY `idx2` (`col117`,`col173`(34),`col132`,`col82`),
+KEY `idx3` (`col2`(86)),
+KEY `idx4` (`col2`(43)),
+KEY `idx5` (`col83`,`col35`(87),`col111`),
+KEY `idx6` (`col6`,`col134`,`col92`),
+KEY `idx7` (`col56`),
+KEY `idx8` (`col30`,`col53`,`col129`(66)),
+KEY `idx9` (`col53`,`col113`(211),`col32`,`col15`(75)),
+KEY `idx10` (`col34`),
+KEY `idx11` (`col126`),
+KEY `idx12` (`col24`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 179 columns and 46 indexes
+DROP TABLE IF EXISTS `table6`;
+-- error ER_TOO_BIG_ROWSIZE
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table6`
+(`col0` ENUM ('test1','test2','test3'),
+`col1` MEDIUMBLOB,
+`col2` MEDIUMBLOB,
+`col3` DATETIME,
+`col4` DATE,
+`col5` YEAR,
+`col6` REAL,
+`col7` NUMERIC,
+`col8` MEDIUMBLOB,
+`col9` TEXT,
+`col10` TIMESTAMP,
+`col11` DOUBLE,
+`col12` DOUBLE,
+`col13` SMALLINT,
+`col14` TIMESTAMP,
+`col15` DECIMAL,
+`col16` DATE,
+`col17` TEXT,
+`col18` LONGBLOB,
+`col19` BIGINT,
+`col20` FLOAT,
+`col21` DATETIME,
+`col22` TINYINT,
+`col23` MEDIUMBLOB,
+`col24` SET ('test1','test2','test3'),
+`col25` TIME,
+`col26` TEXT,
+`col27` LONGTEXT,
+`col28` BIGINT,
+`col29` REAL,
+`col30` YEAR,
+`col31` MEDIUMBLOB,
+`col32` MEDIUMINT,
+`col33` FLOAT,
+`col34` TEXT,
+`col35` DATE,
+`col36` TIMESTAMP,
+`col37` REAL,
+`col38` BLOB,
+`col39` BLOB,
+`col40` BLOB,
+`col41` TINYBLOB,
+`col42` INT,
+`col43` TINYINT,
+`col44` REAL,
+`col45` BIGINT,
+`col46` TIMESTAMP,
+`col47` BLOB,
+`col48` ENUM ('test1','test2','test3'),
+`col49` BOOL,
+`col50` CHAR (109),
+`col51` DOUBLE,
+`col52` DOUBLE PRECISION,
+`col53` ENUM ('test1','test2','test3'),
+`col54` FLOAT,
+`col55` DOUBLE PRECISION,
+`col56` CHAR (166),
+`col57` TEXT,
+`col58` TIME,
+`col59` DECIMAL,
+`col60` TEXT,
+`col61` ENUM ('test1','test2','test3'),
+`col62` LONGTEXT,
+`col63` YEAR,
+`col64` DOUBLE,
+`col65` CHAR (87),
+`col66` DATE,
+`col67` BOOL,
+`col68` MEDIUMBLOB,
+`col69` DATETIME,
+`col70` DECIMAL,
+`col71` TIME,
+`col72` REAL,
+`col73` LONGTEXT,
+`col74` BLOB,
+`col75` REAL,
+`col76` INT,
+`col77` INT,
+`col78` FLOAT,
+`col79` DOUBLE,
+`col80` MEDIUMINT,
+`col81` ENUM ('test1','test2','test3'),
+`col82` VARCHAR (221),
+`col83` BIGINT,
+`col84` TINYINT,
+`col85` BIGINT,
+`col86` FLOAT,
+`col87` MEDIUMBLOB,
+`col88` CHAR (126),
+`col89` MEDIUMBLOB,
+`col90` DATETIME,
+`col91` TINYINT,
+`col92` DOUBLE,
+`col93` NUMERIC,
+`col94` DATE,
+`col95` BLOB,
+`col96` DATETIME,
+`col97` TIME,
+`col98` LONGBLOB,
+`col99` INT,
+`col100` SET ('test1','test2','test3'),
+`col101` TINYBLOB,
+`col102` INT,
+`col103` MEDIUMBLOB,
+`col104` MEDIUMTEXT,
+`col105` FLOAT,
+`col106` TINYBLOB,
+`col107` VARCHAR (26),
+`col108` TINYINT,
+`col109` TIME,
+`col110` TINYBLOB,
+`col111` LONGBLOB,
+`col112` TINYTEXT,
+`col113` FLOAT,
+`col114` TINYINT,
+`col115` NUMERIC,
+`col116` TIME,
+`col117` SET ('test1','test2','test3'),
+`col118` DATE,
+`col119` SMALLINT,
+`col120` BLOB,
+`col121` TINYTEXT,
+`col122` REAL,
+`col123` YEAR,
+`col124` REAL,
+`col125` BOOL,
+`col126` BLOB,
+`col127` REAL,
+`col128` MEDIUMBLOB,
+`col129` TIMESTAMP,
+`col130` LONGBLOB,
+`col131` MEDIUMBLOB,
+`col132` YEAR,
+`col133` YEAR,
+`col134` INT,
+`col135` MEDIUMINT,
+`col136` MEDIUMINT,
+`col137` TINYTEXT,
+`col138` TINYBLOB,
+`col139` BLOB,
+`col140` SET ('test1','test2','test3'),
+`col141` ENUM ('test1','test2','test3'),
+`col142` ENUM ('test1','test2','test3'),
+`col143` TINYTEXT,
+`col144` DATETIME,
+`col145` TEXT,
+`col146` DOUBLE PRECISION,
+`col147` DECIMAL,
+`col148` MEDIUMTEXT,
+`col149` TINYTEXT,
+`col150` SET ('test1','test2','test3'),
+`col151` MEDIUMTEXT,
+`col152` CHAR (126),
+`col153` DOUBLE,
+`col154` CHAR (243),
+`col155` SET ('test1','test2','test3'),
+`col156` SET ('test1','test2','test3'),
+`col157` DATETIME,
+`col158` DOUBLE,
+`col159` NUMERIC,
+`col160` DECIMAL,
+`col161` FLOAT,
+`col162` LONGBLOB,
+`col163` LONGTEXT,
+`col164` INT,
+`col165` TIME,
+`col166` CHAR (27),
+`col167` VARCHAR (63),
+`col168` TEXT,
+`col169` TINYBLOB,
+`col170` TINYBLOB,
+`col171` ENUM ('test1','test2','test3'),
+`col172` INT,
+`col173` TIME,
+`col174` DECIMAL,
+`col175` DOUBLE,
+`col176` MEDIUMBLOB,
+`col177` LONGBLOB,
+`col178` CHAR (43),
+KEY `idx0` (`col131`(219)),
+KEY `idx1` (`col67`,`col122`,`col59`,`col87`(33)),
+KEY `idx2` (`col83`,`col42`,`col57`(152)),
+KEY `idx3` (`col106`(124)),
+KEY `idx4` (`col173`,`col80`,`col165`,`col89`(78)),
+KEY `idx5` (`col174`,`col145`(108),`col23`(228),`col141`),
+KEY `idx6` (`col157`,`col140`),
+KEY `idx7` (`col130`(188),`col15`),
+KEY `idx8` (`col52`),
+KEY `idx9` (`col144`),
+KEY `idx10` (`col155`),
+KEY `idx11` (`col62`(230),`col1`(109)),
+KEY `idx12` (`col151`(24),`col95`(85)),
+KEY `idx13` (`col114`),
+KEY `idx14` (`col42`,`col98`(56),`col146`),
+KEY `idx15` (`col147`,`col39`(254),`col35`),
+KEY `idx16` (`col79`),
+KEY `idx17` (`col65`),
+KEY `idx18` (`col149`(165),`col168`(119),`col32`,`col117`),
+KEY `idx19` (`col64`),
+KEY `idx20` (`col93`),
+KEY `idx21` (`col64`,`col113`,`col104`(182)),
+KEY `idx22` (`col52`,`col111`(189)),
+KEY `idx23` (`col45`),
+KEY `idx24` (`col154`,`col107`,`col110`(159)),
+KEY `idx25` (`col149`(1),`col87`(131)),
+KEY `idx26` (`col58`,`col115`,`col63`),
+KEY `idx27` (`col95`(9),`col0`,`col87`(113)),
+KEY `idx28` (`col92`,`col130`(1)),
+KEY `idx29` (`col151`(129),`col137`(254),`col13`),
+KEY `idx30` (`col49`),
+KEY `idx31` (`col28`),
+KEY `idx32` (`col83`,`col146`),
+KEY `idx33` (`col155`,`col90`,`col17`(245)),
+KEY `idx34` (`col174`,`col169`(44),`col107`),
+KEY `idx35` (`col113`),
+KEY `idx36` (`col52`),
+KEY `idx37` (`col16`,`col120`(190)),
+KEY `idx38` (`col28`),
+KEY `idx39` (`col131`(165)),
+KEY `idx40` (`col135`,`col26`(86)),
+KEY `idx41` (`col69`,`col94`),
+KEY `idx42` (`col105`,`col151`(38),`col97`),
+KEY `idx43` (`col88`),
+KEY `idx44` (`col176`(100),`col42`,`col73`(189),`col94`),
+KEY `idx45` (`col2`(27),`col27`(116))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+DROP TABLE IF EXISTS table0;
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+DROP TABLE IF EXISTS table3;
+DROP TABLE IF EXISTS table4;
+DROP TABLE IF EXISTS table5;
+DROP TABLE IF EXISTS table6;
+
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36172.result b/storage/innodb_plugin/mysql-test/innodb_bug36172.result
new file mode 100644
index 00000000000..195775f74c8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug36172.result
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36172.test b/storage/innodb_plugin/mysql-test/innodb_bug36172.test
new file mode 100644
index 00000000000..666d4a2f4b7
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug36172.test
@@ -0,0 +1,26 @@
+#
+# Test case for bug 36172
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+SET storage_engine=InnoDB;
+
+# we do not really care about what gets printed, we are only
+# interested in getting success or failure according to our
+# expectations
+
+-- disable_query_log
+-- disable_result_log
+
+SET GLOBAL innodb_file_format='Barracuda';
+SET GLOBAL innodb_file_per_table=on;
+
+DROP TABLE IF EXISTS `table0`;
+CREATE TABLE `table0` (   `col0` tinyint(1) DEFAULT NULL,   `col1` tinyint(1) DEFAULT NULL,   `col2` tinyint(4) DEFAULT NULL,   `col3` date DEFAULT NULL,   `col4` time DEFAULT NULL,   `col5` set('test1','test2','test3') DEFAULT NULL,   `col6` time DEFAULT NULL,   `col7` text,   `col8` decimal(10,0) DEFAULT NULL,   `col9` set('test1','test2','test3') DEFAULT NULL,   `col10` float DEFAULT NULL,   `col11` double DEFAULT NULL,   `col12` enum('test1','test2','test3') DEFAULT NULL,   `col13` tinyblob,   `col14` year(4) DEFAULT NULL,   `col15` set('test1','test2','test3') DEFAULT NULL,   `col16` decimal(10,0) DEFAULT NULL,   `col17` decimal(10,0) DEFAULT NULL,   `col18` blob,   `col19` datetime DEFAULT NULL,   `col20` double DEFAULT NULL,   `col21` decimal(10,0) DEFAULT NULL,   `col22` datetime DEFAULT NULL,   `col23` decimal(10,0) DEFAULT NULL,   `col24` decimal(10,0) DEFAULT NULL,   `col25` longtext,   `col26` tinyblob,   `col27` time DEFAULT NULL,   `col28` tinyblob,   `col29` enum('test1','test2','test3') DEFAULT NULL,   `col30` smallint(6) DEFAULT NULL,   `col31` double DEFAULT NULL,   `col32` float DEFAULT NULL,   `col33` char(175) DEFAULT NULL,   `col34` tinytext,   `col35` tinytext,   `col36` tinyblob,   `col37` tinyblob,   `col38` tinytext,   `col39` mediumblob,   `col40` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,   `col41` double DEFAULT NULL,   `col42` smallint(6) DEFAULT NULL,   `col43` longblob,   `col44` varchar(80) DEFAULT NULL,   `col45` mediumtext,   `col46` decimal(10,0) DEFAULT NULL,   `col47` bigint(20) DEFAULT NULL,   `col48` date DEFAULT NULL,   `col49` tinyblob,   `col50` date DEFAULT NULL,   `col51` tinyint(1) DEFAULT NULL,   `col52` mediumint(9) DEFAULT NULL,   `col53` float DEFAULT NULL,   `col54` tinyblob,   `col55` longtext,   `col56` smallint(6) DEFAULT NULL,   `col57` enum('test1','test2','test3') DEFAULT NULL,   `col58` datetime DEFAULT NULL,   `col59` mediumtext,   `col60` varchar(232) DEFAULT NULL,   `col61` decimal(10,0) DEFAULT NULL,   `col62` year(4) DEFAULT NULL,   `col63` smallint(6) DEFAULT NULL,   `col64` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col65` blob,   `col66` longblob,   `col67` int(11) DEFAULT NULL,   `col68` longtext,   `col69` enum('test1','test2','test3') DEFAULT NULL,   `col70` int(11) DEFAULT NULL,   `col71` time DEFAULT NULL,   `col72` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col73` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col74` varchar(170) DEFAULT NULL,   `col75` set('test1','test2','test3') DEFAULT NULL,   `col76` tinyblob,   `col77` bigint(20) DEFAULT NULL,   `col78` decimal(10,0) DEFAULT NULL,   `col79` datetime DEFAULT NULL,   `col80` year(4) DEFAULT NULL,   `col81` decimal(10,0) DEFAULT NULL,   `col82` longblob,   `col83` text,   `col84` char(83) DEFAULT NULL,   `col85` decimal(10,0) DEFAULT NULL,   `col86` float DEFAULT NULL,   `col87` int(11) DEFAULT NULL,   `col88` varchar(145) DEFAULT NULL,   `col89` date DEFAULT NULL,   `col90` decimal(10,0) DEFAULT NULL,   `col91` decimal(10,0) DEFAULT NULL,   `col92` mediumblob,   `col93` time DEFAULT NULL,   KEY `idx0` (`col69`,`col90`,`col8`),   KEY `idx1` (`col60`),   KEY `idx2` (`col60`,`col70`,`col74`),   KEY `idx3` (`col22`,`col32`,`col72`,`col30`),   KEY `idx4` (`col29`),   KEY `idx5` (`col19`,`col45`(143)),   KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),   KEY `idx7` (`col48`,`col61`),   KEY `idx8` (`col93`),   KEY `idx9` (`col31`),   KEY `idx10` (`col30`,`col21`),   KEY `idx11` (`col67`),   KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),   KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),   KEY `idx14` (`col78`),   KEY `idx15` (`col63`,`col67`,`col64`),   KEY `idx16` (`col17`,`col86`),   KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),   KEY `idx18` (`col62`),   KEY `idx19` (`col31`,`col57`,`col56`,`col53`),   KEY `idx20` (`col46`),   KEY `idx21` (`col83`(54)),   KEY `idx22` (`col51`,`col7`(120)),   KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+insert ignore into `table0` set  `col23` = 7887371.5084383683, `col24` = 4293854615.6906948000, `col25` = 'vitalist', `col26` = 'widespread', `col27` = '3570490', `col28` = 'habitual', `col30` = -5471, `col31` = 4286985783.6771750000, `col32` = 6354540.9826654866, `col33` = 'defoliation', `col34` = 'logarithms', `col35` = 'tegument\'s', `col36` = 'scouting\'s', `col37` = 'intermittency', `col38` = 'elongates', `col39` = 'prophecies', `col40` = '20560103035939', `col41` = 4292809130.0544143000, `col42` = 22057, `col43` = 'Hess\'s', `col44` = 'bandstand', `col45` = 'phenylketonuria', `col46` = 6338767.4018677324, `col47` = 5310247, `col48` = '12592418', `col49` = 'churchman\'s', `col50` = '32226125', `col51` = -58, `col52` = -6207968, `col53` = 1244839.3255104220, `col54` = 'robotized', `col55` = 'monotonous', `col56` = -26909, `col58` = '20720107023550', `col59` = 'suggestiveness\'s', `col60` = 'gemology', `col61` = 4287800670.2229986000, `col62` = '1944', `col63` = -16827, `col64` = '20700107212324', `col65` = 'Nicolais', `col66` = 'apteryx', `col67` = 6935317, `col68` = 'stroganoff', `col70` = 3316430, `col71` = '3277608', `col72` = '19300511045918', `col73` = '20421201003327', `col74` = 'attenuant', `col75` = '15173', `col76` = 'upstroke\'s', `col77` = 8118987, `col78` = 6791516.2735374002, `col79` = '20780701144624', `col80` = '2134', `col81` = 4290682351.3127537000, `col82` = 'unexplainably', `col83` = 'Storm', `col84` = 'Greyso\'s', `col85` = 4289119212.4306774000, `col86` = 7617575.8796655172, `col87` = -6325335, `col88` = 'fondue\'s', `col89` = '40608940', `col90` = 1659421.8093508712, `col91` = 8346904.6584368423, `col92` = 'reloads', `col93` = '5188366';
+CHECK TABLE table0 EXTENDED;
+INSERT IGNORE INTO `table0` SET `col19` = '19940127002709', `col20` = 2383927.9055146948, `col21` = 4293243420.5621204000, `col22` = '20511211123705', `col23` = 4289899778.6573381000, `col24` = 4293449279.0540481000, `col25` = 'emphysemic', `col26` = 'dentally', `col27` = '2347406', `col28` = 'eruct', `col30` = 1222, `col31` = 4294372994.9941406000, `col32` = 4291385574.1173744000, `col33` = 'borrowing\'s', `col34` = 'septics', `col35` = 'ratter\'s', `col36` = 'Kaye', `col37` = 'Florentia', `col38` = 'allium', `col39` = 'barkeep', `col40` = '19510407003441', `col41` = 4293559200.4215522000, `col42` = 22482, `col43` = 'decussate', `col44` = 'Brom\'s', `col45` = 'violated', `col46` = 4925506.4635456400, `col47` = 930549, `col48` = '51296066', `col49` = 'voluminously', `col50` = '29306676', `col51` = -88, `col52` = -2153690, `col53` = 4290250202.1464887000, `col54` = 'expropriation', `col55` = 'Aberdeen\'s', `col56` = 20343, `col58` = '19640415171532', `col59` = 'extern', `col60` = 'Ubana', `col61` = 4290487961.8539081000, `col62` = '2147', `col63` = -24271, `col64` = '20750801194548', `col65` = 'Cunaxa\'s', `col66` = 'pasticcio', `col67` = 2795817, `col68` = 'Indore\'s', `col70` = 6864127, `col71` = '1817832', `col72` = '20540506114211', `col73` = '20040101012300', `col74` = 'rationalized', `col75` = '45522', `col76` = 'indene', `col77` = -6964559, `col78` = 4247535.5266884370, `col79` = '20720416124357', `col80` = '2143', `col81` = 4292060102.4466386000, `col82` = 'striving', `col83` = 'boneblack\'s', `col84` = 'redolent', `col85` = 6489697.9009369183, `col86` = 4287473465.9731131000, `col87` = 7726015, `col88` = 'perplexed', `col89` = '17153791', `col90` = 5478587.1108127078, `col91` = 4287091404.7004304000, `col92` = 'Boulez\'s', `col93` = '2931278';
+CHECK TABLE table0 EXTENDED;
+DROP TABLE table0;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40360.result b/storage/innodb_plugin/mysql-test/innodb_bug40360.result
new file mode 100644
index 00000000000..ef4cf463903
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug40360.result
@@ -0,0 +1,4 @@
+SET TX_ISOLATION='READ-COMMITTED';
+CREATE TABLE bug40360 (a INT) engine=innodb;
+INSERT INTO bug40360 VALUES (1);
+DROP TABLE bug40360;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40360.test b/storage/innodb_plugin/mysql-test/innodb_bug40360.test
new file mode 100644
index 00000000000..e88837aab4f
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug40360.test
@@ -0,0 +1,16 @@
+#
+# Make sure http://bugs.mysql.com/40360 remains fixed.
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+SET TX_ISOLATION='READ-COMMITTED';
+
+# This is the default since MySQL 5.1.29 SET BINLOG_FORMAT='STATEMENT';
+
+CREATE TABLE bug40360 (a INT) engine=innodb;
+
+INSERT INTO bug40360 VALUES (1);
+
+DROP TABLE bug40360;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40565.result b/storage/innodb_plugin/mysql-test/innodb_bug40565.result
new file mode 100644
index 00000000000..21e923d9336
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug40565.result
@@ -0,0 +1,9 @@
+create table bug40565(value decimal(4,2)) engine=innodb;
+insert into bug40565 values (1), (null);
+update bug40565 set value=NULL;
+affected rows: 1
+info: Rows matched: 2  Changed: 1  Warnings: 0
+update bug40565 set value=NULL;
+affected rows: 0
+info: Rows matched: 2  Changed: 0  Warnings: 0
+drop table bug40565;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40565.test b/storage/innodb_plugin/mysql-test/innodb_bug40565.test
new file mode 100644
index 00000000000..d7aa0fd514a
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug40565.test
@@ -0,0 +1,10 @@
+# Bug #40565 Update Query Results in "1 Row Affected" But Should Be "Zero Rows"
+-- source include/have_innodb.inc
+
+create table bug40565(value decimal(4,2)) engine=innodb;
+insert into bug40565 values (1), (null);
+--enable_info
+update bug40565 set value=NULL;
+update bug40565 set value=NULL;
+--disable_info
+drop table bug40565;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug41904.result b/storage/innodb_plugin/mysql-test/innodb_bug41904.result
new file mode 100644
index 00000000000..6070d32d181
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug41904.result
@@ -0,0 +1,4 @@
+CREATE TABLE bug41904 (id INT PRIMARY KEY, uniquecol CHAR(15)) ENGINE=InnoDB;
+INSERT INTO bug41904 VALUES (1,NULL), (2,NULL);
+CREATE UNIQUE INDEX ui ON bug41904 (uniquecol);
+DROP TABLE bug41904;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug41904.test b/storage/innodb_plugin/mysql-test/innodb_bug41904.test
new file mode 100644
index 00000000000..365c5229adc
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug41904.test
@@ -0,0 +1,14 @@
+#
+# Make sure http://bugs.mysql.com/41904 remains fixed.
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+
+CREATE TABLE bug41904 (id INT PRIMARY KEY, uniquecol CHAR(15)) ENGINE=InnoDB;
+
+INSERT INTO bug41904 VALUES (1,NULL), (2,NULL);
+
+CREATE UNIQUE INDEX ui ON bug41904 (uniquecol);
+
+DROP TABLE bug41904;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt
new file mode 100644
index 00000000000..d71dbe17d5b
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt
@@ -0,0 +1 @@
+--innodb_commit_concurrency=1
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result
new file mode 100644
index 00000000000..277dfffdd35
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result
@@ -0,0 +1,26 @@
+set global innodb_commit_concurrency=0;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+42
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=0;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+1
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test
new file mode 100644
index 00000000000..685fdf20489
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test
@@ -0,0 +1,21 @@
+#
+# Bug#42101  	Race condition in innodb_commit_concurrency
+# http://bugs.mysql.com/42101
+#
+
+-- source include/have_innodb.inc
+
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101.result b/storage/innodb_plugin/mysql-test/innodb_bug42101.result
new file mode 100644
index 00000000000..805097ffe9d
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug42101.result
@@ -0,0 +1,22 @@
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=1;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=42;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
+@@innodb_commit_concurrency
+0
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101.test b/storage/innodb_plugin/mysql-test/innodb_bug42101.test
new file mode 100644
index 00000000000..b6536490d48
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug42101.test
@@ -0,0 +1,19 @@
+#
+# Bug#42101  	Race condition in innodb_commit_concurrency
+# http://bugs.mysql.com/42101
+#
+
+-- source include/have_innodb.inc
+
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=1;
+select @@innodb_commit_concurrency;
+--error ER_WRONG_ARGUMENTS
+set global innodb_commit_concurrency=42;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=0;
+select @@innodb_commit_concurrency;
+set global innodb_commit_concurrency=DEFAULT;
+select @@innodb_commit_concurrency;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44032.result b/storage/innodb_plugin/mysql-test/innodb_bug44032.result
new file mode 100644
index 00000000000..da2a000b06e
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug44032.result
@@ -0,0 +1,7 @@
+CREATE TABLE bug44032(c CHAR(3) CHARACTER SET UTF8) ROW_FORMAT=REDUNDANT
+ENGINE=InnoDB;
+INSERT INTO bug44032 VALUES('abc'),(0xEFBCA4EFBCA4EFBCA4);
+UPDATE bug44032 SET c='DDD' WHERE c=0xEFBCA4EFBCA4EFBCA4;
+UPDATE bug44032 SET c=NULL WHERE c='DDD';
+UPDATE bug44032 SET c='DDD' WHERE c IS NULL;
+DROP TABLE bug44032;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44032.test b/storage/innodb_plugin/mysql-test/innodb_bug44032.test
new file mode 100644
index 00000000000..a963cb8b68f
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug44032.test
@@ -0,0 +1,13 @@
+# Bug44032 no update-in-place of UTF-8 columns in ROW_FORMAT=REDUNDANT
+# (btr_cur_update_in_place not invoked when updating from/to NULL;
+# the update is performed by delete and insert instead)
+
+-- source include/have_innodb.inc
+
+CREATE TABLE bug44032(c CHAR(3) CHARACTER SET UTF8) ROW_FORMAT=REDUNDANT
+ENGINE=InnoDB;
+INSERT INTO bug44032 VALUES('abc'),(0xEFBCA4EFBCA4EFBCA4);
+UPDATE bug44032 SET c='DDD' WHERE c=0xEFBCA4EFBCA4EFBCA4;
+UPDATE bug44032 SET c=NULL WHERE c='DDD';
+UPDATE bug44032 SET c='DDD' WHERE c IS NULL;
+DROP TABLE bug44032;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug45357.result b/storage/innodb_plugin/mysql-test/innodb_bug45357.result
new file mode 100644
index 00000000000..7adeff2062f
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug45357.result
@@ -0,0 +1,7 @@
+set session transaction isolation level read committed;
+create table bug45357(a int, b int,key(b))engine=innodb;
+insert into bug45357 values (25170,6122);
+update bug45357 set a=1 where b=30131;
+delete from bug45357 where b < 20996;
+delete from bug45357 where b < 7001;
+drop table bug45357;
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug45357.test b/storage/innodb_plugin/mysql-test/innodb_bug45357.test
new file mode 100644
index 00000000000..81727f352dd
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_bug45357.test
@@ -0,0 +1,10 @@
+-- source include/have_innodb.inc
+
+set session transaction isolation level read committed;
+
+create table bug45357(a int, b int,key(b))engine=innodb;
+insert into bug45357 values (25170,6122);
+update bug45357 set a=1 where b=30131;
+delete from bug45357 where b < 20996;
+delete from bug45357 where b < 7001;
+drop table bug45357;
diff --git a/storage/innodb_plugin/mysql-test/innodb_file_format.result b/storage/innodb_plugin/mysql-test/innodb_file_format.result
new file mode 100644
index 00000000000..9cfac5f001c
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_file_format.result
@@ -0,0 +1,44 @@
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Antelope
+set global innodb_file_format=antelope;
+set global innodb_file_format=barracuda;
+set global innodb_file_format=cheetah;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format;
+@@innodb_file_format
+Barracuda
+set global innodb_file_format=default;
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format=on;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=off;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format;
+@@innodb_file_format
+Antelope
+set global innodb_file_format_check=antelope;
+set global innodb_file_format_check=barracuda;
+set global innodb_file_format_check=cheetah;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format_check=default;
+Warnings:
+Warning	1210	Ignoring SET innodb_file_format=on
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
+set global innodb_file_format=on;
+ERROR HY000: Incorrect arguments to SET
+set global innodb_file_format=off;
+ERROR HY000: Incorrect arguments to SET
+select @@innodb_file_format_check;
+@@innodb_file_format_check
+Barracuda
diff --git a/storage/innodb_plugin/mysql-test/innodb_file_format.test b/storage/innodb_plugin/mysql-test/innodb_file_format.test
new file mode 100644
index 00000000000..62ce4157183
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_file_format.test
@@ -0,0 +1,28 @@
+-- source include/have_innodb.inc
+
+select @@innodb_file_format;
+select @@innodb_file_format_check;
+set global innodb_file_format=antelope;
+set global innodb_file_format=barracuda;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=cheetah;
+select @@innodb_file_format;
+set global innodb_file_format=default;
+select @@innodb_file_format;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=on;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=off;
+select @@innodb_file_format;
+set global innodb_file_format_check=antelope;
+set global innodb_file_format_check=barracuda;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format_check=cheetah;
+select @@innodb_file_format_check;
+set global innodb_file_format_check=default;
+select @@innodb_file_format_check;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=on;
+--error ER_WRONG_ARGUMENTS
+set global innodb_file_format=off;
+select @@innodb_file_format_check;
diff --git a/storage/innodb_plugin/mysql-test/innodb_information_schema.result b/storage/innodb_plugin/mysql-test/innodb_information_schema.result
new file mode 100644
index 00000000000..396cae579ce
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_information_schema.result
@@ -0,0 +1,23 @@
+lock_mode	lock_type	lock_table	lock_index	lock_rec	lock_data
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	2	'1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc'''''
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	2	'1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc'''''
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	3	'2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	3	'2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	4	'3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	4	'3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	5	'4', 'abc', '\0abc', 'abc\0', 'a\0bc', 'a\0bc\0', 'a\0bc\0\0'
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	5	'4', 'abc', '\0abc', 'abc\0', 'a\0bc', 'a\0bc\0', 'a\0bc\0\0'
+X	RECORD	`test`.`t_min`	`PRIMARY`	2	-128, 0, -32768, 0, -8388608, 0, -2147483648, 0, -9223372036854775808, 0
+X	RECORD	`test`.`t_min`	`PRIMARY`	2	-128, 0, -32768, 0, -8388608, 0, -2147483648, 0, -9223372036854775808, 0
+X	RECORD	`test`.`t_max`	`PRIMARY`	2	127, 255, 32767, 65535, 8388607, 16777215, 2147483647, 4294967295, 9223372036854775807, 18446744073709551615
+X	RECORD	`test`.`t_max`	`PRIMARY`	2	127, 255, 32767, 65535, 8388607, 16777215, 2147483647, 4294967295, 9223372036854775807, 18446744073709551615
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	1	supremum pseudo-record
+X	RECORD	`test`.```t'\"_str`	`PRIMARY`	1	supremum pseudo-record
+lock_table	COUNT(*)
+`test`.`t_max`	2
+`test`.`t_min`	2
+`test`.```t'\"_str`	10
+lock_table	COUNT(*)
+"test"."t_max"	2
+"test"."t_min"	2
+"test"."`t'\""_str"	10
diff --git a/storage/innodb_plugin/mysql-test/innodb_information_schema.test b/storage/innodb_plugin/mysql-test/innodb_information_schema.test
new file mode 100644
index 00000000000..eaed653854a
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_information_schema.test
@@ -0,0 +1,145 @@
+# 
+# Test that user data is correctly "visualized" in
+# INFORMATION_SCHEMA.innodb_locks.lock_data
+#
+
+-- source include/have_innodb.inc
+
+-- disable_query_log
+-- disable_result_log
+
+SET storage_engine=InnoDB;
+
+-- disable_warnings
+DROP TABLE IF EXISTS t_min, t_max;
+-- enable_warnings
+
+let $table_def =
+(
+	c01 TINYINT,
+	c02 TINYINT UNSIGNED,
+	c03 SMALLINT,
+	c04 SMALLINT UNSIGNED,
+	c05 MEDIUMINT,
+	c06 MEDIUMINT UNSIGNED,
+	c07 INT,
+	c08 INT UNSIGNED,
+	c09 BIGINT,
+	c10 BIGINT UNSIGNED,
+	PRIMARY KEY(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10)
+);
+
+-- eval CREATE TABLE t_min $table_def;
+INSERT INTO t_min VALUES
+(-128, 0,
+ -32768, 0,
+ -8388608, 0,
+ -2147483648, 0,
+ -9223372036854775808, 0);
+
+-- eval CREATE TABLE t_max $table_def;
+INSERT INTO t_max VALUES
+(127, 255,
+ 32767, 65535,
+ 8388607, 16777215,
+ 2147483647, 4294967295,
+ 9223372036854775807, 18446744073709551615);
+
+CREATE TABLE ```t'\"_str` (
+	c1 VARCHAR(32),
+	c2 VARCHAR(32),
+	c3 VARCHAR(32),
+	c4 VARCHAR(32),
+	c5 VARCHAR(32),
+	c6 VARCHAR(32),
+	c7 VARCHAR(32),
+	PRIMARY KEY(c1, c2, c3, c4, c5, c6, c7)
+);
+INSERT INTO ```t'\"_str` VALUES
+('1', 'abc', '''abc', 'abc''', 'a''bc', 'a''bc''', '''abc''''');
+INSERT INTO ```t'\"_str` VALUES
+('2', 'abc', '"abc', 'abc"', 'a"bc', 'a"bc"', '"abc""');
+INSERT INTO ```t'\"_str` VALUES
+('3', 'abc', '\\abc', 'abc\\', 'a\\bc', 'a\\bc\\', '\\abc\\\\');
+INSERT INTO ```t'\"_str` VALUES
+('4', 'abc', 0x00616263, 0x61626300, 0x61006263, 0x6100626300, 0x610062630000);
+
+-- connect (con_lock,localhost,root,,)
+-- connect (con_min_trylock,localhost,root,,)
+-- connect (con_max_trylock,localhost,root,,)
+-- connect (con_str_insert_supremum,localhost,root,,)
+-- connect (con_str_lock_row1,localhost,root,,)
+-- connect (con_str_lock_row2,localhost,root,,)
+-- connect (con_str_lock_row3,localhost,root,,)
+-- connect (con_str_lock_row4,localhost,root,,)
+-- connect (con_verify_innodb_locks,localhost,root,,)
+
+-- connection con_lock
+SET autocommit=0;
+SELECT * FROM t_min FOR UPDATE;
+SELECT * FROM t_max FOR UPDATE;
+SELECT * FROM ```t'\"_str` FOR UPDATE;
+
+-- connection con_min_trylock
+-- send
+SELECT * FROM t_min FOR UPDATE;
+
+-- connection con_max_trylock
+-- send
+SELECT * FROM t_max FOR UPDATE;
+
+-- connection con_str_insert_supremum
+-- send
+INSERT INTO ```t'\"_str` VALUES
+('z', 'z', 'z', 'z', 'z', 'z', 'z');
+
+-- connection con_str_lock_row1
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '1' FOR UPDATE;
+
+-- connection con_str_lock_row2
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '2' FOR UPDATE;
+
+-- connection con_str_lock_row3
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '3' FOR UPDATE;
+
+-- connection con_str_lock_row4
+-- send
+SELECT * FROM ```t'\"_str` WHERE c1 = '4' FOR UPDATE;
+
+# Give time to the above 2 queries to execute before continuing.
+# Without this sleep it sometimes happens that the SELECT from innodb_locks
+# executes before some of them, resulting in less than expected number
+# of rows being selected from innodb_locks.
+-- sleep 0.1
+
+-- enable_result_log
+-- connection con_verify_innodb_locks
+SELECT lock_mode, lock_type, lock_table, lock_index, lock_rec, lock_data
+FROM INFORMATION_SCHEMA.INNODB_LOCKS ORDER BY lock_data;
+
+SELECT lock_table,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_LOCKS
+GROUP BY lock_table;
+
+set @save_sql_mode = @@sql_mode;
+SET SQL_MODE='ANSI_QUOTES';
+SELECT lock_table,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_LOCKS
+GROUP BY lock_table;
+SET @@sql_mode=@save_sql_mode;
+-- disable_result_log
+
+-- connection default
+
+-- disconnect con_lock
+-- disconnect con_min_trylock
+-- disconnect con_max_trylock
+-- disconnect con_str_insert_supremum
+-- disconnect con_str_lock_row1
+-- disconnect con_str_lock_row2
+-- disconnect con_str_lock_row3
+-- disconnect con_str_lock_row4
+-- disconnect con_verify_innodb_locks
+
+DROP TABLE t_min, t_max, ```t'\"_str`;
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.inc b/storage/innodb_plugin/mysql-test/innodb_trx_weight.inc
new file mode 100644
index 00000000000..56d3d47da36
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_trx_weight.inc
@@ -0,0 +1,51 @@
+-- connect (con1,localhost,root,,)
+-- connect (con2,localhost,root,,)
+
+-- connection con1
+SET autocommit=0;
+SELECT * FROM t1 FOR UPDATE;
+-- if ($con1_extra_sql_present) {
+	-- eval $con1_extra_sql
+-- }
+
+-- connection con2
+SET autocommit=0;
+SELECT * FROM t2 FOR UPDATE;
+-- if ($con2_extra_sql_present) {
+	-- eval $con2_extra_sql
+-- }
+
+-- if ($con1_should_be_rolledback) {
+	-- connection con1
+	-- send
+	INSERT INTO t2 VALUES (0);
+
+	-- connection con2
+	INSERT INTO t1 VALUES (0);
+	ROLLBACK;
+
+	-- connection con1
+	-- error ER_LOCK_DEADLOCK
+	-- reap
+-- }
+# else
+-- if (!$con1_should_be_rolledback) {
+	-- connection con2
+	-- send
+	INSERT INTO t1 VALUES (0);
+
+	-- connection con1
+	INSERT INTO t2 VALUES (0);
+	ROLLBACK;
+
+	-- connection con2
+	-- error ER_LOCK_DEADLOCK
+	-- reap
+-- }
+
+-- connection default
+
+DELETE FROM t5_nontrans;
+
+-- disconnect con1
+-- disconnect con2
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.result b/storage/innodb_plugin/mysql-test/innodb_trx_weight.result
new file mode 100644
index 00000000000..195775f74c8
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_trx_weight.result
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.test b/storage/innodb_plugin/mysql-test/innodb_trx_weight.test
new file mode 100644
index 00000000000..b72eaad345f
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/innodb_trx_weight.test
@@ -0,0 +1,108 @@
+#
+# Ensure that the number of locks (SELECT FOR UPDATE for example) is
+# added to the number of altered rows when choosing the smallest
+# transaction to kill as a victim when a deadlock is detected.
+# Also transactions what had edited non-transactional tables should
+# be heavier than ones that had not.
+#
+
+-- source include/have_innodb.inc
+
+SET storage_engine=InnoDB;
+
+# we do not really care about what gets printed, we are only
+# interested in getting the deadlock resolved according to our
+# expectations
+-- disable_query_log
+-- disable_result_log
+
+# we want to use "-- eval statement1; statement2" which does not work with
+# prepared statements. Because this test should not behave differently with
+# or without prepared statements we disable them so the test does not fail
+# if someone runs ./mysql-test-run.pl --ps-protocol
+-- disable_ps_protocol
+
+-- disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3, t4, t5_nontrans;
+-- enable_warnings
+
+# we will create a simple deadlock with t1, t2 and two connections
+CREATE TABLE t1 (a INT);
+CREATE TABLE t2 (a INT);
+
+# auxiliary table with a bulk of rows which will be locked by a
+# transaction to increase its weight
+CREATE TABLE t3 (a INT);
+
+# auxiliary empty table which will be inserted by a
+# transaction to increase its weight
+CREATE TABLE t4 (a INT);
+
+# auxiliary non-transactional table which will be edited by a
+# transaction to tremendously increase its weight
+CREATE TABLE t5_nontrans (a INT) ENGINE=MyISAM;
+
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+# insert a lot of rows in t3
+INSERT INTO t3 VALUES (1);
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+
+# test locking weight
+
+-- let $con1_extra_sql =
+-- let $con1_extra_sql_present = 0
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1), (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+# test weight when non-transactional tables are edited
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql =
+-- let $con2_extra_sql_present = 0
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = INSERT INTO t5_nontrans VALUES (1)
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql = $con1_extra_sql; INSERT INTO t5_nontrans VALUES (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = INSERT INTO t5_nontrans VALUES (1)
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+DROP TABLE t1, t2, t3, t4, t5_nontrans;
diff --git a/storage/innodb_plugin/mysql-test/patches/README b/storage/innodb_plugin/mysql-test/patches/README
new file mode 100644
index 00000000000..122d756e9e3
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/README
@@ -0,0 +1,30 @@
+This directory contains patches that need to be applied to the MySQL
+source tree in order to get the mysql-test suite to succeed (when
+storage/innobase is replaced with this InnoDB branch). Things to keep
+in mind when adding new patches here:
+
+* The patch must be appliable from the mysql top-level source directory.
+
+* The patch filename must end in ".diff".
+
+* All patches here are expected to apply cleanly to the latest MySQL 5.1
+  tree when storage/innobase is replaced with this InnoDB branch. If
+  changes to either of those cause the patch to fail, then please check
+  whether the patch is still needed and, if yes, adjust it so it applies
+  cleanly.
+
+* If applicable, always submit the patch at http://bugs.mysql.com and
+  name the file here like bug%d.diff. Once the patch is committed to
+  MySQL remove the file from here.
+
+* If the patch cannot be proposed for inclusion in the MySQL source tree
+  (via http://bugs.mysql.com) then add a comment at the beginning of the
+  patch, explaining the problem it is solving, how it does solve it and
+  why it is not applicable for inclusion in the MySQL source tree.
+  Obviously this is a very bad situation and should be avoided at all
+  costs, especially for files that are in the MySQL source repository
+  (not in storage/innobase).
+
+* If you ever need to add a patch here that is not related to mysql-test
+  suite, then please move this directory from ./mysql-test/patches to
+  ./patches and remove this text.
diff --git a/storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff b/storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff
new file mode 100644
index 00000000000..d1ed8afc778
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff
@@ -0,0 +1,31 @@
+InnoDB's estimate for the index cardinality depends on a pseudo random
+number generator (it picks up random pages to sample). After an
+optimization that was made in r2625 the following EXPLAINs started
+returning a different number of rows (3 instead of 4).
+
+This patch adjusts the result file.
+
+This patch cannot be proposed to MySQL because the failures occur only
+in this tree and do not occur in the standard InnoDB 5.1. Furthermore,
+the file index_merge2.inc is used by other engines too.
+
+--- mysql-test/r/index_merge_innodb.result.orig	2008-09-30 18:32:13.000000000 +0300
++++ mysql-test/r/index_merge_innodb.result	2008-09-30 18:33:01.000000000 +0300
+@@ -111,7 +111,7 @@
+ explain select count(*) from t1 where
+ key1a = 2 and key1b is null and  key2a = 2 and key2b is null;
+ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+-1	SIMPLE	t1	index_merge	i1,i2	i1,i2	10,10	NULL	4	Using intersect(i1,i2); Using where; Using index
++1	SIMPLE	t1	index_merge	i1,i2	i1,i2	10,10	NULL	3	Using intersect(i1,i2); Using where; Using index
+ select count(*) from t1 where
+ key1a = 2 and key1b is null and key2a = 2 and key2b is null;
+ count(*)
+@@ -119,7 +119,7 @@
+ explain select count(*) from t1 where
+ key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+-1	SIMPLE	t1	index_merge	i1,i3	i1,i3	10,10	NULL	4	Using intersect(i1,i3); Using where; Using index
++1	SIMPLE	t1	index_merge	i1,i3	i1,i3	10,10	NULL	3	Using intersect(i1,i3); Using where; Using index
+ select count(*) from t1 where
+ key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+ count(*)
diff --git a/storage/innodb_plugin/mysql-test/patches/information_schema.diff b/storage/innodb_plugin/mysql-test/patches/information_schema.diff
new file mode 100644
index 00000000000..a3a21f7a08d
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/information_schema.diff
@@ -0,0 +1,124 @@
+--- mysql-test/r/information_schema.result.orig	2009-01-31 03:38:50.000000000 +0200
++++ mysql-test/r/information_schema.result	2009-01-31 07:51:58.000000000 +0200
+@@ -71,6 +71,13 @@
+ TRIGGERS
+ USER_PRIVILEGES
+ VIEWS
++INNODB_CMP_RESET
++INNODB_TRX
++INNODB_CMPMEM_RESET
++INNODB_LOCK_WAITS
++INNODB_CMPMEM
++INNODB_CMP
++INNODB_LOCKS
+ columns_priv
+ db
+ event
+@@ -799,6 +806,8 @@
+ TABLES	UPDATE_TIME	datetime
+ TABLES	CHECK_TIME	datetime
+ TRIGGERS	CREATED	datetime
++INNODB_TRX	trx_started	datetime
++INNODB_TRX	trx_wait_started	datetime
+ event	execute_at	datetime
+ event	last_executed	datetime
+ event	starts	datetime
+@@ -852,7 +861,7 @@
+ flush privileges;
+ SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') AND table_name<>'ndb_binlog_index' AND table_name<>'ndb_apply_status' GROUP BY TABLE_SCHEMA;
+ table_schema	count(*)
+-information_schema	28
++information_schema	35
+ mysql	22
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1267,6 +1276,13 @@
+ TRIGGERS	TRIGGER_SCHEMA
+ USER_PRIVILEGES	GRANTEE
+ VIEWS	TABLE_SCHEMA
++INNODB_CMP_RESET	page_size
++INNODB_TRX	trx_id
++INNODB_CMPMEM_RESET	page_size
++INNODB_LOCK_WAITS	requesting_trx_id
++INNODB_CMPMEM	page_size
++INNODB_CMP	page_size
++INNODB_LOCKS	lock_id
+ SELECT t.table_name, c1.column_name
+ FROM information_schema.tables t
+ INNER JOIN
+@@ -1310,6 +1326,13 @@
+ TRIGGERS	TRIGGER_SCHEMA
+ USER_PRIVILEGES	GRANTEE
+ VIEWS	TABLE_SCHEMA
++INNODB_CMP_RESET	page_size
++INNODB_TRX	trx_id
++INNODB_CMPMEM_RESET	page_size
++INNODB_LOCK_WAITS	requesting_trx_id
++INNODB_CMPMEM	page_size
++INNODB_CMP	page_size
++INNODB_LOCKS	lock_id
+ SELECT MAX(table_name) FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test');
+ MAX(table_name)
+ VIEWS
+@@ -1386,6 +1409,13 @@
+ FILES	information_schema.FILES	1
+ GLOBAL_STATUS	information_schema.GLOBAL_STATUS	1
+ GLOBAL_VARIABLES	information_schema.GLOBAL_VARIABLES	1
++INNODB_CMP	information_schema.INNODB_CMP	1
++INNODB_CMPMEM	information_schema.INNODB_CMPMEM	1
++INNODB_CMPMEM_RESET	information_schema.INNODB_CMPMEM_RESET	1
++INNODB_CMP_RESET	information_schema.INNODB_CMP_RESET	1
++INNODB_LOCKS	information_schema.INNODB_LOCKS	1
++INNODB_LOCK_WAITS	information_schema.INNODB_LOCK_WAITS	1
++INNODB_TRX	information_schema.INNODB_TRX	1
+ KEY_COLUMN_USAGE	information_schema.KEY_COLUMN_USAGE	1
+ PARTITIONS	information_schema.PARTITIONS	1
+ PLUGINS	information_schema.PLUGINS	1
+diff mysql-test/r/information_schema_db.result.orig mysql-test/r/information_schema_db.result
+--- mysql-test/r/information_schema_db.result.orig	2008-08-04 09:27:49.000000000 +0300
++++ mysql-test/r/information_schema_db.result	2008-10-07 12:26:31.000000000 +0300
+@@ -33,6 +33,13 @@
+ TRIGGERS
+ USER_PRIVILEGES
+ VIEWS
++INNODB_CMP_RESET
++INNODB_TRX
++INNODB_CMPMEM_RESET
++INNODB_LOCK_WAITS
++INNODB_CMPMEM
++INNODB_CMP
++INNODB_LOCKS
+ show tables from INFORMATION_SCHEMA like 'T%';
+ Tables_in_information_schema (T%)
+ TABLES
+diff mysql-test/r/mysqlshow.result.orig mysql-test/r/mysqlshow.result
+--- mysql-test/r/mysqlshow.result.orig	2008-08-04 09:27:51.000000000 +0300
++++ mysql-test/r/mysqlshow.result	2008-10-07 12:35:39.000000000 +0300
+@@ -107,6 +107,13 @@
+ | TRIGGERS                              |
+ | USER_PRIVILEGES                       |
+ | VIEWS                                 |
++| INNODB_CMP_RESET                      |
++| INNODB_TRX                            |
++| INNODB_CMPMEM_RESET                   |
++| INNODB_LOCK_WAITS                     |
++| INNODB_CMPMEM                         |
++| INNODB_CMP                            |
++| INNODB_LOCKS                          |
+ +---------------------------------------+
+ Database: INFORMATION_SCHEMA
+ +---------------------------------------+
+@@ -140,6 +147,13 @@
+ | TRIGGERS                              |
+ | USER_PRIVILEGES                       |
+ | VIEWS                                 |
++| INNODB_CMP_RESET                      |
++| INNODB_TRX                            |
++| INNODB_CMPMEM_RESET                   |
++| INNODB_LOCK_WAITS                     |
++| INNODB_CMPMEM                         |
++| INNODB_CMP                            |
++| INNODB_LOCKS                          |
+ +---------------------------------------+
+ Wildcard: inf_rmation_schema
+ +--------------------+
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb-index.diff b/storage/innodb_plugin/mysql-test/patches/innodb-index.diff
new file mode 100644
index 00000000000..0b008c96f25
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/innodb-index.diff
@@ -0,0 +1,62 @@
+This part of the innodb-index test causes mysqld to print some warnings
+and subsequently the whole mysql-test suite to fail.
+
+A permanent solution is probably to remove the printouts from the source
+code or to somehow tell the mysql-test suite that warnings are expected.
+Currently we simply do not execute the problematic tests. Please
+coordinate a permanent solution with Marko, who added those tests.
+
+This cannot be proposed to MySQL because it touches files that are not
+in the MySQL source repository.
+
+Index: storage/innobase/mysql-test/innodb-index.result
+===================================================================
+--- storage/innobase/mysql-test/innodb-index.result	(revision 2870)
++++ storage/innobase/mysql-test/innodb-index.result	(working copy)
+@@ -43,19 +43,12 @@ t1	CREATE TABLE `t1` (
+   `b` int(11) DEFAULT NULL,
+   `c` char(10) NOT NULL,
+   `d` varchar(20) DEFAULT NULL,
+   KEY `d2` (`d`),
+   KEY `b` (`b`)
+ ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+-CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+-alter table t1 add unique index (c), add index (d);
+-ERROR HY000: Table 'test.t1#1' already exists
+-rename table `t1#1` to `t1#2`;
+-alter table t1 add unique index (c), add index (d);
+-ERROR HY000: Table 'test.t1#2' already exists
+-drop table `t1#2`;
+ alter table t1 add unique index (c), add index (d);
+ show create table t1;
+ Table	Create Table
+ t1	CREATE TABLE `t1` (
+   `a` int(11) NOT NULL,
+   `b` int(11) DEFAULT NULL,
+Index: storage/innobase/mysql-test/innodb-index.test
+===================================================================
+--- storage/innobase/mysql-test/innodb-index.test	(revision 2870)
++++ storage/innobase/mysql-test/innodb-index.test	(working copy)
+@@ -14,22 +14,12 @@ select * from t1 force index (d2) order 
+ --error ER_DUP_ENTRY
+ alter table t1 add unique index (b);
+ show create table t1;
+ alter table t1 add index (b);
+ show create table t1;
+ 
+-# Check how existing tables interfere with temporary tables.
+-CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
+-
+---error 156
+-alter table t1 add unique index (c), add index (d);
+-rename table `t1#1` to `t1#2`;
+---error 156
+-alter table t1 add unique index (c), add index (d);
+-drop table `t1#2`;
+-
+ alter table t1 add unique index (c), add index (d);
+ show create table t1;
+ explain select * from t1 force index(c) order by c;
+ alter table t1 add primary key (a), drop index c;
+ show create table t1;
+ --error ER_MULTIPLE_PRI_KEY
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff b/storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff
new file mode 100644
index 00000000000..8b7ae2036c9
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff
@@ -0,0 +1,47 @@
+diff mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test.orig mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test
+--- mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test.orig	2008-10-07 11:32:30.000000000 +0300
++++ mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test	2008-10-07 11:52:14.000000000 +0300
+@@ -37,10 +37,6 @@
+ #   Check if Value can set                                         #
+ ####################################################################
+ 
+---error ER_INCORRECT_GLOBAL_LOCAL_VAR
+-SET @@GLOBAL.innodb_file_per_table=1;
+---echo Expected error 'Read only variable'
+-
+ SELECT COUNT(@@GLOBAL.innodb_file_per_table);
+ --echo 1 Expected
+ 
+@@ -52,7 +48,7 @@
+ # Check if the value in GLOBAL Table matches value in variable  #
+ #################################################################
+ 
+-SELECT @@GLOBAL.innodb_file_per_table = VARIABLE_VALUE
++SELECT IF(@@GLOBAL.innodb_file_per_table,'ON','OFF') = VARIABLE_VALUE
+ FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+ WHERE VARIABLE_NAME='innodb_file_per_table';
+ --echo 1 Expected
+diff mysql-test/suite/sys_vars/r/innodb_file_per_table_basic.result.orig mysql-test/suite/sys_vars/r/innodb_file_per_table_basic.result
+--- mysql-test/suite/sys_vars/r/innodb_file_per_table_basic.result.orig	2008-10-07 11:32:02.000000000 +0300
++++ mysql-test/suite/sys_vars/r/innodb_file_per_table_basic.result	2008-10-07 11:52:47.000000000 +0300
+@@ -4,18 +4,15 @@
+ 1
+ 1 Expected
+ '#---------------------BS_STVARS_028_02----------------------#'
+-SET @@GLOBAL.innodb_file_per_table=1;
+-ERROR HY000: Variable 'innodb_file_per_table' is a read only variable
+-Expected error 'Read only variable'
+ SELECT COUNT(@@GLOBAL.innodb_file_per_table);
+ COUNT(@@GLOBAL.innodb_file_per_table)
+ 1
+ 1 Expected
+ '#---------------------BS_STVARS_028_03----------------------#'
+-SELECT @@GLOBAL.innodb_file_per_table = VARIABLE_VALUE
++SELECT IF(@@GLOBAL.innodb_file_per_table,'ON','OFF') = VARIABLE_VALUE
+ FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+ WHERE VARIABLE_NAME='innodb_file_per_table';
+-@@GLOBAL.innodb_file_per_table = VARIABLE_VALUE
++IF(@@GLOBAL.innodb_file_per_table,'ON','OFF') = VARIABLE_VALUE
+ 1
+ 1 Expected
+ SELECT COUNT(@@GLOBAL.innodb_file_per_table);
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff b/storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff
new file mode 100644
index 00000000000..bc61a0f5841
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff
@@ -0,0 +1,55 @@
+--- mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test.orig	2008-08-04 09:28:16.000000000 +0300
++++ mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test	2008-10-07 11:14:15.000000000 +0300
+@@ -37,10 +37,6 @@
+ #   Check if Value can set                                         #
+ ####################################################################
+ 
+---error ER_INCORRECT_GLOBAL_LOCAL_VAR
+-SET @@GLOBAL.innodb_lock_wait_timeout=1;
+---echo Expected error 'Read only variable'
+-
+ SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
+ --echo 1 Expected
+ 
+@@ -84,13 +80,9 @@
+ SELECT COUNT(@@innodb_lock_wait_timeout);
+ --echo 1 Expected
+ 
+---Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+ SELECT COUNT(@@local.innodb_lock_wait_timeout);
+---echo Expected error 'Variable is a GLOBAL variable'
+ 
+---Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+ SELECT COUNT(@@SESSION.innodb_lock_wait_timeout);
+---echo Expected error 'Variable is a GLOBAL variable'
+ 
+ SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
+ --echo 1 Expected
+--- mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result.orig	2008-08-04 09:27:50.000000000 +0300
++++ mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result	2008-10-07 11:15:14.000000000 +0300
+@@ -4,9 +4,6 @@
+ 1
+ 1 Expected
+ '#---------------------BS_STVARS_032_02----------------------#'
+-SET @@GLOBAL.innodb_lock_wait_timeout=1;
+-ERROR HY000: Variable 'innodb_lock_wait_timeout' is a read only variable
+-Expected error 'Read only variable'
+ SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
+ COUNT(@@GLOBAL.innodb_lock_wait_timeout)
+ 1
+@@ -39,11 +36,11 @@
+ 1
+ 1 Expected
+ SELECT COUNT(@@local.innodb_lock_wait_timeout);
+-ERROR HY000: Variable 'innodb_lock_wait_timeout' is a GLOBAL variable
+-Expected error 'Variable is a GLOBAL variable'
++COUNT(@@local.innodb_lock_wait_timeout)
++1
+ SELECT COUNT(@@SESSION.innodb_lock_wait_timeout);
+-ERROR HY000: Variable 'innodb_lock_wait_timeout' is a GLOBAL variable
+-Expected error 'Variable is a GLOBAL variable'
++COUNT(@@SESSION.innodb_lock_wait_timeout)
++1
+ SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
+ COUNT(@@GLOBAL.innodb_lock_wait_timeout)
+ 1
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff b/storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff
new file mode 100644
index 00000000000..72e5457905f
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff
@@ -0,0 +1,31 @@
+--- mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result.orig	2008-12-04 18:45:52 -06:00
++++ mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result	2009-02-12 02:05:48 -06:00
+@@ -1,19 +1,19 @@
+ SET @global_start_value = @@global.innodb_thread_concurrency;
+ SELECT @global_start_value;
+ @global_start_value
+-8
++0
+ '#--------------------FN_DYNVARS_046_01------------------------#'
+ SET @@global.innodb_thread_concurrency = 0;
+ SET @@global.innodb_thread_concurrency = DEFAULT;
+ SELECT @@global.innodb_thread_concurrency;
+ @@global.innodb_thread_concurrency
+-8
++0
+ '#---------------------FN_DYNVARS_046_02-------------------------#'
+ SET innodb_thread_concurrency = 1;
+ ERROR HY000: Variable 'innodb_thread_concurrency' is a GLOBAL variable and should be set with SET GLOBAL
+ SELECT @@innodb_thread_concurrency;
+ @@innodb_thread_concurrency
+-8
++0
+ SELECT local.innodb_thread_concurrency;
+ ERROR 42S02: Unknown table 'local' in field list
+ SET global innodb_thread_concurrency = 0;
+@@ -93,4 +93,4 @@
+ SET @@global.innodb_thread_concurrency = @global_start_value;
+ SELECT @@global.innodb_thread_concurrency;
+ @@global.innodb_thread_concurrency
+-8
++0
diff --git a/storage/innodb_plugin/mysql-test/patches/partition_innodb.diff b/storage/innodb_plugin/mysql-test/patches/partition_innodb.diff
new file mode 100644
index 00000000000..01bc073008e
--- /dev/null
+++ b/storage/innodb_plugin/mysql-test/patches/partition_innodb.diff
@@ -0,0 +1,59 @@
+The partition_innodb test only fails if run immediately after innodb_trx_weight.
+The reason for this failure is that innodb_trx_weight creates deadlocks and
+leaves something like this in the SHOW ENGINE INNODB STATUS output:
+
+  ------------------------
+  LATEST DETECTED DEADLOCK
+  ------------------------
+  090213 10:26:25
+  *** (1) TRANSACTION:
+  TRANSACTION 313, ACTIVE 0 sec, OS thread id 13644672 inserting
+  mysql tables in use 1, locked 1
+  LOCK WAIT 4 lock struct(s), heap size 488, 3 row lock(s)
+  MySQL thread id 3, query id 36 localhost root update
+
+The regular expressions that partition_innodb is using are intended to extract
+the lock structs and row locks numbers from another part of the output:
+
+  ------------
+  TRANSACTIONS
+  ------------
+  Trx id counter 31D
+  Purge done for trx's n:o < 0 undo n:o < 0
+  History list length 4
+  LIST OF TRANSACTIONS FOR EACH SESSION:
+  ---TRANSACTION 0, not started, OS thread id 13645056
+  0 lock struct(s), heap size 488, 0 row lock(s)
+  MySQL thread id 8, query id 81 localhost root
+
+In the InnoDB Plugin a transaction id is not printed as 2 consecutive
+decimal integers (as it is in InnoDB 5.1) but rather as a single
+hexadecimal integer. Thus the regular expressions somehow pick the wrong
+part of the SHOW ENGINE INNODB STATUS output.
+
+So after the regular expressions are adjusted to the InnoDB Plugin's variant
+of trx_id prinout, then they pick the expected part of the output.
+
+This patch cannot be proposed to MySQL because the failures occur only
+in this tree and do not occur in the standard InnoDB 5.1.
+
+--- mysql-test/t/partition_innodb.test	2008-11-14 22:51:17 +0000
++++ mysql-test/t/partition_innodb.test	2009-02-13 07:36:07 +0000
+@@ -27,14 +27,14 @@
+ 
+ # grouping/referencing in replace_regex is very slow on long strings,
+ # removing all before/after the interesting row before grouping/referencing
+---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
++--replace_regex /.*---TRANSACTION [0-9A-F]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
+ SHOW ENGINE InnoDB STATUS;
+ 
+ UPDATE t1 SET data = data*2 WHERE data = 2;
+ 
+ # grouping/referencing in replace_regex is very slow on long strings,
+ # removing all before/after the interesting row before grouping/referencing
+---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
++--replace_regex /.*---TRANSACTION [0-9A-F]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
+ SHOW ENGINE InnoDB STATUS;
+ 
+ SET @@session.tx_isolation = @old_tx_isolation;
+
diff --git a/storage/innobase/os/os0file.c b/storage/innodb_plugin/os/os0file.c
similarity index 69%
rename from storage/innobase/os/os0file.c
rename to storage/innodb_plugin/os/os0file.c
index 4a9d3334e7d..d3bd6465f5f 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innodb_plugin/os/os0file.c
@@ -1,240 +1,236 @@
-/******************************************************
-The interface to the operating system file i/o primitives
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file os/os0file.c
+The interface to the operating system file i/o primitives
 
 Created 10/21/1995 Heikki Tuuri
 *******************************************************/
 
 #include "os0file.h"
-#include "os0sync.h"
-#include "os0thread.h"
 #include "ut0mem.h"
 #include "srv0srv.h"
 #include "srv0start.h"
 #include "fil0fil.h"
 #include "buf0buf.h"
-
-#if defined(UNIV_HOTBACKUP) && defined(__WIN__)
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h"
+# include "os0thread.h"
+#else /* !UNIV_HOTBACKUP */
+# ifdef __WIN__
 /* Add includes for the _stat() call to compile on Windows */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <errno.h>
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef POSIX_ASYNC_IO
-/* We assume in this case that the OS has standard Posix aio (at least SunOS
-2.6, HP-UX 11i and AIX 4.3 have) */
-
-#endif
+#  include <sys/types.h>
+#  include <sys/stat.h>
+#  include <errno.h>
+# endif /* __WIN__ */
+#endif /* !UNIV_HOTBACKUP */
 
 /* This specifies the file permissions InnoDB uses when it creates files in
 Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
 my_umask */
 
 #ifndef __WIN__
-ulint	os_innodb_umask		= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+/** Umask for creating files */
+UNIV_INTERN ulint	os_innodb_umask
+			= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
 #else
-ulint	os_innodb_umask		= 0;
+/** Umask for creating files */
+UNIV_INTERN ulint	os_innodb_umask		= 0;
 #endif
 
 #ifdef UNIV_DO_FLUSH
 /* If the following is set to TRUE, we do not call os_file_flush in every
 os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-ibool	os_do_not_call_flush_at_each_write	= FALSE;
+UNIV_INTERN ibool	os_do_not_call_flush_at_each_write	= FALSE;
 #else
 /* We do not call os_file_flush in every os_file_write. */
 #endif /* UNIV_DO_FLUSH */
 
+#ifndef UNIV_HOTBACKUP
 /* We use these mutexes to protect lseek + file i/o operation, if the
 OS does not provide an atomic pread or pwrite, or similar */
 #define OS_FILE_N_SEEK_MUTEXES	16
-os_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+UNIV_INTERN os_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
 
 /* In simulated aio, merge at most this many consecutive i/os */
 #define OS_AIO_MERGE_N_CONSECUTIVE	64
 
-/* If this flag is TRUE, then we will use the native aio of the
+/** If this flag is TRUE, then we will use the native aio of the
 OS (provided we compiled Innobase with it in), otherwise we will
 use simulated aio we build below with threads */
 
-ibool	os_aio_use_native_aio	= FALSE;
+UNIV_INTERN ibool	os_aio_use_native_aio	= FALSE;
 
-ibool	os_aio_print_debug	= FALSE;
+/** Flag: enable debug printout for asynchronous i/o */
+UNIV_INTERN ibool	os_aio_print_debug	= FALSE;
 
-/* State for the state of an IO request in simulated AIO.
-   Protocol for simulated aio:
-     client requests IO: find slot with reserved = FALSE. Add entry with
-                         status = OS_AIO_NOT_ISSUED.
-     IO thread wakes: find adjacent slots with reserved = TRUE and status =
-                      OS_AIO_NOT_ISSUED. Change status for slots to
-                      OS_AIO_ISSUED.
-     IO operation completes: set status for slots to OS_AIO_DONE. set status
-                             for the first slot to OS_AIO_CLAIMED and return
-                             result for that slot.
-   When there are multiple read and write threads, they all compete to execute
-   the requests in the array (os_aio_array_t). This avoids the need to load
-   balance requests at the time the request is made at the cost of waking all
-   threads when a request is available.
-*/
-typedef enum {
-	OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
-	OS_AIO_ISSUED,     /* Being processed by an IO thread. */
-	OS_AIO_DONE,       /* Request processed. */
-	OS_AIO_CLAIMED     /* Result being returned to client. */
-} os_aio_status;
-
-/* The aio array slot structure */
+/** The asynchronous i/o array slot structure */
 typedef struct os_aio_slot_struct	os_aio_slot_t;
 
+/** The asynchronous i/o array slot structure */
 struct os_aio_slot_struct{
-	ibool		is_read;	/* TRUE if a read operation */
-	ulint		pos;		/* index of the slot in the aio
+	ibool		is_read;	/*!< TRUE if a read operation */
+	ulint		pos;		/*!< index of the slot in the aio
 					array */
-	ibool		reserved;	/* TRUE if this slot is reserved */
-	os_aio_status   status;		/* Status for current request. Valid when reserved
-					is TRUE. Used only in simulated aio. */
-	time_t		reservation_time;/* time when reserved */
-	ulint		len;		/* length of the block to read or
+	ibool		reserved;	/*!< TRUE if this slot is reserved */
+	time_t		reservation_time;/*!< time when reserved */
+	ulint		len;		/*!< length of the block to read or
 					write */
-	byte*		buf;		/* buffer used in i/o */
-	ulint		type;		/* OS_FILE_READ or OS_FILE_WRITE */
-	ulint		offset;		/* 32 low bits of file offset in
+	byte*		buf;		/*!< buffer used in i/o */
+	ulint		type;		/*!< OS_FILE_READ or OS_FILE_WRITE */
+	ulint		offset;		/*!< 32 low bits of file offset in
 					bytes */
-	ulint		offset_high;	/* 32 high bits of file offset */
-	os_file_t	file;		/* file where to read or write */
-	const char*	name;		/* file name or path */
-	fil_node_t*	message1;	/* message which is given by the */
-	void*		message2;	/* the requester of an aio operation
+	ulint		offset_high;	/*!< 32 high bits of file offset */
+	os_file_t	file;		/*!< file where to read or write */
+	const char*	name;		/*!< file name or path */
+	ibool		io_already_done;/*!< used only in simulated aio:
+					TRUE if the physical i/o already
+					made and only the slot message
+					needs to be passed to the caller
+					of os_aio_simulated_handle */
+	fil_node_t*	message1;	/*!< message which is given by the */
+	void*		message2;	/*!< the requester of an aio operation
 					and which can be used to identify
 					which pending aio operation was
 					completed */
 #ifdef WIN_ASYNC_IO
-	os_event_t	event;		/* event object we need in the
+	os_event_t	event;		/*!< event object we need in the
 					OVERLAPPED struct */
-	OVERLAPPED	control;	/* Windows control block for the
+	OVERLAPPED	control;	/*!< Windows control block for the
 					aio request */
-#elif defined(POSIX_ASYNC_IO)
-	struct aiocb	control;	/* Posix control block for aio
-					request */
 #endif
 };
 
-/* The aio array structure */
+/** The asynchronous i/o array structure */
 typedef struct os_aio_array_struct	os_aio_array_t;
 
+/** The asynchronous i/o array structure */
 struct os_aio_array_struct{
-	os_mutex_t	mutex;	  /* the mutex protecting the aio array */
-	os_event_t	not_full; /* The event which is set to the signaled
-				  state when there is space in the aio
-				  outside the ibuf segment */
-	os_event_t	is_empty; /* The event which is set to the signaled
-				  state when there are no pending i/os
-				  in this array */
-	ulint		n_slots;  /* Total number of slots in the aio array.
-				  This must be divisible by n_threads. */
-	ulint		n_reserved;/* Number of reserved slots in the
-				  aio array outside the ibuf segment */
-	os_aio_slot_t*	slots;	  /* Pointer to the slots in the array */
+	os_mutex_t	mutex;	/*!< the mutex protecting the aio array */
+	os_event_t	not_full;
+				/*!< The event which is set to the
+				signaled state when there is space in
+				the aio outside the ibuf segment */
+	os_event_t	is_empty;
+				/*!< The event which is set to the
+				signaled state when there are no
+				pending i/os in this array */
+	ulint		n_slots;/*!< Total number of slots in the aio
+				array.  This must be divisible by
+				n_threads. */
+	ulint		n_segments;
+				/*!< Number of segments in the aio
+				array of pending aio requests. A
+				thread can wait separately for any one
+				of the segments. */
+	ulint		n_reserved;
+				/*!< Number of reserved slots in the
+				aio array outside the ibuf segment */
+	os_aio_slot_t*	slots;	/*!< Pointer to the slots in the array */
 #ifdef __WIN__
 	os_native_event_t* native_events;
-				  /* Pointer to an array of OS native event
-				  handles where we copied the handles from
-				  slots, in the same order. This can be used
-				  in WaitForMultipleObjects; used only in
-				  Windows */
+				/*!< Pointer to an array of OS native
+				event handles where we copied the
+				handles from slots, in the same
+				order. This can be used in
+				WaitForMultipleObjects; used only in
+				Windows */
 #endif
 };
 
-/* Array of events used in simulated aio */
-os_event_t*	os_aio_segment_wait_events	= NULL;
+/** Array of events used in simulated aio */
+static os_event_t*	os_aio_segment_wait_events	= NULL;
 
-/* Number of threads for reading and writing. */
-ulint os_aio_read_threads = 0;
-ulint os_aio_write_threads = 0;
+/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. @{ */
+static os_aio_array_t*	os_aio_read_array	= NULL;	/*!< Reads */
+static os_aio_array_t*	os_aio_write_array	= NULL;	/*!< Writes */
+static os_aio_array_t*	os_aio_ibuf_array	= NULL;	/*!< Insert buffer */
+static os_aio_array_t*	os_aio_log_array	= NULL;	/*!< Redo log */
+static os_aio_array_t*	os_aio_sync_array	= NULL;	/*!< Synchronous I/O */
+/* @} */
 
-/* Number for the first global segment for reading. */
-const ulint os_aio_first_read_segment = 2;
-
-/* Number for the first global segment for writing. Set to
-2 + os_aio_read_write_threads. */
-ulint os_aio_first_write_segment = 0;
-
-/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. */
-static os_aio_array_t*	os_aio_read_array	= NULL;
-static os_aio_array_t*	os_aio_write_array	= NULL;
-static os_aio_array_t*	os_aio_ibuf_array	= NULL;
-static os_aio_array_t*	os_aio_log_array	= NULL;
-static os_aio_array_t*	os_aio_sync_array	= NULL;
-
-/* Per thread buffer used for merged IO requests. Used by
-os_aio_simulated_handle so that a buffer doesn't have to be allocated
-for each request. */
-static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
-
-/* Count pages read and written per thread */
-static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO operations done. One request can be for N pages. */
-static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS];
-
-/* usecs spent blocked on an IO request */
-static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS];
-/* max usecs spent blocked on an IO request */
-static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO global segments. An IO handler thread is created for each
-global segment, except for the segment associated with os_aio_sync_array.
-Several segments can be associated with os_aio_{read,write}_array. One
-segment is created for each of the other arrays. This is also the number
-of valid entries in srv_io_thread_reads, srv_io_thread_writes,
-srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */
+/** Number of asynchronous I/O segments.  Set by os_aio_init(). */
 static ulint	os_aio_n_segments	= ULINT_UNDEFINED;
 
-/* Set to TRUE to temporarily block reads from being scheduled while a batch
-of read requests is added to allow them to be merged by the IO handler thread
-if they are adjacent. Declared volatile because we don't want this to be
-read from a register in a loop when another thread may change the value in
-memory.
-*/
-static volatile ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
+/** If the following is TRUE, read i/o handler threads try to
+wait until a batch of new read requests have been posted */
+static ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
+#endif /* !UNIV_HOTBACKUP */
 
-ulint	os_n_file_reads		= 0;
-ulint	os_bytes_read_since_printout = 0;
-ulint	os_n_file_writes	= 0;
-ulint	os_n_fsyncs		= 0;
-ulint	os_n_file_reads_old	= 0;
-ulint	os_n_file_writes_old	= 0;
-ulint	os_n_fsyncs_old		= 0;
-time_t	os_last_printout;
+UNIV_INTERN ulint	os_n_file_reads		= 0;
+UNIV_INTERN ulint	os_bytes_read_since_printout = 0;
+UNIV_INTERN ulint	os_n_file_writes	= 0;
+UNIV_INTERN ulint	os_n_fsyncs		= 0;
+UNIV_INTERN ulint	os_n_file_reads_old	= 0;
+UNIV_INTERN ulint	os_n_file_writes_old	= 0;
+UNIV_INTERN ulint	os_n_fsyncs_old		= 0;
+UNIV_INTERN time_t	os_last_printout;
 
-ibool	os_has_said_disk_full	= FALSE;
+UNIV_INTERN ibool	os_has_said_disk_full	= FALSE;
 
-/* The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
-ulint	os_file_n_pending_preads  = 0;
-ulint	os_file_n_pending_pwrites = 0;
-ulint	os_n_pending_writes = 0;
-ulint	os_n_pending_reads = 0;
-
-static double time_usecs() {
-  ulint sec, ms;
-  if (ut_usectime(&sec, &ms))
-    return 0;
-  else
-    return sec * 1000000.0 + ms;
-}
-
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
+#ifndef UNIV_HOTBACKUP
+/** The mutex protecting the following counts of pending I/O operations */
+static os_mutex_t	os_file_count_mutex;
+#endif /* !UNIV_HOTBACKUP */
+/** Number of pending os_file_pread() operations */
+UNIV_INTERN ulint	os_file_n_pending_preads  = 0;
+/** Number of pending os_file_pwrite() operations */
+UNIV_INTERN ulint	os_file_n_pending_pwrites = 0;
+/** Number of pending write operations */
+UNIV_INTERN ulint	os_n_pending_writes = 0;
+/** Number of pending read operations */
+UNIV_INTERN ulint	os_n_pending_reads = 0;
 
+/***********************************************************************//**
+Gets the operating system version. Currently works only on Windows.
+@return	OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
+UNIV_INTERN
 ulint
 os_get_os_version(void)
 /*===================*/
-		  /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
 {
 #ifdef __WIN__
 	OSVERSIONINFO	  os_info;
@@ -264,18 +260,17 @@ os_get_os_version(void)
 #endif
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
 overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
+the OS error number + 100 is returned.
+@return	error number, or OS error number + 100 */
+UNIV_INTERN
 ulint
 os_file_get_last_error(
 /*===================*/
-					/* out: error number, or OS error
-					number + 100 */
-	ibool	report_all_errors)	/* in: TRUE if we want an error message
+	ibool	report_all_errors)	/*!< in: TRUE if we want an error message
 					printed of all errors */
 {
 	ulint	err;
@@ -325,7 +320,7 @@ os_file_get_last_error(
 				"InnoDB: Some operating system error numbers"
 				" are described at\n"
 				"InnoDB: "
-				"http://dev.mysql.com/doc/refman/5.1/en/"
+				REFMAN
 				"operating-system-error-codes.html\n");
 		}
 	}
@@ -384,7 +379,7 @@ os_file_get_last_error(
 				"InnoDB: Some operating system"
 				" error numbers are described at\n"
 				"InnoDB: "
-				"http://dev.mysql.com/doc/refman/5.1/en/"
+				REFMAN
 				"operating-system-error-codes.html\n");
 		}
 	}
@@ -393,10 +388,6 @@ os_file_get_last_error(
 
 	if (err == ENOSPC) {
 		return(OS_FILE_DISK_FULL);
-#ifdef POSIX_ASYNC_IO
-	} else if (err == EAGAIN) {
-		return(OS_FILE_AIO_RESOURCES_RESERVED);
-#endif
 	} else if (err == ENOENT) {
 		return(OS_FILE_NOT_FOUND);
 	} else if (err == EEXIST) {
@@ -409,20 +400,18 @@ os_file_get_last_error(
 #endif
 }
 
-/********************************************************************
+/****************************************************************//**
 Does error handling when a file operation fails.
 Conditionally exits (calling exit(3)) based on should_exit value and the
-error type */
-
+error type
+@return	TRUE if we should retry the operation */
 static
 ibool
 os_file_handle_error_cond_exit(
 /*===========================*/
-					/* out: TRUE if we should retry the
-					operation */
-	const char*	name,		/* in: name of a file or NULL */
-	const char*	operation,	/* in: operation */
-	ibool		should_exit)	/* in: call exit(3) if unknown error
+	const char*	name,		/*!< in: name of a file or NULL */
+	const char*	operation,	/*!< in: operation */
+	ibool		should_exit)	/*!< in: call exit(3) if unknown error
 					and this parameter is TRUE */
 {
 	ulint	err;
@@ -485,31 +474,29 @@ os_file_handle_error_cond_exit(
 	return(FALSE);
 }
 
-/********************************************************************
-Does error handling when a file operation fails. */
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return	TRUE if we should retry the operation */
 static
 ibool
 os_file_handle_error(
 /*=================*/
-				/* out: TRUE if we should retry the
-				operation */
-	const char*	name,	/* in: name of a file or NULL */
-	const char*	operation)/* in: operation */
+	const char*	name,	/*!< in: name of a file or NULL */
+	const char*	operation)/*!< in: operation */
 {
 	/* exit in case of unknown error */
 	return(os_file_handle_error_cond_exit(name, operation, TRUE));
 }
 
-/********************************************************************
-Does error handling when a file operation fails. */
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return	TRUE if we should retry the operation */
 static
 ibool
 os_file_handle_error_no_exit(
 /*=========================*/
-				/* out: TRUE if we should retry the
-				operation */
-	const char*	name,	/* in: name of a file or NULL */
-	const char*	operation)/* in: operation */
+	const char*	name,	/*!< in: name of a file or NULL */
+	const char*	operation)/*!< in: operation */
 {
 	/* don't exit in case of unknown error */
 	return(os_file_handle_error_cond_exit(name, operation, FALSE));
@@ -524,15 +511,15 @@ os_file_handle_error_no_exit(
 # undef USE_FILE_LOCK
 #endif
 #ifdef USE_FILE_LOCK
-/********************************************************************
-Obtain an exclusive lock on a file. */
+/****************************************************************//**
+Obtain an exclusive lock on a file.
+@return	0 on success */
 static
 int
 os_file_lock(
 /*=========*/
-				/* out: 0 on success */
-	int		fd,	/* in: file descriptor */
-	const char*	name)	/* in: file name */
+	int		fd,	/*!< in: file descriptor */
+	const char*	name)	/*!< in: file name */
 {
 	struct flock lk;
 	lk.l_type = F_WRLCK;
@@ -557,9 +544,10 @@ os_file_lock(
 }
 #endif /* USE_FILE_LOCK */
 
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
 Creates the seek mutexes used in positioned reads and writes. */
-
+UNIV_INTERN
 void
 os_io_init_simple(void)
 /*===================*/
@@ -573,74 +561,57 @@ os_io_init_simple(void)
 	}
 }
 
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
-/*************************************************************************
-Creates a temporary file that will be deleted on close.
-This function is defined in ha_innodb.cc. */
-
-int
-innobase_mysql_tmpfile(void);
-/*========================*/
-			/* out: temporary file descriptor, or < 0 on error */
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
-
-/***************************************************************************
+/***********************************************************************//**
 Creates a temporary file.  This function is like tmpfile(3), but
 the temporary file is created in the MySQL temporary directory.
 On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
-
+library of Netware does not expose the delete-on-close flag.
+@return	temporary file handle, or NULL on error */
+UNIV_INTERN
 FILE*
 os_file_create_tmpfile(void)
 /*========================*/
-			/* out: temporary file handle, or NULL on error */
 {
-#ifdef UNIV_HOTBACKUP
-	ut_error;
-
-	return(NULL);
-#else
-# ifdef __NETWARE__
+#ifdef __NETWARE__
 	FILE*	file	= tmpfile();
-# else /* __NETWARE__ */
+#else /* __NETWARE__ */
 	FILE*	file	= NULL;
 	int	fd	= innobase_mysql_tmpfile();
 
 	if (fd >= 0) {
 		file = fdopen(fd, "w+b");
 	}
-# endif /* __NETWARE__ */
+#endif /* __NETWARE__ */
 
 	if (!file) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Error: unable to create temporary file;"
 			" errno: %d\n", errno);
-# ifndef __NETWARE__
+#ifndef __NETWARE__
 		if (fd >= 0) {
 			close(fd);
 		}
-# endif /* !__NETWARE__ */
+#endif /* !__NETWARE__ */
 	}
 
 	return(file);
-#endif /* UNIV_HOTBACKUP */
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************************
+/***********************************************************************//**
 The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
 at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
+and '..' items at the start of the directory listing.
+@return	directory stream, NULL if error */
+UNIV_INTERN
 os_file_dir_t
 os_file_opendir(
 /*============*/
-					/* out: directory stream, NULL if
-					error */
-	const char*	dirname,	/* in: directory name; it must not
+	const char*	dirname,	/*!< in: directory name; it must not
 					contain a trailing '\' or '/' */
-	ibool		error_is_fatal)	/* in: TRUE if we should treat an
+	ibool		error_is_fatal)	/*!< in: TRUE if we should treat an
 					error as a fatal error; if we try to
 					open symlinks then we do not wish a
 					fatal error if it happens not to be
@@ -687,14 +658,14 @@ os_file_opendir(
 #endif
 }
 
-/***************************************************************************
-Closes a directory stream. */
-
+/***********************************************************************//**
+Closes a directory stream.
+@return	0 if success, -1 if failure */
+UNIV_INTERN
 int
 os_file_closedir(
 /*=============*/
-				/* out: 0 if success, -1 if failure */
-	os_file_dir_t	dir)	/* in: directory stream */
+	os_file_dir_t	dir)	/*!< in: directory stream */
 {
 #ifdef __WIN__
 	BOOL		ret;
@@ -721,18 +692,17 @@ os_file_closedir(
 #endif
 }
 
-/***************************************************************************
+/***********************************************************************//**
 This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
+over the '.' and '..' entries in the directory.
+@return	0 if ok, -1 if error, 1 if at the end of the directory */
+UNIV_INTERN
 int
 os_file_readdir_next_file(
 /*======================*/
-				/* out: 0 if ok, -1 if error, 1 if at the end
-				of the directory */
-	const char*	dirname,/* in: directory name or path */
-	os_file_dir_t	dir,	/* in: directory stream */
-	os_file_stat_t*	info)	/* in/out: buffer where the info is returned */
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info)	/*!< in/out: buffer where the info is returned */
 {
 #ifdef __WIN__
 	LPWIN32_FIND_DATA	lpFindFileData;
@@ -754,8 +724,8 @@ next_file:
 
 		strcpy(info->name, (char *) lpFindFileData->cFileName);
 
-		info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
-			+ (((ib_longlong)(lpFindFileData->nFileSizeHigh))
+		info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
+			+ (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
 			   << 32);
 
 		if (lpFindFileData->dwFileAttributes
@@ -764,8 +734,7 @@ next_file:
 			/* TODO: MySQL has apparently its own symlink
 			implementation in Windows, dbname.sym can
 			redirect a database directory:
-			http://dev.mysql.com/doc/refman/5.1/en/
-			windows-symbolic-links.html */
+			REFMAN "windows-symbolic-links.html" */
 			info->type = OS_FILE_TYPE_LINK;
 		} else if (lpFindFileData->dwFileAttributes
 			   & FILE_ATTRIBUTE_DIRECTORY) {
@@ -855,7 +824,7 @@ next_file:
 		return(-1);
 	}
 
-	info->size = (ib_longlong)statinfo.st_size;
+	info->size = (ib_int64_t)statinfo.st_size;
 
 	if (S_ISDIR(statinfo.st_mode)) {
 		info->type = OS_FILE_TYPE_DIR;
@@ -873,20 +842,19 @@ next_file:
 #endif
 }
 
-/*********************************************************************
+/*****************************************************************//**
 This function attempts to create a directory named pathname. The new directory
 gets default permissions. On Unix the permissions are (0770 & ~umask). If the
 directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
+fail_if_exists arguments is true.
+@return	TRUE if call succeeds, FALSE on error */
+UNIV_INTERN
 ibool
 os_file_create_directory(
 /*=====================*/
-					/* out: TRUE if call succeeds,
-					FALSE on error */
-	const char*	pathname,	/* in: directory name as
+	const char*	pathname,	/*!< in: directory name as
 					null-terminated string */
-	ibool		fail_if_exists)	/* in: if TRUE, pre-existing directory
+	ibool		fail_if_exists)	/*!< in: if TRUE, pre-existing directory
 					is treated as an error. */
 {
 #ifdef __WIN__
@@ -919,27 +887,26 @@ os_file_create_directory(
 #endif
 }
 
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create_simple(
 /*==================*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file is
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file is
 				opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error), or
 				OS_FILE_CREATE_PATH if new file
 				(if exists, error) and subdirectories along
 				its path are created (if needed)*/
-	ulint		access_type,/* in: OS_FILE_READ_ONLY or
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success)/* out: TRUE if succeed, FALSE if error */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
 #ifdef __WIN__
 	os_file_t	file;
@@ -985,7 +952,7 @@ try_again:
 			  NULL,	/* default security attributes */
 			  create_flag,
 			  attributes,
-			  NULL);	/* no template file */
+			  NULL);	/*!< no template file */
 
 	if (file == INVALID_HANDLE_VALUE) {
 		*success = FALSE;
@@ -1061,26 +1028,25 @@ try_again:
 #endif /* __WIN__ */
 }
 
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create_simple_no_error_handling(
 /*====================================*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file
 				is opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error) */
-	ulint		access_type,/* in: OS_FILE_READ_ONLY,
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success)/* out: TRUE if succeed, FALSE if error */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
 #ifdef __WIN__
 	os_file_t	file;
@@ -1107,7 +1073,7 @@ os_file_create_simple_no_error_handling(
 	} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
 		access = GENERIC_READ;
 		share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
-			| FILE_SHARE_WRITE;	/* A backup program has to give
+			| FILE_SHARE_WRITE;	/*!< A backup program has to give
 						mysqld the maximum freedom to
 						do what it likes with the
 						file */
@@ -1122,7 +1088,7 @@ os_file_create_simple_no_error_handling(
 			  NULL,	/* default security attributes */
 			  create_flag,
 			  attributes,
-			  NULL);	/* no template file */
+			  NULL);	/*!< no template file */
 
 	if (file == INVALID_HANDLE_VALUE) {
 		*success = FALSE;
@@ -1174,19 +1140,17 @@ os_file_create_simple_no_error_handling(
 #endif /* __WIN__ */
 }
 
-/********************************************************************
+/****************************************************************//**
 Tries to disable OS caching on an opened file descriptor. */
-
+UNIV_INTERN
 void
 os_file_set_nocache(
 /*================*/
-	int		fd,		/* in: file descriptor to alter */
-	const char*	file_name,	/* in: used in the diagnostic message */
-	const char*	operation_name)	/* in: used in the diagnostic message,
-					we call os_file_set_nocache()
-					immediately after opening or creating
-					a file, so this is either "open" or
-					"create" */
+	int		fd,		/*!< in: file descriptor to alter */
+	const char*	file_name,	/*!< in: file name, used in the
+					diagnostic message */
+	const char*	operation_name)	/*!< in: "open" or "create"; used in the
+					diagnostic message */
 {
 	/* some versions of Solaris may not have DIRECTIO_ON */
 #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
@@ -1219,18 +1183,17 @@ os_file_set_nocache(
 #endif
 }
 
-/********************************************************************
-Opens an existing file or creates a new. */
-
+/****************************************************************//**
+Opens an existing file or creates a new.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
 os_file_t
 os_file_create(
 /*===========*/
-				/* out, own: handle to the file, not defined
-				if error, error number can be retrieved with
-				os_file_get_last_error */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	ulint		create_mode,/* in: OS_FILE_OPEN if an existing file
+	ulint		create_mode,/*!< in: OS_FILE_OPEN if an existing file
 				is opened (if does not exist, error), or
 				OS_FILE_CREATE if a new file is created
 				(if exists, error),
@@ -1238,15 +1201,15 @@ os_file_create(
 				or an old overwritten;
 				OS_FILE_OPEN_RAW, if a raw device or disk
 				partition should be opened */
-	ulint		purpose,/* in: OS_FILE_AIO, if asynchronous,
+	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
 				non-buffered i/o is desired,
 				OS_FILE_NORMAL, if any normal file;
 				NOTE that it also depends on type, os_aio_..
 				and srv_.. variables whether we really use
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
-	ulint		type,	/* in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success)/* out: TRUE if succeed, FALSE if error */
+	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
 #ifdef __WIN__
 	os_file_t	file;
@@ -1325,7 +1288,7 @@ try_again:
 			  NULL,	/* default security attributes */
 			  create_flag,
 			  attributes,
-			  NULL);	/* no template file */
+			  NULL);	/*!< no template file */
 
 	if (file == INVALID_HANDLE_VALUE) {
 		*success = FALSE;
@@ -1478,14 +1441,14 @@ try_again:
 #endif /* __WIN__ */
 }
 
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
-
+/***********************************************************************//**
+Deletes a file if it exists. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_delete_if_exists(
 /*=====================*/
-				/* out: TRUE if success */
-	const char*	name)	/* in: file path as a null-terminated string */
+	const char*	name)	/*!< in: file path as a null-terminated string */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -1528,7 +1491,7 @@ loop:
 #else
 	int	ret;
 
-	ret = unlink((const char*)name);
+	ret = unlink(name);
 
 	if (ret != 0 && errno != ENOENT) {
 		os_file_handle_error_no_exit(name, "delete");
@@ -1540,14 +1503,14 @@ loop:
 #endif
 }
 
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
+/***********************************************************************//**
+Deletes a file. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_delete(
 /*===========*/
-				/* out: TRUE if success */
-	const char*	name)	/* in: file path as a null-terminated string */
+	const char*	name)	/*!< in: file path as a null-terminated string */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -1591,7 +1554,7 @@ loop:
 #else
 	int	ret;
 
-	ret = unlink((const char*)name);
+	ret = unlink(name);
 
 	if (ret != 0) {
 		os_file_handle_error_no_exit(name, "delete");
@@ -1603,17 +1566,17 @@ loop:
 #endif
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
+file is closed before calling this function.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_rename(
 /*===========*/
-				/* out: TRUE if success */
-	const char*	oldpath,/* in: old file path as a null-terminated
+	const char*	oldpath,/*!< in: old file path as a null-terminated
 				string */
-	const char*	newpath)/* in: new file path */
+	const char*	newpath)/*!< in: new file path */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -1630,7 +1593,7 @@ os_file_rename(
 #else
 	int	ret;
 
-	ret = rename((const char*)oldpath, (const char*)newpath);
+	ret = rename(oldpath, newpath);
 
 	if (ret != 0) {
 		os_file_handle_error_no_exit(oldpath, "rename");
@@ -1642,15 +1605,15 @@ os_file_rename(
 #endif
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
+os_file_get_last_error.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_close(
 /*==========*/
-				/* out: TRUE if success */
-	os_file_t	file)	/* in, own: handle to a file */
+	os_file_t	file)	/*!< in, own: handle to a file */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -1681,14 +1644,15 @@ os_file_close(
 #endif
 }
 
-/***************************************************************************
-Closes a file handle. */
-
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************//**
+Closes a file handle.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_close_no_error_handling(
 /*============================*/
-				/* out: TRUE if success */
-	os_file_t	file)	/* in, own: handle to a file */
+	os_file_t	file)	/*!< in, own: handle to a file */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -1715,18 +1679,19 @@ os_file_close_no_error_handling(
 	return(TRUE);
 #endif
 }
+#endif /* UNIV_HOTBACKUP */
 
-/***************************************************************************
-Gets a file size. */
-
+/***********************************************************************//**
+Gets a file size.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_get_size(
 /*=============*/
-				/* out: TRUE if success */
-	os_file_t	file,	/* in: handle to a file */
-	ulint*		size,	/* out: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	ulint*		size,	/*!< out: least significant 32 bits of file
 				size */
-	ulint*		size_high)/* out: most significant 32 bits of size */
+	ulint*		size_high)/*!< out: most significant 32 bits of size */
 {
 #ifdef __WIN__
 	DWORD	high;
@@ -1764,14 +1729,14 @@ os_file_get_size(
 #endif
 }
 
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
+/***********************************************************************//**
+Gets file size as a 64-bit integer ib_int64_t.
+@return	size in bytes, -1 if error */
+UNIV_INTERN
+ib_int64_t
 os_file_get_size_as_iblonglong(
 /*===========================*/
-				/* out: size in bytes, -1 if error */
-	os_file_t	file)	/* in: handle to a file */
+	os_file_t	file)	/*!< in: handle to a file */
 {
 	ulint	size;
 	ulint	size_high;
@@ -1784,25 +1749,25 @@ os_file_get_size_as_iblonglong(
 		return(-1);
 	}
 
-	return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
+	return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
 }
 
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
+/***********************************************************************//**
+Write the specified number of zeros to a newly created file.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_set_size(
 /*=============*/
-				/* out: TRUE if success */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	ulint		size,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	ulint		size,	/*!< in: least significant 32 bits of file
 				size */
-	ulint		size_high)/* in: most significant 32 bits of size */
+	ulint		size_high)/*!< in: most significant 32 bits of size */
 {
-	ib_longlong	current_size;
-	ib_longlong	desired_size;
+	ib_int64_t	current_size;
+	ib_int64_t	desired_size;
 	ibool		ret;
 	byte*		buf;
 	byte*		buf2;
@@ -1811,7 +1776,7 @@ os_file_set_size(
 	ut_a(size == (size & 0xFFFFFFFF));
 
 	current_size = 0;
-	desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
+	desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
 
 	/* Write up to 1 megabyte at a time. */
 	buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
@@ -1824,7 +1789,7 @@ os_file_set_size(
 	/* Write buffer full of zeros */
 	memset(buf, 0, buf_size);
 
-	if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
+	if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
 
 		fprintf(stderr, "InnoDB: Progress in MB:");
 	}
@@ -1832,7 +1797,7 @@ os_file_set_size(
 	while (current_size < desired_size) {
 		ulint	n_bytes;
 
-		if (desired_size - current_size < (ib_longlong) buf_size) {
+		if (desired_size - current_size < (ib_int64_t) buf_size) {
 			n_bytes = (ulint) (desired_size - current_size);
 		} else {
 			n_bytes = buf_size;
@@ -1848,18 +1813,18 @@ os_file_set_size(
 		}
 
 		/* Print about progress for each 100 MB written */
-		if ((ib_longlong) (current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
-		    != current_size / (ib_longlong)(100 * 1024 * 1024)) {
+		if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
+		    != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
 
 			fprintf(stderr, " %lu00",
 				(ulong) ((current_size + n_bytes)
-					 / (ib_longlong)(100 * 1024 * 1024)));
+					 / (ib_int64_t)(100 * 1024 * 1024)));
 		}
 
 		current_size += n_bytes;
 	}
 
-	if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
+	if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
 
 		fprintf(stderr, "\n");
 	}
@@ -1876,14 +1841,14 @@ error_handling:
 	return(FALSE);
 }
 
-/***************************************************************************
-Truncates a file at its current position. */
-
+/***********************************************************************//**
+Truncates a file at its current position.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_set_eof(
 /*============*/
-				/* out: TRUE if success */
-	FILE*		file)	/* in: file to be truncated */
+	FILE*		file)	/*!< in: file to be truncated */
 {
 #ifdef __WIN__
 	HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
@@ -1894,17 +1859,17 @@ os_file_set_eof(
 }
 
 #ifndef __WIN__
-/***************************************************************************
+/***********************************************************************//**
 Wrapper to fsync(2) that retries the call on some errors.
 Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error. */
+the global variable errno is set to indicate the error.
+@return	0 if success, -1 otherwise */
 
 static
 int
 os_file_fsync(
 /*==========*/
-				/* out: 0 if success, -1 otherwise */
-	os_file_t	file)	/* in: handle to a file */
+	os_file_t	file)	/*!< in: handle to a file */
 {
 	int	ret;
 	int	failures;
@@ -1942,14 +1907,14 @@ os_file_fsync(
 }
 #endif /* !__WIN__ */
 
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
+/***********************************************************************//**
+Flushes the write buffers of a given file to the disk.
+@return	TRUE if success */
+UNIV_INTERN
 ibool
 os_file_flush(
 /*==========*/
-				/* out: TRUE if success */
-	os_file_t	file)	/* in, own: handle to a file */
+	os_file_t	file)	/*!< in, own: handle to a file */
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -2041,19 +2006,19 @@ os_file_flush(
 }
 
 #ifndef __WIN__
-/***********************************************************************
-Does a synchronous read operation in Posix. */
+/*******************************************************************//**
+Does a synchronous read operation in Posix.
+@return	number of bytes read, -1 if error */
 static
 ssize_t
 os_file_pread(
 /*==========*/
-				/* out: number of bytes read, -1 if error */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read */
-	ulint		n,	/* in: number of bytes to read */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	ulint		n,	/*!< in: number of bytes to read */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset from where to read */
-	ulint		offset_high) /* in: most significant 32 bits of
+	ulint		offset_high) /*!< in: most significant 32 bits of
 				offset */
 {
 	off_t	offs;
@@ -2126,19 +2091,19 @@ os_file_pread(
 #endif
 }
 
-/***********************************************************************
-Does a synchronous write operation in Posix. */
+/*******************************************************************//**
+Does a synchronous write operation in Posix.
+@return	number of bytes written, -1 if error */
 static
 ssize_t
 os_file_pwrite(
 /*===========*/
-				/* out: number of bytes written, -1 if error */
-	os_file_t	file,	/* in: handle to a file */
-	const void*	buf,	/* in: buffer from where to write */
-	ulint		n,	/* in: number of bytes to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from where to write */
+	ulint		n,	/*!< in: number of bytes to write */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to write */
-	ulint		offset_high) /* in: most significant 32 bits of
+	ulint		offset_high) /*!< in: most significant 32 bits of
 				offset */
 {
 	ssize_t	ret;
@@ -2240,21 +2205,20 @@ func_exit:
 }
 #endif
 
-/***********************************************************************
-Requests a synchronous positioned read operation. */
-
+/*******************************************************************//**
+Requests a synchronous positioned read operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_read(
 /*=========*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		n)	/* in: number of bytes to read */
+	ulint		n)	/*!< in: number of bytes to read */
 {
 #ifdef __WIN__
 	BOOL		ret;
@@ -2356,22 +2320,21 @@ error_handling:
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
+any error handling. In case of error it returns FALSE.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_read_no_error_handling(
 /*===========================*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		n)	/* in: number of bytes to read */
+	ulint		n)	/*!< in: number of bytes to read */
 {
 #ifdef __WIN__
 	BOOL		ret;
@@ -2454,17 +2417,17 @@ error_handling:
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Rewind file to its start, read at most size - 1 bytes from it to str, and
 NUL-terminate str. All errors are silently ignored. This function is
 mostly meant to be used with temporary files. */
-
+UNIV_INTERN
 void
 os_file_read_string(
 /*================*/
-	FILE*	file,	/* in: file to read from */
-	char*	str,	/* in: buffer where to read */
-	ulint	size)	/* in: size of buffer */
+	FILE*	file,	/*!< in: file to read from */
+	char*	str,	/*!< in: buffer where to read */
+	ulint	size)	/*!< in: size of buffer */
 {
 	size_t	flen;
 
@@ -2477,23 +2440,22 @@ os_file_read_string(
 	str[flen] = '\0';
 }
 
-/***********************************************************************
-Requests a synchronous write operation. */
-
+/*******************************************************************//**
+Requests a synchronous write operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
 ibool
 os_file_write(
 /*==========*/
-				/* out: TRUE if request was
-				successful, FALSE if fail */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	const void*	buf,	/* in: buffer from which to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from which to write */
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to write */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		n)	/* in: number of bytes to write */
+	ulint		n)	/*!< in: number of bytes to write */
 {
 #ifdef __WIN__
 	BOOL		ret;
@@ -2545,8 +2507,7 @@ retry:
 			"InnoDB: Some operating system error numbers"
 			" are described at\n"
 			"InnoDB: "
-			"http://dev.mysql.com/doc/refman/5.1/en/"
-			"operating-system-error-codes.html\n",
+			REFMAN "operating-system-error-codes.html\n",
 			name, (ulong) offset_high, (ulong) offset,
 			(ulong) GetLastError());
 
@@ -2617,8 +2578,7 @@ retry:
 			"InnoDB: Some operating system error numbers"
 			" are described at\n"
 			"InnoDB: "
-			"http://dev.mysql.com/doc/refman/5.1/en/"
-			"operating-system-error-codes.html\n");
+			REFMAN "operating-system-error-codes.html\n");
 
 		os_has_said_disk_full = TRUE;
 	}
@@ -2660,8 +2620,7 @@ retry:
 			"InnoDB: Some operating system error numbers"
 			" are described at\n"
 			"InnoDB: "
-			"http://dev.mysql.com/doc/refman/5.1/en/"
-			"operating-system-error-codes.html\n");
+			REFMAN "operating-system-error-codes.html\n");
 
 		os_has_said_disk_full = TRUE;
 	}
@@ -2670,16 +2629,16 @@ retry:
 #endif
 }
 
-/***********************************************************************
-Check the existence and type of the given file. */
-
+/*******************************************************************//**
+Check the existence and type of the given file.
+@return	TRUE if call succeeded */
+UNIV_INTERN
 ibool
 os_file_status(
 /*===========*/
-				/* out: TRUE if call succeeded */
-	const char*	path,	/* in:	pathname of the file */
-	ibool*		exists,	/* out: TRUE if file exists */
-	os_file_type_t* type)	/* out: type of the file (if it exists) */
+	const char*	path,	/*!< in:	pathname of the file */
+	ibool*		exists,	/*!< out: TRUE if file exists */
+	os_file_type_t* type)	/*!< out: type of the file (if it exists) */
 {
 #ifdef __WIN__
 	int		ret;
@@ -2742,16 +2701,15 @@ os_file_status(
 #endif
 }
 
-/***********************************************************************
-This function returns information about the specified file */
-
+/*******************************************************************//**
+This function returns information about the specified file
+@return	TRUE if stat information found */
+UNIV_INTERN
 ibool
 os_file_get_status(
 /*===============*/
-					/* out: TRUE if stat
-					information found */
-	const char*	path,		/* in:	pathname of the file */
-	os_file_stat_t* stat_info)	/* information of a file in a
+	const char*	path,		/*!< in:	pathname of the file */
+	os_file_stat_t* stat_info)	/*!< information of a file in a
 					directory */
 {
 #ifdef __WIN__
@@ -2828,7 +2786,7 @@ os_file_get_status(
 #  define OS_FILE_PATH_SEPARATOR	'/'
 #endif
 
-/********************************************************************
+/****************************************************************//**
 The function os_file_dirname returns a directory component of a
 null-terminated pathname string.  In the usual case, dirname returns
 the string up to, but not including, the final '/', and basename
@@ -2854,14 +2812,13 @@ returned by dirname and basename for different paths:
        "/"	      "/"	     "/"
        "."	      "."	     "."
        ".."	      "."	     ".."
-*/
 
+@return	own: directory component of the pathname */
+UNIV_INTERN
 char*
 os_file_dirname(
 /*============*/
-				/* out, own: directory component of the
-				pathname */
-	const char*	path)	/* in: pathname */
+	const char*	path)	/*!< in: pathname */
 {
 	/* Find the offset of the last slash */
 	const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
@@ -2884,15 +2841,14 @@ os_file_dirname(
 	return(mem_strdupl(path, last_slash - path));
 }
 
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
+/****************************************************************//**
+Creates all missing subdirectories along the given path.
+@return	TRUE if call succeeded FALSE otherwise */
+UNIV_INTERN
 ibool
 os_file_create_subdirs_if_needed(
 /*=============================*/
-				/* out: TRUE if call succeeded
-				   FALSE otherwise */
-	const char*	path)	/* in: path name */
+	const char*	path)	/*!< in: path name */
 {
 	char*		subdir;
 	ibool		success, subdir_exists;
@@ -2925,30 +2881,32 @@ os_file_create_subdirs_if_needed(
 	return(success);
 }
 
-/********************************************************************
-Returns a pointer to the nth slot in the aio array. */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Returns a pointer to the nth slot in the aio array.
+@return	pointer to slot */
 static
 os_aio_slot_t*
 os_aio_array_get_nth_slot(
 /*======================*/
-					/* out: pointer to slot */
-	os_aio_array_t*		array,	/* in: aio array */
-	ulint			index)	/* in: index of the slot */
+	os_aio_array_t*		array,	/*!< in: aio array */
+	ulint			index)	/*!< in: index of the slot */
 {
 	ut_a(index < array->n_slots);
 
 	return((array->slots) + index);
 }
 
-/****************************************************************************
-Creates an aio wait array. */
+/************************************************************************//**
+Creates an aio wait array.
+@return	own: aio array */
 static
 os_aio_array_t*
 os_aio_array_create(
 /*================*/
-				/* out, own: aio array */
-	ulint	n)	/* in: maximum number of pending aio operations
-				allowed */
+	ulint	n,		/*!< in: maximum number of pending aio operations
+				allowed; n must be divisible by n_segments */
+	ulint	n_segments)	/*!< in: number of segments in the aio array */
 {
 	os_aio_array_t*	array;
 	ulint		i;
@@ -2957,6 +2915,7 @@ os_aio_array_create(
 	OVERLAPPED*	over;
 #endif
 	ut_a(n > 0);
+	ut_a(n_segments > 0);
 
 	array = ut_malloc(sizeof(os_aio_array_t));
 
@@ -2967,6 +2926,7 @@ os_aio_array_create(
 	os_event_set(array->is_empty);
 
 	array->n_slots		= n;
+	array->n_segments	= n_segments;
 	array->n_reserved	= 0;
 	array->slots		= ut_malloc(n * sizeof(os_aio_slot_t));
 #ifdef __WIN__
@@ -2991,77 +2951,63 @@ os_aio_array_create(
 	return(array);
 }
 
-/****************************************************************************
-Initializes the asynchronous io system. Calls also os_io_init_simple.
-Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO,
-log IO, and synchronous IO. The caller must create i/o handler thread for all
-but the synchronous aio array. Multiple threads can access the same array for
-the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays.
-Return the number of AIO handler threads. */
-
-ulint
+/***********************************************************************
+Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_read_segs and n_write_segs
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that */
+UNIV_INTERN
+void
 os_aio_init(
 /*========*/
-	ulint	ios_per_array,	/* in: maximum number of pending aio operations
-                                 allowed per array */
-	ulint	n_read_threads, /* in: number of read threads */
-	ulint	n_write_threads, /* in: number of write threads */
-	ulint	n_slots_sync)	/* in: number of slots in the sync aio array */
+	ulint	n_per_seg,	/*<! in: maximum number of pending aio
+				operations allowed per segment */
+	ulint	n_read_segs,	/*<! in: number of reader threads */
+	ulint	n_write_segs,	/*<! in: number of writer threads */
+	ulint	n_slots_sync)	/*<! in: number of slots in the sync aio
+				array */
 {
 	ulint	i;
-	ulint   n_segments = 2 + n_read_threads + n_write_threads;
-#ifdef POSIX_ASYNC_IO
-	sigset_t   sigset;
-#endif
-	ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD);
-	ut_a(n_read_threads >= 1 && n_read_threads <= 64);
-	ut_a(n_write_threads >= 1 && n_write_threads <= 64);
-	ut_a(n_segments < SRV_MAX_N_IO_THREADS);
+	ulint 	n_segments = 2 + n_read_segs + n_write_segs;
+
+	ut_ad(n_segments >= 4);
 
 	os_io_init_simple();
 
 	for (i = 0; i < n_segments; i++) {
 		srv_set_io_thread_op_info(i, "not started yet");
-		os_aio_thread_io_reads[i] = 0;
-		os_aio_thread_io_writes[i] = 0;
-		os_aio_thread_io_requests[i] = 0;
-		os_aio_thread_buffer[i] = 0;
-		os_aio_thread_buffer_size[i] = 0;
-		os_aio_thread_io_wait[i] = 0;
-		os_aio_thread_max_io_wait[i] = 0;
 	}
 
- 	os_aio_read_threads = n_read_threads;
- 	os_aio_write_threads = n_write_threads;
- 	os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads;
- 
- 	fprintf(stderr,
- 		"InnoDB: ios_per_array %lu read threads %lu write threads %lu\n",
- 		ios_per_array, os_aio_read_threads, os_aio_write_threads);
 
-	os_aio_ibuf_array = os_aio_array_create(ios_per_array);
+	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+
+	os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
 
 	srv_io_thread_function[0] = "insert buffer thread";
 
-	os_aio_log_array = os_aio_array_create(ios_per_array);
+	os_aio_log_array = os_aio_array_create(n_per_seg, 1);
 
 	srv_io_thread_function[1] = "log thread";
 
-	os_aio_read_array = os_aio_array_create(ios_per_array);
-	for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) {
+	os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+						n_read_segs);
+	for (i = 2; i < 2 + n_read_segs; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "read thread";
 	}
 
-	os_aio_write_array = os_aio_array_create(ios_per_array);
-	for (i = os_aio_first_write_segment; i < n_segments; i++) {
+	os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+						 n_write_segs);
+	for (i = 2 + n_read_segs; i < n_segments; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "write thread";
 	}
 
-	os_aio_sync_array = os_aio_array_create(n_slots_sync);
+	os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
 
-	os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads;
+	os_aio_n_segments = n_segments;
 
 	os_aio_validate();
 
@@ -3073,34 +3019,17 @@ os_aio_init(
 
 	os_last_printout = time(NULL);
 
-#ifdef POSIX_ASYNC_IO
-	/* Block aio signals from the current thread and its children:
-	for this to work, the current thread must be the first created
-	in the database, so that all its children will inherit its
-	signal mask */
-
-	/* TODO: to work MySQL needs the SIGALARM signal; the following
-	will not work yet! */
-	sigemptyset(&sigset);
-	sigaddset(&sigset, SIGRTMIN + 1 + 0);
-	sigaddset(&sigset, SIGRTMIN + 1 + 1);
-	sigaddset(&sigset, SIGRTMIN + 1 + 2);
-	sigaddset(&sigset, SIGRTMIN + 1 + 3);
-
-	pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
-#endif
-	return os_aio_n_segments;
 }
 
 #ifdef WIN_ASYNC_IO
-/****************************************************************************
+/************************************************************************//**
 Wakes up all async i/o threads in the array in Windows async i/o at
 shutdown. */
 static
 void
 os_aio_array_wake_win_aio_at_shutdown(
 /*==================================*/
-	os_aio_array_t*	array)	/* in: aio array */
+	os_aio_array_t*	array)	/*!< in: aio array */
 {
 	ulint	i;
 
@@ -3111,10 +3040,10 @@ os_aio_array_wake_win_aio_at_shutdown(
 }
 #endif
 
-/****************************************************************************
+/************************************************************************//**
 Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
-
+UNIV_INTERN
 void
 os_aio_wake_all_threads_at_shutdown(void)
 /*=====================================*/
@@ -3136,10 +3065,10 @@ os_aio_wake_all_threads_at_shutdown(void)
 	}
 }
 
-/****************************************************************************
+/************************************************************************//**
 Waits until there are no pending writes in os_aio_write_array. There can
 be other, synchronous, pending writes. */
-
+UNIV_INTERN
 void
 os_aio_wait_until_no_pending_writes(void)
 /*=====================================*/
@@ -3147,125 +3076,120 @@ os_aio_wait_until_no_pending_writes(void)
 	os_event_wait(os_aio_write_array->is_empty);
 }
 
-/**************************************************************************
-Calculates aio array from global segment number. */
+/**********************************************************************//**
+Calculates segment number for a slot.
+@return segment number (which is the number used by, for example,
+i/o-handler threads) */
 static
-os_aio_array_t*
-os_aio_get_array(
-/*===============================*/
-	/* out: aio wait array */
-	ulint		 global_segment)/* in: global segment number */
+ulint
+os_aio_get_segment_no_from_slot(
+/*============================*/
+	os_aio_array_t*	array,	/*!< in: aio wait array */
+	os_aio_slot_t*	slot)	/*!< in: slot in this array */
 {
+	ulint	segment;
+	ulint	seg_len;
+
+	if (array == os_aio_ibuf_array) {
+		segment = 0;
+
+	} else if (array == os_aio_log_array) {
+		segment = 1;
+
+	} else if (array == os_aio_read_array) {
+		seg_len = os_aio_read_array->n_slots
+			/ os_aio_read_array->n_segments;
+
+		segment = 2 + slot->pos / seg_len;
+	} else {
+		ut_a(array == os_aio_write_array);
+		seg_len = os_aio_write_array->n_slots
+			/ os_aio_write_array->n_segments;
+
+		segment = os_aio_read_array->n_segments + 2
+			+ slot->pos / seg_len;
+	}
+
+	return(segment);
+}
+
+/**********************************************************************//**
+Calculates local segment number and aio array from global segment number.
+@return	local segment number within the aio array */
+static
+ulint
+os_aio_get_array_and_local_segment(
+/*===============================*/
+	os_aio_array_t** array,		/*!< out: aio wait array */
+	ulint		 global_segment)/*!< in: global segment number */
+{
+	ulint	segment;
+
 	ut_a(global_segment < os_aio_n_segments);
 
 	if (global_segment == 0) {
-		return os_aio_ibuf_array;
-  
+		*array = os_aio_ibuf_array;
+		segment = 0;
+
 	} else if (global_segment == 1) {
-		return os_aio_log_array;
+		*array = os_aio_log_array;
+		segment = 0;
 
-	} else if (global_segment < os_aio_first_write_segment) {
-		return os_aio_read_array;
+	} else if (global_segment < os_aio_read_array->n_segments + 2) {
+		*array = os_aio_read_array;
 
+		segment = global_segment - 2;
 	} else {
-		return os_aio_write_array;
-  	}
-}
+		*array = os_aio_write_array;
 
-/***********************************************************************
-Gets an integer value designating a specified aio array. This is used
-to give numbers to signals in Posix aio. */
-
-#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)
-static
-ulint
-os_aio_get_array_no(
-/*================*/
-	os_aio_array_t*	array)	/* in: aio array */
-{
-	if (array == os_aio_ibuf_array) {
-
-		return(0);
-
-	} else if (array == os_aio_log_array) {
-
-		return(1);
-
-	} else if (array == os_aio_read_array) {
-
-		return(2);
-	} else if (array == os_aio_write_array) {
-
-		return(3);
-	} else {
-		ut_error;
-
-		return(0);
+		segment = global_segment - (os_aio_read_array->n_segments + 2);
 	}
+
+	return(segment);
 }
 
-/***********************************************************************
-Gets the aio array for its number. */
-static
-os_aio_array_t*
-os_aio_get_array_from_no(
-/*=====================*/
-			/* out: aio array */
-	ulint	n)	/* in: array number */
-{
-	if (n == 0) {
-		return(os_aio_ibuf_array);
-	} else if (n == 1) {
-
-		return(os_aio_log_array);
-	} else if (n == 2) {
-
-		return(os_aio_read_array);
-	} else if (n == 3) {
-
-		return(os_aio_write_array);
-	} else {
-		ut_error;
-
-		return(NULL);
-	}
-}
-#endif /* if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) */
-
-/***********************************************************************
+/*******************************************************************//**
 Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled. */
+not_full-event becomes signaled.
+@return	pointer to slot */
 static
 os_aio_slot_t*
 os_aio_array_reserve_slot(
 /*======================*/
-				/* out: pointer to slot */
-	ulint		type,	/* in: OS_FILE_READ or OS_FILE_WRITE */
-	os_aio_array_t*	array,	/* in: aio array */
-	fil_node_t*	message1,/* in: message to be passed along with
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	os_aio_array_t*	array,	/*!< in: aio array */
+	fil_node_t*	message1,/*!< in: message to be passed along with
 				the aio operation */
-	void*		message2,/* in: message to be passed along with
+	void*		message2,/*!< in: message to be passed along with
 				the aio operation */
-	os_file_t	file,	/* in: file handle */
-	const char*	name,	/* in: name of the file or path as a
+	os_file_t	file,	/*!< in: file handle */
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	void*		buf,	/* in: buffer where to read or from which
+	void*		buf,	/*!< in: buffer where to read or from which
 				to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		len)	/* in: length of the block to read or write */
+	ulint		len)	/*!< in: length of the block to read or write */
 {
 	os_aio_slot_t*	slot;
 #ifdef WIN_ASYNC_IO
 	OVERLAPPED*	control;
-
-#elif defined(POSIX_ASYNC_IO)
-
-	struct aiocb*	control;
 #endif
 	ulint		i;
+	ulint		slots_per_seg;
+	ulint		local_seg;
+
+	/* No need of a mutex. Only reading constant fields */
+	slots_per_seg = array->n_slots / array->n_segments;
+
+	/* We attempt to keep adjacent blocks in the same local
+	segment. This can help in merging IO requests when we are
+	doing simulated AIO */
+	local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
+		    % array->n_segments;
+
 loop:
 	os_mutex_enter(array->mutex);
 
@@ -3284,14 +3208,26 @@ loop:
 		goto loop;
 	}
 
+	/* First try to find a slot in the preferred local segment */
+	for (i = local_seg * slots_per_seg; i < array->n_slots; i++) {
+		slot = os_aio_array_get_nth_slot(array, i);
+
+		if (slot->reserved == FALSE) {
+			goto found;
+		}
+	}
+
+	/* Fall back to a full scan. We are guaranteed to find a slot */
 	for (i = 0;; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		if (slot->reserved == FALSE) {
-			break;
+			goto found;
 		}
 	}
-	ut_a(i < array->n_slots);
+
+found:
+	ut_a(slot->reserved == FALSE);
 	array->n_reserved++;
 
 	if (array->n_reserved == 1) {
@@ -3313,50 +3249,28 @@ loop:
 	slot->buf      = buf;
 	slot->offset   = offset;
 	slot->offset_high = offset_high;
-	slot->status = OS_AIO_NOT_ISSUED;
+	slot->io_already_done = FALSE;
 
 #ifdef WIN_ASYNC_IO
 	control = &(slot->control);
 	control->Offset = (DWORD)offset;
 	control->OffsetHigh = (DWORD)offset_high;
 	os_event_reset(slot->event);
-
-#elif defined(POSIX_ASYNC_IO)
-
-#if (UNIV_WORD_SIZE == 8)
-	offset = offset + (offset_high << 32);
-#else
-	ut_a(offset_high == 0);
-#endif
-	control = &(slot->control);
-	control->aio_fildes = file;
-	control->aio_buf = buf;
-	control->aio_nbytes = len;
-	control->aio_offset = offset;
-	control->aio_reqprio = 0;
-	control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
-	control->aio_sigevent.sigev_signo
-		= SIGRTMIN + 1 + os_aio_get_array_no(array);
-	/* TODO: How to choose the signal numbers? */
-	/*
-	fprintf(stderr, "AIO signal number %lu\n",
-	(ulint) control->aio_sigevent.sigev_signo);
-	*/
-	control->aio_sigevent.sigev_value.sival_ptr = slot;
 #endif
+
 	os_mutex_exit(array->mutex);
 
 	return(slot);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees a slot in the aio array. */
 static
 void
 os_aio_array_free_slot(
 /*===================*/
-	os_aio_array_t*	array,	/* in: aio array */
-	os_aio_slot_t*	slot)	/* in: pointer to slot */
+	os_aio_array_t*	array,	/*!< in: aio array */
+	os_aio_slot_t*	slot)	/*!< in: pointer to slot */
 {
 	ut_ad(array);
 	ut_ad(slot);
@@ -3366,7 +3280,6 @@ os_aio_array_free_slot(
 	ut_ad(slot->reserved);
 
 	slot->reserved = FALSE;
- 	slot->status = OS_AIO_NOT_ISSUED;
 
 	array->n_reserved--;
 
@@ -3384,74 +3297,57 @@ os_aio_array_free_slot(
 	os_mutex_exit(array->mutex);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Wakes up a simulated aio i/o-handler thread if it has something to do. */
 static
 void
 os_aio_simulated_wake_handler_thread(
 /*=================================*/
-		os_aio_array_t* array)	/* in: aio array for which wakeup is done */
+	ulint	global_segment)	/*!< in: the number of the segment in the aio
+				arrays */
 {
+	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
+	ulint		segment;
 	ulint		n;
 	ulint		i;
 
 	ut_ad(!os_aio_use_native_aio);
- 	n = array->n_slots;
 
-	/* Look through n slots */
+	segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+	n = array->n_slots / array->n_segments;
+
+	/* Look through n slots after the segment * n'th slot */
 
 	os_mutex_enter(array->mutex);
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i);
- 
-		if (slot->reserved &&
-		    (slot->status == OS_AIO_NOT_ISSUED ||
-		     slot->status == OS_AIO_DONE)) {
-			/* Found an i/o request
-			   OS_AIO_NOT_ISSUED means the read or write request has
-			   * yet to be done. OS_AIO_DONE means the request has been
-			   * done but it was part of a set of requests merged into
-			   * one read or write call and was not the first block in
-			   * the request, so the handling of the IO completion for
-			   * that block has not been done. */
-  			break;
-  		}
+		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+		if (slot->reserved) {
+			/* Found an i/o request */
+
+			break;
+		}
 	}
 
 	os_mutex_exit(array->mutex);
 
 	if (i < n) {
-		if (array == os_aio_ibuf_array) {
-			os_event_set(os_aio_segment_wait_events[0]);
-
-		} else if (array == os_aio_log_array) {
-			os_event_set(os_aio_segment_wait_events[1]);
-
-		} else if (array == os_aio_read_array) {
-			ulint	x;
-			for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
-				os_event_set(os_aio_segment_wait_events[x]);
-
-		} else if (array == os_aio_write_array) {
-			ulint	x;
-			for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
-				os_event_set(os_aio_segment_wait_events[x]);
-
-		} else {
-			ut_a(0);
-		}
+		os_event_set(os_aio_segment_wait_events[global_segment]);
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Wakes up simulated aio i/o-handler threads if they have something to do. */
-
+UNIV_INTERN
 void
 os_aio_simulated_wake_handler_threads(void)
 /*=======================================*/
 {
+	ulint	i;
+
 	if (os_aio_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
 
@@ -3459,43 +3355,46 @@ os_aio_simulated_wake_handler_threads(void)
 	}
 
 	os_aio_recommend_sleep_for_read_threads	= FALSE;
-  
-	os_aio_simulated_wake_handler_thread(os_aio_ibuf_array);
-	os_aio_simulated_wake_handler_thread(os_aio_log_array);
-	os_aio_simulated_wake_handler_thread(os_aio_read_array);
-	os_aio_simulated_wake_handler_thread(os_aio_write_array);
+
+	for (i = 0; i < os_aio_n_segments; i++) {
+		os_aio_simulated_wake_handler_thread(i);
+	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 This function can be called if one wants to post a batch of reads and
 prefers an i/o-handler thread to handle them all at once later. You must
 call os_aio_simulated_wake_handler_threads later to ensure the threads
 are not left sleeping! */
-
+UNIV_INTERN
 void
 os_aio_simulated_put_read_threads_to_sleep(void)
 /*============================================*/
 {
+	os_aio_array_t*	array;
 	ulint		g;
 
-	/* TODO(mcallaghan): provide similar function for write? */
 	os_aio_recommend_sleep_for_read_threads	= TRUE;
-  
-	for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) {
-		os_event_reset(os_aio_segment_wait_events[g]);
+
+	for (g = 0; g < os_aio_n_segments; g++) {
+		os_aio_get_array_and_local_segment(&array, g);
+
+		if (array == os_aio_read_array) {
+
+			os_event_reset(os_aio_segment_wait_events[g]);
+		}
 	}
 }
 
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
+/*******************************************************************//**
+Requests an asynchronous i/o operation.
+@return	TRUE if request was queued successfully, FALSE if fail */
+UNIV_INTERN
 ibool
 os_aio(
 /*===*/
-				/* out: TRUE if request was queued
-				successfully, FALSE if fail */
-	ulint		type,	/* in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		mode,	/* in: OS_AIO_NORMAL, ..., possibly ORed
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
 				to OS_AIO_SIMULATED_WAKE_LATER: the
 				last flag advises this function not to wake
 				i/o-handler threads, but the caller will
@@ -3508,21 +3407,24 @@ os_aio(
 				because i/os are not actually handled until
 				all have been posted: use with great
 				caution! */
-	const char*	name,	/* in: name of the file or path as a
+	const char*	name,	/*!< in: name of the file or path as a
 				null-terminated string */
-	os_file_t	file,	/* in: handle to a file */
-	void*		buf,	/* in: buffer where to read or from which
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read or from which
 				to write */
-	ulint		offset,	/* in: least significant 32 bits of file
+	ulint		offset,	/*!< in: least significant 32 bits of file
 				offset where to read or write */
-	ulint		offset_high, /* in: most significant 32 bits of
+	ulint		offset_high, /*!< in: most significant 32 bits of
 				offset */
-	ulint		n,	/* in: number of bytes to read or write */
-	fil_node_t*	message1,/* in: messages for the aio handler (these
-				can be used to identify a completed aio
-				operation); if mode is OS_AIO_SYNC, these
-				are ignored */
-	void*		message2)
+	ulint		n,	/*!< in: number of bytes to read or write */
+	fil_node_t*	message1,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+	void*		message2)/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -3605,14 +3507,12 @@ try_again:
 
 			ret = ReadFile(file, buf, (DWORD)n, &len,
 				       &(slot->control));
-#elif defined(POSIX_ASYNC_IO)
-			slot->control.aio_lio_opcode = LIO_READ;
-			err = (ulint) aio_read(&(slot->control));
-			fprintf(stderr, "Starting POSIX aio read %lu\n", err);
 #endif
 		} else {
 			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(array);
+				os_aio_simulated_wake_handler_thread(
+					os_aio_get_segment_no_from_slot(
+						array, slot));
 			}
 		}
 	} else if (type == OS_FILE_WRITE) {
@@ -3621,14 +3521,12 @@ try_again:
 			os_n_file_writes++;
 			ret = WriteFile(file, buf, (DWORD)n, &len,
 					&(slot->control));
-#elif defined(POSIX_ASYNC_IO)
-			slot->control.aio_lio_opcode = LIO_WRITE;
-			err = (ulint) aio_write(&(slot->control));
-			fprintf(stderr, "Starting POSIX aio write %lu\n", err);
 #endif
 		} else {
 			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(array);
+				os_aio_simulated_wake_handler_thread(
+					os_aio_get_segment_no_from_slot(
+						array, slot));
 			}
 		}
 	} else {
@@ -3682,19 +3580,19 @@ try_again:
 }
 
 #ifdef WIN_ASYNC_IO
-/**************************************************************************
+/**********************************************************************//**
 This function is only used in Windows asynchronous i/o.
 Waits for an aio operation to complete. This function is used to wait the
 for completed requests. The aio array of pending requests is divided
 into segments. The thread specifies which segment or slot it wants to wait
 for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
+therefore no other thread is allowed to do the freeing!
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
 ibool
 os_aio_windows_handle(
 /*==================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	global_segment,	/* in: the number of the segment in the aio
+	ulint	segment,	/*!< in: the number of the segment in the aio
 				arrays to wait for; segment 0 is the ibuf
 				i/o thread, segment 1 the log i/o thread,
 				then follow the non-ibuf read threads, and as
@@ -3702,16 +3600,17 @@ os_aio_windows_handle(
 				this is ULINT_UNDEFINED, then it means that
 				sync aio is used, and this parameter is
 				ignored */
-	ulint	pos,		/* this parameter is used only in sync aio:
+	ulint	pos,		/*!< this parameter is used only in sync aio:
 				wait for the aio slot at this position */
-	fil_node_t**message1,	/* out: the messages passed with the aio
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
 				request; note that also in the case where
 				the aio operation failed, these output
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type)		/* out: OS_FILE_WRITE or ..._READ */
+	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
 {
+	ulint		orig_seg	= segment;
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
 	ulint		n;
@@ -3720,35 +3619,39 @@ os_aio_windows_handle(
 	BOOL		ret;
 	DWORD		len;
 
-	if (global_segment == ULINT_UNDEFINED) {
+	if (segment == ULINT_UNDEFINED) {
 		array = os_aio_sync_array;
+		segment = 0;
 	} else {
-		array = os_aio_get_array(global_segment);
+		segment = os_aio_get_array_and_local_segment(&array, segment);
 	}
 
 	/* NOTE! We only access constant fields in os_aio_array. Therefore
 	we do not have to acquire the protecting mutex yet */
 
 	ut_ad(os_aio_validate());
+	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots;
+	n = array->n_slots / array->n_segments;
 
 	if (array == os_aio_sync_array) {
 		os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
 		i = pos;
 	} else {
-		srv_set_io_thread_op_info(global_segment, "wait Windows aio");
-		i = os_event_wait_multiple(n, (array->native_events));
+		srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
+		i = os_event_wait_multiple(n,
+					   (array->native_events)
+					   + segment * n);
 	}
 
 	os_mutex_enter(array->mutex);
 
-	slot = os_aio_array_get_nth_slot(array, i);
+	slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
 	ut_a(slot->reserved);
 
-	if (global_segment != ULINT_UNDEFINED) {
-		srv_set_io_thread_op_info(global_segment,
+	if (orig_seg != ULINT_UNDEFINED) {
+		srv_set_io_thread_op_info(orig_seg,
 					  "get windows aio return value");
 	}
 
@@ -3762,12 +3665,12 @@ os_aio_windows_handle(
 	if (ret && len == slot->len) {
 		ret_val = TRUE;
 
-# ifdef UNIV_DO_FLUSH
+#ifdef UNIV_DO_FLUSH
 		if (slot->type == OS_FILE_WRITE
 		    && !os_do_not_call_flush_at_each_write) {
 			ut_a(TRUE == os_file_flush(slot->file));
 		}
-# endif /* UNIV_DO_FLUSH */
+#endif /* UNIV_DO_FLUSH */
 	} else {
 		os_file_handle_error(slot->name, "Windows aio");
 
@@ -3782,155 +3685,36 @@ os_aio_windows_handle(
 }
 #endif
 
-#ifdef POSIX_ASYNC_IO
-
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	array_no,	/* in: array number 0 - 3 */
-	fil_node_t**message1,	/* out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2)
-{
-	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
-	siginfo_t	info;
-	sigset_t	sigset;
-	sigset_t	proc_sigset;
-	sigset_t	thr_sigset;
-	int		ret;
-	int		i;
-	int		sig;
-
-	sigemptyset(&sigset);
-	sigaddset(&sigset, SIGRTMIN + 1 + array_no);
-
-	pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
-
-#if 0
-	sigprocmask(0, NULL, &proc_sigset);
-	pthread_sigmask(0, NULL, &thr_sigset);
-
-	for (i = 32 ; i < 40; i++) {
-		fprintf(stderr, "%lu : %lu %lu\n", (ulint)i,
-			(ulint) sigismember(&proc_sigset, i),
-			(ulint) sigismember(&thr_sigset, i));
-	}
-#endif
-
-	ret = sigwaitinfo(&sigset, &info);
-
-	if (sig != SIGRTMIN + 1 + array_no) {
-
-		ut_error;
-
-		return(FALSE);
-	}
-
-	fputs("Handling POSIX aio\n", stderr);
-
-	array = os_aio_get_array_from_no(array_no);
-
-	os_mutex_enter(array->mutex);
-
-	slot = info.si_value.sival_ptr;
-
-	ut_a(slot->reserved);
-
-	*message1 = slot->message1;
-	*message2 = slot->message2;
-
-# ifdef UNIV_DO_FLUSH
-	if (slot->type == OS_FILE_WRITE
-	    && !os_do_not_call_flush_at_each_write) {
-		ut_a(TRUE == os_file_flush(slot->file));
-	}
-# endif /* UNIV_DO_FLUSH */
-
-	os_mutex_exit(array->mutex);
-
-	os_aio_array_free_slot(array, slot);
-
-	return(TRUE);
-}
-#endif
-
-/**************************************************************************
-Do a 'last millisecond' check that the page end is sensible;
-reported page checksum errors from Linux seem to wipe over the page end. */
-static
-void
-os_file_check_page_trailers(
-/*========================*/
-	byte*	combined_buf,	/* in: combined write buffer */
-	ulint	total_len)	/* in: size of combined_buf, in bytes
-				(a multiple of UNIV_PAGE_SIZE) */
-{
-	ulint	len;
-
-	for (len = 0; len + UNIV_PAGE_SIZE <= total_len;
-	     len += UNIV_PAGE_SIZE) {
-		byte*	buf = combined_buf + len;
-
-		if (UNIV_UNLIKELY
-		    (memcmp(buf + (FIL_PAGE_LSN + 4),
-			    buf + (UNIV_PAGE_SIZE
-				   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
-		    	ut_print_timestamp(stderr);
-		    	fprintf(stderr,
-				"  InnoDB: ERROR: The page to be written"
-				" seems corrupt!\n"
-				"InnoDB: Writing a block of %lu bytes,"
-				" currently at offset %lu\n",
-				(ulong)total_len, (ulong)len);
-			buf_page_print(buf);
-		    	fprintf(stderr,
-				"InnoDB: ERROR: The page to be written"
-				" seems corrupt!\n");
-		}
-	}
-}
-
-/**************************************************************************
+/**********************************************************************//**
 Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
+thread.
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
 ibool
 os_aio_simulated_handle(
 /*====================*/
-				/* out: TRUE if the aio operation succeeded */
-	ulint	global_segment,	/* in: the number of the segment in the aio
+	ulint	global_segment,	/*!< in: the number of the segment in the aio
 				arrays to wait for; segment 0 is the ibuf
 				i/o thread, segment 1 the log i/o thread,
 				then follow the non-ibuf read threads, and as
 				the last are the non-ibuf write threads */
-	fil_node_t**message1,	/* out: the messages passed with the aio
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
 				request; note that also in the case where
 				the aio operation failed, these output
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type)		/* out: OS_FILE_WRITE or ..._READ */
+	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
 {
 	os_aio_array_t*	array;
+	ulint		segment;
 	os_aio_slot_t*	slot;
 	os_aio_slot_t*	slot2;
 	os_aio_slot_t*	consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
-	os_aio_slot_t*  lowest_request;
-	os_aio_slot_t*	oldest_request;
 	ulint		n_consecutive;
 	ulint		total_len;
 	ulint		offs;
 	ulint		lowest_offset;
-	ulint		oldest_offset;
 	ulint		biggest_age;
 	ulint		age;
 	byte*		combined_buf;
@@ -3939,9 +3723,7 @@ os_aio_simulated_handle(
 	ulint		n;
 	ulint		i;
 
- 	double          start_usecs, stop_usecs, elapsed_usecs;
- 	time_t          now;
- 	array = os_aio_get_array(global_segment);	
+	segment = os_aio_get_array_and_local_segment(&array, global_segment);
 
 restart:
 	/* NOTE! We only access constant fields in os_aio_array. Therefore
@@ -3950,10 +3732,11 @@ restart:
 	srv_set_io_thread_op_info(global_segment,
 				  "looking for i/o requests (a)");
 	ut_ad(os_aio_validate());
+	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots;
+	n = array->n_slots / array->n_segments;
 
-	/* Look through n slots */
+	/* Look through n slots after the segment * n'th slot */
 
 	if (array == os_aio_read_array
 	    && os_aio_recommend_sleep_for_read_threads) {
@@ -3973,9 +3756,9 @@ restart:
 	done */
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i);
+		slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
-		if (slot->reserved && slot->status == OS_AIO_DONE) {
+		if (slot->reserved && slot->io_already_done) {
 
 			if (os_aio_print_debug) {
 				fprintf(stderr,
@@ -3990,64 +3773,74 @@ restart:
 		}
 	}
 
+	n_consecutive = 0;
+
+	/* If there are at least 2 seconds old requests, then pick the oldest
+	one to prevent starvation. If several requests have the same age,
+	then pick the one at the lowest offset. */
+
 	biggest_age = 0;
-	now = time(NULL);
-        oldest_request = lowest_request = NULL;
-        oldest_offset = lowest_offset = ULINT_MAX;
+	lowest_offset = ULINT_MAX;
 
-        /* Find the oldest request and the request with the smallest offset */
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i);
+		slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
-		if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
-			age = (ulint)difftime(now, slot->reservation_time);
+		if (slot->reserved) {
+			age = (ulint)difftime(time(NULL),
+					      slot->reservation_time);
 
-			/* If there are at least 2 seconds old requests, then pick the oldest
-			   one to prevent starvation. If several requests have the same age,
-			   then pick the one at the lowest offset. */
 			if ((age >= 2 && age > biggest_age)
 			    || (age >= 2 && age == biggest_age
-			        && slot->offset < oldest_offset)) {
+				&& slot->offset < lowest_offset)) {
+
+				/* Found an i/o request */
+				consecutive_ios[0] = slot;
+
+				n_consecutive = 1;
 
-			        /* Found an i/o request */
 				biggest_age = age;
-				oldest_request = slot;
-				oldest_offset = slot->offset;
+				lowest_offset = slot->offset;
 			}
+		}
+	}
 
-			/* Look for an i/o request at the lowest offset in the array
-			 * (we ignore the high 32 bits of the offset) */
-			if (slot->offset < lowest_offset) {
-			        /* Found an i/o request */
-				lowest_request = slot;
+	if (n_consecutive == 0) {
+		/* There were no old requests. Look for an i/o request at the
+		lowest offset in the array (we ignore the high 32 bits of the
+		offset in these heuristics) */
 
+		lowest_offset = ULINT_MAX;
 
+		for (i = 0; i < n; i++) {
+			slot = os_aio_array_get_nth_slot(array,
+							 i + segment * n);
+
+			if (slot->reserved && slot->offset < lowest_offset) {
+
+				/* Found an i/o request */
+				consecutive_ios[0] = slot;
+
+				n_consecutive = 1;
 
 				lowest_offset = slot->offset;
 			}
 		}
 	}
 
-	if (!lowest_request && !oldest_request) {
+	if (n_consecutive == 0) {
 
 		/* No i/o requested at the moment */
 
 		goto wait_for_io;
 	}
 
-        if (oldest_request) {
-		slot = oldest_request;
-        } else {
-		slot = lowest_request;
-        }
-        consecutive_ios[0] = slot;
-	n_consecutive = 1;
-  
+	slot = consecutive_ios[0];
+
 	/* Check if there are several consecutive blocks to read or write */
 
 consecutive_loop:
 	for (i = 0; i < n; i++) {
-		slot2 = os_aio_array_get_nth_slot(array, i);
+		slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
 
 		if (slot2->reserved && slot2 != slot
 		    && slot2->offset == slot->offset + slot->len
@@ -4055,8 +3848,7 @@ consecutive_loop:
 		    && slot->offset + slot->len > slot->offset
 		    && slot2->offset_high == slot->offset_high
 		    && slot2->type == slot->type
-		    && slot2->file == slot->file
-		    && slot2->status == OS_AIO_NOT_ISSUED) { 
+		    && slot2->file == slot->file) {
 
 			/* Found a consecutive i/o request */
 
@@ -4065,8 +3857,7 @@ consecutive_loop:
 
 			slot = slot2;
 
-			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE &&
- 			    n_consecutive < srv_max_merged_io) {
+			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
 
 				goto consecutive_loop;
 			} else {
@@ -4086,8 +3877,6 @@ consecutive_loop:
 
 	for (i = 0; i < n_consecutive; i++) {
 		total_len += consecutive_ios[i]->len;
-		ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
-		consecutive_ios[i]->status = OS_AIO_ISSUED;
 	}
 
 	if (n_consecutive == 1) {
@@ -4095,16 +3884,7 @@ consecutive_loop:
 		combined_buf = slot->buf;
 		combined_buf2 = NULL;
 	} else {
-		if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
-
-			if (os_aio_thread_buffer[global_segment])
-				ut_free(os_aio_thread_buffer[global_segment]);
-
-			os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
- 
-			os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
-		}
-		combined_buf2 = os_aio_thread_buffer[global_segment];
+		combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
 
 		ut_a(combined_buf2);
 
@@ -4115,9 +3895,6 @@ consecutive_loop:
 	this assumes that there is just one i/o-handler thread serving
 	a single segment of slots! */
 
-	ut_a(slot->reserved);
-	ut_a(slot->status == OS_AIO_ISSUED);
-
 	os_mutex_exit(array->mutex);
 
 	if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
@@ -4144,45 +3921,13 @@ consecutive_loop:
 
 	/* Do the i/o with ordinary, synchronous i/o functions: */
 	if (slot->type == OS_FILE_WRITE) {
-		os_aio_thread_io_writes[global_segment] += n_consecutive;
-		if (array == os_aio_write_array) {
-			if ((total_len % UNIV_PAGE_SIZE != 0)
-			    || (slot->offset % UNIV_PAGE_SIZE != 0)) {
-				fprintf(stderr,
-					"InnoDB: Error: trying a displaced"
-					" write to %s %lu %lu, len %lu\n",
-					slot->name, (ulong) slot->offset_high,
-					(ulong) slot->offset,
-					(ulong) total_len);
-				ut_error;
-			}
-
-			os_file_check_page_trailers(combined_buf, total_len);
-		}
- 		start_usecs = time_usecs();
 		ret = os_file_write(slot->name, slot->file, combined_buf,
 				    slot->offset, slot->offset_high,
 				    total_len);
- 		stop_usecs = time_usecs();
-                elapsed_usecs = stop_usecs - start_usecs;
-                if (elapsed_usecs < 0) elapsed_usecs = 0;
-
-		if (array == os_aio_write_array) {
-			os_file_check_page_trailers(combined_buf, total_len);
-		}
 	} else {
-		start_usecs = time_usecs();
- 		os_aio_thread_io_reads[global_segment] += n_consecutive;
 		ret = os_file_read(slot->file, combined_buf,
 				   slot->offset, slot->offset_high, total_len);
-		stop_usecs = time_usecs();
-                elapsed_usecs = stop_usecs - start_usecs;
-                if (elapsed_usecs < 0) elapsed_usecs = 0;
 	}
- 	if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment])
- 		os_aio_thread_max_io_wait[global_segment] = elapsed_usecs;
- 	os_aio_thread_io_wait[global_segment] += elapsed_usecs;
- 	os_aio_thread_io_requests[global_segment]++;
 
 	ut_a(ret);
 	srv_set_io_thread_op_info(global_segment, "file i/o done");
@@ -4205,13 +3950,16 @@ consecutive_loop:
 		}
 	}
 
+	if (combined_buf2) {
+		ut_free(combined_buf2);
+	}
+
 	os_mutex_enter(array->mutex);
 
 	/* Mark the i/os done in slots */
 
 	for (i = 0; i < n_consecutive; i++) {
-		ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
-		consecutive_ios[i]->status = OS_AIO_DONE;
+		consecutive_ios[i]->io_already_done = TRUE;
 	}
 
 	/* We return the messages for the first slot now, and if there were
@@ -4221,8 +3969,6 @@ consecutive_loop:
 slot_io_done:
 
 	ut_a(slot->reserved);
-	ut_a(slot->status == OS_AIO_DONE);
-	slot->status = OS_AIO_CLAIMED;
 
 	*message1 = slot->message1;
 	*message2 = slot->message2;
@@ -4232,7 +3978,6 @@ slot_io_done:
 	os_mutex_exit(array->mutex);
 
 	os_aio_array_free_slot(array, slot);
- 	srv_set_io_thread_op_info(global_segment, "exited handler");
 
 	return(ret);
 
@@ -4261,14 +4006,14 @@ recommended_sleep:
 	goto restart;
 }
 
-/**************************************************************************
-Validates the consistency of an aio array. */
+/**********************************************************************//**
+Validates the consistency of an aio array.
+@return	TRUE if ok */
 static
 ibool
 os_aio_array_validate(
 /*==================*/
-				/* out: TRUE if ok */
-	os_aio_array_t*	array)	/* in: aio wait array */
+	os_aio_array_t*	array)	/*!< in: aio wait array */
 {
 	os_aio_slot_t*	slot;
 	ulint		n_reserved	= 0;
@@ -4279,6 +4024,7 @@ os_aio_array_validate(
 	os_mutex_enter(array->mutex);
 
 	ut_a(array->n_slots > 0);
+	ut_a(array->n_segments > 0);
 
 	for (i = 0; i < array->n_slots; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
@@ -4296,13 +4042,13 @@ os_aio_array_validate(
 	return(TRUE);
 }
 
-/**************************************************************************
-Validates the consistency the aio system. */
-
+/**********************************************************************//**
+Validates the consistency the aio system.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 os_aio_validate(void)
 /*=================*/
-				/* out: TRUE if ok */
 {
 	os_aio_array_validate(os_aio_read_array);
 	os_aio_array_validate(os_aio_write_array);
@@ -4313,13 +4059,13 @@ os_aio_validate(void)
 	return(TRUE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info of the aio arrays. */
-
+UNIV_INTERN
 void
 os_aio_print(
 /*=========*/
-	FILE*	file)	/* in: file where to print */
+	FILE*	file)	/*!< in: file where to print */
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -4328,19 +4074,11 @@ os_aio_print(
 	double		time_elapsed;
 	double		avg_bytes_read;
 	ulint		i;
- 	ulint		num_issued, num_done, num_claimed;
-  
-	for (i = 0; i < os_aio_n_segments; i++) {
-		fprintf(file,
-			"I/O thread %lu state: %s (%s) reads %lu writes %lu "
-			"requests %lu io secs %lf io msecs/request %lf max_io_wait %lf",
-			i, srv_io_thread_op_info[i], srv_io_thread_function[i],
-			os_aio_thread_io_reads[i], os_aio_thread_io_writes[i],
-			os_aio_thread_io_requests[i],
-			os_aio_thread_io_wait[i] / 1000000.0,
-			os_aio_thread_io_requests[i] ?
-			os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0,
-			os_aio_thread_max_io_wait[i] / 1000.0);
+
+	for (i = 0; i < srv_n_file_io_threads; i++) {
+		fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
+			srv_io_thread_op_info[i],
+			srv_io_thread_function[i]);
 
 #ifndef __WIN__
 		if (os_aio_segment_wait_events[i]->is_set) {
@@ -4360,21 +4098,14 @@ loop:
 	os_mutex_enter(array->mutex);
 
 	ut_a(array->n_slots > 0);
+	ut_a(array->n_segments > 0);
+
 	n_reserved = 0;
-	num_done = num_issued = num_claimed = 0;
 
 	for (i = 0; i < array->n_slots; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		if (slot->reserved) {
- 			if (slot->status == OS_AIO_ISSUED)
- 				num_issued++;
- 			else if (slot->status == OS_AIO_DONE)
- 				num_done++;
- 			else {
- 				ut_ad(slot->status == OS_AIO_CLAIMED);
- 				num_claimed++;
- 			}
 			n_reserved++;
 #if 0
 			fprintf(stderr, "Reserved slot, messages %p %p\n",
@@ -4420,13 +4151,6 @@ loop:
 		goto loop;
 	}
 
- 	putc('\n', file);
- 	fprintf(file,
- 		"Summary of background IO slot status: %lu issued, "
- 		"%lu done, %lu claimed, sleep set %d\n",
- 		num_issued, num_done, num_claimed,
- 		(int)os_aio_recommend_sleep_for_read_threads);
-
 	putc('\n', file);
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time, os_last_printout);
@@ -4472,9 +4196,9 @@ loop:
 	os_last_printout = current_time;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
 void
 os_aio_refresh_stats(void)
 /*======================*/
@@ -4488,14 +4212,14 @@ os_aio_refresh_stats(void)
 }
 
 #ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
 Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-
+no pending io operations.
+@return	TRUE if all free */
+UNIV_INTERN
 ibool
 os_aio_all_slots_free(void)
 /*=======================*/
-				/* out: TRUE if all free */
 {
 	os_aio_array_t*	array;
 	ulint		n_res	= 0;
@@ -4548,3 +4272,5 @@ os_aio_all_slots_free(void)
 	return(FALSE);
 }
 #endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/os/os0proc.c b/storage/innodb_plugin/os/os0proc.c
new file mode 100644
index 00000000000..a0ea9a1b258
--- /dev/null
+++ b/storage/innodb_plugin/os/os0proc.c
@@ -0,0 +1,230 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0proc.c
+The interface to the operating system
+process control primitives
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0proc.h"
+#ifdef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#include "ut0mem.h"
+#include "ut0byte.h"
+
+/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
+MAP_ANON but MAP_ANON is marked as deprecated */
+#if defined(MAP_ANONYMOUS)
+#define OS_MAP_ANON	MAP_ANONYMOUS
+#elif defined(MAP_ANON)
+#define OS_MAP_ANON	MAP_ANON
+#endif
+
+UNIV_INTERN ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+UNIV_INTERN ulint os_large_page_size;
+
+/****************************************************************//**
+Converts the current process id to a number. It is not guaranteed that the
+number is unique. In Linux returns the 'process number' of the current
+thread. That number is the same as one sees in 'top', for example. In Linux
+the thread id is not the same as one sees in 'top'.
+@return	process id as a number */
+UNIV_INTERN
+ulint
+os_proc_get_number(void)
+/*====================*/
+{
+#ifdef __WIN__
+	return((ulint)GetCurrentProcessId());
+#else
+	return((ulint)getpid());
+#endif
+}
+
+/****************************************************************//**
+Allocates large pages memory.
+@return	allocated memory */
+UNIV_INTERN
+void*
+os_mem_alloc_large(
+/*===============*/
+	ulint*	n)			/*!< in/out: number of bytes */
+{
+	void*	ptr;
+	ulint	size;
+#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+	int shmid;
+	struct shmid_ds buf;
+
+	if (!os_use_large_pages || !os_large_page_size) {
+		goto skip;
+	}
+
+	/* Align block size to os_large_page_size */
+	ut_ad(ut_is_2pow(os_large_page_size));
+	size = ut_2pow_round(*n + (os_large_page_size - 1),
+			     os_large_page_size);
+
+	shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
+	if (shmid < 0) {
+		fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
+			" %lu bytes. errno %d\n", size, errno);
+		ptr = NULL;
+	} else {
+		ptr = shmat(shmid, NULL, 0);
+		if (ptr == (void *)-1) {
+			fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
+				" attach shared memory segment, errno %d\n",
+				errno);
+		}
+
+		/* Remove the shared memory segment so that it will be
+		automatically freed after memory is detached or
+		process exits */
+		shmctl(shmid, IPC_RMID, &buf);
+	}
+
+	if (ptr) {
+		*n = size;
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_total_allocated_memory += size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+# ifdef UNIV_SET_MEM_TO_ZERO
+		memset(ptr, '\0', size);
+# endif
+		UNIV_MEM_ALLOC(ptr, size);
+		return(ptr);
+	}
+
+	fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
+		" memory pool\n");
+skip:
+#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
+
+#ifdef __WIN__
+	SYSTEM_INFO	system_info;
+	GetSystemInfo(&system_info);
+
+	/* Align block size to system page size */
+	ut_ad(ut_is_2pow(system_info.dwPageSize));
+	/* system_info.dwPageSize is only 32-bit. Casting to ulint is required
+	on 64-bit Windows. */
+	size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1),
+				  (ulint) system_info.dwPageSize);
+	ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
+			   PAGE_READWRITE);
+	if (!ptr) {
+		fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
+			" Windows error %lu\n",
+			(ulong) size, (ulong) GetLastError());
+	} else {
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_total_allocated_memory += size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+		UNIV_MEM_ALLOC(ptr, size);
+	}
+#elif defined __NETWARE__ || !defined OS_MAP_ANON
+	size = *n;
+	ptr = ut_malloc_low(size, TRUE, FALSE);
+#else
+# ifdef HAVE_GETPAGESIZE
+	size = getpagesize();
+# else
+	size = UNIV_PAGE_SIZE;
+# endif
+	/* Align block size to system page size */
+	ut_ad(ut_is_2pow(size));
+	size = *n = ut_2pow_round(*n + (size - 1), size);
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | OS_MAP_ANON, -1, 0);
+	if (UNIV_UNLIKELY(ptr == (void*) -1)) {
+		fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
+			" errno %lu\n",
+			(ulong) size, (ulong) errno);
+		ptr = NULL;
+	} else {
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_total_allocated_memory += size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+		UNIV_MEM_ALLOC(ptr, size);
+	}
+#endif
+	return(ptr);
+}
+
+/****************************************************************//**
+Frees large pages memory. */
+UNIV_INTERN
+void
+os_mem_free_large(
+/*==============*/
+	void	*ptr,			/*!< in: pointer returned by
+					os_mem_alloc_large() */
+	ulint	size)			/*!< in: size returned by
+					os_mem_alloc_large() */
+{
+	os_fast_mutex_lock(&ut_list_mutex);
+	ut_a(ut_total_allocated_memory >= size);
+	os_fast_mutex_unlock(&ut_list_mutex);
+
+#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+	if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_a(ut_total_allocated_memory >= size);
+		ut_total_allocated_memory -= size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+		UNIV_MEM_FREE(ptr, size);
+		return;
+	}
+#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
+#ifdef __WIN__
+	/* When RELEASE memory, the size parameter must be 0.
+	Do not use MEM_RELEASE with MEM_DECOMMIT. */
+	if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
+		fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
+			" Windows error %lu\n",
+			ptr, (ulong) size, (ulong) GetLastError());
+	} else {
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_a(ut_total_allocated_memory >= size);
+		ut_total_allocated_memory -= size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+		UNIV_MEM_FREE(ptr, size);
+	}
+#elif defined __NETWARE__ || !defined OS_MAP_ANON
+	ut_free(ptr);
+#else
+	if (munmap(ptr, size)) {
+		fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
+			" errno %lu\n",
+			ptr, (ulong) size, (ulong) errno);
+	} else {
+		os_fast_mutex_lock(&ut_list_mutex);
+		ut_a(ut_total_allocated_memory >= size);
+		ut_total_allocated_memory -= size;
+		os_fast_mutex_unlock(&ut_list_mutex);
+		UNIV_MEM_FREE(ptr, size);
+	}
+#endif
+}
diff --git a/storage/innobase/os/os0sync.c b/storage/innodb_plugin/os/os0sync.c
similarity index 71%
rename from storage/innobase/os/os0sync.c
rename to storage/innodb_plugin/os/os0sync.c
index 18fd38f3f9b..4ec340b72b5 100644
--- a/storage/innobase/os/os0sync.c
+++ b/storage/innodb_plugin/os/os0sync.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0sync.c
 The interface to the operating system
 synchronization primitives.
 
-(c) 1995 Innobase Oy
-
 Created 9/6/1995 Heikki Tuuri
 *******************************************************/
 
@@ -21,9 +38,9 @@ Created 9/6/1995 Heikki Tuuri
 
 /* Type definition for an operating system mutex struct */
 struct os_mutex_struct{
-	os_event_t	event;	/* Used by sync0arr.c for queing threads */
-	void*		handle;	/* OS handle to mutex */
-	ulint		count;	/* we use this counter to check
+	os_event_t	event;	/*!< Used by sync0arr.c for queing threads */
+	void*		handle;	/*!< OS handle to mutex */
+	ulint		count;	/*!< we use this counter to check
 				that the same thread does not
 				recursively lock the mutex: we
 				do not assume that the OS mutex
@@ -33,33 +50,35 @@ struct os_mutex_struct{
 				/* list of all 'slow' OS mutexes created */
 };
 
-/* Mutex protecting counts and the lists of OS mutexes and events */
-os_mutex_t	os_sync_mutex;
-ibool		os_sync_mutex_inited	= FALSE;
-ibool		os_sync_free_called	= FALSE;
+/** Mutex protecting counts and the lists of OS mutexes and events */
+UNIV_INTERN os_mutex_t	os_sync_mutex;
+/** TRUE if os_sync_mutex has been initialized */
+static ibool		os_sync_mutex_inited	= FALSE;
+/** TRUE when os_sync_free() is being executed */
+static ibool		os_sync_free_called	= FALSE;
 
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
+/** This is incremented by 1 in os_thread_create and decremented by 1 in
 os_thread_exit */
-ulint	os_thread_count		= 0;
+UNIV_INTERN ulint	os_thread_count		= 0;
 
-/* The list of all events created */
-UT_LIST_BASE_NODE_T(os_event_struct_t)	os_event_list;
+/** The list of all events created */
+static UT_LIST_BASE_NODE_T(os_event_struct_t)	os_event_list;
 
-/* The list of all OS 'slow' mutexes */
-UT_LIST_BASE_NODE_T(os_mutex_str_t)	os_mutex_list;
+/** The list of all OS 'slow' mutexes */
+static UT_LIST_BASE_NODE_T(os_mutex_str_t)	os_mutex_list;
 
-ulint	os_event_count		= 0;
-ulint	os_mutex_count		= 0;
-ulint	os_fast_mutex_count	= 0;
+UNIV_INTERN ulint	os_event_count		= 0;
+UNIV_INTERN ulint	os_mutex_count		= 0;
+UNIV_INTERN ulint	os_fast_mutex_count	= 0;
 
 /* Because a mutex is embedded inside an event and there is an
 event embedded inside a mutex, on free, this generates a recursive call.
 This version of the free event function doesn't acquire the global lock */
 static void os_event_free_internal(os_event_t	event);
 
-/*************************************************************
+/*********************************************************//**
 Initializes global event and OS 'slow' mutex lists. */
-
+UNIV_INTERN
 void
 os_sync_init(void)
 /*==============*/
@@ -72,9 +91,9 @@ os_sync_init(void)
 	os_sync_mutex_inited = TRUE;
 }
 
-/*************************************************************
+/*********************************************************//**
 Frees created events and OS 'slow' mutexes. */
-
+UNIV_INTERN
 void
 os_sync_free(void)
 /*==============*/
@@ -109,16 +128,16 @@ os_sync_free(void)
 	os_sync_free_called = FALSE;
 }
 
-/*************************************************************
+/*********************************************************//**
 Creates an event semaphore, i.e., a semaphore which may just have two
 states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event. */
-
+must be reset explicitly by calling sync_os_reset_event.
+@return	the event handle */
+UNIV_INTERN
 os_event_t
 os_event_create(
 /*============*/
-				/* out: the event handle */
-	const char*	name)	/* in: the name of the event, if NULL
+	const char*	name)	/*!< in: the name of the event, if NULL
 				the event is created without a name */
 {
 #ifdef __WIN__
@@ -145,12 +164,8 @@ os_event_create(
 
 	os_fast_mutex_init(&(event->os_mutex));
 
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
-	ut_a(0 == pthread_cond_init(&(event->cond_var),
-				    pthread_condattr_default));
-#else
 	ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
-#endif
+
 	event->is_set = FALSE;
 
 	/* We return this value in os_event_reset(), which can then be
@@ -181,55 +196,14 @@ os_event_create(
 	return(event);
 }
 
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
-				/* out: the event handle */
-	const char*	name)	/* in: the name of the event, if NULL
-				the event is created without a name */
-{
-	os_event_t event;
-
-	event = ut_malloc(sizeof(struct os_event_struct));
-
-	event->handle = CreateEvent(NULL, /* No security attributes */
-				    FALSE, /* Auto-reset */
-				    FALSE, /* Initial state nonsignaled */
-				    (LPCTSTR) name);
-
-	if (!event->handle) {
-		fprintf(stderr,
-			"InnoDB: Could not create a Windows auto"
-			" event semaphore; Windows error %lu\n",
-			(ulong) GetLastError());
-	}
-
-	/* Put to the list of events */
-	os_mutex_enter(os_sync_mutex);
-
-	UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
-	os_event_count++;
-
-	os_mutex_exit(os_sync_mutex);
-
-	return(event);
-}
-#endif
-
-/**************************************************************
+/**********************************************************//**
 Sets an event semaphore to the signaled state: lets waiting threads
 proceed. */
-
+UNIV_INTERN
 void
 os_event_set(
 /*=========*/
-	os_event_t	event)	/* in: event to set */
+	os_event_t	event)	/*!< in: event to set */
 {
 #ifdef __WIN__
 	ut_a(event);
@@ -251,21 +225,21 @@ os_event_set(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Resets an event semaphore to the nonsignaled state. Waiting threads will
 stop to wait for the event.
 The return value should be passed to os_even_wait_low() if it is desired
 that this thread should not wait in case of an intervening call to
 os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
+os_event_wait_low() call. See comments for os_event_wait_low().
+@return	current signal_count. */
+UNIV_INTERN
+ib_int64_t
 os_event_reset(
 /*===========*/
-				/* out: current signal_count. */
-	os_event_t	event)	/* in: event to reset */
+	os_event_t	event)	/*!< in: event to reset */
 {
-	ib_longlong	ret = 0;
+	ib_int64_t	ret = 0;
 
 #ifdef __WIN__
 	ut_a(event);
@@ -288,13 +262,13 @@ os_event_reset(
 	return(ret);
 }
 
-/**************************************************************
+/**********************************************************//**
 Frees an event object, without acquiring the global lock. */
 static
 void
 os_event_free_internal(
 /*===================*/
-	os_event_t	event)	/* in: event to free */
+	os_event_t	event)	/*!< in: event to free */
 {
 #ifdef __WIN__
 	ut_a(event);
@@ -317,13 +291,13 @@ os_event_free_internal(
 	ut_free(event);
 }
 
-/**************************************************************
+/**********************************************************//**
 Frees an event object. */
-
+UNIV_INTERN
 void
 os_event_free(
 /*==========*/
-	os_event_t	event)	/* in: event to free */
+	os_event_t	event)	/*!< in: event to free */
 
 {
 #ifdef __WIN__
@@ -349,7 +323,7 @@ os_event_free(
 	ut_free(event);
 }
 
-/**************************************************************
+/**********************************************************//**
 Waits for an event object until it is in the signaled state. If
 srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
 waiting thread when the event becomes signaled (or immediately if the
@@ -369,12 +343,12 @@ thread C calls os_event_wait()  [infinite wait!]
 Where such a scenario is possible, to avoid infinite wait, the
 value returned by os_event_reset() should be passed in as
 reset_sig_count. */
-
+UNIV_INTERN
 void
 os_event_wait_low(
 /*==============*/
-	os_event_t	event,		/* in: event to wait */
-	ib_longlong	reset_sig_count)/* in: zero or the value
+	os_event_t	event,		/*!< in: event to wait */
+	ib_int64_t	reset_sig_count)/*!< in: zero or the value
 					returned by previous call of
 					os_event_reset(). */
 {
@@ -394,7 +368,7 @@ os_event_wait_low(
 		os_thread_exit(NULL);
 	}
 #else
-	ib_longlong	old_signal_count;
+	ib_int64_t	old_signal_count;
 
 	os_fast_mutex_lock(&(event->os_mutex));
 
@@ -428,17 +402,16 @@ os_event_wait_low(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+UNIV_INTERN
 ulint
 os_event_wait_time(
 /*===============*/
-				/* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
-				timeout was exceeded */
-	os_event_t	event,	/* in: event to wait */
-	ulint		time)	/* in: timeout in microseconds, or
+	os_event_t	event,	/*!< in: event to wait */
+	ulint		time)	/*!< in: timeout in microseconds, or
 				OS_SYNC_INFINITE_TIME */
 {
 #ifdef __WIN__
@@ -474,19 +447,18 @@ os_event_wait_time(
 }
 
 #ifdef __WIN__
-/**************************************************************
+/**********************************************************//**
 Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
+one is signaled or becomes signaled.
+@return	index of the event which was signaled */
+UNIV_INTERN
 ulint
 os_event_wait_multiple(
 /*===================*/
-					/* out: index of the event
-					which was signaled */
-	ulint			n,	/* in: number of events in the
+	ulint			n,	/*!< in: number of events in the
 					array */
 	os_native_event_t*	native_event_array)
-					/* in: pointer to an array of event
+					/*!< in: pointer to an array of event
 					handles */
 {
 	DWORD	index;
@@ -498,7 +470,7 @@ os_event_wait_multiple(
 				       FALSE,	   /* Wait for any 1 event */
 				       INFINITE); /* Infinite wait time
 						  limit */
-	ut_a(index >= WAIT_OBJECT_0);	/* NOTE: Pointless comparision */
+	ut_a(index >= WAIT_OBJECT_0);	/* NOTE: Pointless comparison */
 	ut_a(index < WAIT_OBJECT_0 + n);
 
 	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
@@ -509,15 +481,15 @@ os_event_wait_multiple(
 }
 #endif
 
-/*************************************************************
+/*********************************************************//**
 Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
+mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+@return	the mutex handle */
+UNIV_INTERN
 os_mutex_t
 os_mutex_create(
 /*============*/
-				/* out: the mutex handle */
-	const char*	name)	/* in: the name of the mutex, if NULL
+	const char*	name)	/*!< in: the name of the mutex, if NULL
 				the mutex is created without a name */
 {
 #ifdef __WIN__
@@ -544,7 +516,7 @@ os_mutex_create(
 	mutex_str->count = 0;
 	mutex_str->event = os_event_create(NULL);
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		/* When creating os_sync_mutex itself we cannot reserve it */
 		os_mutex_enter(os_sync_mutex);
 	}
@@ -553,20 +525,20 @@ os_mutex_create(
 
 	os_mutex_count++;
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		os_mutex_exit(os_sync_mutex);
 	}
 
 	return(mutex_str);
 }
 
-/**************************************************************
+/**********************************************************//**
 Acquires ownership of a mutex semaphore. */
-
+UNIV_INTERN
 void
 os_mutex_enter(
 /*===========*/
-	os_mutex_t	mutex)	/* in: mutex to acquire */
+	os_mutex_t	mutex)	/*!< in: mutex to acquire */
 {
 #ifdef __WIN__
 	DWORD	err;
@@ -589,13 +561,13 @@ os_mutex_enter(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Releases ownership of a mutex. */
-
+UNIV_INTERN
 void
 os_mutex_exit(
 /*==========*/
-	os_mutex_t	mutex)	/* in: mutex to release */
+	os_mutex_t	mutex)	/*!< in: mutex to release */
 {
 	ut_a(mutex);
 
@@ -609,21 +581,21 @@ os_mutex_exit(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Frees a mutex object. */
-
+UNIV_INTERN
 void
 os_mutex_free(
 /*==========*/
-	os_mutex_t	mutex)	/* in: mutex to free */
+	os_mutex_t	mutex)	/*!< in: mutex to free */
 {
 	ut_a(mutex);
 
-	if (!os_sync_free_called) {
+	if (UNIV_LIKELY(!os_sync_free_called)) {
 		os_event_free_internal(mutex->event);
 	}
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		os_mutex_enter(os_sync_mutex);
 	}
 
@@ -631,7 +603,7 @@ os_mutex_free(
 
 	os_mutex_count--;
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		os_mutex_exit(os_sync_mutex);
 	}
 
@@ -646,26 +618,22 @@ os_mutex_free(
 #endif
 }
 
-/*************************************************************
+/*********************************************************//**
 Initializes an operating system fast mutex semaphore. */
-
+UNIV_INTERN
 void
 os_fast_mutex_init(
 /*===============*/
-	os_fast_mutex_t*	fast_mutex)	/* in: fast mutex */
+	os_fast_mutex_t*	fast_mutex)	/*!< in: fast mutex */
 {
 #ifdef __WIN__
 	ut_a(fast_mutex);
 
 	InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
-	ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
 #else
 	ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
 #endif
-#endif
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		/* When creating os_sync_mutex itself (in Unix) we cannot
 		reserve it */
 
@@ -674,18 +642,18 @@ os_fast_mutex_init(
 
 	os_fast_mutex_count++;
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		os_mutex_exit(os_sync_mutex);
 	}
 }
 
-/**************************************************************
+/**********************************************************//**
 Acquires ownership of a fast mutex. */
-
+UNIV_INTERN
 void
 os_fast_mutex_lock(
 /*===============*/
-	os_fast_mutex_t*	fast_mutex)	/* in: mutex to acquire */
+	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
 {
 #ifdef __WIN__
 	EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
@@ -694,13 +662,13 @@ os_fast_mutex_lock(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Releases ownership of a fast mutex. */
-
+UNIV_INTERN
 void
 os_fast_mutex_unlock(
 /*=================*/
-	os_fast_mutex_t*	fast_mutex)	/* in: mutex to release */
+	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to release */
 {
 #ifdef __WIN__
 	LeaveCriticalSection(fast_mutex);
@@ -709,13 +677,13 @@ os_fast_mutex_unlock(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Frees a mutex object. */
-
+UNIV_INTERN
 void
 os_fast_mutex_free(
 /*===============*/
-	os_fast_mutex_t*	fast_mutex)	/* in: mutex to free */
+	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to free */
 {
 #ifdef __WIN__
 	ut_a(fast_mutex);
@@ -726,7 +694,7 @@ os_fast_mutex_free(
 
 	ret = pthread_mutex_destroy(fast_mutex);
 
-	if (ret != 0) {
+	if (UNIV_UNLIKELY(ret != 0)) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: error: return value %lu when calling\n"
@@ -735,10 +703,10 @@ os_fast_mutex_free(
 			"InnoDB: Byte contents of the pthread mutex at %p:\n",
 			(void*) fast_mutex);
 		ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
-		fprintf(stderr, "\n");
+		putc('\n', stderr);
 	}
 #endif
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		/* When freeing the last mutexes, we have
 		already freed os_sync_mutex */
 
@@ -747,7 +715,7 @@ os_fast_mutex_free(
 
 	os_fast_mutex_count--;
 
-	if (os_sync_mutex_inited) {
+	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		os_mutex_exit(os_sync_mutex);
 	}
 }
diff --git a/storage/innobase/os/os0thread.c b/storage/innodb_plugin/os/os0thread.c
similarity index 74%
rename from storage/innobase/os/os0thread.c
rename to storage/innodb_plugin/os/os0thread.c
index a0b1e51d359..9a2d95cb166 100644
--- a/storage/innobase/os/os0thread.c
+++ b/storage/innodb_plugin/os/os0thread.c
@@ -1,7 +1,24 @@
-/******************************************************
-The interface to the operating system thread control primitives
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0thread.c
+The interface to the operating system thread control primitives
 
 Created 9/8/1995 Heikki Tuuri
 *******************************************************/
@@ -15,18 +32,19 @@ Created 9/8/1995 Heikki Tuuri
 #include <windows.h>
 #endif
 
+#ifndef UNIV_HOTBACKUP
 #include "srv0srv.h"
 #include "os0sync.h"
 
-/*******************************************************************
-Compares two thread ids for equality. */
-
+/***************************************************************//**
+Compares two thread ids for equality.
+@return	TRUE if equal */
+UNIV_INTERN
 ibool
 os_thread_eq(
 /*=========*/
-				/* out: TRUE if equal */
-	os_thread_id_t	a,	/* in: OS thread or thread id */
-	os_thread_id_t	b)	/* in: OS thread or thread id */
+	os_thread_id_t	a,	/*!< in: OS thread or thread id */
+	os_thread_id_t	b)	/*!< in: OS thread or thread id */
 {
 #ifdef __WIN__
 	if (a == b) {
@@ -43,14 +61,15 @@ os_thread_eq(
 #endif
 }
 
-/********************************************************************
+/****************************************************************//**
 Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
+unique for the thread though!
+@return	thread identifier as a number */
+UNIV_INTERN
 ulint
 os_thread_pf(
 /*=========*/
-	os_thread_id_t	a)
+	os_thread_id_t	a)	/*!< in: OS thread identifier */
 {
 #ifdef UNIV_HPUX10
 	/* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
@@ -62,11 +81,12 @@ os_thread_pf(
 #endif
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Returns the thread identifier of current thread. Currently the thread
 identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields. */
-
+pthread_t is a struct of 3 fields.
+@return	current thread identifier */
+UNIV_INTERN
 os_thread_id_t
 os_thread_get_curr_id(void)
 /*=======================*/
@@ -78,24 +98,24 @@ os_thread_get_curr_id(void)
 #endif
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates a new thread of execution. The execution starts from
 the function given. The start function takes a void* parameter
-and returns an ulint. */
-
+and returns an ulint.
+@return	handle to the thread */
+UNIV_INTERN
 os_thread_t
 os_thread_create(
 /*=============*/
-						/* out: handle to the thread */
 #ifndef __WIN__
 	os_posix_f_t		start_f,
 #else
-	ulint (*start_f)(void*),		/* in: pointer to function
+	ulint (*start_f)(void*),		/*!< in: pointer to function
 						from which to start */
 #endif
-	void*			arg,		/* in: argument to start
+	void*			arg,		/*!< in: argument to start
 						function */
-	os_thread_id_t*		thread_id)	/* out: id of the created
+	os_thread_id_t*		thread_id)	/*!< out: id of the created
 						thread, or NULL */
 {
 #ifdef __WIN__
@@ -132,7 +152,7 @@ os_thread_create(
 	os_thread_t	pthread;
 	pthread_attr_t	attr;
 
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
+#ifndef UNIV_HPUX10
 	pthread_attr_init(&attr);
 #endif
 
@@ -166,7 +186,7 @@ os_thread_create(
 	os_thread_count++;
 	os_mutex_exit(os_sync_mutex);
 
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
+#ifdef UNIV_HPUX10
 	ret = pthread_create(&pthread, pthread_attr_default, start_f, arg);
 #else
 	ret = pthread_create(&pthread, &attr, start_f, arg);
@@ -177,7 +197,7 @@ os_thread_create(
 		exit(1);
 	}
 
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
+#ifndef UNIV_HPUX10
 	pthread_attr_destroy(&attr);
 #endif
 	if (srv_set_thread_priorities) {
@@ -193,13 +213,13 @@ os_thread_create(
 #endif
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Exits the current thread. */
-
+UNIV_INTERN
 void
 os_thread_exit(
 /*===========*/
-	void*	exit_value)	/* in: exit value; in Windows this void*
+	void*	exit_value)	/*!< in: exit value; in Windows this void*
 				is cast as a DWORD */
 {
 #ifdef UNIV_DEBUG_THREAD_CREATION
@@ -217,18 +237,10 @@ os_thread_exit(
 #endif
 }
 
-#ifdef HAVE_PTHREAD_JOIN
-int
-os_thread_join(
-/*===========*/
-  os_thread_id_t  thread_id)	/* in: id of the thread to join */
-{
-	return(pthread_join(thread_id, NULL));
-}
-#endif
-/*********************************************************************
-Returns handle to the current thread. */
-
+/*****************************************************************//**
+Returns handle to the current thread.
+@return	current thread handle */
+UNIV_INTERN
 os_thread_t
 os_thread_get_curr(void)
 /*====================*/
@@ -240,9 +252,9 @@ os_thread_get_curr(void)
 #endif
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Advises the os to give up remainder of the thread's time slice. */
-
+UNIV_INTERN
 void
 os_thread_yield(void)
 /*=================*/
@@ -259,14 +271,15 @@ os_thread_yield(void)
 	os_thread_sleep(0);
 #endif
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*********************************************************************
+/*****************************************************************//**
 The thread sleeps at least the time given in microseconds. */
-
+UNIV_INTERN
 void
 os_thread_sleep(
 /*============*/
-	ulint	tm)	/* in: time in microseconds */
+	ulint	tm)	/*!< in: time in microseconds */
 {
 #ifdef __WIN__
 	Sleep((DWORD) tm / 1000);
@@ -282,14 +295,15 @@ os_thread_sleep(
 #endif
 }
 
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
 Sets a thread priority. */
-
+UNIV_INTERN
 void
 os_thread_set_priority(
 /*===================*/
-	os_thread_t	handle,	/* in: OS handle to the thread */
-	ulint		pri)	/* in: priority */
+	os_thread_t	handle,	/*!< in: OS handle to the thread */
+	ulint		pri)	/*!< in: priority */
 {
 #ifdef __WIN__
 	int	os_pri;
@@ -311,15 +325,15 @@ os_thread_set_priority(
 #endif
 }
 
-/**********************************************************************
-Gets a thread priority. */
-
+/******************************************************************//**
+Gets a thread priority.
+@return	priority */
+UNIV_INTERN
 ulint
 os_thread_get_priority(
 /*===================*/
-				/* out: priority */
 	os_thread_t	handle __attribute__((unused)))
-				/* in: OS handle to the thread */
+				/*!< in: OS handle to the thread */
 {
 #ifdef __WIN__
 	int	os_pri;
@@ -343,9 +357,10 @@ os_thread_get_priority(
 #endif
 }
 
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
+/******************************************************************//**
+Gets the last operating system error code for the calling thread.
+@return	last error on Windows, 0 otherwise */
+UNIV_INTERN
 ulint
 os_thread_get_last_error(void)
 /*==========================*/
@@ -356,3 +371,4 @@ os_thread_get_last_error(void)
 	return(0);
 #endif
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c
new file mode 100644
index 00000000000..65f3ba67439
--- /dev/null
+++ b/storage/innodb_plugin/page/page0cur.c
@@ -0,0 +1,1987 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file page/page0cur.c
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0cur.h"
+#ifdef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#include "page0zip.h"
+#include "mtr0log.h"
+#include "log0recv.h"
+#include "ut0ut.h"
+#ifndef UNIV_HOTBACKUP
+#include "rem0cmp.h"
+
+#ifdef PAGE_CUR_ADAPT
+# ifdef UNIV_SEARCH_PERF_STAT
+static ulint	page_cur_short_succ	= 0;
+# endif /* UNIV_SEARCH_PERF_STAT */
+
+/*******************************************************************//**
+This is a linear congruential generator PRNG. Returns a pseudo random
+number between 0 and 2^64-1 inclusive. The formula and the constants
+being used are:
+X[n+1] = (a * X[n] + c) mod m
+where:
+X[0] = ut_time_us(NULL)
+a = 1103515245 (3^5 * 5 * 7 * 129749)
+c = 12345 (3 * 5 * 823)
+m = 18446744073709551616 (2^64)
+
+@return	number between 0 and 2^64-1 */
+static
+ib_uint64_t
+page_cur_lcg_prng(void)
+/*===================*/
+{
+#define LCG_a	1103515245
+#define LCG_c	12345
+	static ib_uint64_t	lcg_current = 0;
+	static ibool		initialized = FALSE;
+
+	if (!initialized) {
+		lcg_current = (ib_uint64_t) ut_time_us(NULL);
+		initialized = TRUE;
+	}
+
+	/* no need to "% 2^64" explicitly because lcg_current is
+	64 bit and this will be done anyway */
+	lcg_current = LCG_a * lcg_current + LCG_c;
+
+	return(lcg_current);
+}
+
+/****************************************************************//**
+Tries a search shortcut based on the last insert.
+@return	TRUE on success */
+UNIV_INLINE
+ibool
+page_cur_try_search_shortcut(
+/*=========================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint*			iup_matched_fields,
+					/*!< in/out: already matched
+					fields in upper limit record */
+	ulint*			iup_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	ulint*			ilow_matched_fields,
+					/*!< in/out: already matched
+					fields in lower limit record */
+	ulint*			ilow_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	page_cur_t*		cursor) /*!< out: page cursor */
+{
+	const rec_t*	rec;
+	const rec_t*	next_rec;
+	ulint		low_match;
+	ulint		low_bytes;
+	ulint		up_match;
+	ulint		up_bytes;
+#ifdef UNIV_SEARCH_DEBUG
+	page_cur_t	cursor2;
+#endif
+	ibool		success		= FALSE;
+	const page_t*	page		= buf_block_get_frame(block);
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
+	offsets = rec_get_offsets(rec, index, offsets,
+				  dtuple_get_n_fields(tuple), &heap);
+
+	ut_ad(rec);
+	ut_ad(page_rec_is_user_rec(rec));
+
+	ut_pair_min(&low_match, &low_bytes,
+		    *ilow_matched_fields, *ilow_matched_bytes,
+		    *iup_matched_fields, *iup_matched_bytes);
+
+	up_match = low_match;
+	up_bytes = low_bytes;
+
+	if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
+					   &low_match, &low_bytes) < 0) {
+		goto exit_func;
+	}
+
+	next_rec = page_rec_get_next_const(rec);
+	offsets = rec_get_offsets(next_rec, index, offsets,
+				  dtuple_get_n_fields(tuple), &heap);
+
+	if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
+					   &up_match, &up_bytes) >= 0) {
+		goto exit_func;
+	}
+
+	page_cur_position(rec, block, cursor);
+
+#ifdef UNIV_SEARCH_DEBUG
+	page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
+				   iup_matched_fields,
+				   iup_matched_bytes,
+				   ilow_matched_fields,
+				   ilow_matched_bytes,
+				   &cursor2);
+	ut_a(cursor2.rec == cursor->rec);
+
+	if (!page_rec_is_supremum(next_rec)) {
+
+		ut_a(*iup_matched_fields == up_match);
+		ut_a(*iup_matched_bytes == up_bytes);
+	}
+
+	ut_a(*ilow_matched_fields == low_match);
+	ut_a(*ilow_matched_bytes == low_bytes);
+#endif
+	if (!page_rec_is_supremum(next_rec)) {
+
+		*iup_matched_fields = up_match;
+		*iup_matched_bytes = up_bytes;
+	}
+
+	*ilow_matched_fields = low_match;
+	*ilow_matched_bytes = low_bytes;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+	page_cur_short_succ++;
+#endif
+	success = TRUE;
+exit_func:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(success);
+}
+
+#endif
+
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+/****************************************************************//**
+Checks if the nth field in a record is a character type field which extends
+the nth field in tuple, i.e., the field is longer or equal in length and has
+common first characters.
+@return	TRUE if rec field extends tuple field */
+static
+ibool
+page_cur_rec_field_extends(
+/*=======================*/
+	const dtuple_t*	tuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: compare nth field */
+{
+	const dtype_t*	type;
+	const dfield_t*	dfield;
+	const byte*	rec_f;
+	ulint		rec_f_len;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	dfield = dtuple_get_nth_field(tuple, n);
+
+	type = dfield_get_type(dfield);
+
+	rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
+
+	if (type->mtype == DATA_VARCHAR
+	    || type->mtype == DATA_CHAR
+	    || type->mtype == DATA_FIXBINARY
+	    || type->mtype == DATA_BINARY
+	    || type->mtype == DATA_BLOB
+	    || type->mtype == DATA_VARMYSQL
+	    || type->mtype == DATA_MYSQL) {
+
+		if (dfield_get_len(dfield) != UNIV_SQL_NULL
+		    && rec_f_len != UNIV_SQL_NULL
+		    && rec_f_len >= dfield_get_len(dfield)
+		    && !cmp_data_data_slow(type->mtype, type->prtype,
+					   dfield_get_data(dfield),
+					   dfield_get_len(dfield),
+					   rec_f, dfield_get_len(dfield))) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+/****************************************************************//**
+Searches the right position for a page cursor. */
+UNIV_INTERN
+void
+page_cur_search_with_match(
+/*=======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	ulint*			iup_matched_fields,
+					/*!< in/out: already matched
+					fields in upper limit record */
+	ulint*			iup_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	ulint*			ilow_matched_fields,
+					/*!< in/out: already matched
+					fields in lower limit record */
+	ulint*			ilow_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	page_cur_t*		cursor)	/*!< out: page cursor */
+{
+	ulint		up;
+	ulint		low;
+	ulint		mid;
+	const page_t*	page;
+	const page_dir_slot_t* slot;
+	const rec_t*	up_rec;
+	const rec_t*	low_rec;
+	const rec_t*	mid_rec;
+	ulint		up_matched_fields;
+	ulint		up_matched_bytes;
+	ulint		low_matched_fields;
+	ulint		low_matched_bytes;
+	ulint		cur_matched_fields;
+	ulint		cur_matched_bytes;
+	int		cmp;
+#ifdef UNIV_SEARCH_DEBUG
+	int		dbg_cmp;
+	ulint		dbg_matched_fields;
+	ulint		dbg_matched_bytes;
+#endif
+#ifdef UNIV_ZIP_DEBUG
+	const page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
+#endif /* UNIV_ZIP_DEBUG */
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
+	      && ilow_matched_fields && ilow_matched_bytes && cursor);
+	ut_ad(dtuple_validate(tuple));
+#ifdef UNIV_DEBUG
+# ifdef PAGE_CUR_DBG
+	if (mode != PAGE_CUR_DBG)
+# endif /* PAGE_CUR_DBG */
+# ifdef PAGE_CUR_LE_OR_EXTENDS
+		if (mode != PAGE_CUR_LE_OR_EXTENDS)
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
+			ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+			      || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
+#endif /* UNIV_DEBUG */
+	page = buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	page_check_dir(page);
+
+#ifdef PAGE_CUR_ADAPT
+	if (page_is_leaf(page)
+	    && (mode == PAGE_CUR_LE)
+	    && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
+	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
+	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
+
+		if (page_cur_try_search_shortcut(
+			    block, index, tuple,
+			    iup_matched_fields, iup_matched_bytes,
+			    ilow_matched_fields, ilow_matched_bytes,
+			    cursor)) {
+			return;
+		}
+	}
+# ifdef PAGE_CUR_DBG
+	if (mode == PAGE_CUR_DBG) {
+		mode = PAGE_CUR_LE;
+	}
+# endif
+#endif
+
+	/* The following flag does not work for non-latin1 char sets because
+	cmp_full_field does not tell how many bytes matched */
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+	ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+	/* If mode PAGE_CUR_G is specified, we are trying to position the
+	cursor to answer a query of the form "tuple < X", where tuple is
+	the input parameter, and X denotes an arbitrary physical record on
+	the page. We want to position the cursor on the first X which
+	satisfies the condition. */
+
+	up_matched_fields  = *iup_matched_fields;
+	up_matched_bytes   = *iup_matched_bytes;
+	low_matched_fields = *ilow_matched_fields;
+	low_matched_bytes  = *ilow_matched_bytes;
+
+	/* Perform binary search. First the search is done through the page
+	directory, after that as a linear search in the list of records
+	owned by the upper limit directory slot. */
+
+	low = 0;
+	up = page_dir_get_n_slots(page) - 1;
+
+	/* Perform binary search until the lower and upper limit directory
+	slots come to the distance 1 of each other */
+
+	while (up - low > 1) {
+		mid = (low + up) / 2;
+		slot = page_dir_get_nth_slot(page, mid);
+		mid_rec = page_dir_slot_get_rec(slot);
+
+		ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+			    low_matched_fields, low_matched_bytes,
+			    up_matched_fields, up_matched_bytes);
+
+		offsets = rec_get_offsets(mid_rec, index, offsets,
+					  dtuple_get_n_fields_cmp(tuple),
+					  &heap);
+
+		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
+						&cur_matched_fields,
+						&cur_matched_bytes);
+		if (UNIV_LIKELY(cmp > 0)) {
+low_slot_match:
+			low = mid;
+			low_matched_fields = cur_matched_fields;
+			low_matched_bytes = cur_matched_bytes;
+
+		} else if (UNIV_EXPECT(cmp, -1)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			if (mode == PAGE_CUR_LE_OR_EXTENDS
+			    && page_cur_rec_field_extends(
+				    tuple, mid_rec, offsets,
+				    cur_matched_fields)) {
+
+				goto low_slot_match;
+			}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_slot_match:
+			up = mid;
+			up_matched_fields = cur_matched_fields;
+			up_matched_bytes = cur_matched_bytes;
+
+		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			   || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+			   ) {
+
+			goto low_slot_match;
+		} else {
+
+			goto up_slot_match;
+		}
+	}
+
+	slot = page_dir_get_nth_slot(page, low);
+	low_rec = page_dir_slot_get_rec(slot);
+	slot = page_dir_get_nth_slot(page, up);
+	up_rec = page_dir_slot_get_rec(slot);
+
+	/* Perform linear search until the upper and lower records come to
+	distance 1 of each other. */
+
+	while (page_rec_get_next_const(low_rec) != up_rec) {
+
+		mid_rec = page_rec_get_next_const(low_rec);
+
+		ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+			    low_matched_fields, low_matched_bytes,
+			    up_matched_fields, up_matched_bytes);
+
+		offsets = rec_get_offsets(mid_rec, index, offsets,
+					  dtuple_get_n_fields_cmp(tuple),
+					  &heap);
+
+		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
+						&cur_matched_fields,
+						&cur_matched_bytes);
+		if (UNIV_LIKELY(cmp > 0)) {
+low_rec_match:
+			low_rec = mid_rec;
+			low_matched_fields = cur_matched_fields;
+			low_matched_bytes = cur_matched_bytes;
+
+		} else if (UNIV_EXPECT(cmp, -1)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			if (mode == PAGE_CUR_LE_OR_EXTENDS
+			    && page_cur_rec_field_extends(
+				    tuple, mid_rec, offsets,
+				    cur_matched_fields)) {
+
+				goto low_rec_match;
+			}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_rec_match:
+			up_rec = mid_rec;
+			up_matched_fields = cur_matched_fields;
+			up_matched_bytes = cur_matched_bytes;
+		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			   || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+			   ) {
+
+			goto low_rec_match;
+		} else {
+
+			goto up_rec_match;
+		}
+	}
+
+#ifdef UNIV_SEARCH_DEBUG
+
+	/* Check that the lower and upper limit records have the
+	right alphabetical order compared to tuple. */
+	dbg_matched_fields = 0;
+	dbg_matched_bytes = 0;
+
+	offsets = rec_get_offsets(low_rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
+						 &dbg_matched_fields,
+						 &dbg_matched_bytes);
+	if (mode == PAGE_CUR_G) {
+		ut_a(dbg_cmp >= 0);
+	} else if (mode == PAGE_CUR_GE) {
+		ut_a(dbg_cmp == 1);
+	} else if (mode == PAGE_CUR_L) {
+		ut_a(dbg_cmp == 1);
+	} else if (mode == PAGE_CUR_LE) {
+		ut_a(dbg_cmp >= 0);
+	}
+
+	if (!page_rec_is_infimum(low_rec)) {
+
+		ut_a(low_matched_fields == dbg_matched_fields);
+		ut_a(low_matched_bytes == dbg_matched_bytes);
+	}
+
+	dbg_matched_fields = 0;
+	dbg_matched_bytes = 0;
+
+	offsets = rec_get_offsets(up_rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
+						 &dbg_matched_fields,
+						 &dbg_matched_bytes);
+	if (mode == PAGE_CUR_G) {
+		ut_a(dbg_cmp == -1);
+	} else if (mode == PAGE_CUR_GE) {
+		ut_a(dbg_cmp <= 0);
+	} else if (mode == PAGE_CUR_L) {
+		ut_a(dbg_cmp <= 0);
+	} else if (mode == PAGE_CUR_LE) {
+		ut_a(dbg_cmp == -1);
+	}
+
+	if (!page_rec_is_supremum(up_rec)) {
+
+		ut_a(up_matched_fields == dbg_matched_fields);
+		ut_a(up_matched_bytes == dbg_matched_bytes);
+	}
+#endif
+	if (mode <= PAGE_CUR_GE) {
+		page_cur_position(up_rec, block, cursor);
+	} else {
+		page_cur_position(low_rec, block, cursor);
+	}
+
+	*iup_matched_fields  = up_matched_fields;
+	*iup_matched_bytes   = up_matched_bytes;
+	*ilow_matched_fields = low_matched_fields;
+	*ilow_matched_bytes  = low_matched_bytes;
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/***********************************************************//**
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+UNIV_INTERN
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: page */
+	page_cur_t*	cursor)	/*!< out: page cursor */
+{
+	ulint	rnd;
+	ulint	n_recs = page_get_n_recs(buf_block_get_frame(block));
+
+	page_cur_set_before_first(block, cursor);
+
+	if (UNIV_UNLIKELY(n_recs == 0)) {
+
+		return;
+	}
+
+	rnd = (ulint) (page_cur_lcg_prng() % n_recs);
+
+	do {
+		page_cur_move_to_next(cursor);
+	} while (rnd--);
+}
+
+/***********************************************************//**
+Writes the log record of a record insert on a page. */
+static
+void
+page_cur_insert_rec_write_log(
+/*==========================*/
+	rec_t*		insert_rec,	/*!< in: inserted physical record */
+	ulint		rec_size,	/*!< in: insert_rec size */
+	rec_t*		cursor_rec,	/*!< in: record the
+					cursor is pointing to */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mini-transaction handle */
+{
+	ulint	cur_rec_size;
+	ulint	extra_size;
+	ulint	cur_extra_size;
+	const byte* ins_ptr;
+	byte*	log_ptr;
+	const byte* log_end;
+	ulint	i;
+
+	ut_a(rec_size < UNIV_PAGE_SIZE);
+	ut_ad(page_align(insert_rec) == page_align(cursor_rec));
+	ut_ad(!page_rec_is_comp(insert_rec)
+	      == !dict_table_is_comp(index->table));
+
+	{
+		mem_heap_t*	heap		= NULL;
+		ulint		cur_offs_[REC_OFFS_NORMAL_SIZE];
+		ulint		ins_offs_[REC_OFFS_NORMAL_SIZE];
+
+		ulint*		cur_offs;
+		ulint*		ins_offs;
+
+		rec_offs_init(cur_offs_);
+		rec_offs_init(ins_offs_);
+
+		cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
+					   ULINT_UNDEFINED, &heap);
+		ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
+					   ULINT_UNDEFINED, &heap);
+
+		extra_size = rec_offs_extra_size(ins_offs);
+		cur_extra_size = rec_offs_extra_size(cur_offs);
+		ut_ad(rec_size == rec_offs_size(ins_offs));
+		cur_rec_size = rec_offs_size(cur_offs);
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+
+	ins_ptr = insert_rec - extra_size;
+
+	i = 0;
+
+	if (cur_extra_size == extra_size) {
+		ulint		min_rec_size = ut_min(cur_rec_size, rec_size);
+
+		const byte*	cur_ptr = cursor_rec - cur_extra_size;
+
+		/* Find out the first byte in insert_rec which differs from
+		cursor_rec; skip the bytes in the record info */
+
+		do {
+			if (*ins_ptr == *cur_ptr) {
+				i++;
+				ins_ptr++;
+				cur_ptr++;
+			} else if ((i < extra_size)
+				   && (i >= extra_size
+				       - page_rec_get_base_extra_size
+				       (insert_rec))) {
+				i = extra_size;
+				ins_ptr = insert_rec;
+				cur_ptr = cursor_rec;
+			} else {
+				break;
+			}
+		} while (i < min_rec_size);
+	}
+
+	if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
+
+		if (page_rec_is_comp(insert_rec)) {
+			log_ptr = mlog_open_and_write_index(
+				mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
+				2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
+			if (UNIV_UNLIKELY(!log_ptr)) {
+				/* Logging in mtr is switched off
+				during crash recovery: in that case
+				mlog_open returns NULL */
+				return;
+			}
+		} else {
+			log_ptr = mlog_open(mtr, 11
+					    + 2 + 5 + 1 + 5 + 5
+					    + MLOG_BUF_MARGIN);
+			if (UNIV_UNLIKELY(!log_ptr)) {
+				/* Logging in mtr is switched off
+				during crash recovery: in that case
+				mlog_open returns NULL */
+				return;
+			}
+
+			log_ptr = mlog_write_initial_log_record_fast(
+				insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
+		}
+
+		log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
+		/* Write the cursor rec offset as a 2-byte ulint */
+		mach_write_to_2(log_ptr, page_offset(cursor_rec));
+		log_ptr += 2;
+	} else {
+		log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
+		if (!log_ptr) {
+			/* Logging in mtr is switched off during crash
+			recovery: in that case mlog_open returns NULL */
+			return;
+		}
+		log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
+	}
+
+	if (page_rec_is_comp(insert_rec)) {
+		if (UNIV_UNLIKELY
+		    (rec_get_info_and_status_bits(insert_rec, TRUE)
+		     != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
+
+			goto need_extra_info;
+		}
+	} else {
+		if (UNIV_UNLIKELY
+		    (rec_get_info_and_status_bits(insert_rec, FALSE)
+		     != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
+
+			goto need_extra_info;
+		}
+	}
+
+	if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
+need_extra_info:
+		/* Write the record end segment length
+		and the extra info storage flag */
+		log_ptr += mach_write_compressed(log_ptr,
+						 2 * (rec_size - i) + 1);
+
+		/* Write the info bits */
+		mach_write_to_1(log_ptr,
+				rec_get_info_and_status_bits(
+					insert_rec,
+					page_rec_is_comp(insert_rec)));
+		log_ptr++;
+
+		/* Write the record origin offset */
+		log_ptr += mach_write_compressed(log_ptr, extra_size);
+
+		/* Write the mismatch index */
+		log_ptr += mach_write_compressed(log_ptr, i);
+
+		ut_a(i < UNIV_PAGE_SIZE);
+		ut_a(extra_size < UNIV_PAGE_SIZE);
+	} else {
+		/* Write the record end segment length
+		and the extra info storage flag */
+		log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
+	}
+
+	/* Write to the log the inserted index record end segment which
+	differs from the cursor record */
+
+	rec_size -= i;
+
+	if (log_ptr + rec_size <= log_end) {
+		memcpy(log_ptr, ins_ptr, rec_size);
+		mlog_close(mtr, log_ptr + rec_size);
+	} else {
+		mlog_close(mtr, log_ptr);
+		ut_a(rec_size < UNIV_PAGE_SIZE);
+		mlog_catenate_string(mtr, ins_ptr, rec_size);
+	}
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+	ibool		is_short,/*!< in: TRUE if short inserts */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	ulint	origin_offset;
+	ulint	end_seg_len;
+	ulint	mismatch_index;
+	page_t*	page;
+	rec_t*	cursor_rec;
+	byte	buf1[1024];
+	byte*	buf;
+	byte*	ptr2			= ptr;
+	ulint	info_and_status_bits = 0; /* remove warning */
+	page_cur_t cursor;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	page = block ? buf_block_get_frame(block) : NULL;
+
+	if (is_short) {
+		cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
+	} else {
+		ulint	offset;
+
+		/* Read the cursor rec offset as a 2-byte ulint */
+
+		if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
+
+			return(NULL);
+		}
+
+		offset = mach_read_from_2(ptr);
+		ptr += 2;
+
+		cursor_rec = page + offset;
+
+		if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
+
+			recv_sys->found_corrupt_log = TRUE;
+
+			return(NULL);
+		}
+	}
+
+	ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
+
+	if (ptr == NULL) {
+
+		return(NULL);
+	}
+
+	if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
+		recv_sys->found_corrupt_log = TRUE;
+
+		return(NULL);
+	}
+
+	if (end_seg_len & 0x1UL) {
+		/* Read the info bits */
+
+		if (end_ptr < ptr + 1) {
+
+			return(NULL);
+		}
+
+		info_and_status_bits = mach_read_from_1(ptr);
+		ptr++;
+
+		ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
+
+		if (ptr == NULL) {
+
+			return(NULL);
+		}
+
+		ut_a(origin_offset < UNIV_PAGE_SIZE);
+
+		ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
+
+		if (ptr == NULL) {
+
+			return(NULL);
+		}
+
+		ut_a(mismatch_index < UNIV_PAGE_SIZE);
+	}
+
+	if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
+
+		return(NULL);
+	}
+
+	if (!block) {
+
+		return(ptr + (end_seg_len >> 1));
+	}
+
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+	ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
+
+	/* Read from the log the inserted index record end segment which
+	differs from the cursor record */
+
+	offsets = rec_get_offsets(cursor_rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+
+	if (!(end_seg_len & 0x1UL)) {
+		info_and_status_bits = rec_get_info_and_status_bits(
+			cursor_rec, page_is_comp(page));
+		origin_offset = rec_offs_extra_size(offsets);
+		mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
+	}
+
+	end_seg_len >>= 1;
+
+	if (mismatch_index + end_seg_len < sizeof buf1) {
+		buf = buf1;
+	} else {
+		buf = mem_alloc(mismatch_index + end_seg_len);
+	}
+
+	/* Build the inserted record to buf */
+
+        if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
+		fprintf(stderr,
+			"Is short %lu, info_and_status_bits %lu, offset %lu, "
+			"o_offset %lu\n"
+			"mismatch index %lu, end_seg_len %lu\n"
+			"parsed len %lu\n",
+			(ulong) is_short, (ulong) info_and_status_bits,
+			(ulong) page_offset(cursor_rec),
+			(ulong) origin_offset,
+			(ulong) mismatch_index, (ulong) end_seg_len,
+			(ulong) (ptr - ptr2));
+
+		fputs("Dump of 300 bytes of log:\n", stderr);
+		ut_print_buf(stderr, ptr2, 300);
+		putc('\n', stderr);
+
+		buf_page_print(page, 0);
+
+		ut_error;
+	}
+
+	ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
+	ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
+
+	if (page_is_comp(page)) {
+		rec_set_info_and_status_bits(buf + origin_offset,
+				     info_and_status_bits);
+	} else {
+		rec_set_info_bits_old(buf + origin_offset,
+							info_and_status_bits);
+	}
+
+	page_cur_position(cursor_rec, block, &cursor);
+
+	offsets = rec_get_offsets(buf + origin_offset, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+	if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
+					       buf + origin_offset,
+					       index, offsets, mtr))) {
+		/* The redo log record should only have been written
+		after the write was successful. */
+		ut_error;
+	}
+
+	if (buf != buf1) {
+
+		mem_free(buf);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	return(ptr + end_seg_len);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+	rec_t*		current_rec,/*!< in: pointer to current record after
+				which the new record is inserted */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	byte*		insert_buf;
+	ulint		rec_size;
+	page_t*		page;		/*!< the relevant page */
+	rec_t*		last_insert;	/*!< cursor position at previous
+					insert */
+	rec_t*		free_rec;	/*!< a free record that was reused,
+					or NULL */
+	rec_t*		insert_rec;	/*!< inserted record */
+	ulint		heap_no;	/*!< heap number of the inserted
+					record */
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	page = page_align(current_rec);
+	ut_ad(dict_table_is_comp(index->table)
+	      == (ibool) !!page_is_comp(page));
+
+	ut_ad(!page_rec_is_supremum(current_rec));
+
+	/* 1. Get the size of the physical record in the page */
+	rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	{
+		const void*	rec_start
+			= rec - rec_offs_extra_size(offsets);
+		ulint		extra_size
+			= rec_offs_extra_size(offsets)
+			- (rec_offs_comp(offsets)
+			   ? REC_N_NEW_EXTRA_BYTES
+			   : REC_N_OLD_EXTRA_BYTES);
+
+		/* All data bytes of the record must be valid. */
+		UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+		/* The variable-length header must be valid. */
+		UNIV_MEM_ASSERT_RW(rec_start, extra_size);
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	/* 2. Try to find suitable space from page memory management */
+
+	free_rec = page_header_get_ptr(page, PAGE_FREE);
+	if (UNIV_LIKELY_NULL(free_rec)) {
+		/* Try to allocate from the head of the free list. */
+		ulint		foffsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*		foffsets	= foffsets_;
+		mem_heap_t*	heap		= NULL;
+
+		rec_offs_init(foffsets_);
+
+		foffsets = rec_get_offsets(free_rec, index, foffsets,
+					ULINT_UNDEFINED, &heap);
+		if (rec_offs_size(foffsets) < rec_size) {
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+
+			goto use_heap;
+		}
+
+		insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+		if (page_is_comp(page)) {
+			heap_no = rec_get_heap_no_new(free_rec);
+			page_mem_alloc_free(page, NULL,
+					rec_get_next_ptr(free_rec, TRUE),
+					rec_size);
+		} else {
+			heap_no = rec_get_heap_no_old(free_rec);
+			page_mem_alloc_free(page, NULL,
+					rec_get_next_ptr(free_rec, FALSE),
+					rec_size);
+		}
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	} else {
+use_heap:
+		free_rec = NULL;
+		insert_buf = page_mem_alloc_heap(page, NULL,
+						 rec_size, &heap_no);
+
+		if (UNIV_UNLIKELY(insert_buf == NULL)) {
+			return(NULL);
+		}
+	}
+
+	/* 3. Create the record */
+	insert_rec = rec_copy(insert_buf, rec, offsets);
+	rec_offs_make_valid(insert_rec, index, offsets);
+
+	/* 4. Insert the record in the linked list of records */
+	ut_ad(current_rec != insert_rec);
+
+	{
+		/* next record after current before the insertion */
+		rec_t*	next_rec = page_rec_get_next(current_rec);
+#ifdef UNIV_DEBUG
+		if (page_is_comp(page)) {
+			ut_ad(rec_get_status(current_rec)
+				<= REC_STATUS_INFIMUM);
+			ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+			ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+		}
+#endif
+		page_rec_set_next(insert_rec, next_rec);
+		page_rec_set_next(current_rec, insert_rec);
+	}
+
+	page_header_set_field(page, NULL, PAGE_N_RECS,
+			      1 + page_get_n_recs(page));
+
+	/* 5. Set the n_owned field in the inserted record to zero,
+	and set the heap_no field */
+	if (page_is_comp(page)) {
+		rec_set_n_owned_new(insert_rec, NULL, 0);
+		rec_set_heap_no_new(insert_rec, heap_no);
+	} else {
+		rec_set_n_owned_old(insert_rec, 0);
+		rec_set_heap_no_old(insert_rec, heap_no);
+	}
+
+	UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+			   rec_offs_size(offsets));
+	/* 6. Update the last insertion info in page header */
+
+	last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+	ut_ad(!last_insert || !page_is_comp(page)
+	      || rec_get_node_ptr_flag(last_insert)
+	      == rec_get_node_ptr_flag(insert_rec));
+
+	if (UNIV_UNLIKELY(last_insert == NULL)) {
+		page_header_set_field(page, NULL, PAGE_DIRECTION,
+				      PAGE_NO_DIRECTION);
+		page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+
+	} else if ((last_insert == current_rec)
+		   && (page_header_get_field(page, PAGE_DIRECTION)
+		       != PAGE_LEFT)) {
+
+		page_header_set_field(page, NULL, PAGE_DIRECTION,
+							PAGE_RIGHT);
+		page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+				      page_header_get_field(
+					      page, PAGE_N_DIRECTION) + 1);
+
+	} else if ((page_rec_get_next(insert_rec) == last_insert)
+		   && (page_header_get_field(page, PAGE_DIRECTION)
+		       != PAGE_RIGHT)) {
+
+		page_header_set_field(page, NULL, PAGE_DIRECTION,
+							PAGE_LEFT);
+		page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+				      page_header_get_field(
+					      page, PAGE_N_DIRECTION) + 1);
+	} else {
+		page_header_set_field(page, NULL, PAGE_DIRECTION,
+							PAGE_NO_DIRECTION);
+		page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+	}
+
+	page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
+
+	/* 7. It remains to update the owner record. */
+	{
+		rec_t*	owner_rec	= page_rec_find_owner_rec(insert_rec);
+		ulint	n_owned;
+		if (page_is_comp(page)) {
+			n_owned = rec_get_n_owned_new(owner_rec);
+			rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
+		} else {
+			n_owned = rec_get_n_owned_old(owner_rec);
+			rec_set_n_owned_old(owner_rec, n_owned + 1);
+		}
+
+		/* 8. Now we have incremented the n_owned field of the owner
+		record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+		we have to split the corresponding directory slot in two. */
+
+		if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+			page_dir_split_slot(
+				page, NULL,
+				page_dir_find_owner_slot(owner_rec));
+		}
+	}
+
+	/* 9. Write log record of the insert */
+	if (UNIV_LIKELY(mtr != NULL)) {
+		page_cur_insert_rec_write_log(insert_rec, rec_size,
+					      current_rec, index, mtr);
+	}
+
+	return(insert_rec);
+}
+
+/***********************************************************//**
+Compresses or reorganizes a page after an optimistic insert.
+@return	rec if succeed, NULL otherwise */
+static
+rec_t*
+page_cur_insert_rec_zip_reorg(
+/*==========================*/
+	rec_t**		current_rec,/*!< in/out: pointer to current record after
+				which the new record is inserted */
+	buf_block_t*	block,	/*!< in: buffer block */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	rec_t*		rec,	/*!< in: inserted record */
+	page_t*		page,	/*!< in: uncompressed page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+{
+	ulint		pos;
+
+	/* Recompress or reorganize and recompress the page. */
+	if (UNIV_LIKELY(page_zip_compress(page_zip, page, index, mtr))) {
+		return(rec);
+	}
+
+	/* Before trying to reorganize the page,
+	store the number of preceding records on the page. */
+	pos = page_rec_get_n_recs_before(rec);
+
+	if (page_zip_reorganize(block, index, mtr)) {
+		/* The page was reorganized: Find rec by seeking to pos,
+		and update *current_rec. */
+		rec = page + PAGE_NEW_INFIMUM;
+
+		while (--pos) {
+			rec = page + rec_get_next_offs(rec, TRUE);
+		}
+
+		*current_rec = rec;
+		rec = page + rec_get_next_offs(rec, TRUE);
+
+		return(rec);
+	}
+
+	/* Out of space: restore the page */
+	if (!page_zip_decompress(page_zip, page)) {
+		ut_error; /* Memory corrupted? */
+	}
+	ut_ad(page_validate(page, index));
+	return(NULL);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+	rec_t**		current_rec,/*!< in/out: pointer to current record after
+				which the new record is inserted */
+	buf_block_t*	block,	/*!< in: buffer block of *current_rec */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	byte*		insert_buf;
+	ulint		rec_size;
+	page_t*		page;		/*!< the relevant page */
+	rec_t*		last_insert;	/*!< cursor position at previous
+					insert */
+	rec_t*		free_rec;	/*!< a free record that was reused,
+					or NULL */
+	rec_t*		insert_rec;	/*!< inserted record */
+	ulint		heap_no;	/*!< heap number of the inserted
+					record */
+	page_zip_des_t*	page_zip;
+
+	page_zip = buf_block_get_page_zip(block);
+	ut_ad(page_zip);
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	page = page_align(*current_rec);
+	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(page_is_comp(page));
+
+	ut_ad(!page_rec_is_supremum(*current_rec));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* 1. Get the size of the physical record in the page */
+	rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	{
+		const void*	rec_start
+			= rec - rec_offs_extra_size(offsets);
+		ulint		extra_size
+			= rec_offs_extra_size(offsets)
+			- (rec_offs_comp(offsets)
+			   ? REC_N_NEW_EXTRA_BYTES
+			   : REC_N_OLD_EXTRA_BYTES);
+
+		/* All data bytes of the record must be valid. */
+		UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+		/* The variable-length header must be valid. */
+		UNIV_MEM_ASSERT_RW(rec_start, extra_size);
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	/* 2. Try to find suitable space from page memory management */
+	if (!page_zip_available(page_zip, dict_index_is_clust(index),
+				rec_size, 1)) {
+
+		/* Try compressing the whole page afterwards. */
+		insert_rec = page_cur_insert_rec_low(*current_rec,
+						     index, rec, offsets,
+						     NULL);
+
+		if (UNIV_LIKELY(insert_rec != NULL)) {
+			insert_rec = page_cur_insert_rec_zip_reorg(
+				current_rec, block, index, insert_rec,
+				page, page_zip, mtr);
+		}
+
+		return(insert_rec);
+	}
+
+	free_rec = page_header_get_ptr(page, PAGE_FREE);
+	if (UNIV_LIKELY_NULL(free_rec)) {
+		/* Try to allocate from the head of the free list. */
+		lint	extra_size_diff;
+		ulint		foffsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*		foffsets	= foffsets_;
+		mem_heap_t*	heap		= NULL;
+
+		rec_offs_init(foffsets_);
+
+		foffsets = rec_get_offsets(free_rec, index, foffsets,
+					ULINT_UNDEFINED, &heap);
+		if (rec_offs_size(foffsets) < rec_size) {
+too_small:
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+
+			goto use_heap;
+		}
+
+		insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+		/* On compressed pages, do not relocate records from
+		the free list.  If extra_size would grow, use the heap. */
+		extra_size_diff
+			= rec_offs_extra_size(offsets)
+			- rec_offs_extra_size(foffsets);
+
+		if (UNIV_UNLIKELY(extra_size_diff < 0)) {
+			/* Add an offset to the extra_size. */
+			if (rec_offs_size(foffsets)
+			    < rec_size - extra_size_diff) {
+
+				goto too_small;
+			}
+
+			insert_buf -= extra_size_diff;
+		} else if (UNIV_UNLIKELY(extra_size_diff)) {
+			/* Do not allow extra_size to grow */
+
+			goto too_small;
+		}
+
+		heap_no = rec_get_heap_no_new(free_rec);
+		page_mem_alloc_free(page, page_zip,
+				    rec_get_next_ptr(free_rec, TRUE),
+				    rec_size);
+
+		if (!page_is_leaf(page)) {
+			/* Zero out the node pointer of free_rec,
+			in case it will not be overwritten by
+			insert_rec. */
+
+			ut_ad(rec_size > REC_NODE_PTR_SIZE);
+
+			if (rec_offs_extra_size(foffsets)
+			    + rec_offs_data_size(foffsets) > rec_size) {
+
+				memset(rec_get_end(free_rec, foffsets)
+				       - REC_NODE_PTR_SIZE, 0,
+				       REC_NODE_PTR_SIZE);
+			}
+		} else if (dict_index_is_clust(index)) {
+			/* Zero out the DB_TRX_ID and DB_ROLL_PTR
+			columns of free_rec, in case it will not be
+			overwritten by insert_rec. */
+
+			ulint	trx_id_col;
+			ulint	trx_id_offs;
+			ulint	len;
+
+			trx_id_col = dict_index_get_sys_col_pos(index,
+								DATA_TRX_ID);
+			ut_ad(trx_id_col > 0);
+			ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+			trx_id_offs = rec_get_nth_field_offs(foffsets,
+							     trx_id_col, &len);
+			ut_ad(len == DATA_TRX_ID_LEN);
+
+			if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
+			    + rec_offs_extra_size(foffsets) > rec_size) {
+				/* We will have to zero out the
+				DB_TRX_ID and DB_ROLL_PTR, because
+				they will not be fully overwritten by
+				insert_rec. */
+
+				memset(free_rec + trx_id_offs, 0,
+				       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			}
+
+			ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
+			      == rec_get_nth_field(free_rec, foffsets,
+						   trx_id_col + 1, &len));
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+		}
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	} else {
+use_heap:
+		free_rec = NULL;
+		insert_buf = page_mem_alloc_heap(page, page_zip,
+						 rec_size, &heap_no);
+
+		if (UNIV_UNLIKELY(insert_buf == NULL)) {
+			return(NULL);
+		}
+
+		page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
+	}
+
+	/* 3. Create the record */
+	insert_rec = rec_copy(insert_buf, rec, offsets);
+	rec_offs_make_valid(insert_rec, index, offsets);
+
+	/* 4. Insert the record in the linked list of records */
+	ut_ad(*current_rec != insert_rec);
+
+	{
+		/* next record after current before the insertion */
+		rec_t*	next_rec = page_rec_get_next(*current_rec);
+		ut_ad(rec_get_status(*current_rec)
+		      <= REC_STATUS_INFIMUM);
+		ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+		ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+
+		page_rec_set_next(insert_rec, next_rec);
+		page_rec_set_next(*current_rec, insert_rec);
+	}
+
+	page_header_set_field(page, page_zip, PAGE_N_RECS,
+			      1 + page_get_n_recs(page));
+
+	/* 5. Set the n_owned field in the inserted record to zero,
+	and set the heap_no field */
+	rec_set_n_owned_new(insert_rec, NULL, 0);
+	rec_set_heap_no_new(insert_rec, heap_no);
+
+	UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+			   rec_offs_size(offsets));
+
+	page_zip_dir_insert(page_zip, *current_rec, free_rec, insert_rec);
+
+	/* 6. Update the last insertion info in page header */
+
+	last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+	ut_ad(!last_insert
+	      || rec_get_node_ptr_flag(last_insert)
+	      == rec_get_node_ptr_flag(insert_rec));
+
+	if (UNIV_UNLIKELY(last_insert == NULL)) {
+		page_header_set_field(page, page_zip, PAGE_DIRECTION,
+							PAGE_NO_DIRECTION);
+		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+	} else if ((last_insert == *current_rec)
+		   && (page_header_get_field(page, PAGE_DIRECTION)
+		       != PAGE_LEFT)) {
+
+		page_header_set_field(page, page_zip, PAGE_DIRECTION,
+							PAGE_RIGHT);
+		page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+				      page_header_get_field(
+					      page, PAGE_N_DIRECTION) + 1);
+
+	} else if ((page_rec_get_next(insert_rec) == last_insert)
+		   && (page_header_get_field(page, PAGE_DIRECTION)
+		       != PAGE_RIGHT)) {
+
+		page_header_set_field(page, page_zip, PAGE_DIRECTION,
+							PAGE_LEFT);
+		page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+				      page_header_get_field(
+					      page, PAGE_N_DIRECTION) + 1);
+	} else {
+		page_header_set_field(page, page_zip, PAGE_DIRECTION,
+							PAGE_NO_DIRECTION);
+		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+	}
+
+	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
+
+	/* 7. It remains to update the owner record. */
+	{
+		rec_t*	owner_rec	= page_rec_find_owner_rec(insert_rec);
+		ulint	n_owned;
+
+		n_owned = rec_get_n_owned_new(owner_rec);
+		rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
+
+		/* 8. Now we have incremented the n_owned field of the owner
+		record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+		we have to split the corresponding directory slot in two. */
+
+		if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+			page_dir_split_slot(
+				page, page_zip,
+				page_dir_find_owner_slot(owner_rec));
+		}
+	}
+
+	page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
+
+	/* 9. Write log record of the insert */
+	if (UNIV_LIKELY(mtr != NULL)) {
+		page_cur_insert_rec_write_log(insert_rec, rec_size,
+					      *current_rec, index, mtr);
+	}
+
+	return(insert_rec);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Writes a log record of copying a record list end to a new created page.
+@return 4-byte field where to write the log data length, or NULL if
+logging is disabled */
+UNIV_INLINE
+byte*
+page_copy_rec_list_to_created_page_write_log(
+/*=========================================*/
+	page_t*		page,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte*	log_ptr;
+
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	log_ptr = mlog_open_and_write_index(mtr, page, index,
+					    page_is_comp(page)
+					    ? MLOG_COMP_LIST_END_COPY_CREATED
+					    : MLOG_LIST_END_COPY_CREATED, 4);
+	if (UNIV_LIKELY(log_ptr != NULL)) {
+		mlog_close(mtr, log_ptr + 4);
+	}
+
+	return(log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	byte*		rec_end;
+	ulint		log_data_len;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+
+	if (ptr + 4 > end_ptr) {
+
+		return(NULL);
+	}
+
+	log_data_len = mach_read_from_4(ptr);
+	ptr += 4;
+
+	rec_end = ptr + log_data_len;
+
+	if (rec_end > end_ptr) {
+
+		return(NULL);
+	}
+
+	if (!block) {
+
+		return(rec_end);
+	}
+
+	while (ptr < rec_end) {
+		ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
+						block, index, mtr);
+	}
+
+	ut_a(ptr == rec_end);
+
+	page = buf_block_get_frame(block);
+	page_zip = buf_block_get_page_zip(block);
+
+	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+	page_header_set_field(page, page_zip, PAGE_DIRECTION,
+							PAGE_NO_DIRECTION);
+	page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+	return(rec_end);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied. */
+UNIV_INTERN
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+	page_t*		new_page,	/*!< in/out: index page to copy to */
+	rec_t*		rec,		/*!< in: first record to copy */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_dir_slot_t* slot = 0; /* remove warning */
+	byte*	heap_top;
+	rec_t*	insert_rec = 0; /* remove warning */
+	rec_t*	prev_rec;
+	ulint	count;
+	ulint	n_recs;
+	ulint	slot_index;
+	ulint	rec_size;
+	ulint	log_mode;
+	byte*	log_ptr;
+	ulint	log_data_len;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+	ut_ad(page_align(rec) != new_page);
+	ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
+
+	if (page_rec_is_infimum(rec)) {
+
+		rec = page_rec_get_next(rec);
+	}
+
+	if (page_rec_is_supremum(rec)) {
+
+		return;
+	}
+
+#ifdef UNIV_DEBUG
+	/* To pass the debug tests we have to set these dummy values
+	in the debug version */
+	page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
+	page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
+			    new_page + UNIV_PAGE_SIZE - 1);
+#endif
+
+	log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
+							       index, mtr);
+
+	log_data_len = dyn_array_get_data_size(&(mtr->log));
+
+	/* Individual inserts are logged in a shorter form */
+
+	log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+
+	prev_rec = page_get_infimum_rec(new_page);
+	if (page_is_comp(new_page)) {
+		heap_top = new_page + PAGE_NEW_SUPREMUM_END;
+	} else {
+		heap_top = new_page + PAGE_OLD_SUPREMUM_END;
+	}
+	count = 0;
+	slot_index = 0;
+	n_recs = 0;
+
+	do {
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		insert_rec = rec_copy(heap_top, rec, offsets);
+
+		if (page_is_comp(new_page)) {
+			rec_set_next_offs_new(prev_rec,
+					      page_offset(insert_rec));
+
+			rec_set_n_owned_new(insert_rec, NULL, 0);
+			rec_set_heap_no_new(insert_rec,
+					    PAGE_HEAP_NO_USER_LOW + n_recs);
+		} else {
+			rec_set_next_offs_old(prev_rec,
+					      page_offset(insert_rec));
+
+			rec_set_n_owned_old(insert_rec, 0);
+			rec_set_heap_no_old(insert_rec,
+					    PAGE_HEAP_NO_USER_LOW + n_recs);
+		}
+
+		count++;
+		n_recs++;
+
+		if (UNIV_UNLIKELY
+		    (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
+
+			slot_index++;
+
+			slot = page_dir_get_nth_slot(new_page, slot_index);
+
+			page_dir_slot_set_rec(slot, insert_rec);
+			page_dir_slot_set_n_owned(slot, NULL, count);
+
+			count = 0;
+		}
+
+		rec_size = rec_offs_size(offsets);
+
+		ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+
+		heap_top += rec_size;
+
+		page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
+					      index, mtr);
+		prev_rec = insert_rec;
+		rec = page_rec_get_next(rec);
+	} while (!page_rec_is_supremum(rec));
+
+	if ((slot_index > 0) && (count + 1
+				 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+				 <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+		/* We can merge the two last dir slots. This operation is
+		here to make this function imitate exactly the equivalent
+		task made using page_cur_insert_rec, which we use in database
+		recovery to reproduce the task performed by this function.
+		To be able to check the correctness of recovery, it is good
+		that it imitates exactly. */
+
+		count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+		page_dir_slot_set_n_owned(slot, NULL, 0);
+
+		slot_index--;
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+
+	ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
+
+	if (UNIV_LIKELY(log_ptr != NULL)) {
+		mach_write_to_4(log_ptr, log_data_len);
+	}
+
+	if (page_is_comp(new_page)) {
+		rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
+	} else {
+		rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
+	}
+
+	slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
+
+	page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
+	page_dir_slot_set_n_owned(slot, NULL, count + 1);
+
+	page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
+	page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
+	page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
+	page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
+
+	page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
+	page_header_set_field(new_page, NULL, PAGE_DIRECTION,
+							PAGE_NO_DIRECTION);
+	page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
+
+	/* Restore the log mode */
+
+	mtr_set_log_mode(mtr, log_mode);
+}
+
+/***********************************************************//**
+Writes log record of a record delete on a page. */
+UNIV_INLINE
+void
+page_cur_delete_rec_write_log(
+/*==========================*/
+	rec_t*		rec,	/*!< in: record to be deleted */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	byte*	log_ptr;
+
+	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+
+	log_ptr = mlog_open_and_write_index(mtr, rec, index,
+					    page_rec_is_comp(rec)
+					    ? MLOG_COMP_REC_DELETE
+					    : MLOG_REC_DELETE, 2);
+
+	if (!log_ptr) {
+		/* Logging in mtr is switched off during crash recovery:
+		in that case mlog_open returns NULL */
+		return;
+	}
+
+	/* Write the cursor rec offset as a 2-byte ulint */
+	mach_write_to_2(log_ptr, page_offset(rec));
+
+	mlog_close(mtr, log_ptr + 2);
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return	pointer to record end or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	ulint		offset;
+	page_cur_t	cursor;
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	/* Read the cursor rec offset as a 2-byte ulint */
+	offset = mach_read_from_2(ptr);
+	ptr += 2;
+
+	ut_a(offset <= UNIV_PAGE_SIZE);
+
+	if (block) {
+		page_t*		page		= buf_block_get_frame(block);
+		mem_heap_t*	heap		= NULL;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		rec_t*		rec		= page + offset;
+		rec_offs_init(offsets_);
+
+		page_cur_position(rec, block, &cursor);
+		ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
+
+		page_cur_delete_rec(&cursor, index,
+				    rec_get_offsets(rec, index, offsets_,
+						    ULINT_UNDEFINED, &heap),
+				    mtr);
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+
+	return(ptr);
+}
+
+/***********************************************************//**
+Deletes a record at the page cursor. The cursor is moved to the next
+record after the deleted one. */
+UNIV_INTERN
+void
+page_cur_delete_rec(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const ulint*	offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	page_dir_slot_t* cur_dir_slot;
+	page_dir_slot_t* prev_slot;
+	page_t*		page;
+	page_zip_des_t*	page_zip;
+	rec_t*		current_rec;
+	rec_t*		prev_rec	= NULL;
+	rec_t*		next_rec;
+	ulint		cur_slot_no;
+	ulint		cur_n_owned;
+	rec_t*		rec;
+
+	ut_ad(cursor && mtr);
+
+	page = page_cur_get_page(cursor);
+	page_zip = page_cur_get_page_zip(cursor);
+
+	/* page_zip_validate() will fail here when
+	btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
+	Then, both "page_zip" and "page" would have the min-rec-mark
+	set on the smallest user record, but "page" would additionally
+	have it set on the smallest-but-one record.  Because sloppy
+	page_zip_validate_low() only ignores min-rec-flag differences
+	in the smallest user record, it cannot be used here either. */
+
+	current_rec = cursor->rec;
+	ut_ad(rec_offs_validate(current_rec, index, offsets));
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	/* The record must not be the supremum or infimum record. */
+	ut_ad(page_rec_is_user_rec(current_rec));
+
+	/* Save to local variables some data associated with current_rec */
+	cur_slot_no = page_dir_find_owner_slot(current_rec);
+	cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
+	cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
+
+	/* 0. Write the log record */
+	page_cur_delete_rec_write_log(current_rec, index, mtr);
+
+	/* 1. Reset the last insert info in the page header and increment
+	the modify clock for the frame */
+
+	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+
+	/* The page gets invalid for optimistic searches: increment the
+	frame modify clock */
+
+	buf_block_modify_clock_inc(page_cur_get_block(cursor));
+
+	/* 2. Find the next and the previous record. Note that the cursor is
+	left at the next record. */
+
+	ut_ad(cur_slot_no > 0);
+	prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
+
+	rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
+
+	/* rec now points to the record of the previous directory slot. Look
+	for the immediate predecessor of current_rec in a loop. */
+
+	while(current_rec != rec) {
+		prev_rec = rec;
+		rec = page_rec_get_next(rec);
+	}
+
+	page_cur_move_to_next(cursor);
+	next_rec = cursor->rec;
+
+	/* 3. Remove the record from the linked list of records */
+
+	page_rec_set_next(prev_rec, next_rec);
+
+	/* 4. If the deleted record is pointed to by a dir slot, update the
+	record pointer in slot. In the following if-clause we assume that
+	prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
+	>= 2. */
+
+#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
+# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
+#endif
+	ut_ad(cur_n_owned > 1);
+
+	if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
+		page_dir_slot_set_rec(cur_dir_slot, prev_rec);
+	}
+
+	/* 5. Update the number of owned records of the slot */
+
+	page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
+
+	/* 6. Free the memory occupied by the record */
+	page_mem_free(page, page_zip, current_rec, index, offsets);
+
+	/* 7. Now we have decremented the number of owned records of the slot.
+	If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
+	slots. */
+
+	if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
+		page_dir_balance_slot(page, page_zip, cur_slot_no);
+	}
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+/*******************************************************************//**
+Print the first n numbers, generated by page_cur_lcg_prng() to make sure
+(visually) that it works properly. */
+void
+test_page_cur_lcg_prng(
+/*===================*/
+	int	n)	/*!< in: print first n numbers */
+{
+	int			i;
+	unsigned long long	rnd;
+
+	for (i = 0; i < n; i++) {
+		rnd = page_cur_lcg_prng();
+		printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
+		       rnd,
+		       rnd % 2,
+		       rnd % 3,
+		       rnd % 5,
+		       rnd % 7,
+		       rnd % 11);
+	}
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c
new file mode 100644
index 00000000000..f056ef77bdc
--- /dev/null
+++ b/storage/innodb_plugin/page/page0page.c
@@ -0,0 +1,2608 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0page.c
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#define THIS_MODULE
+#include "page0page.h"
+#ifdef UNIV_NONINL
+#include "page0page.ic"
+#endif
+#undef THIS_MODULE
+
+#include "page0cur.h"
+#include "page0zip.h"
+#include "buf0buf.h"
+#include "btr0btr.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "lock0lock.h"
+# include "fut0lst.h"
+# include "btr0sea.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/*			THE INDEX PAGE
+			==============
+
+The index page consists of a page header which contains the page's
+id and other information. On top of it are the the index records
+in a heap linked into a one way linear list according to alphabetic order.
+
+Just below page end is an array of pointers which we call page directory,
+to about every sixth record in the list. The pointers are placed in
+the directory in the alphabetical order of the records pointed to,
+enabling us to make binary search using the array. Each slot n:o I
+in the directory points to a record, where a 4-bit field contains a count
+of those records which are in the linear list between pointer I and
+the pointer I - 1 in the directory, including the record
+pointed to by pointer I and not including the record pointed to by I - 1.
+We say that the record pointed to by slot I, or that slot I, owns
+these records. The count is always kept in the range 4 to 8, with
+the exception that it is 1 for the first slot, and 1--8 for the second slot.
+
+An essentially binary search can be performed in the list of index
+records, like we could do if we had pointer to every record in the
+page directory. The data structure is, however, more efficient when
+we are doing inserts, because most inserts are just pushed on a heap.
+Only every 8th insert requires block move in the directory pointer
+table, which itself is quite small. A record is deleted from the page
+by just taking it off the linear list and updating the number of owned
+records-field of the record which owns it, and updating the page directory,
+if necessary. A special case is the one when the record owns itself.
+Because the overhead of inserts is so small, we may also increase the
+page size from the projected default of 8 kB to 64 kB without too
+much loss of efficiency in inserts. Bigger page becomes actual
+when the disk transfer rate compared to seek and latency time rises.
+On the present system, the page size is set so that the page transfer
+time (3 ms) is 20 % of the disk random access time (15 ms).
+
+When the page is split, merged, or becomes full but contains deleted
+records, we have to reorganize the page.
+
+Assuming a page size of 8 kB, a typical index page of a secondary
+index contains 300 index entries, and the size of the page directory
+is 50 x 4 bytes = 200 bytes. */
+
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return	the directory slot number */
+UNIV_INTERN
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+	const rec_t*	rec)	/*!< in: the physical record */
+{
+	const page_t*			page;
+	register uint16			rec_offs_bytes;
+	register const page_dir_slot_t*	slot;
+	register const page_dir_slot_t*	first_slot;
+	register const rec_t*		r = rec;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+	first_slot = page_dir_get_nth_slot(page, 0);
+	slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
+
+	if (page_is_comp(page)) {
+		while (rec_get_n_owned_new(r) == 0) {
+			r = rec_get_next_ptr_const(r, TRUE);
+			ut_ad(r >= page + PAGE_NEW_SUPREMUM);
+			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+		}
+	} else {
+		while (rec_get_n_owned_old(r) == 0) {
+			r = rec_get_next_ptr_const(r, FALSE);
+			ut_ad(r >= page + PAGE_OLD_SUPREMUM);
+			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+		}
+	}
+
+	rec_offs_bytes = mach_encode_2(r - page);
+
+	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
+
+		if (UNIV_UNLIKELY(slot == first_slot)) {
+			fprintf(stderr,
+				"InnoDB: Probable data corruption on"
+				" page %lu\n"
+				"InnoDB: Original record ",
+				(ulong) page_get_page_no(page));
+
+			if (page_is_comp(page)) {
+				fputs("(compact record)", stderr);
+			} else {
+				rec_print_old(stderr, rec);
+			}
+
+			fputs("\n"
+			      "InnoDB: on that page.\n"
+			      "InnoDB: Cannot find the dir slot for record ",
+			      stderr);
+			if (page_is_comp(page)) {
+				fputs("(compact record)", stderr);
+			} else {
+				rec_print_old(stderr, page
+					      + mach_decode_2(rec_offs_bytes));
+			}
+			fputs("\n"
+			      "InnoDB: on that page!\n", stderr);
+
+			buf_page_print(page, 0);
+
+			ut_error;
+		}
+
+		slot += PAGE_DIR_SLOT_SIZE;
+	}
+
+	return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
+}
+
+/**************************************************************//**
+Used to check the consistency of a directory slot.
+@return	TRUE if succeed */
+static
+ibool
+page_dir_slot_check(
+/*================*/
+	page_dir_slot_t*	slot)	/*!< in: slot */
+{
+	page_t*	page;
+	ulint	n_slots;
+	ulint	n_owned;
+
+	ut_a(slot);
+
+	page = page_align(slot);
+
+	n_slots = page_dir_get_n_slots(page);
+
+	ut_a(slot <= page_dir_get_nth_slot(page, 0));
+	ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
+
+	ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
+
+	if (page_is_comp(page)) {
+		n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
+	} else {
+		n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
+	}
+
+	if (slot == page_dir_get_nth_slot(page, 0)) {
+		ut_a(n_owned == 1);
+	} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
+		ut_a(n_owned >= 1);
+		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+	} else {
+		ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
+		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************//**
+Sets the max trx id field value. */
+UNIV_INTERN
+void
+page_set_max_trx_id(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction, or NULL */
+{
+	page_t*		page		= buf_block_get_frame(block);
+#ifndef UNIV_HOTBACKUP
+	const ibool	is_hashed	= block->is_hashed;
+
+	if (is_hashed) {
+		rw_lock_x_lock(&btr_search_latch);
+	}
+
+	ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#endif /* !UNIV_HOTBACKUP */
+
+	/* It is not necessary to write this change to the redo log, as
+	during a database recovery we assume that the max trx id of every
+	page is the maximum trx id assigned before the crash. */
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+				      8, mtr);
+#ifndef UNIV_HOTBACKUP
+	} else if (mtr) {
+		mlog_write_dulint(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+				  trx_id, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	} else {
+		mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+	}
+
+#ifndef UNIV_HOTBACKUP
+	if (is_hashed) {
+		rw_lock_x_unlock(&btr_search_latch);
+	}
+#endif /* !UNIV_HOTBACKUP */
+}
+
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return	pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
+byte*
+page_mem_alloc_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	ulint		need,	/*!< in: total number of bytes needed */
+	ulint*		heap_no)/*!< out: this contains the heap number
+				of the allocated record
+				if allocation succeeds */
+{
+	byte*	block;
+	ulint	avl_space;
+
+	ut_ad(page && heap_no);
+
+	avl_space = page_get_max_insert_size(page, 1);
+
+	if (avl_space >= need) {
+		block = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+		page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
+				    block + need);
+		*heap_no = page_dir_get_n_heap(page);
+
+		page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
+
+		return(block);
+	}
+
+	return(NULL);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Writes a log record of page creation. */
+UNIV_INLINE
+void
+page_create_write_log(
+/*==================*/
+	buf_frame_t*	frame,	/*!< in: a buffer frame where the page is
+				created */
+	mtr_t*		mtr,	/*!< in: mini-transaction handle */
+	ibool		comp)	/*!< in: TRUE=compact page format */
+{
+	mlog_write_initial_log_record(frame, comp
+				      ? MLOG_COMP_PAGE_CREATE
+				      : MLOG_PAGE_CREATE, mtr);
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_create_write_log(frame,mtr,comp) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_create(
+/*==============*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr __attribute__((unused)), /*!< in: buffer end */
+	ulint		comp,	/*!< in: nonzero=compact page format */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	ut_ad(ptr && end_ptr);
+
+	/* The record is empty, except for the record initial part */
+
+	if (block) {
+		page_create(block, mtr, comp);
+	}
+
+	return(ptr);
+}
+
+/**********************************************************//**
+The index page creation function.
+@return	pointer to the page */
+static
+page_t*
+page_create_low(
+/*============*/
+	buf_block_t*	block,		/*!< in: a buffer block where the
+					page is created */
+	ulint		comp)		/*!< in: nonzero=compact page format */
+{
+	page_dir_slot_t* slot;
+	mem_heap_t*	heap;
+	dtuple_t*	tuple;
+	dfield_t*	field;
+	byte*		heap_top;
+	rec_t*		infimum_rec;
+	rec_t*		supremum_rec;
+	page_t*		page;
+	dict_index_t*	index;
+	ulint*		offsets;
+
+	ut_ad(block);
+#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
+# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
+#endif
+#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
+# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
+#endif
+
+	/* The infimum and supremum records use a dummy index. */
+	if (UNIV_LIKELY(comp)) {
+		index = dict_ind_compact;
+	} else {
+		index = dict_ind_redundant;
+	}
+
+	/* 1. INCREMENT MODIFY CLOCK */
+	buf_block_modify_clock_inc(block);
+
+	page = buf_block_get_frame(block);
+
+	fil_page_set_type(page, FIL_PAGE_INDEX);
+
+	heap = mem_heap_create(200);
+
+	/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
+
+	/* Create first a data tuple for infimum record */
+	tuple = dtuple_create(heap, 1);
+	dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
+	field = dtuple_get_nth_field(tuple, 0);
+
+	dfield_set_data(field, "infimum", 8);
+	dtype_set(dfield_get_type(field),
+		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
+	/* Set the corresponding physical record to its place in the page
+	record heap */
+
+	heap_top = page + PAGE_DATA;
+
+	infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
+
+	if (UNIV_LIKELY(comp)) {
+		ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
+
+		rec_set_n_owned_new(infimum_rec, NULL, 1);
+		rec_set_heap_no_new(infimum_rec, 0);
+	} else {
+		ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
+
+		rec_set_n_owned_old(infimum_rec, 1);
+		rec_set_heap_no_old(infimum_rec, 0);
+	}
+
+	offsets = rec_get_offsets(infimum_rec, index, NULL,
+				  ULINT_UNDEFINED, &heap);
+
+	heap_top = rec_get_end(infimum_rec, offsets);
+
+	/* Create then a tuple for supremum */
+
+	tuple = dtuple_create(heap, 1);
+	dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
+	field = dtuple_get_nth_field(tuple, 0);
+
+	dfield_set_data(field, "supremum", comp ? 8 : 9);
+	dtype_set(dfield_get_type(field),
+		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
+
+	supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
+
+	if (UNIV_LIKELY(comp)) {
+		ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
+
+		rec_set_n_owned_new(supremum_rec, NULL, 1);
+		rec_set_heap_no_new(supremum_rec, 1);
+	} else {
+		ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
+
+		rec_set_n_owned_old(supremum_rec, 1);
+		rec_set_heap_no_old(supremum_rec, 1);
+	}
+
+	offsets = rec_get_offsets(supremum_rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+	heap_top = rec_get_end(supremum_rec, offsets);
+
+	ut_ad(heap_top == page
+	      + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
+
+	mem_heap_free(heap);
+
+	/* 4. INITIALIZE THE PAGE */
+
+	page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
+	page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
+	page_header_set_field(page, NULL, PAGE_N_HEAP, comp
+			      ? 0x8000 | PAGE_HEAP_NO_USER_LOW
+			      : PAGE_HEAP_NO_USER_LOW);
+	page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
+	page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
+	page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
+	page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+	page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+	page_header_set_field(page, NULL, PAGE_N_RECS, 0);
+	page_set_max_trx_id(block, NULL, ut_dulint_zero, NULL);
+	memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
+	       - page_offset(heap_top));
+
+	/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
+
+	/* Set the slots to point to infimum and supremum. */
+
+	slot = page_dir_get_nth_slot(page, 0);
+	page_dir_slot_set_rec(slot, infimum_rec);
+
+	slot = page_dir_get_nth_slot(page, 1);
+	page_dir_slot_set_rec(slot, supremum_rec);
+
+	/* Set the next pointers in infimum and supremum */
+
+	if (UNIV_LIKELY(comp)) {
+		rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
+		rec_set_next_offs_new(supremum_rec, 0);
+	} else {
+		rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
+		rec_set_next_offs_old(supremum_rec, 0);
+	}
+
+	return(page);
+}
+
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create(
+/*========*/
+	buf_block_t*	block,		/*!< in: a buffer block where the
+					page is created */
+	mtr_t*		mtr,		/*!< in: mini-transaction handle */
+	ulint		comp)		/*!< in: nonzero=compact page format */
+{
+	page_create_write_log(buf_block_get_frame(block), mtr, comp);
+	return(page_create_low(block, comp));
+}
+
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+	buf_block_t*	block,		/*!< in/out: a buffer frame where the
+					page is created */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint		level,		/*!< in: the B-tree level of the page */
+	mtr_t*		mtr)		/*!< in: mini-transaction handle */
+{
+	page_t*		page;
+	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
+
+	ut_ad(block);
+	ut_ad(page_zip);
+	ut_ad(index);
+	ut_ad(dict_table_is_comp(index->table));
+
+	page = page_create_low(block, TRUE);
+	mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
+
+	if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
+		/* The compression of a newly created page
+		should always succeed. */
+		ut_error;
+	}
+
+	return(page);
+}
+
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page. */
+UNIV_INTERN
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		new_page	= buf_block_get_frame(new_block);
+	page_cur_t	cur1;
+	rec_t*		cur2;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	page_cur_position(rec, block, &cur1);
+
+	if (page_cur_is_before_first(&cur1)) {
+
+		page_cur_move_to_next(&cur1);
+	}
+
+	ut_a((ibool)!!page_is_comp(new_page)
+	     == dict_table_is_comp(index->table));
+	ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
+	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+	     (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+
+	cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
+
+	/* Copy records from the original page to the new page */
+
+	while (!page_cur_is_after_last(&cur1)) {
+		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+		rec_t*	ins_rec;
+		offsets = rec_get_offsets(cur1_rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		ins_rec = page_cur_insert_rec_low(cur2, index,
+						  cur1_rec, offsets, mtr);
+		if (UNIV_UNLIKELY(!ins_rec)) {
+			/* Track an assertion failure reported on the mailing
+			list on June 18th, 2003 */
+
+			buf_page_print(new_page, 0);
+			buf_page_print(page_align(rec), 0);
+			ut_print_timestamp(stderr);
+
+			fprintf(stderr,
+				"InnoDB: rec offset %lu, cur1 offset %lu,"
+				" cur2 offset %lu\n",
+				(ulong) page_offset(rec),
+				(ulong) page_offset(page_cur_get_rec(&cur1)),
+				(ulong) page_offset(cur2));
+			ut_error;
+		}
+
+		page_cur_move_to_next(&cur1);
+		cur2 = ins_rec;
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Copies records from page to new_page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page.
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		new_page	= buf_block_get_frame(new_block);
+	page_zip_des_t*	new_page_zip	= buf_block_get_page_zip(new_block);
+	page_t*		page		= page_align(rec);
+	rec_t*		ret		= page_rec_get_next(
+		page_get_infimum_rec(new_page));
+	ulint		log_mode	= 0; /* remove warning */
+
+#ifdef UNIV_ZIP_DEBUG
+	if (new_page_zip) {
+		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
+		ut_a(page_zip);
+
+		/* Strict page_zip_validate() may fail here.
+		Furthermore, btr_compress() may set FIL_PAGE_PREV to
+		FIL_NULL on new_page while leaving it intact on
+		new_page_zip.  So, we cannot validate new_page_zip. */
+		ut_a(page_zip_validate_low(page_zip, page, TRUE));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+	ut_ad(buf_block_get_frame(block) == page);
+	ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
+	ut_ad(page_is_comp(page) == page_is_comp(new_page));
+	/* Here, "ret" may be pointing to a user record or the
+	predefined supremum record. */
+
+	if (UNIV_LIKELY_NULL(new_page_zip)) {
+		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	}
+
+	if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
+		page_copy_rec_list_end_to_created_page(new_page, rec,
+						       index, mtr);
+	} else {
+		page_copy_rec_list_end_no_locks(new_block, block, rec,
+						index, mtr);
+	}
+
+	if (UNIV_LIKELY_NULL(new_page_zip)) {
+		mtr_set_log_mode(mtr, log_mode);
+
+		if (UNIV_UNLIKELY
+		    (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+			/* Before trying to reorganize the page,
+			store the number of preceding records on the page. */
+			ulint	ret_pos
+				= page_rec_get_n_recs_before(ret);
+			/* Before copying, "ret" was the successor of
+			the predefined infimum record.  It must still
+			have at least one predecessor (the predefined
+			infimum record, or a freshly copied record
+			that is smaller than "ret"). */
+			ut_a(ret_pos > 0);
+
+			if (UNIV_UNLIKELY
+			    (!page_zip_reorganize(new_block, index, mtr))) {
+
+				if (UNIV_UNLIKELY
+				    (!page_zip_decompress(new_page_zip,
+							  new_page))) {
+					ut_error;
+				}
+				ut_ad(page_validate(new_page, index));
+				return(NULL);
+			} else {
+				/* The page was reorganized:
+				Seek to ret_pos. */
+				ret = new_page + PAGE_NEW_INFIMUM;
+
+				do {
+					ret = rec_get_next_ptr(ret, TRUE);
+				} while (--ret_pos);
+			}
+		}
+	}
+
+	/* Update the lock table, MAX_TRX_ID, and possible hash index */
+
+	lock_move_rec_list_end(new_block, block, rec);
+
+	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+		page_update_max_trx_id(new_block, new_page_zip,
+				       page_get_max_trx_id(page), mtr);
+	}
+
+	btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+	return(ret);
+}
+
+/*************************************************************//**
+Copies records from page to new_page, up to the given record,
+NOT including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page.
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		new_page	= buf_block_get_frame(new_block);
+	page_zip_des_t*	new_page_zip	= buf_block_get_page_zip(new_block);
+	page_cur_t	cur1;
+	rec_t*		cur2;
+	ulint		log_mode	= 0 /* remove warning */;
+	mem_heap_t*	heap		= NULL;
+	rec_t*		ret
+		= page_rec_get_prev(page_get_supremum_rec(new_page));
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	/* Here, "ret" may be pointing to a user record or the
+	predefined infimum record. */
+
+	if (page_rec_is_infimum(rec)) {
+
+		return(ret);
+	}
+
+	if (UNIV_LIKELY_NULL(new_page_zip)) {
+		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	}
+
+	page_cur_set_before_first(block, &cur1);
+	page_cur_move_to_next(&cur1);
+
+	cur2 = ret;
+
+	/* Copy records from the original page to the new page */
+
+	while (page_cur_get_rec(&cur1) != rec) {
+		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+		offsets = rec_get_offsets(cur1_rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		cur2 = page_cur_insert_rec_low(cur2, index,
+					       cur1_rec, offsets, mtr);
+		ut_a(cur2);
+
+		page_cur_move_to_next(&cur1);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	if (UNIV_LIKELY_NULL(new_page_zip)) {
+		mtr_set_log_mode(mtr, log_mode);
+
+		if (UNIV_UNLIKELY
+		    (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+			/* Before trying to reorganize the page,
+			store the number of preceding records on the page. */
+			ulint	ret_pos
+				= page_rec_get_n_recs_before(ret);
+			/* Before copying, "ret" was the predecessor
+			of the predefined supremum record.  If it was
+			the predefined infimum record, then it would
+			still be the infimum.  Thus, the assertion
+			ut_a(ret_pos > 0) would fail here. */
+
+			if (UNIV_UNLIKELY
+			    (!page_zip_reorganize(new_block, index, mtr))) {
+
+				if (UNIV_UNLIKELY
+				    (!page_zip_decompress(new_page_zip,
+							  new_page))) {
+					ut_error;
+				}
+				ut_ad(page_validate(new_page, index));
+				return(NULL);
+			} else {
+				/* The page was reorganized:
+				Seek to ret_pos. */
+				ret = new_page + PAGE_NEW_INFIMUM;
+
+				do {
+					ret = rec_get_next_ptr(ret, TRUE);
+				} while (--ret_pos);
+			}
+		}
+	}
+
+	/* Update MAX_TRX_ID, the lock table, and possible hash index */
+
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page_align(rec))) {
+		page_update_max_trx_id(new_block, new_page_zip,
+				       page_get_max_trx_id(page_align(rec)),
+				       mtr);
+	}
+
+	lock_move_rec_list_start(new_block, block, rec, ret);
+
+	btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+	return(ret);
+}
+
+/**********************************************************//**
+Writes a log record of a record list end or start deletion. */
+UNIV_INLINE
+void
+page_delete_rec_list_write_log(
+/*===========================*/
+	rec_t*		rec,	/*!< in: record on page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	byte		type,	/*!< in: operation type:
+				MLOG_LIST_END_DELETE, ... */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte*	log_ptr;
+	ut_ad(type == MLOG_LIST_END_DELETE
+	      || type == MLOG_LIST_START_DELETE
+	      || type == MLOG_COMP_LIST_END_DELETE
+	      || type == MLOG_COMP_LIST_START_DELETE);
+
+	log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
+	if (log_ptr) {
+		/* Write the parameter as a 2-byte ulint */
+		mach_write_to_2(log_ptr, page_offset(rec));
+		mlog_close(mtr, log_ptr + 2);
+	}
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+				MLOG_LIST_START_DELETE,
+				MLOG_COMP_LIST_END_DELETE or
+				MLOG_COMP_LIST_START_DELETE */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in/out: buffer block or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+{
+	page_t*	page;
+	ulint	offset;
+
+	ut_ad(type == MLOG_LIST_END_DELETE
+	      || type == MLOG_LIST_START_DELETE
+	      || type == MLOG_COMP_LIST_END_DELETE
+	      || type == MLOG_COMP_LIST_START_DELETE);
+
+	/* Read the record offset as a 2-byte ulint */
+
+	if (end_ptr < ptr + 2) {
+
+		return(NULL);
+	}
+
+	offset = mach_read_from_2(ptr);
+	ptr += 2;
+
+	if (!block) {
+
+		return(ptr);
+	}
+
+	page = buf_block_get_frame(block);
+
+	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	if (type == MLOG_LIST_END_DELETE
+	    || type == MLOG_COMP_LIST_END_DELETE) {
+		page_delete_rec_list_end(page + offset, block, index,
+					 ULINT_UNDEFINED, ULINT_UNDEFINED,
+					 mtr);
+	} else {
+		page_delete_rec_list_start(page + offset, block, index, mtr);
+	}
+
+	return(ptr);
+}
+
+/*************************************************************//**
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_end(
+/*=====================*/
+	rec_t*		rec,	/*!< in: pointer to record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n_recs,	/*!< in: number of records to delete,
+				or ULINT_UNDEFINED if not known */
+	ulint		size,	/*!< in: the sum of the sizes of the
+				records in the end of the chain to
+				delete, or ULINT_UNDEFINED if not known */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_dir_slot_t*slot;
+	ulint		slot_index;
+	rec_t*		last_rec;
+	rec_t*		prev_rec;
+	ulint		n_owned;
+	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
+	page_t*		page		= page_align(rec);
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
+	ut_ad(!page_zip || page_rec_is_comp(rec));
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (page_rec_is_infimum(rec)) {
+		rec = page_rec_get_next(rec);
+	}
+
+	if (page_rec_is_supremum(rec)) {
+
+		return;
+	}
+
+	/* Reset the last insert info in the page header and increment
+	the modify clock for the frame */
+
+	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+
+	/* The page gets invalid for optimistic searches: increment the
+	frame modify clock */
+
+	buf_block_modify_clock_inc(block);
+
+	page_delete_rec_list_write_log(rec, index, page_is_comp(page)
+				       ? MLOG_COMP_LIST_END_DELETE
+				       : MLOG_LIST_END_DELETE, mtr);
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		ulint		log_mode;
+
+		ut_a(page_is_comp(page));
+		/* Individual deletes are not logged */
+
+		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+		do {
+			page_cur_t	cur;
+			page_cur_position(rec, block, &cur);
+
+			offsets = rec_get_offsets(rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			rec = rec_get_next_ptr(rec, TRUE);
+#ifdef UNIV_ZIP_DEBUG
+			ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+			page_cur_delete_rec(&cur, index, offsets, mtr);
+		} while (page_offset(rec) != PAGE_NEW_SUPREMUM);
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+
+		/* Restore log mode */
+
+		mtr_set_log_mode(mtr, log_mode);
+		return;
+	}
+
+	prev_rec = page_rec_get_prev(rec);
+
+	last_rec = page_rec_get_prev(page_get_supremum_rec(page));
+
+	if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
+		rec_t*		rec2		= rec;
+		/* Calculate the sum of sizes and the number of records */
+		size = 0;
+		n_recs = 0;
+
+		do {
+			ulint	s;
+			offsets = rec_get_offsets(rec2, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			s = rec_offs_size(offsets);
+			ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
+			      < UNIV_PAGE_SIZE);
+			ut_ad(size + s < UNIV_PAGE_SIZE);
+			size += s;
+			n_recs++;
+
+			rec2 = page_rec_get_next(rec2);
+		} while (!page_rec_is_supremum(rec2));
+
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+
+	ut_ad(size < UNIV_PAGE_SIZE);
+
+	/* Update the page directory; there is no need to balance the number
+	of the records owned by the supremum record, as it is allowed to be
+	less than PAGE_DIR_SLOT_MIN_N_OWNED */
+
+	if (page_is_comp(page)) {
+		rec_t*	rec2	= rec;
+		ulint	count	= 0;
+
+		while (rec_get_n_owned_new(rec2) == 0) {
+			count++;
+
+			rec2 = rec_get_next_ptr(rec2, TRUE);
+		}
+
+		ut_ad(rec_get_n_owned_new(rec2) > count);
+
+		n_owned = rec_get_n_owned_new(rec2) - count;
+		slot_index = page_dir_find_owner_slot(rec2);
+		slot = page_dir_get_nth_slot(page, slot_index);
+	} else {
+		rec_t*	rec2	= rec;
+		ulint	count	= 0;
+
+		while (rec_get_n_owned_old(rec2) == 0) {
+			count++;
+
+			rec2 = rec_get_next_ptr(rec2, FALSE);
+		}
+
+		ut_ad(rec_get_n_owned_old(rec2) > count);
+
+		n_owned = rec_get_n_owned_old(rec2) - count;
+		slot_index = page_dir_find_owner_slot(rec2);
+		slot = page_dir_get_nth_slot(page, slot_index);
+	}
+
+	page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
+	page_dir_slot_set_n_owned(slot, NULL, n_owned);
+
+	page_dir_set_n_slots(page, NULL, slot_index + 1);
+
+	/* Remove the record chain segment from the record chain */
+	page_rec_set_next(prev_rec, page_get_supremum_rec(page));
+
+	/* Catenate the deleted chain segment to the page free list */
+
+	page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
+	page_header_set_ptr(page, NULL, PAGE_FREE, rec);
+
+	page_header_set_field(page, NULL, PAGE_GARBAGE, size
+			      + page_header_get_field(page, PAGE_GARBAGE));
+
+	page_header_set_field(page, NULL, PAGE_N_RECS,
+			      (ulint)(page_get_n_recs(page) - n_recs));
+}
+
+/*************************************************************//**
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_start(
+/*=======================*/
+	rec_t*		rec,	/*!< in: record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_cur_t	cur1;
+	ulint		log_mode;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	mem_heap_t*	heap		= NULL;
+	byte		type;
+
+	rec_offs_init(offsets_);
+
+	ut_ad((ibool) !!page_rec_is_comp(rec)
+	      == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+	{
+		page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
+		page_t*		page	= buf_block_get_frame(block);
+
+		/* page_zip_validate() would detect a min_rec_mark mismatch
+		in btr_page_split_and_insert()
+		between btr_attach_half_pages() and insert_page = ...
+		when btr_page_get_split_rec_to_left() holds
+		(direction == FSP_DOWN). */
+		ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (page_rec_is_infimum(rec)) {
+
+		return;
+	}
+
+	if (page_rec_is_comp(rec)) {
+		type = MLOG_COMP_LIST_START_DELETE;
+	} else {
+		type = MLOG_LIST_START_DELETE;
+	}
+
+	page_delete_rec_list_write_log(rec, index, type, mtr);
+
+	page_cur_set_before_first(block, &cur1);
+	page_cur_move_to_next(&cur1);
+
+	/* Individual deletes are not logged */
+
+	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+	while (page_cur_get_rec(&cur1) != rec) {
+		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
+					  offsets, ULINT_UNDEFINED, &heap);
+		page_cur_delete_rec(&cur1, index, offsets, mtr);
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	/* Restore log mode */
+
+	mtr_set_log_mode(mtr, log_mode);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Moves record list end to another page. Moved records include
+split_rec.
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
+page_move_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in: index page from where to move */
+	rec_t*		split_rec,	/*!< in: first record to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		new_page	= buf_block_get_frame(new_block);
+	ulint		old_data_size;
+	ulint		new_data_size;
+	ulint		old_n_recs;
+	ulint		new_n_recs;
+
+	old_data_size = page_get_data_size(new_page);
+	old_n_recs = page_get_n_recs(new_page);
+#ifdef UNIV_ZIP_DEBUG
+	{
+		page_zip_des_t*	new_page_zip
+			= buf_block_get_page_zip(new_block);
+		page_zip_des_t*	page_zip
+			= buf_block_get_page_zip(block);
+		ut_a(!new_page_zip == !page_zip);
+		ut_a(!new_page_zip
+		     || page_zip_validate(new_page_zip, new_page));
+		ut_a(!page_zip
+		     || page_zip_validate(page_zip, page_align(split_rec)));
+	}
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
+						  split_rec, index, mtr))) {
+		return(FALSE);
+	}
+
+	new_data_size = page_get_data_size(new_page);
+	new_n_recs = page_get_n_recs(new_page);
+
+	ut_ad(new_data_size >= old_data_size);
+
+	page_delete_rec_list_end(split_rec, block, index,
+				 new_n_recs - old_n_recs,
+				 new_data_size - old_data_size, mtr);
+
+	return(TRUE);
+}
+
+/*************************************************************//**
+Moves record list start to another page. Moved records do not include
+split_rec.
+@return	TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
+page_move_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in/out: page containing split_rec */
+	rec_t*		split_rec,	/*!< in: first record not to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
+						    split_rec, index, mtr))) {
+		return(FALSE);
+	}
+
+	page_delete_rec_list_start(split_rec, block, index, mtr);
+
+	return(TRUE);
+}
+
+/***********************************************************************//**
+This is a low-level operation which is used in a database index creation
+to update the page number of a created B-tree to a data dictionary record. */
+UNIV_INTERN
+void
+page_rec_write_index_page_no(
+/*=========================*/
+	rec_t*	rec,	/*!< in: record to update */
+	ulint	i,	/*!< in: index of the field to update */
+	ulint	page_no,/*!< in: value to write */
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	byte*	data;
+	ulint	len;
+
+	data = rec_get_nth_field_old(rec, i, &len);
+
+	ut_ad(len == 4);
+
+	mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**************************************************************//**
+Used to delete n slots from the directory. This function updates
+also n_owned fields in the records, so that the first slot after
+the deleted ones inherits the records of the deleted slots. */
+UNIV_INLINE
+void
+page_dir_delete_slot(
+/*=================*/
+	page_t*		page,	/*!< in/out: the index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		slot_no)/*!< in: slot to be deleted */
+{
+	page_dir_slot_t*	slot;
+	ulint			n_owned;
+	ulint			i;
+	ulint			n_slots;
+
+	ut_ad(!page_zip || page_is_comp(page));
+	ut_ad(slot_no > 0);
+	ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
+
+	n_slots = page_dir_get_n_slots(page);
+
+	/* 1. Reset the n_owned fields of the slots to be
+	deleted */
+	slot = page_dir_get_nth_slot(page, slot_no);
+	n_owned = page_dir_slot_get_n_owned(slot);
+	page_dir_slot_set_n_owned(slot, page_zip, 0);
+
+	/* 2. Update the n_owned value of the first non-deleted slot */
+
+	slot = page_dir_get_nth_slot(page, slot_no + 1);
+	page_dir_slot_set_n_owned(slot, page_zip,
+				  n_owned + page_dir_slot_get_n_owned(slot));
+
+	/* 3. Destroy the slot by copying slots */
+	for (i = slot_no + 1; i < n_slots; i++) {
+		rec_t*	rec = (rec_t*)
+			page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
+		page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
+	}
+
+	/* 4. Zero out the last slot, which will be removed */
+	mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
+
+	/* 5. Update the page header */
+	page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
+}
+
+/**************************************************************//**
+Used to add n slots to the directory. Does not set the record pointers
+in the added slots or update n_owned values: this is the responsibility
+of the caller. */
+UNIV_INLINE
+void
+page_dir_add_slot(
+/*==============*/
+	page_t*		page,	/*!< in/out: the index page */
+	page_zip_des_t*	page_zip,/*!< in/out: comprssed page, or NULL */
+	ulint		start)	/*!< in: the slot above which the new slots
+				are added */
+{
+	page_dir_slot_t*	slot;
+	ulint			n_slots;
+
+	n_slots = page_dir_get_n_slots(page);
+
+	ut_ad(start < n_slots - 1);
+
+	/* Update the page header */
+	page_dir_set_n_slots(page, page_zip, n_slots + 1);
+
+	/* Move slots up */
+	slot = page_dir_get_nth_slot(page, n_slots);
+	memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
+		(n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
+}
+
+/****************************************************************//**
+Splits a directory slot which owns too many records. */
+UNIV_INTERN
+void
+page_dir_split_slot(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be written, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+{
+	rec_t*			rec;
+	page_dir_slot_t*	new_slot;
+	page_dir_slot_t*	prev_slot;
+	page_dir_slot_t*	slot;
+	ulint			i;
+	ulint			n_owned;
+
+	ut_ad(page);
+	ut_ad(!page_zip || page_is_comp(page));
+	ut_ad(slot_no > 0);
+
+	slot = page_dir_get_nth_slot(page, slot_no);
+
+	n_owned = page_dir_slot_get_n_owned(slot);
+	ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
+
+	/* 1. We loop to find a record approximately in the middle of the
+	records owned by the slot. */
+
+	prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
+	rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
+
+	for (i = 0; i < n_owned / 2; i++) {
+		rec = page_rec_get_next(rec);
+	}
+
+	ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
+
+	/* 2. We add one directory slot immediately below the slot to be
+	split. */
+
+	page_dir_add_slot(page, page_zip, slot_no - 1);
+
+	/* The added slot is now number slot_no, and the old slot is
+	now number slot_no + 1 */
+
+	new_slot = page_dir_get_nth_slot(page, slot_no);
+	slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+	/* 3. We store the appropriate values to the new slot. */
+
+	page_dir_slot_set_rec(new_slot, rec);
+	page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
+
+	/* 4. Finally, we update the number of records field of the
+	original slot */
+
+	page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
+}
+
+/*************************************************************//**
+Tries to balance the given directory slot with too few records with the upper
+neighbor, so that there are at least the minimum number of records owned by
+the slot; this may result in the merging of two slots. */
+UNIV_INTERN
+void
+page_dir_balance_slot(
+/*==================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+{
+	page_dir_slot_t*	slot;
+	page_dir_slot_t*	up_slot;
+	ulint			n_owned;
+	ulint			up_n_owned;
+	rec_t*			old_rec;
+	rec_t*			new_rec;
+
+	ut_ad(page);
+	ut_ad(!page_zip || page_is_comp(page));
+	ut_ad(slot_no > 0);
+
+	slot = page_dir_get_nth_slot(page, slot_no);
+
+	/* The last directory slot cannot be balanced with the upper
+	neighbor, as there is none. */
+
+	if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
+
+		return;
+	}
+
+	up_slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+	n_owned = page_dir_slot_get_n_owned(slot);
+	up_n_owned = page_dir_slot_get_n_owned(up_slot);
+
+	ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
+
+	/* If the upper slot has the minimum value of n_owned, we will merge
+	the two slots, therefore we assert: */
+	ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
+
+	if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
+
+		/* In this case we can just transfer one record owned
+		by the upper slot to the property of the lower slot */
+		old_rec = (rec_t*) page_dir_slot_get_rec(slot);
+
+		if (page_is_comp(page)) {
+			new_rec = rec_get_next_ptr(old_rec, TRUE);
+
+			rec_set_n_owned_new(old_rec, page_zip, 0);
+			rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
+		} else {
+			new_rec = rec_get_next_ptr(old_rec, FALSE);
+
+			rec_set_n_owned_old(old_rec, 0);
+			rec_set_n_owned_old(new_rec, n_owned + 1);
+		}
+
+		page_dir_slot_set_rec(slot, new_rec);
+
+		page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
+	} else {
+		/* In this case we may merge the two slots */
+		page_dir_delete_slot(page, page_zip, slot_no);
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the record list. If there are an even number
+of records in the list, returns the first record of the upper half-list.
+@return	middle record */
+UNIV_INTERN
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page)	/*!< in: page */
+{
+	page_dir_slot_t*	slot;
+	ulint			middle;
+	ulint			i;
+	ulint			n_owned;
+	ulint			count;
+	rec_t*			rec;
+
+	/* This many records we must leave behind */
+	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+
+	count = 0;
+
+	for (i = 0;; i++) {
+
+		slot = page_dir_get_nth_slot(page, i);
+		n_owned = page_dir_slot_get_n_owned(slot);
+
+		if (count + n_owned > middle) {
+			break;
+		} else {
+			count += n_owned;
+		}
+	}
+
+	ut_ad(i > 0);
+	slot = page_dir_get_nth_slot(page, i - 1);
+	rec = (rec_t*) page_dir_slot_get_rec(slot);
+	rec = page_rec_get_next(rec);
+
+	/* There are now count records behind rec */
+
+	for (i = 0; i < middle - count; i++) {
+		rec = page_rec_get_next(rec);
+	}
+
+	return(rec);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records.
+@return	number of records */
+UNIV_INTERN
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+	const rec_t*	rec)	/*!< in: the physical record */
+{
+	const page_dir_slot_t*	slot;
+	const rec_t*		slot_rec;
+	const page_t*		page;
+	ulint			i;
+	lint			n	= 0;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+	if (page_is_comp(page)) {
+		while (rec_get_n_owned_new(rec) == 0) {
+
+			rec = rec_get_next_ptr_const(rec, TRUE);
+			n--;
+		}
+
+		for (i = 0; ; i++) {
+			slot = page_dir_get_nth_slot(page, i);
+			slot_rec = page_dir_slot_get_rec(slot);
+
+			n += rec_get_n_owned_new(slot_rec);
+
+			if (rec == slot_rec) {
+
+				break;
+			}
+		}
+	} else {
+		while (rec_get_n_owned_old(rec) == 0) {
+
+			rec = rec_get_next_ptr_const(rec, FALSE);
+			n--;
+		}
+
+		for (i = 0; ; i++) {
+			slot = page_dir_get_nth_slot(page, i);
+			slot_rec = page_dir_slot_get_rec(slot);
+
+			n += rec_get_n_owned_old(slot_rec);
+
+			if (rec == slot_rec) {
+
+				break;
+			}
+		}
+	}
+
+	n--;
+
+	ut_ad(n >= 0);
+
+	return((ulint) n);
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Prints record contents including the data relevant only in
+the index page context. */
+UNIV_INTERN
+void
+page_rec_print(
+/*===========*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: record descriptor */
+{
+	ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	rec_print_new(stderr, rec, offsets);
+	if (page_rec_is_comp(rec)) {
+		fprintf(stderr,
+			" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+			(ulong) rec_get_n_owned_new(rec),
+			(ulong) rec_get_heap_no_new(rec),
+			(ulong) rec_get_next_offs(rec, TRUE));
+	} else {
+		fprintf(stderr,
+			" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+			(ulong) rec_get_n_owned_old(rec),
+			(ulong) rec_get_heap_no_old(rec),
+			(ulong) rec_get_next_offs(rec, TRUE));
+	}
+
+	page_rec_check(rec);
+	rec_validate(rec, offsets);
+}
+
+/***************************************************************//**
+This is used to print the contents of the directory for
+debugging purposes. */
+UNIV_INTERN
+void
+page_dir_print(
+/*===========*/
+	page_t*	page,	/*!< in: index page */
+	ulint	pr_n)	/*!< in: print n first and n last entries */
+{
+	ulint			n;
+	ulint			i;
+	page_dir_slot_t*	slot;
+
+	n = page_dir_get_n_slots(page);
+
+	fprintf(stderr, "--------------------------------\n"
+		"PAGE DIRECTORY\n"
+		"Page address %p\n"
+		"Directory stack top at offs: %lu; number of slots: %lu\n",
+		page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
+		(ulong) n);
+	for (i = 0; i < n; i++) {
+		slot = page_dir_get_nth_slot(page, i);
+		if ((i == pr_n) && (i < n - pr_n)) {
+			fputs("    ...   \n", stderr);
+		}
+		if ((i < pr_n) || (i >= n - pr_n)) {
+			fprintf(stderr,
+				"Contents of slot: %lu: n_owned: %lu,"
+				" rec offs: %lu\n",
+				(ulong) i,
+				(ulong) page_dir_slot_get_n_owned(slot),
+				(ulong)
+				page_offset(page_dir_slot_get_rec(slot)));
+		}
+	}
+	fprintf(stderr, "Total of %lu records\n"
+		"--------------------------------\n",
+		(ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page record list for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print_list(
+/*============*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		pr_n)	/*!< in: print n first and n last entries */
+{
+	page_t*		page		= block->frame;
+	page_cur_t	cur;
+	ulint		count;
+	ulint		n_recs;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+	fprintf(stderr,
+		"--------------------------------\n"
+		"PAGE RECORD LIST\n"
+		"Page address %p\n", page);
+
+	n_recs = page_get_n_recs(page);
+
+	page_cur_set_before_first(block, &cur);
+	count = 0;
+	for (;;) {
+		offsets = rec_get_offsets(cur.rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		page_rec_print(cur.rec, offsets);
+
+		if (count == pr_n) {
+			break;
+		}
+		if (page_cur_is_after_last(&cur)) {
+			break;
+		}
+		page_cur_move_to_next(&cur);
+		count++;
+	}
+
+	if (n_recs > 2 * pr_n) {
+		fputs(" ... \n", stderr);
+	}
+
+	while (!page_cur_is_after_last(&cur)) {
+		page_cur_move_to_next(&cur);
+
+		if (count + pr_n >= n_recs) {
+			offsets = rec_get_offsets(cur.rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			page_rec_print(cur.rec, offsets);
+		}
+		count++;
+	}
+
+	fprintf(stderr,
+		"Total of %lu records \n"
+		"--------------------------------\n",
+		(ulong) (count + 1));
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/***************************************************************//**
+Prints the info in a page header. */
+UNIV_INTERN
+void
+page_header_print(
+/*==============*/
+	const page_t*	page)
+{
+	fprintf(stderr,
+		"--------------------------------\n"
+		"PAGE HEADER INFO\n"
+		"Page address %p, n records %lu (%s)\n"
+		"n dir slots %lu, heap top %lu\n"
+		"Page n heap %lu, free %lu, garbage %lu\n"
+		"Page last insert %lu, direction %lu, n direction %lu\n",
+		page, (ulong) page_header_get_field(page, PAGE_N_RECS),
+		page_is_comp(page) ? "compact format" : "original format",
+		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
+		(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+		(ulong) page_dir_get_n_heap(page),
+		(ulong) page_header_get_field(page, PAGE_FREE),
+		(ulong) page_header_get_field(page, PAGE_GARBAGE),
+		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
+		(ulong) page_header_get_field(page, PAGE_DIRECTION),
+		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print(
+/*=======*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		dn,	/*!< in: print dn first and last entries
+				in directory */
+	ulint		rn)	/*!< in: print rn first and last records
+				in directory */
+{
+	page_t*	page = block->frame;
+
+	page_header_print(page);
+	page_dir_print(page, dn);
+	page_print_list(block, index, rn);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_rec_validate(
+/*==============*/
+	rec_t*		rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	n_owned;
+	ulint	heap_no;
+	page_t*	page;
+
+	page = page_align(rec);
+	ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
+
+	page_rec_check(rec);
+	rec_validate(rec, offsets);
+
+	if (page_rec_is_comp(rec)) {
+		n_owned = rec_get_n_owned_new(rec);
+		heap_no = rec_get_heap_no_new(rec);
+	} else {
+		n_owned = rec_get_n_owned_old(rec);
+		heap_no = rec_get_heap_no_old(rec);
+	}
+
+	if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
+		fprintf(stderr,
+			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
+			(ulong) page_offset(rec), (ulong) n_owned);
+		return(FALSE);
+	}
+
+	if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
+		fprintf(stderr,
+			"InnoDB: Heap no of rec %lu too big %lu %lu\n",
+			(ulong) page_offset(rec), (ulong) heap_no,
+			(ulong) page_dir_get_n_heap(page));
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+UNIV_INTERN
+void
+page_check_dir(
+/*===========*/
+	const page_t*	page)	/*!< in: index page */
+{
+	ulint	n_slots;
+	ulint	infimum_offs;
+	ulint	supremum_offs;
+
+	n_slots = page_dir_get_n_slots(page);
+	infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
+	supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
+							       n_slots - 1));
+
+	if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
+
+		fprintf(stderr,
+			"InnoDB: Page directory corruption:"
+			" infimum not pointed to\n");
+		buf_page_print(page, 0);
+	}
+
+	if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
+
+		fprintf(stderr,
+			"InnoDB: Page directory corruption:"
+			" supremum not pointed to\n");
+		buf_page_print(page, 0);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_old(
+/*=====================*/
+	page_t*	page)	/*!< in: old-style index page */
+{
+	page_dir_slot_t* slot;
+	ulint		slot_no;
+	ulint		n_slots;
+	rec_t*		rec;
+	byte*		rec_heap_top;
+	ulint		count;
+	ulint		own_count;
+	ibool		ret	= FALSE;
+
+	ut_a(!page_is_comp(page));
+
+	/* Check first that the record heap and the directory do not
+	overlap. */
+
+	n_slots = page_dir_get_n_slots(page);
+
+	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+		fprintf(stderr,
+			"InnoDB: Nonsensical number %lu of page dir slots\n",
+			(ulong) n_slots);
+
+		goto func_exit;
+	}
+
+	rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+	if (UNIV_UNLIKELY(rec_heap_top
+			  > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+		fprintf(stderr,
+			"InnoDB: Record heap and dir overlap on a page,"
+			" heap top %lu, dir %lu\n",
+			(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+			(ulong)
+			page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+
+		goto func_exit;
+	}
+
+	/* Validate the record list in a loop checking also that it is
+	consistent with the page record directory. */
+
+	count = 0;
+	own_count = 1;
+	slot_no = 0;
+	slot = page_dir_get_nth_slot(page, slot_no);
+
+	rec = page_get_infimum_rec(page);
+
+	for (;;) {
+		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+			fprintf(stderr,
+				"InnoDB: Record %lu is above"
+				" rec heap top %lu\n",
+				(ulong)(rec - page),
+				(ulong)(rec_heap_top - page));
+
+			goto func_exit;
+		}
+
+		if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
+			/* This is a record pointed to by a dir slot */
+			if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
+					  != own_count)) {
+
+				fprintf(stderr,
+					"InnoDB: Wrong owned count %lu, %lu,"
+					" rec %lu\n",
+					(ulong) rec_get_n_owned_old(rec),
+					(ulong) own_count,
+					(ulong)(rec - page));
+
+				goto func_exit;
+			}
+
+			if (UNIV_UNLIKELY
+			    (page_dir_slot_get_rec(slot) != rec)) {
+				fprintf(stderr,
+					"InnoDB: Dir slot does not point"
+					" to right rec %lu\n",
+					(ulong)(rec - page));
+
+				goto func_exit;
+			}
+
+			own_count = 0;
+
+			if (!page_rec_is_supremum(rec)) {
+				slot_no++;
+				slot = page_dir_get_nth_slot(page, slot_no);
+			}
+		}
+
+		if (page_rec_is_supremum(rec)) {
+
+			break;
+		}
+
+		if (UNIV_UNLIKELY
+		    (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
+		     || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Next record offset"
+				" nonsensical %lu for rec %lu\n",
+				(ulong) rec_get_next_offs(rec, FALSE),
+				(ulong) (rec - page));
+
+			goto func_exit;
+		}
+
+		count++;
+
+		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Page record list appears"
+				" to be circular %lu\n",
+				(ulong) count);
+			goto func_exit;
+		}
+
+		rec = page_rec_get_next(rec);
+		own_count++;
+	}
+
+	if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+		fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
+
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
+			(ulong) slot_no, (ulong) (n_slots - 1));
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+			  + PAGE_HEAP_NO_USER_LOW
+			  != count + 1)) {
+		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+			(ulong) page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW,
+			(ulong) (count + 1));
+
+		goto func_exit;
+	}
+
+	/* Check then the free list */
+	rec = page_header_get_ptr(page, PAGE_FREE);
+
+	while (rec != NULL) {
+		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+				  || rec >= page + UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Free list record has"
+				" a nonsensical offset %lu\n",
+				(ulong) (rec - page));
+
+			goto func_exit;
+		}
+
+		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+			fprintf(stderr,
+				"InnoDB: Free list record %lu"
+				" is above rec heap top %lu\n",
+				(ulong) (rec - page),
+				(ulong) (rec_heap_top - page));
+
+			goto func_exit;
+		}
+
+		count++;
+
+		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Page free list appears"
+				" to be circular %lu\n",
+				(ulong) count);
+			goto func_exit;
+		}
+
+		rec = page_rec_get_next(rec);
+	}
+
+	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
+			(ulong) page_dir_get_n_heap(page),
+			(ulong) (count + 1));
+
+		goto func_exit;
+	}
+
+	ret = TRUE;
+
+func_exit:
+	return(ret);
+}
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+	page_t*	page)	/*!< in: new-style index page */
+{
+	page_dir_slot_t* slot;
+	ulint		slot_no;
+	ulint		n_slots;
+	rec_t*		rec;
+	byte*		rec_heap_top;
+	ulint		count;
+	ulint		own_count;
+	ibool		ret	= FALSE;
+
+	ut_a(page_is_comp(page));
+
+	/* Check first that the record heap and the directory do not
+	overlap. */
+
+	n_slots = page_dir_get_n_slots(page);
+
+	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+		fprintf(stderr,
+			"InnoDB: Nonsensical number %lu"
+			" of page dir slots\n", (ulong) n_slots);
+
+		goto func_exit;
+	}
+
+	rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+	if (UNIV_UNLIKELY(rec_heap_top
+			  > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+		fprintf(stderr,
+			"InnoDB: Record heap and dir overlap on a page,"
+			" heap top %lu, dir %lu\n",
+			(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+			(ulong)
+			page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+
+		goto func_exit;
+	}
+
+	/* Validate the record list in a loop checking also that it is
+	consistent with the page record directory. */
+
+	count = 0;
+	own_count = 1;
+	slot_no = 0;
+	slot = page_dir_get_nth_slot(page, slot_no);
+
+	rec = page_get_infimum_rec(page);
+
+	for (;;) {
+		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+			fprintf(stderr,
+				"InnoDB: Record %lu is above rec"
+				" heap top %lu\n",
+				(ulong) page_offset(rec),
+				(ulong) page_offset(rec_heap_top));
+
+			goto func_exit;
+		}
+
+		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+			/* This is a record pointed to by a dir slot */
+			if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
+					  != own_count)) {
+
+				fprintf(stderr,
+					"InnoDB: Wrong owned count %lu, %lu,"
+					" rec %lu\n",
+					(ulong) rec_get_n_owned_new(rec),
+					(ulong) own_count,
+					(ulong) page_offset(rec));
+
+				goto func_exit;
+			}
+
+			if (UNIV_UNLIKELY
+			    (page_dir_slot_get_rec(slot) != rec)) {
+				fprintf(stderr,
+					"InnoDB: Dir slot does not point"
+					" to right rec %lu\n",
+					(ulong) page_offset(rec));
+
+				goto func_exit;
+			}
+
+			own_count = 0;
+
+			if (!page_rec_is_supremum(rec)) {
+				slot_no++;
+				slot = page_dir_get_nth_slot(page, slot_no);
+			}
+		}
+
+		if (page_rec_is_supremum(rec)) {
+
+			break;
+		}
+
+		if (UNIV_UNLIKELY
+		    (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
+		     || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Next record offset nonsensical %lu"
+				" for rec %lu\n",
+				(ulong) rec_get_next_offs(rec, TRUE),
+				(ulong) page_offset(rec));
+
+			goto func_exit;
+		}
+
+		count++;
+
+		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Page record list appears"
+				" to be circular %lu\n",
+				(ulong) count);
+			goto func_exit;
+		}
+
+		rec = page_rec_get_next(rec);
+		own_count++;
+	}
+
+	if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+		fprintf(stderr, "InnoDB: n owned is zero"
+			" in a supremum rec\n");
+
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
+			(ulong) slot_no, (ulong) (n_slots - 1));
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+			  + PAGE_HEAP_NO_USER_LOW
+			  != count + 1)) {
+		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+			(ulong) page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW,
+			(ulong) (count + 1));
+
+		goto func_exit;
+	}
+
+	/* Check then the free list */
+	rec = page_header_get_ptr(page, PAGE_FREE);
+
+	while (rec != NULL) {
+		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+				  || rec >= page + UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Free list record has"
+				" a nonsensical offset %lu\n",
+				(ulong) page_offset(rec));
+
+			goto func_exit;
+		}
+
+		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+			fprintf(stderr,
+				"InnoDB: Free list record %lu"
+				" is above rec heap top %lu\n",
+				(ulong) page_offset(rec),
+				(ulong) page_offset(rec_heap_top));
+
+			goto func_exit;
+		}
+
+		count++;
+
+		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Page free list appears"
+				" to be circular %lu\n",
+				(ulong) count);
+			goto func_exit;
+		}
+
+		rec = page_rec_get_next(rec);
+	}
+
+	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
+			(ulong) page_dir_get_n_heap(page),
+			(ulong) (count + 1));
+
+		goto func_exit;
+	}
+
+	ret = TRUE;
+
+func_exit:
+	return(ret);
+}
+
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_validate(
+/*==========*/
+	page_t*		page,	/*!< in: index page */
+	dict_index_t*	index)	/*!< in: data dictionary index containing
+				the page record type definition */
+{
+	page_dir_slot_t*slot;
+	mem_heap_t*	heap;
+	byte*		buf;
+	ulint		count;
+	ulint		own_count;
+	ulint		rec_own_count;
+	ulint		slot_no;
+	ulint		data_size;
+	rec_t*		rec;
+	rec_t*		old_rec		= NULL;
+	ulint		offs;
+	ulint		n_slots;
+	ibool		ret		= FALSE;
+	ulint		i;
+	ulint*		offsets		= NULL;
+	ulint*		old_offsets	= NULL;
+
+	if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
+			  != dict_table_is_comp(index->table))) {
+		fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
+		goto func_exit2;
+	}
+	if (page_is_comp(page)) {
+		if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
+			goto func_exit2;
+		}
+	} else {
+		if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
+			goto func_exit2;
+		}
+	}
+
+	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
+
+	/* The following buffer is used to check that the
+	records in the page record heap do not overlap */
+
+	buf = mem_heap_zalloc(heap, UNIV_PAGE_SIZE);
+
+	/* Check first that the record heap and the directory do not
+	overlap. */
+
+	n_slots = page_dir_get_n_slots(page);
+
+	if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
+			    <= page_dir_get_nth_slot(page, n_slots - 1)))) {
+
+		fprintf(stderr, 
+			"InnoDB: Record heap and dir overlap"
+			" on space %lu page %lu index %s, %p, %p\n",
+			(ulong) page_get_space_id(page),
+			(ulong) page_get_page_no(page), index->name,
+			page_header_get_ptr(page, PAGE_HEAP_TOP),
+			page_dir_get_nth_slot(page, n_slots - 1));
+
+		goto func_exit;
+	}
+
+	/* Validate the record list in a loop checking also that
+	it is consistent with the directory. */
+	count = 0;
+	data_size = 0;
+	own_count = 1;
+	slot_no = 0;
+	slot = page_dir_get_nth_slot(page, slot_no);
+
+	rec = page_get_infimum_rec(page);
+
+	for (;;) {
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		if (page_is_comp(page) && page_rec_is_user_rec(rec)
+		    && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
+				     == page_is_leaf(page))) {
+			fputs("InnoDB: node_ptr flag mismatch\n", stderr);
+			goto func_exit;
+		}
+
+		if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+			goto func_exit;
+		}
+
+#ifndef UNIV_HOTBACKUP
+		/* Check that the records are in the ascending order */
+		if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
+		    && !page_rec_is_supremum(rec)) {
+			if (UNIV_UNLIKELY
+			    (1 != cmp_rec_rec(rec, old_rec,
+					      offsets, old_offsets, index))) {
+				fprintf(stderr, 
+					"InnoDB: Records in wrong order"
+					" on space %lu page %lu index %s\n",
+					(ulong) page_get_space_id(page),
+					(ulong) page_get_page_no(page),
+					index->name);
+				fputs("\nInnoDB: previous record ", stderr);
+				rec_print_new(stderr, old_rec, old_offsets);
+				fputs("\nInnoDB: record ", stderr);
+				rec_print_new(stderr, rec, offsets);
+				putc('\n', stderr);
+
+				goto func_exit;
+			}
+		}
+#endif /* !UNIV_HOTBACKUP */
+
+		if (page_rec_is_user_rec(rec)) {
+
+			data_size += rec_offs_size(offsets);
+		}
+
+		offs = page_offset(rec_get_start(rec, offsets));
+
+		for (i = rec_offs_size(offsets); i--; ) {
+			if (UNIV_UNLIKELY(buf[offs + i])) {
+				/* No other record may overlap this */
+
+				fputs("InnoDB: Record overlaps another\n",
+				      stderr);
+				goto func_exit;
+			}
+
+			buf[offs + i] = 1;
+		}
+
+		if (page_is_comp(page)) {
+			rec_own_count = rec_get_n_owned_new(rec);
+		} else {
+			rec_own_count = rec_get_n_owned_old(rec);
+		}
+
+		if (UNIV_UNLIKELY(rec_own_count)) {
+			/* This is a record pointed to by a dir slot */
+			if (UNIV_UNLIKELY(rec_own_count != own_count)) {
+				fprintf(stderr,
+					"InnoDB: Wrong owned count %lu, %lu\n",
+					(ulong) rec_own_count,
+					(ulong) own_count);
+				goto func_exit;
+			}
+
+			if (page_dir_slot_get_rec(slot) != rec) {
+				fputs("InnoDB: Dir slot does not"
+				      " point to right rec\n",
+				      stderr);
+				goto func_exit;
+			}
+
+			page_dir_slot_check(slot);
+
+			own_count = 0;
+			if (!page_rec_is_supremum(rec)) {
+				slot_no++;
+				slot = page_dir_get_nth_slot(page, slot_no);
+			}
+		}
+
+		if (page_rec_is_supremum(rec)) {
+			break;
+		}
+
+		count++;
+		own_count++;
+		old_rec = rec;
+		rec = page_rec_get_next(rec);
+
+		/* set old_offsets to offsets; recycle offsets */
+		{
+			ulint* offs = old_offsets;
+			old_offsets = offsets;
+			offsets = offs;
+		}
+	}
+
+	if (page_is_comp(page)) {
+		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+
+			goto n_owned_zero;
+		}
+	} else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+n_owned_zero:
+		fputs("InnoDB: n owned is zero\n", stderr);
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+		fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
+			(ulong) slot_no, (ulong) (n_slots - 1));
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+			  + PAGE_HEAP_NO_USER_LOW
+			  != count + 1)) {
+		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+			(ulong) page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW,
+			(ulong) (count + 1));
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
+		fprintf(stderr,
+			"InnoDB: Summed data size %lu, returned by func %lu\n",
+			(ulong) data_size, (ulong) page_get_data_size(page));
+		goto func_exit;
+	}
+
+	/* Check then the free list */
+	rec = page_header_get_ptr(page, PAGE_FREE);
+
+	while (rec != NULL) {
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+
+			goto func_exit;
+		}
+
+		count++;
+		offs = page_offset(rec_get_start(rec, offsets));
+
+		for (i = rec_offs_size(offsets); i--; ) {
+
+			if (UNIV_UNLIKELY(buf[offs + i])) {
+				fputs("InnoDB: Record overlaps another"
+				      " in free list\n", stderr);
+				goto func_exit;
+			}
+
+			buf[offs + i] = 1;
+		}
+
+		rec = page_rec_get_next(rec);
+	}
+
+	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+		fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
+			(ulong) page_dir_get_n_heap(page),
+			(ulong) count + 1);
+		goto func_exit;
+	}
+
+	ret = TRUE;
+
+func_exit:
+	mem_heap_free(heap);
+
+	if (UNIV_UNLIKELY(ret == FALSE)) {
+func_exit2:
+		fprintf(stderr, 
+			"InnoDB: Apparent corruption"
+			" in space %lu page %lu index %s\n",
+			(ulong) page_get_space_id(page),
+			(ulong) page_get_page_no(page),
+			index->name);
+		buf_page_print(page, 0);
+	}
+
+	return(ret);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return	record, NULL if not found */
+UNIV_INTERN
+const rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		heap_no)/*!< in: heap number */
+{
+	const rec_t*	rec;
+
+	if (page_is_comp(page)) {
+		rec = page + PAGE_NEW_INFIMUM;
+
+		for(;;) {
+			ulint	rec_heap_no = rec_get_heap_no_new(rec);
+
+			if (rec_heap_no == heap_no) {
+
+				return(rec);
+			} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+				return(NULL);
+			}
+
+			rec = page + rec_get_next_offs(rec, TRUE);
+		}
+	} else {
+		rec = page + PAGE_OLD_INFIMUM;
+
+		for (;;) {
+			ulint	rec_heap_no = rec_get_heap_no_old(rec);
+
+			if (rec_heap_no == heap_no) {
+
+				return(rec);
+			} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+				return(NULL);
+			}
+
+			rec = page + rec_get_next_offs(rec, FALSE);
+		}
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innodb_plugin/page/page0zip.c
new file mode 100644
index 00000000000..92ba0ec768a
--- /dev/null
+++ b/storage/innodb_plugin/page/page0zip.c
@@ -0,0 +1,4628 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0zip.c
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+#define THIS_MODULE
+#include "page0zip.h"
+#ifdef UNIV_NONINL
+# include "page0zip.ic"
+#endif
+#undef THIS_MODULE
+#include "page0page.h"
+#include "mtr0log.h"
+#include "ut0sort.h"
+#include "dict0dict.h"
+#include "btr0cur.h"
+#include "page0types.h"
+#include "log0recv.h"
+#include "zlib.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "btr0sea.h"
+# include "dict0boot.h"
+# include "lock0lock.h"
+#else /* !UNIV_HOTBACKUP */
+# define lock_move_reorganize_page(block, temp_block)	((void) 0)
+# define buf_LRU_stat_inc_unzip()			((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
+
+/* Please refer to ../include/page0zip.ic for a description of the
+compressed page format. */
+
+/* The infimum and supremum records are omitted from the compressed page.
+On compress, we compare that the records are there, and on uncompress we
+restore the records. */
+/** Extra bytes of an infimum record */
+static const byte infimum_extra[] = {
+	0x01,			/* info_bits=0, n_owned=1 */
+	0x00, 0x02		/* heap_no=0, status=2 */
+	/* ?, ?	*/		/* next=(first user rec, or supremum) */
+};
+/** Data bytes of an infimum record */
+static const byte infimum_data[] = {
+	0x69, 0x6e, 0x66, 0x69,
+	0x6d, 0x75, 0x6d, 0x00	/* "infimum\0" */
+};
+/** Extra bytes and data bytes of a supremum record */
+static const byte supremum_extra_data[] = {
+	/* 0x0?, */		/* info_bits=0, n_owned=1..8 */
+	0x00, 0x0b,		/* heap_no=1, status=3 */
+	0x00, 0x00,		/* next=0 */
+	0x73, 0x75, 0x70, 0x72,
+	0x65, 0x6d, 0x75, 0x6d	/* "supremum" */
+};
+
+/** Assert that a block of memory is filled with zero bytes.
+Compare at most sizeof(field_ref_zero) bytes.
+@param b	in: memory block
+@param s	in: size of the memory block, in bytes */
+#define ASSERT_ZERO(b, s) \
+	ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
+/** Assert that a BLOB pointer is filled with zero bytes.
+@param b	in: BLOB pointer */
+#define ASSERT_ZERO_BLOB(b) \
+	ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
+
+/* Enable some extra debugging output.  This code can be enabled
+independently of any UNIV_ debugging conditions. */
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+# include <stdarg.h>
+__attribute__((format (printf, 1, 2)))
+/**********************************************************************//**
+Report a failure to decompress or compress.
+@return	number of characters printed */
+static
+int
+page_zip_fail_func(
+/*===============*/
+	const char*	fmt,	/*!< in: printf(3) format string */
+	...)			/*!< in: arguments corresponding to fmt */
+{
+	int	res;
+	va_list	ap;
+
+	ut_print_timestamp(stderr);
+	fputs("  InnoDB: ", stderr);
+	va_start(ap, fmt);
+	res = vfprintf(stderr, fmt, ap);
+	va_end(ap);
+
+	return(res);
+}
+/** Wrapper for page_zip_fail_func()
+@param fmt_args	in: printf(3) format string and arguments */
+# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
+#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+/** Dummy wrapper for page_zip_fail_func()
+@param fmt_args	ignored: printf(3) format string and arguments */
+# define page_zip_fail(fmt_args) /* empty */
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine the guaranteed free space on an empty page.
+@return	minimum payload size on the page */
+UNIV_INTERN
+ulint
+page_zip_empty_size(
+/*================*/
+	ulint	n_fields,	/*!< in: number of columns in the index */
+	ulint	zip_size)	/*!< in: compressed page size in bytes */
+{
+	lint	size = zip_size
+		/* subtract the page header and the longest
+		uncompressed data needed for one record */
+		- (PAGE_DATA
+		   + PAGE_ZIP_DIR_SLOT_SIZE
+		   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
+		   + 1/* encoded heap_no==2 in page_zip_write_rec() */
+		   + 1/* end of modification log */
+		   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
+		/* subtract the space for page_zip_fields_encode() */
+		- compressBound(2 * (n_fields + 1));
+	return(size > 0 ? (ulint) size : 0);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Gets the size of the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@return	length of dense page directory, in bytes */
+UNIV_INLINE
+ulint
+page_zip_dir_size(
+/*==============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+{
+	/* Exclude the page infimum and supremum from the record count. */
+	ulint	size = PAGE_ZIP_DIR_SLOT_SIZE
+		* (page_dir_get_n_heap(page_zip->data)
+		   - PAGE_HEAP_NO_USER_LOW);
+	return(size);
+}
+
+/*************************************************************//**
+Gets the size of the compressed page trailer (the dense page directory),
+only including user records (excluding the free list).
+@return	length of dense page directory comprising existing records, in bytes */
+UNIV_INLINE
+ulint
+page_zip_dir_user_size(
+/*===================*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+{
+	ulint	size = PAGE_ZIP_DIR_SLOT_SIZE
+		* page_get_n_recs(page_zip->data);
+	ut_ad(size <= page_zip_dir_size(page_zip));
+	return(size);
+}
+
+/*************************************************************//**
+Find the slot of the given record in the dense page directory.
+@return	dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find_low(
+/*==================*/
+	byte*	slot,			/*!< in: start of records */
+	byte*	end,			/*!< in: end of records */
+	ulint	offset)			/*!< in: offset of user record */
+{
+	ut_ad(slot <= end);
+
+	for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
+		if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
+		    == offset) {
+			return(slot);
+		}
+	}
+
+	return(NULL);
+}
+
+/*************************************************************//**
+Find the slot of the given non-free record in the dense page directory.
+@return	dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find(
+/*==============*/
+	page_zip_des_t*	page_zip,		/*!< in: compressed page */
+	ulint		offset)			/*!< in: offset of user record */
+{
+	byte*	end	= page_zip->data + page_zip_get_size(page_zip);
+
+	ut_ad(page_zip_simple_validate(page_zip));
+
+	return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
+				     end,
+				     offset));
+}
+
+/*************************************************************//**
+Find the slot of the given free record in the dense page directory.
+@return	dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find_free(
+/*===================*/
+	page_zip_des_t*	page_zip,		/*!< in: compressed page */
+	ulint		offset)			/*!< in: offset of user record */
+{
+	byte*	end	= page_zip->data + page_zip_get_size(page_zip);
+
+	ut_ad(page_zip_simple_validate(page_zip));
+
+	return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
+				     end - page_zip_dir_user_size(page_zip),
+				     offset));
+}
+
+/*************************************************************//**
+Read a given slot in the dense page directory.
+@return record offset on the uncompressed page, possibly ORed with
+PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
+UNIV_INLINE
+ulint
+page_zip_dir_get(
+/*=============*/
+	const page_zip_des_t*	page_zip,	/*!< in: compressed page */
+	ulint			slot)		/*!< in: slot
+						(0=first user record) */
+{
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
+	return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
+				- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Write a log record of compressing an index page. */
+static
+void
+page_zip_compress_write_log(
+/*========================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	dict_index_t*		index,	/*!< in: index of the B-tree node */
+	mtr_t*			mtr)	/*!< in: mini-transaction */
+{
+	byte*	log_ptr;
+	ulint	trailer_size;
+
+	ut_ad(!dict_index_is_ibuf(index));
+
+	log_ptr = mlog_open(mtr, 11 + 2 + 2);
+
+	if (!log_ptr) {
+
+		return;
+	}
+
+	/* Read the number of user records. */
+	trailer_size = page_dir_get_n_heap(page_zip->data)
+		- PAGE_HEAP_NO_USER_LOW;
+	/* Multiply by uncompressed of size stored per record */
+	if (!page_is_leaf(page)) {
+		trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
+	} else if (dict_index_is_clust(index)) {
+		trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
+			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+	} else {
+		trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
+	}
+	/* Add the space occupied by BLOB pointers. */
+	trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+	ut_a(page_zip->m_end > PAGE_DATA);
+#if FIL_PAGE_DATA > PAGE_DATA
+# error "FIL_PAGE_DATA > PAGE_DATA"
+#endif
+	ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
+
+	log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
+						     MLOG_ZIP_PAGE_COMPRESS,
+						     log_ptr, mtr);
+	mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
+	log_ptr += 2;
+	mach_write_to_2(log_ptr, trailer_size);
+	log_ptr += 2;
+	mlog_close(mtr, log_ptr);
+
+	/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
+	mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
+	mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
+	/* Write most of the page header, the compressed stream and
+	the modification log. */
+	mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
+			     page_zip->m_end - FIL_PAGE_TYPE);
+	/* Write the uncompressed trailer of the compressed page. */
+	mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
+			     - trailer_size, trailer_size);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************//**
+Determine how many externally stored columns are contained
+in existing records with smaller heap_no than rec. */
+static
+ulint
+page_zip_get_n_prev_extern(
+/*=======================*/
+	const page_zip_des_t*	page_zip,/*!< in: dense page directory on
+					compressed page */
+	const rec_t*		rec,	/*!< in: compact physical record
+					on a B-tree leaf page */
+	dict_index_t*		index)	/*!< in: record descriptor */
+{
+	const page_t*	page	= page_align(rec);
+	ulint		n_ext	= 0;
+	ulint		i;
+	ulint		left;
+	ulint		heap_no;
+	ulint		n_recs	= page_get_n_recs(page_zip->data);
+
+	ut_ad(page_is_leaf(page));
+	ut_ad(page_is_comp(page));
+	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(!dict_index_is_ibuf(index));
+
+	heap_no = rec_get_heap_no_new(rec);
+	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
+	left = heap_no - PAGE_HEAP_NO_USER_LOW;
+	if (UNIV_UNLIKELY(!left)) {
+		return(0);
+	}
+
+	for (i = 0; i < n_recs; i++) {
+		const rec_t*	r	= page + (page_zip_dir_get(page_zip, i)
+						  & PAGE_ZIP_DIR_SLOT_MASK);
+
+		if (rec_get_heap_no_new(r) < heap_no) {
+			n_ext += rec_get_n_extern_new(r, index,
+						      ULINT_UNDEFINED);
+			if (!--left) {
+				break;
+			}
+		}
+	}
+
+	return(n_ext);
+}
+
+/**********************************************************************//**
+Encode the length of a fixed-length column.
+@return	buf + length of encoded val */
+static
+byte*
+page_zip_fixed_field_encode(
+/*========================*/
+	byte*	buf,	/*!< in: pointer to buffer where to write */
+	ulint	val)	/*!< in: value to write */
+{
+	ut_ad(val >= 2);
+
+	if (UNIV_LIKELY(val < 126)) {
+		/*
+		0 = nullable variable field of at most 255 bytes length;
+		1 = not null variable field of at most 255 bytes length;
+		126 = nullable variable field with maximum length >255;
+		127 = not null variable field with maximum length >255
+		*/
+		*buf++ = (byte) val;
+	} else {
+		*buf++ = (byte) (0x80 | val >> 8);
+		*buf++ = (byte) val;
+	}
+
+	return(buf);
+}
+
+/**********************************************************************//**
+Write the index information for the compressed page.
+@return	used size of buf */
+static
+ulint
+page_zip_fields_encode(
+/*===================*/
+	ulint		n,	/*!< in: number of fields to compress */
+	dict_index_t*	index,	/*!< in: index comprising at least n fields */
+	ulint		trx_id_pos,/*!< in: position of the trx_id column
+				in the index, or ULINT_UNDEFINED if
+				this is a non-leaf page */
+	byte*		buf)	/*!< out: buffer of (n + 1) * 2 bytes */
+{
+	const byte*	buf_start	= buf;
+	ulint		i;
+	ulint		col;
+	ulint		trx_id_col	= 0;
+	/* sum of lengths of preceding non-nullable fixed fields, or 0 */
+	ulint		fixed_sum	= 0;
+
+	ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
+
+	for (i = col = 0; i < n; i++) {
+		dict_field_t*	field = dict_index_get_nth_field(index, i);
+		ulint		val;
+
+		if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
+			val = 1; /* set the "not nullable" flag */
+		} else {
+			val = 0; /* nullable field */
+		}
+
+		if (!field->fixed_len) {
+			/* variable-length field */
+			const dict_col_t*	column
+				= dict_field_get_col(field);
+
+			if (UNIV_UNLIKELY(column->len > 255)
+			    || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
+				val |= 0x7e; /* max > 255 bytes */
+			}
+
+			if (fixed_sum) {
+				/* write out the length of any
+				preceding non-nullable fields */
+				buf = page_zip_fixed_field_encode(
+					buf, fixed_sum << 1 | 1);
+				fixed_sum = 0;
+				col++;
+			}
+
+			*buf++ = (byte) val;
+			col++;
+		} else if (val) {
+			/* fixed-length non-nullable field */
+
+			if (fixed_sum && UNIV_UNLIKELY
+			    (fixed_sum + field->fixed_len
+			     > DICT_MAX_INDEX_COL_LEN)) {
+				/* Write out the length of the
+				preceding non-nullable fields,
+				to avoid exceeding the maximum
+				length of a fixed-length column. */
+				buf = page_zip_fixed_field_encode(
+					buf, fixed_sum << 1 | 1);
+				fixed_sum = 0;
+				col++;
+			}
+
+			if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
+				if (fixed_sum) {
+					/* Write out the length of any
+					preceding non-nullable fields,
+					and start a new trx_id column. */
+					buf = page_zip_fixed_field_encode(
+						buf, fixed_sum << 1 | 1);
+					col++;
+				}
+
+				trx_id_col = col;
+				fixed_sum = field->fixed_len;
+			} else {
+				/* add to the sum */
+				fixed_sum += field->fixed_len;
+			}
+		} else {
+			/* fixed-length nullable field */
+
+			if (fixed_sum) {
+				/* write out the length of any
+				preceding non-nullable fields */
+				buf = page_zip_fixed_field_encode(
+					buf, fixed_sum << 1 | 1);
+				fixed_sum = 0;
+				col++;
+			}
+
+			buf = page_zip_fixed_field_encode(
+				buf, field->fixed_len << 1);
+			col++;
+		}
+	}
+
+	if (fixed_sum) {
+		/* Write out the lengths of last fixed-length columns. */
+		buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
+	}
+
+	if (trx_id_pos != ULINT_UNDEFINED) {
+		/* Write out the position of the trx_id column */
+		i = trx_id_col;
+	} else {
+		/* Write out the number of nullable fields */
+		i = index->n_nullable;
+	}
+
+	if (i < 128) {
+		*buf++ = (byte) i;
+	} else {
+		*buf++ = (byte) (0x80 | i >> 8);
+		*buf++ = (byte) i;
+	}
+
+	ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
+	return((ulint) (buf - buf_start));
+}
+
+/**********************************************************************//**
+Populate the dense page directory from the sparse directory. */
+static
+void
+page_zip_dir_encode(
+/*================*/
+	const page_t*	page,	/*!< in: compact page */
+	byte*		buf,	/*!< in: pointer to dense page directory[-1];
+				out: dense directory on compressed page */
+	const rec_t**	recs)	/*!< in: pointer to an array of 0, or NULL;
+				out: dense page directory sorted by ascending
+				address (and heap_no) */
+{
+	const byte*	rec;
+	ulint		status;
+	ulint		min_mark;
+	ulint		heap_no;
+	ulint		i;
+	ulint		n_heap;
+	ulint		offs;
+
+	min_mark = 0;
+
+	if (page_is_leaf(page)) {
+		status = REC_STATUS_ORDINARY;
+	} else {
+		status = REC_STATUS_NODE_PTR;
+		if (UNIV_UNLIKELY
+		    (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
+			min_mark = REC_INFO_MIN_REC_FLAG;
+		}
+	}
+
+	n_heap = page_dir_get_n_heap(page);
+
+	/* Traverse the list of stored records in the collation order,
+	starting from the first user record. */
+
+	rec = page + PAGE_NEW_INFIMUM, TRUE;
+
+	i = 0;
+
+	for (;;) {
+		ulint	info_bits;
+		offs = rec_get_next_offs(rec, TRUE);
+		if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
+			break;
+		}
+		rec = page + offs;
+		heap_no = rec_get_heap_no_new(rec);
+		ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
+		ut_a(heap_no < n_heap);
+		ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
+		ut_a(offs >= PAGE_ZIP_START);
+#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
+# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
+#endif
+#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
+# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
+#endif
+		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+			offs |= PAGE_ZIP_DIR_SLOT_OWNED;
+		}
+
+		info_bits = rec_get_info_bits(rec, TRUE);
+		if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
+			info_bits &= ~REC_INFO_DELETED_FLAG;
+			offs |= PAGE_ZIP_DIR_SLOT_DEL;
+		}
+		ut_a(info_bits == min_mark);
+		/* Only the smallest user record can have
+		REC_INFO_MIN_REC_FLAG set. */
+		min_mark = 0;
+
+		mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
+
+		if (UNIV_LIKELY_NULL(recs)) {
+			/* Ensure that each heap_no occurs at most once. */
+			ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
+			/* exclude infimum and supremum */
+			recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
+		}
+
+		ut_a(rec_get_status(rec) == status);
+	}
+
+	offs = page_header_get_field(page, PAGE_FREE);
+
+	/* Traverse the free list (of deleted records). */
+	while (offs) {
+		ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
+		rec = page + offs;
+
+		heap_no = rec_get_heap_no_new(rec);
+		ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
+		ut_a(heap_no < n_heap);
+
+		ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
+		ut_a(rec_get_status(rec) == status);
+
+		mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
+
+		if (UNIV_LIKELY_NULL(recs)) {
+			/* Ensure that each heap_no occurs at most once. */
+			ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
+			/* exclude infimum and supremum */
+			recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
+		}
+
+		offs = rec_get_next_offs(rec, TRUE);
+	}
+
+	/* Ensure that each heap no occurs at least once. */
+	ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
+}
+
+/**********************************************************************//**
+Allocate memory for zlib. */
+static
+void*
+page_zip_malloc(
+/*============*/
+	void*	opaque,	/*!< in/out: memory heap */
+	uInt	items,	/*!< in: number of items to allocate */
+	uInt	size)	/*!< in: size of an item in bytes */
+{
+	return(mem_heap_alloc(opaque, items * size));
+}
+
+/**********************************************************************//**
+Deallocate memory for zlib. */
+static
+void
+page_zip_free(
+/*==========*/
+	void*	opaque __attribute__((unused)),	/*!< in: memory heap */
+	void*	address __attribute__((unused)))/*!< in: object to free */
+{
+}
+
+/**********************************************************************//**
+Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+page_zip_set_alloc(
+/*===============*/
+	void*		stream,		/*!< in/out: zlib stream */
+	mem_heap_t*	heap)		/*!< in: memory heap to use */
+{
+	z_stream*	strm = stream;
+
+	strm->zalloc = page_zip_malloc;
+	strm->zfree = page_zip_free;
+	strm->opaque = heap;
+}
+
+#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/** Symbol for enabling compression and decompression diagnostics */
+# define PAGE_ZIP_COMPRESS_DBG
+#endif
+
+#ifdef PAGE_ZIP_COMPRESS_DBG
+/** Set this variable in a debugger to enable
+excessive logging in page_zip_compress(). */
+UNIV_INTERN ibool	page_zip_compress_dbg;
+/** Set this variable in a debugger to enable
+binary logging of the data passed to deflate().
+When this variable is nonzero, it will act
+as a log file name generator. */
+UNIV_INTERN unsigned	page_zip_compress_log;
+
+/**********************************************************************//**
+Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set.
+@return	deflate() status: Z_OK, Z_BUF_ERROR, ... */
+static
+int
+page_zip_compress_deflate(
+/*======================*/
+	FILE*		logfile,/*!< in: log file, or NULL */
+	z_streamp	strm,	/*!< in/out: compressed stream for deflate() */
+	int		flush)	/*!< in: deflate() flushing method */
+{
+	int	status;
+	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+		ut_print_buf(stderr, strm->next_in, strm->avail_in);
+	}
+	if (UNIV_LIKELY_NULL(logfile)) {
+		fwrite(strm->next_in, 1, strm->avail_in, logfile);
+	}
+	status = deflate(strm, flush);
+	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+		fprintf(stderr, " -> %d\n", status);
+	}
+	return(status);
+}
+
+/* Redefine deflate(). */
+# undef deflate
+/** Debug wrapper for the zlib compression routine deflate().
+Log the operation if page_zip_compress_dbg is set.
+@param strm	in/out: compressed stream
+@param flush	in: flushing method
+@return		deflate() status: Z_OK, Z_BUF_ERROR, ... */
+# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
+/** Declaration of the logfile parameter */
+# define FILE_LOGFILE FILE* logfile,
+/** The logfile parameter */
+# define LOGFILE logfile,
+#else /* PAGE_ZIP_COMPRESS_DBG */
+/** Empty declaration of the logfile parameter */
+# define FILE_LOGFILE
+/** Missing logfile parameter */
+# define LOGFILE
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+
+/**********************************************************************//**
+Compress the records of a node pointer page.
+@return	Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_node_ptrs(
+/*========================*/
+	FILE_LOGFILE
+	z_stream*	c_stream,	/*!< in/out: compressed page stream */
+	const rec_t**	recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense,	/*!< in: size of recs[] */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	byte*		storage,	/*!< in: end of dense page directory */
+	mem_heap_t*	heap)		/*!< in: temporary memory heap */
+{
+	int	err	= Z_OK;
+	ulint*	offsets = NULL;
+
+	do {
+		const rec_t*	rec = *recs++;
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		/* Only leaf nodes may contain externally stored columns. */
+		ut_ad(!rec_offs_any_extern(offsets));
+
+		UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+		UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+				   rec_offs_extra_size(offsets));
+
+		/* Compress the extra bytes. */
+		c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
+			- c_stream->next_in;
+
+		if (c_stream->avail_in) {
+			err = deflate(c_stream, Z_NO_FLUSH);
+			if (UNIV_UNLIKELY(err != Z_OK)) {
+				break;
+			}
+		}
+		ut_ad(!c_stream->avail_in);
+
+		/* Compress the data bytes, except node_ptr. */
+		c_stream->next_in = (byte*) rec;
+		c_stream->avail_in = rec_offs_data_size(offsets)
+			- REC_NODE_PTR_SIZE;
+		ut_ad(c_stream->avail_in);
+
+		err = deflate(c_stream, Z_NO_FLUSH);
+		if (UNIV_UNLIKELY(err != Z_OK)) {
+			break;
+		}
+
+		ut_ad(!c_stream->avail_in);
+
+		memcpy(storage - REC_NODE_PTR_SIZE
+		       * (rec_get_heap_no_new(rec) - 1),
+		       c_stream->next_in, REC_NODE_PTR_SIZE);
+		c_stream->next_in += REC_NODE_PTR_SIZE;
+	} while (--n_dense);
+
+	return(err);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a secondary index.
+@return	Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_sec(
+/*==================*/
+	FILE_LOGFILE
+	z_stream*	c_stream,	/*!< in/out: compressed page stream */
+	const rec_t**	recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense)	/*!< in: size of recs[] */
+{
+	int		err	= Z_OK;
+
+	ut_ad(n_dense > 0);
+
+	do {
+		const rec_t*	rec = *recs++;
+
+		/* Compress everything up to this record. */
+		c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
+			- c_stream->next_in;
+
+		if (UNIV_LIKELY(c_stream->avail_in)) {
+			UNIV_MEM_ASSERT_RW(c_stream->next_in,
+					   c_stream->avail_in);
+			err = deflate(c_stream, Z_NO_FLUSH);
+			if (UNIV_UNLIKELY(err != Z_OK)) {
+				break;
+			}
+		}
+
+		ut_ad(!c_stream->avail_in);
+		ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
+
+		/* Skip the REC_N_NEW_EXTRA_BYTES. */
+
+		c_stream->next_in = (byte*) rec;
+	} while (--n_dense);
+
+	return(err);
+}
+
+/**********************************************************************//**
+Compress a record of a leaf node of a clustered index that contains
+externally stored columns.
+@return	Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_clust_ext(
+/*========================*/
+	FILE_LOGFILE
+	z_stream*	c_stream,	/*!< in/out: compressed page stream */
+	const rec_t*	rec,		/*!< in: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
+	ulint		trx_id_col,	/*!< in: position of of DB_TRX_ID */
+	byte*		deleted,	/*!< in: dense directory entry pointing
+					to the head of the free list */
+	byte*		storage,	/*!< in: end of dense page directory */
+	byte**		externs,	/*!< in/out: pointer to the next
+					available BLOB pointer */
+	ulint*		n_blobs)	/*!< in/out: number of
+					externally stored columns */
+{
+	int	err;
+	ulint	i;
+
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		ulint		len;
+		const byte*	src;
+
+		if (UNIV_UNLIKELY(i == trx_id_col)) {
+			ut_ad(!rec_offs_nth_extern(offsets, i));
+			/* Store trx_id and roll_ptr
+			in uncompressed form. */
+			src = rec_get_nth_field(rec, offsets, i, &len);
+			ut_ad(src + DATA_TRX_ID_LEN
+			      == rec_get_nth_field(rec, offsets,
+						   i + 1, &len));
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+
+			/* Compress any preceding bytes. */
+			c_stream->avail_in
+				= src - c_stream->next_in;
+
+			if (c_stream->avail_in) {
+				err = deflate(c_stream, Z_NO_FLUSH);
+				if (UNIV_UNLIKELY(err != Z_OK)) {
+
+					return(err);
+				}
+			}
+
+			ut_ad(!c_stream->avail_in);
+			ut_ad(c_stream->next_in == src);
+
+			memcpy(storage
+			       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+			       * (rec_get_heap_no_new(rec) - 1),
+			       c_stream->next_in,
+			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+			c_stream->next_in
+				+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+			/* Skip also roll_ptr */
+			i++;
+		} else if (rec_offs_nth_extern(offsets, i)) {
+			src = rec_get_nth_field(rec, offsets, i, &len);
+			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			src += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			c_stream->avail_in = src
+				- c_stream->next_in;
+			if (UNIV_LIKELY(c_stream->avail_in)) {
+				err = deflate(c_stream, Z_NO_FLUSH);
+				if (UNIV_UNLIKELY(err != Z_OK)) {
+
+					return(err);
+				}
+			}
+
+			ut_ad(!c_stream->avail_in);
+			ut_ad(c_stream->next_in == src);
+
+			/* Reserve space for the data at
+			the end of the space reserved for
+			the compressed data and the page
+			modification log. */
+
+			if (UNIV_UNLIKELY
+			    (c_stream->avail_out
+			     <= BTR_EXTERN_FIELD_REF_SIZE)) {
+				/* out of space */
+				return(Z_BUF_ERROR);
+			}
+
+			ut_ad(*externs == c_stream->next_out
+			      + c_stream->avail_out
+			      + 1/* end of modif. log */);
+
+			c_stream->next_in
+				+= BTR_EXTERN_FIELD_REF_SIZE;
+
+			/* Skip deleted records. */
+			if (UNIV_LIKELY_NULL
+			    (page_zip_dir_find_low(
+				    storage, deleted,
+				    page_offset(rec)))) {
+				continue;
+			}
+
+			(*n_blobs)++;
+			c_stream->avail_out
+				-= BTR_EXTERN_FIELD_REF_SIZE;
+			*externs -= BTR_EXTERN_FIELD_REF_SIZE;
+
+			/* Copy the BLOB pointer */
+			memcpy(*externs, c_stream->next_in
+			       - BTR_EXTERN_FIELD_REF_SIZE,
+			       BTR_EXTERN_FIELD_REF_SIZE);
+		}
+	}
+
+	return(Z_OK);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a clustered index.
+@return	Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_clust(
+/*====================*/
+	FILE_LOGFILE
+	z_stream*	c_stream,	/*!< in/out: compressed page stream */
+	const rec_t**	recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense,	/*!< in: size of recs[] */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint*		n_blobs,	/*!< in: 0; out: number of
+					externally stored columns */
+	ulint		trx_id_col,	/*!< index of the trx_id column */
+	byte*		deleted,	/*!< in: dense directory entry pointing
+					to the head of the free list */
+	byte*		storage,	/*!< in: end of dense page directory */
+	mem_heap_t*	heap)		/*!< in: temporary memory heap */
+{
+	int	err		= Z_OK;
+	ulint*	offsets		= NULL;
+	/* BTR_EXTERN_FIELD_REF storage */
+	byte*	externs		= storage - n_dense
+		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+	ut_ad(*n_blobs == 0);
+
+	do {
+		const rec_t*	rec = *recs++;
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		ut_ad(rec_offs_n_fields(offsets)
+		      == dict_index_get_n_fields(index));
+		UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+		UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+				   rec_offs_extra_size(offsets));
+
+		/* Compress the extra bytes. */
+		c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
+			- c_stream->next_in;
+
+		if (c_stream->avail_in) {
+			err = deflate(c_stream, Z_NO_FLUSH);
+			if (UNIV_UNLIKELY(err != Z_OK)) {
+
+				goto func_exit;
+			}
+		}
+		ut_ad(!c_stream->avail_in);
+		ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
+
+		/* Compress the data bytes. */
+
+		c_stream->next_in = (byte*) rec;
+
+		/* Check if there are any externally stored columns.
+		For each externally stored column, store the
+		BTR_EXTERN_FIELD_REF separately. */
+		if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
+			ut_ad(dict_index_is_clust(index));
+
+			err = page_zip_compress_clust_ext(
+				LOGFILE
+				c_stream, rec, offsets, trx_id_col,
+				deleted, storage, &externs, n_blobs);
+
+			if (UNIV_UNLIKELY(err != Z_OK)) {
+
+				goto func_exit;
+			}
+		} else {
+			ulint		len;
+			const byte*	src;
+
+			/* Store trx_id and roll_ptr in uncompressed form. */
+			src = rec_get_nth_field(rec, offsets,
+						trx_id_col, &len);
+			ut_ad(src + DATA_TRX_ID_LEN
+			      == rec_get_nth_field(rec, offsets,
+						   trx_id_col + 1, &len));
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+			UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+			UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+					   rec_offs_extra_size(offsets));
+
+			/* Compress any preceding bytes. */
+			c_stream->avail_in = src - c_stream->next_in;
+
+			if (c_stream->avail_in) {
+				err = deflate(c_stream, Z_NO_FLUSH);
+				if (UNIV_UNLIKELY(err != Z_OK)) {
+
+					return(err);
+				}
+			}
+
+			ut_ad(!c_stream->avail_in);
+			ut_ad(c_stream->next_in == src);
+
+			memcpy(storage
+			       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+			       * (rec_get_heap_no_new(rec) - 1),
+			       c_stream->next_in,
+			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+			c_stream->next_in
+				+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+			/* Skip also roll_ptr */
+			ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
+		}
+
+		/* Compress the last bytes of the record. */
+		c_stream->avail_in = rec + rec_offs_data_size(offsets)
+			- c_stream->next_in;
+
+		if (c_stream->avail_in) {
+			err = deflate(c_stream, Z_NO_FLUSH);
+			if (UNIV_UNLIKELY(err != Z_OK)) {
+
+				goto func_exit;
+			}
+		}
+		ut_ad(!c_stream->avail_in);
+	} while (--n_dense);
+
+func_exit:
+	return(err);
+}
+
+/**********************************************************************//**
+Compress a page.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_compress(
+/*==============*/
+	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	const page_t*	page,	/*!< in: uncompressed page */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+{
+	z_stream	c_stream;
+	int		err;
+	ulint		n_fields;/* number of index fields needed */
+	byte*		fields;	/*!< index field information */
+	byte*		buf;	/*!< compressed payload of the page */
+	byte*		buf_end;/* end of buf */
+	ulint		n_dense;
+	ulint		slot_size;/* amount of uncompressed bytes per record */
+	const rec_t**	recs;	/*!< dense page directory, sorted by address */
+	mem_heap_t*	heap;
+	ulint		trx_id_col;
+	ulint*		offsets	= NULL;
+	ulint		n_blobs	= 0;
+	byte*		storage;/* storage of uncompressed columns */
+	ullint		usec = ut_time_us(NULL);
+#ifdef PAGE_ZIP_COMPRESS_DBG
+	FILE*		logfile = NULL;
+#endif
+
+	ut_a(page_is_comp(page));
+	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(page_simple_validate_new((page_t*) page));
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(!dict_index_is_ibuf(index));
+
+	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+
+	/* Check the data that will be omitted. */
+	ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
+		     infimum_extra, sizeof infimum_extra));
+	ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
+		     infimum_data, sizeof infimum_data));
+	ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
+	     /* info_bits == 0, n_owned <= max */
+	     <= PAGE_DIR_SLOT_MAX_N_OWNED);
+	ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
+		     supremum_extra_data, sizeof supremum_extra_data));
+
+	if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
+		ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
+		     == PAGE_NEW_SUPREMUM);
+	}
+
+	if (page_is_leaf(page)) {
+		n_fields = dict_index_get_n_fields(index);
+	} else {
+		n_fields = dict_index_get_n_unique_in_tree(index);
+	}
+
+	/* The dense directory excludes the infimum and supremum records. */
+	n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+#ifdef PAGE_ZIP_COMPRESS_DBG
+	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+		fprintf(stderr, "compress %p %p %lu %lu %lu\n",
+			(void*) page_zip, (void*) page,
+			page_is_leaf(page),
+			n_fields, n_dense);
+	}
+	if (UNIV_UNLIKELY(page_zip_compress_log)) {
+		/* Create a log file for every compression attempt. */
+		char	logfilename[9];
+		ut_snprintf(logfilename, sizeof logfilename,
+			    "%08x", page_zip_compress_log++);
+		logfile = fopen(logfilename, "wb");
+
+		if (logfile) {
+			/* Write the uncompressed page to the log. */
+			fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
+			/* Record the compressed size as zero.
+			This will be overwritten at successful exit. */
+			putc(0, logfile);
+			putc(0, logfile);
+			putc(0, logfile);
+			putc(0, logfile);
+		}
+	}
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+	page_zip_stat[page_zip->ssize - 1].compressed++;
+
+	if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
+			  >= page_zip_get_size(page_zip))) {
+
+		goto err_exit;
+	}
+
+	heap = mem_heap_create(page_zip_get_size(page_zip)
+			       + n_fields * (2 + sizeof *offsets)
+			       + n_dense * ((sizeof *recs)
+					    - PAGE_ZIP_DIR_SLOT_SIZE)
+			       + UNIV_PAGE_SIZE * 4
+			       + (512 << MAX_MEM_LEVEL));
+
+	recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
+
+	fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
+
+	buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
+	buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
+
+	/* Compress the data payload. */
+	page_zip_set_alloc(&c_stream, heap);
+
+	err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+			   Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
+			   MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	ut_a(err == Z_OK);
+
+	c_stream.next_out = buf;
+	/* Subtract the space reserved for uncompressed data. */
+	/* Page header and the end marker of the modification log */
+	c_stream.avail_out = buf_end - buf - 1;
+	/* Dense page directory and uncompressed columns, if any */
+	if (page_is_leaf(page)) {
+		if (dict_index_is_clust(index)) {
+			trx_id_col = dict_index_get_sys_col_pos(
+				index, DATA_TRX_ID);
+			ut_ad(trx_id_col > 0);
+			ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+			slot_size = PAGE_ZIP_DIR_SLOT_SIZE
+				+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+		} else {
+			/* Signal the absence of trx_id
+			in page_zip_fields_encode() */
+			ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
+			      == ULINT_UNDEFINED);
+			trx_id_col = 0;
+			slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
+		}
+	} else {
+		slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
+		trx_id_col = ULINT_UNDEFINED;
+	}
+
+	if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
+			  + 6/* sizeof(zlib header and footer) */)) {
+		goto zlib_error;
+	}
+
+	c_stream.avail_out -= n_dense * slot_size;
+	c_stream.avail_in = page_zip_fields_encode(n_fields, index,
+						   trx_id_col, fields);
+	c_stream.next_in = fields;
+	if (UNIV_LIKELY(!trx_id_col)) {
+		trx_id_col = ULINT_UNDEFINED;
+	}
+
+	UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
+	err = deflate(&c_stream, Z_FULL_FLUSH);
+	if (err != Z_OK) {
+		goto zlib_error;
+	}
+
+	ut_ad(!c_stream.avail_in);
+
+	page_zip_dir_encode(page, buf_end, recs);
+
+	c_stream.next_in = (byte*) page + PAGE_ZIP_START;
+
+	storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+
+	/* Compress the records in heap_no order. */
+	if (UNIV_UNLIKELY(!n_dense)) {
+	} else if (!page_is_leaf(page)) {
+		/* This is a node pointer page. */
+		err = page_zip_compress_node_ptrs(LOGFILE
+						  &c_stream, recs, n_dense,
+						  index, storage, heap);
+		if (UNIV_UNLIKELY(err != Z_OK)) {
+			goto zlib_error;
+		}
+	} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+		/* This is a leaf page in a secondary index. */
+		err = page_zip_compress_sec(LOGFILE
+					    &c_stream, recs, n_dense);
+		if (UNIV_UNLIKELY(err != Z_OK)) {
+			goto zlib_error;
+		}
+	} else {
+		/* This is a leaf page in a clustered index. */
+		err = page_zip_compress_clust(LOGFILE
+					      &c_stream, recs, n_dense,
+					      index, &n_blobs, trx_id_col,
+					      buf_end - PAGE_ZIP_DIR_SLOT_SIZE
+					      * page_get_n_recs(page),
+					      storage, heap);
+		if (UNIV_UNLIKELY(err != Z_OK)) {
+			goto zlib_error;
+		}
+	}
+
+	/* Finish the compression. */
+	ut_ad(!c_stream.avail_in);
+	/* Compress any trailing garbage, in case the last record was
+	allocated from an originally longer space on the free list,
+	or the data of the last record from page_zip_compress_sec(). */
+	c_stream.avail_in
+		= page_header_get_field(page, PAGE_HEAP_TOP)
+		- (c_stream.next_in - page);
+	ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
+
+	UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
+	err = deflate(&c_stream, Z_FINISH);
+
+	if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
+zlib_error:
+		deflateEnd(&c_stream);
+		mem_heap_free(heap);
+err_exit:
+#ifdef PAGE_ZIP_COMPRESS_DBG
+		if (logfile) {
+			fclose(logfile);
+		}
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+		page_zip_stat[page_zip->ssize - 1].compressed_usec
+			+= ut_time_us(NULL) - usec;
+		return(FALSE);
+	}
+
+	err = deflateEnd(&c_stream);
+	ut_a(err == Z_OK);
+
+	ut_ad(buf + c_stream.total_out == c_stream.next_out);
+	ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
+
+	/* Valgrind believes that zlib does not initialize some bits
+	in the last 7 or 8 bytes of the stream.  Make Valgrind happy. */
+	UNIV_MEM_VALID(buf, c_stream.total_out);
+
+	/* Zero out the area reserved for the modification log.
+	Space for the end marker of the modification log is not
+	included in avail_out. */
+	memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
+
+#ifdef UNIV_DEBUG
+	page_zip->m_start =
+#endif /* UNIV_DEBUG */
+		page_zip->m_end = PAGE_DATA + c_stream.total_out;
+	page_zip->m_nonempty = FALSE;
+	page_zip->n_blobs = n_blobs;
+	/* Copy those header fields that will not be written
+	in buf_flush_init_for_writing() */
+	memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+	       FIL_PAGE_LSN - FIL_PAGE_PREV);
+	memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
+	memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+	       PAGE_DATA - FIL_PAGE_DATA);
+	/* Copy the rest of the compressed page */
+	memcpy(page_zip->data + PAGE_DATA, buf,
+	       page_zip_get_size(page_zip) - PAGE_DATA);
+	mem_heap_free(heap);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (mtr) {
+#ifndef UNIV_HOTBACKUP
+		page_zip_compress_write_log(page_zip, page, index, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	}
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+#ifdef PAGE_ZIP_COMPRESS_DBG
+	if (logfile) {
+		/* Record the compressed size of the block. */
+		byte sz[4];
+		mach_write_to_4(sz, c_stream.total_out);
+		fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
+		fwrite(sz, 1, sizeof sz, logfile);
+		fclose(logfile);
+	}
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+	{
+		page_zip_stat_t*	zip_stat
+			= &page_zip_stat[page_zip->ssize - 1];
+		zip_stat->compressed_ok++;
+		zip_stat->compressed_usec += ut_time_us(NULL) - usec;
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Compare two page directory entries.
+@return	positive if rec1 > rec2 */
+UNIV_INLINE
+ibool
+page_zip_dir_cmp(
+/*=============*/
+	const rec_t*	rec1,	/*!< in: rec1 */
+	const rec_t*	rec2)	/*!< in: rec2 */
+{
+	return(rec1 > rec2);
+}
+
+/**********************************************************************//**
+Sort the dense page directory by address (heap_no). */
+static
+void
+page_zip_dir_sort(
+/*==============*/
+	rec_t**	arr,	/*!< in/out: dense page directory */
+	rec_t**	aux_arr,/*!< in/out: work area */
+	ulint	low,	/*!< in: lower bound of the sorting area, inclusive */
+	ulint	high)	/*!< in: upper bound of the sorting area, exclusive */
+{
+	UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
+			      page_zip_dir_cmp);
+}
+
+/**********************************************************************//**
+Deallocate the index information initialized by page_zip_fields_decode(). */
+static
+void
+page_zip_fields_free(
+/*=================*/
+	dict_index_t*	index)	/*!< in: dummy index to be freed */
+{
+	if (index) {
+		dict_table_t*	table = index->table;
+		mem_heap_free(index->heap);
+		mutex_free(&(table->autoinc_mutex));
+		mem_heap_free(table->heap);
+	}
+}
+
+/**********************************************************************//**
+Read the index information for the compressed page.
+@return	own: dummy index describing the page, or NULL on error */
+static
+dict_index_t*
+page_zip_fields_decode(
+/*===================*/
+	const byte*	buf,	/*!< in: index information */
+	const byte*	end,	/*!< in: end of buf */
+	ulint*		trx_id_col)/*!< in: NULL for non-leaf pages;
+				for leaf pages, pointer to where to store
+				the position of the trx_id column */
+{
+	const byte*	b;
+	ulint		n;
+	ulint		i;
+	ulint		val;
+	dict_table_t*	table;
+	dict_index_t*	index;
+
+	/* Determine the number of fields. */
+	for (b = buf, n = 0; b < end; n++) {
+		if (*b++ & 0x80) {
+			b++; /* skip the second byte */
+		}
+	}
+
+	n--; /* n_nullable or trx_id */
+
+	if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
+
+		page_zip_fail(("page_zip_fields_decode: n = %lu\n",
+			       (ulong) n));
+		return(NULL);
+	}
+
+	if (UNIV_UNLIKELY(b > end)) {
+
+		page_zip_fail(("page_zip_fields_decode: %p > %p\n",
+			       (const void*) b, (const void*) end));
+		return(NULL);
+	}
+
+	table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
+				      DICT_TF_COMPACT);
+	index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
+				      DICT_HDR_SPACE, 0, n);
+	index->table = table;
+	index->n_uniq = n;
+	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+	index->cached = TRUE;
+
+	/* Initialize the fields. */
+	for (b = buf, i = 0; i < n; i++) {
+		ulint	mtype;
+		ulint	len;
+
+		val = *b++;
+
+		if (UNIV_UNLIKELY(val & 0x80)) {
+			/* fixed length > 62 bytes */
+			val = (val & 0x7f) << 8 | *b++;
+			len = val >> 1;
+			mtype = DATA_FIXBINARY;
+		} else if (UNIV_UNLIKELY(val >= 126)) {
+			/* variable length with max > 255 bytes */
+			len = 0x7fff;
+			mtype = DATA_BINARY;
+		} else if (val <= 1) {
+			/* variable length with max <= 255 bytes */
+			len = 0;
+			mtype = DATA_BINARY;
+		} else {
+			/* fixed length < 62 bytes */
+			len = val >> 1;
+			mtype = DATA_FIXBINARY;
+		}
+
+		dict_mem_table_add_col(table, NULL, NULL, mtype,
+				       val & 1 ? DATA_NOT_NULL : 0, len);
+		dict_index_add_col(index, table,
+				   dict_table_get_nth_col(table, i), 0);
+	}
+
+	val = *b++;
+	if (UNIV_UNLIKELY(val & 0x80)) {
+		val = (val & 0x7f) << 8 | *b++;
+	}
+
+	/* Decode the position of the trx_id column. */
+	if (trx_id_col) {
+		if (!val) {
+			val = ULINT_UNDEFINED;
+		} else if (UNIV_UNLIKELY(val >= n)) {
+			page_zip_fields_free(index);
+			index = NULL;
+		} else {
+			index->type = DICT_CLUSTERED;
+		}
+
+		*trx_id_col = val;
+	} else {
+		/* Decode the number of nullable fields. */
+		if (UNIV_UNLIKELY(index->n_nullable > val)) {
+			page_zip_fields_free(index);
+			index = NULL;
+		} else {
+			index->n_nullable = val;
+		}
+	}
+
+	ut_ad(b == end);
+
+	return(index);
+}
+
+/**********************************************************************//**
+Populate the sparse page directory from the dense directory.
+@return	TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_dir_decode(
+/*================*/
+	const page_zip_des_t*	page_zip,/*!< in: dense page directory on
+					compressed page */
+	page_t*			page,	/*!< in: compact page with valid header;
+					out: trailer and sparse page directory
+					filled in */
+	rec_t**			recs,	/*!< out: dense page directory sorted by
+					ascending address (and heap_no) */
+	rec_t**			recs_aux,/*!< in/out: scratch area */
+	ulint			n_dense)/*!< in: number of user records, and
+					size of recs[] and recs_aux[] */
+{
+	ulint	i;
+	ulint	n_recs;
+	byte*	slot;
+
+	n_recs = page_get_n_recs(page);
+
+	if (UNIV_UNLIKELY(n_recs > n_dense)) {
+		page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
+			       (ulong) n_recs, (ulong) n_dense));
+		return(FALSE);
+	}
+
+	/* Traverse the list of stored records in the sorting order,
+	starting from the first user record. */
+
+	slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
+	UNIV_PREFETCH_RW(slot);
+
+	/* Zero out the page trailer. */
+	memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
+
+	mach_write_to_2(slot, PAGE_NEW_INFIMUM);
+	slot -= PAGE_DIR_SLOT_SIZE;
+	UNIV_PREFETCH_RW(slot);
+
+	/* Initialize the sparse directory and copy the dense directory. */
+	for (i = 0; i < n_recs; i++) {
+		ulint	offs = page_zip_dir_get(page_zip, i);
+
+		if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
+			mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
+			slot -= PAGE_DIR_SLOT_SIZE;
+			UNIV_PREFETCH_RW(slot);
+		}
+
+		if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
+				  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
+			page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
+				       (unsigned) i, (unsigned) n_recs,
+				       (ulong) offs));
+			return(FALSE);
+		}
+
+		recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
+	}
+
+	mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
+	{
+		const page_dir_slot_t*	last_slot = page_dir_get_nth_slot(
+			page, page_dir_get_n_slots(page) - 1);
+
+		if (UNIV_UNLIKELY(slot != last_slot)) {
+			page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
+				       (const void*) slot,
+				       (const void*) last_slot));
+			return(FALSE);
+		}
+	}
+
+	/* Copy the rest of the dense directory. */
+	for (; i < n_dense; i++) {
+		ulint	offs = page_zip_dir_get(page_zip, i);
+
+		if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
+			page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
+				       (unsigned) i, (unsigned) n_dense,
+				       (ulong) offs));
+			return(FALSE);
+		}
+
+		recs[i] = page + offs;
+	}
+
+	if (UNIV_LIKELY(n_dense > 1)) {
+		page_zip_dir_sort(recs, recs_aux, 0, n_dense);
+	}
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Initialize the REC_N_NEW_EXTRA_BYTES of each record.
+@return	TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_set_extra_bytes(
+/*=====================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	page_t*			page,	/*!< in/out: uncompressed page */
+	ulint			info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
+{
+	ulint	n;
+	ulint	i;
+	ulint	n_owned = 1;
+	ulint	offs;
+	rec_t*	rec;
+
+	n = page_get_n_recs(page);
+	rec = page + PAGE_NEW_INFIMUM;
+
+	for (i = 0; i < n; i++) {
+		offs = page_zip_dir_get(page_zip, i);
+
+		if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
+			info_bits |= REC_INFO_DELETED_FLAG;
+		}
+		if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
+			info_bits |= n_owned;
+			n_owned = 1;
+		} else {
+			n_owned++;
+		}
+		offs &= PAGE_ZIP_DIR_SLOT_MASK;
+		if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
+				  + REC_N_NEW_EXTRA_BYTES)) {
+			page_zip_fail(("page_zip_set_extra_bytes 1:"
+				       " %u %u %lx\n",
+				       (unsigned) i, (unsigned) n,
+				       (ulong) offs));
+			return(FALSE);
+		}
+
+		rec_set_next_offs_new(rec, offs);
+		rec = page + offs;
+		rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
+		info_bits = 0;
+	}
+
+	/* Set the next pointer of the last user record. */
+	rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
+
+	/* Set n_owned of the supremum record. */
+	page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
+
+	/* The dense directory excludes the infimum and supremum records. */
+	n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+
+	if (i >= n) {
+		if (UNIV_LIKELY(i == n)) {
+			return(TRUE);
+		}
+
+		page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
+			       (unsigned) i, (unsigned) n));
+		return(FALSE);
+	}
+
+	offs = page_zip_dir_get(page_zip, i);
+
+	/* Set the extra bytes of deleted records on the free list. */
+	for (;;) {
+		if (UNIV_UNLIKELY(!offs)
+		    || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
+
+			page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
+				       (ulong) offs));
+			return(FALSE);
+		}
+
+		rec = page + offs;
+		rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+
+		if (++i == n) {
+			break;
+		}
+
+		offs = page_zip_dir_get(page_zip, i);
+		rec_set_next_offs_new(rec, offs);
+	}
+
+	/* Terminate the free list. */
+	rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+	rec_set_next_offs_new(rec, 0);
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Apply the modification log to a record containing externally stored
+columns.  Do not copy the fields that are stored separately.
+@return	pointer to modification log, or NULL on failure */
+static
+const byte*
+page_zip_apply_log_ext(
+/*===================*/
+	rec_t*		rec,		/*!< in/out: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
+	ulint		trx_id_col,	/*!< in: position of of DB_TRX_ID */
+	const byte*	data,		/*!< in: modification log */
+	const byte*	end)		/*!< in: end of modification log */
+{
+	ulint	i;
+	ulint	len;
+	byte*	next_out = rec;
+
+	/* Check if there are any externally stored columns.
+	For each externally stored column, skip the
+	BTR_EXTERN_FIELD_REF. */
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		byte*	dst;
+
+		if (UNIV_UNLIKELY(i == trx_id_col)) {
+			/* Skip trx_id and roll_ptr */
+			dst = rec_get_nth_field(rec, offsets,
+						i, &len);
+			if (UNIV_UNLIKELY(dst - next_out >= end - data)
+			    || UNIV_UNLIKELY
+			    (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
+			    || rec_offs_nth_extern(offsets, i)) {
+				page_zip_fail(("page_zip_apply_log_ext:"
+					       " trx_id len %lu,"
+					       " %p - %p >= %p - %p\n",
+					       (ulong) len,
+					       (const void*) dst,
+					       (const void*) next_out,
+					       (const void*) end,
+					       (const void*) data));
+				return(NULL);
+			}
+
+			memcpy(next_out, data, dst - next_out);
+			data += dst - next_out;
+			next_out = dst + (DATA_TRX_ID_LEN
+					  + DATA_ROLL_PTR_LEN);
+		} else if (rec_offs_nth_extern(offsets, i)) {
+			dst = rec_get_nth_field(rec, offsets,
+						i, &len);
+			ut_ad(len
+			      >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			len += dst - next_out
+				- BTR_EXTERN_FIELD_REF_SIZE;
+
+			if (UNIV_UNLIKELY(data + len >= end)) {
+				page_zip_fail(("page_zip_apply_log_ext: "
+					       "ext %p+%lu >= %p\n",
+					       (const void*) data,
+					       (ulong) len,
+					       (const void*) end));
+				return(NULL);
+			}
+
+			memcpy(next_out, data, len);
+			data += len;
+			next_out += len
+				+ BTR_EXTERN_FIELD_REF_SIZE;
+		}
+	}
+
+	/* Copy the last bytes of the record. */
+	len = rec_get_end(rec, offsets) - next_out;
+	if (UNIV_UNLIKELY(data + len >= end)) {
+		page_zip_fail(("page_zip_apply_log_ext: "
+			       "last %p+%lu >= %p\n",
+			       (const void*) data,
+			       (ulong) len,
+			       (const void*) end));
+		return(NULL);
+	}
+	memcpy(next_out, data, len);
+	data += len;
+
+	return(data);
+}
+
+/**********************************************************************//**
+Apply the modification log to an uncompressed page.
+Do not copy the fields that are stored separately.
+@return	pointer to end of modification log, or NULL on failure */
+static
+const byte*
+page_zip_apply_log(
+/*===============*/
+	const byte*	data,	/*!< in: modification log */
+	ulint		size,	/*!< in: maximum length of the log, in bytes */
+	rec_t**		recs,	/*!< in: dense page directory,
+				sorted by address (indexed by
+				heap_no - PAGE_HEAP_NO_USER_LOW) */
+	ulint		n_dense,/*!< in: size of recs[] */
+	ulint		trx_id_col,/*!< in: column number of trx_id in the index,
+				or ULINT_UNDEFINED if none */
+	ulint		heap_status,
+				/*!< in: heap_no and status bits for
+				the next record to uncompress */
+	dict_index_t*	index,	/*!< in: index of the page */
+	ulint*		offsets)/*!< in/out: work area for
+				rec_get_offsets_reverse() */
+{
+	const byte* const end = data + size;
+
+	for (;;) {
+		ulint	val;
+		rec_t*	rec;
+		ulint	len;
+		ulint	hs;
+
+		val = *data++;
+		if (UNIV_UNLIKELY(!val)) {
+			return(data - 1);
+		}
+		if (val & 0x80) {
+			val = (val & 0x7f) << 8 | *data++;
+			if (UNIV_UNLIKELY(!val)) {
+				page_zip_fail(("page_zip_apply_log:"
+					       " invalid val %x%x\n",
+					       data[-2], data[-1]));
+				return(NULL);
+			}
+		}
+		if (UNIV_UNLIKELY(data >= end)) {
+			page_zip_fail(("page_zip_apply_log: %p >= %p\n",
+				       (const void*) data,
+				       (const void*) end));
+			return(NULL);
+		}
+		if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
+			page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
+				       (ulong) val, (ulong) n_dense));
+			return(NULL);
+		}
+
+		/* Determine the heap number and status bits of the record. */
+		rec = recs[(val >> 1) - 1];
+
+		hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
+		hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
+
+		/* This may either be an old record that is being
+		overwritten (updated in place, or allocated from
+		the free list), or a new record, with the next
+		available_heap_no. */
+		if (UNIV_UNLIKELY(hs > heap_status)) {
+			page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
+				       (ulong) hs, (ulong) heap_status));
+			return(NULL);
+		} else if (hs == heap_status) {
+			/* A new record was allocated from the heap. */
+			if (UNIV_UNLIKELY(val & 1)) {
+				/* Only existing records may be cleared. */
+				page_zip_fail(("page_zip_apply_log:"
+					       " attempting to create"
+					       " deleted rec %lu\n",
+					       (ulong) hs));
+				return(NULL);
+			}
+			heap_status += 1 << REC_HEAP_NO_SHIFT;
+		}
+
+		mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
+
+		if (val & 1) {
+			/* Clear the data bytes of the record. */
+			mem_heap_t*	heap	= NULL;
+			ulint*		offs;
+			offs = rec_get_offsets(rec, index, offsets,
+					       ULINT_UNDEFINED, &heap);
+			memset(rec, 0, rec_offs_data_size(offs));
+
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+			continue;
+		}
+
+#if REC_STATUS_NODE_PTR != TRUE
+# error "REC_STATUS_NODE_PTR != TRUE"
+#endif
+		rec_get_offsets_reverse(data, index,
+					hs & REC_STATUS_NODE_PTR,
+					offsets);
+		rec_offs_make_valid(rec, index, offsets);
+
+		/* Copy the extra bytes (backwards). */
+		{
+			byte*	start	= rec_get_start(rec, offsets);
+			byte*	b	= rec - REC_N_NEW_EXTRA_BYTES;
+			while (b != start) {
+				*--b = *data++;
+			}
+		}
+
+		/* Copy the data bytes. */
+		if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
+			/* Non-leaf nodes should not contain any
+			externally stored columns. */
+			if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
+				page_zip_fail(("page_zip_apply_log: "
+					       "%lu&REC_STATUS_NODE_PTR\n",
+					       (ulong) hs));
+				return(NULL);
+			}
+
+			data = page_zip_apply_log_ext(
+				rec, offsets, trx_id_col, data, end);
+
+			if (UNIV_UNLIKELY(!data)) {
+				return(NULL);
+			}
+		} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
+			len = rec_offs_data_size(offsets)
+				- REC_NODE_PTR_SIZE;
+			/* Copy the data bytes, except node_ptr. */
+			if (UNIV_UNLIKELY(data + len >= end)) {
+				page_zip_fail(("page_zip_apply_log: "
+					       "node_ptr %p+%lu >= %p\n",
+					       (const void*) data,
+					       (ulong) len,
+					       (const void*) end));
+				return(NULL);
+			}
+			memcpy(rec, data, len);
+			data += len;
+		} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+			len = rec_offs_data_size(offsets);
+
+			/* Copy all data bytes of
+			a record in a secondary index. */
+			if (UNIV_UNLIKELY(data + len >= end)) {
+				page_zip_fail(("page_zip_apply_log: "
+					       "sec %p+%lu >= %p\n",
+					       (const void*) data,
+					       (ulong) len,
+					       (const void*) end));
+				return(NULL);
+			}
+
+			memcpy(rec, data, len);
+			data += len;
+		} else {
+			/* Skip DB_TRX_ID and DB_ROLL_PTR. */
+			ulint	l = rec_get_nth_field_offs(offsets,
+							   trx_id_col, &len);
+			byte*	b;
+
+			if (UNIV_UNLIKELY(data + l >= end)
+			    || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
+						    + DATA_ROLL_PTR_LEN))) {
+				page_zip_fail(("page_zip_apply_log: "
+					       "trx_id %p+%lu >= %p\n",
+					       (const void*) data,
+					       (ulong) l,
+					       (const void*) end));
+				return(NULL);
+			}
+
+			/* Copy any preceding data bytes. */
+			memcpy(rec, data, l);
+			data += l;
+
+			/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
+			b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			len = rec_get_end(rec, offsets) - b;
+			if (UNIV_UNLIKELY(data + len >= end)) {
+				page_zip_fail(("page_zip_apply_log: "
+					       "clust %p+%lu >= %p\n",
+					       (const void*) data,
+					       (ulong) len,
+					       (const void*) end));
+				return(NULL);
+			}
+			memcpy(b, data, len);
+			data += len;
+		}
+	}
+}
+
+/**********************************************************************//**
+Decompress the records of a node pointer page.
+@return	TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_node_ptrs(
+/*==========================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	z_stream*	d_stream,	/*!< in/out: compressed page stream */
+	rec_t**		recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense,	/*!< in: size of recs[] */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint*		offsets,	/*!< in/out: temporary offsets */
+	mem_heap_t*	heap)		/*!< in: temporary memory heap */
+{
+	ulint		heap_status = REC_STATUS_NODE_PTR
+		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+	ulint		slot;
+	const byte*	storage;
+
+	/* Subtract the space reserved for uncompressed data. */
+	d_stream->avail_in -= n_dense
+		* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
+
+	/* Decompress the records in heap_no order. */
+	for (slot = 0; slot < n_dense; slot++) {
+		rec_t*	rec = recs[slot];
+
+		d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
+			- d_stream->next_out;
+
+		ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+		      - PAGE_ZIP_START - PAGE_DIR);
+		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+		case Z_STREAM_END:
+			/* Apparently, n_dense has grown
+			since the time the page was last compressed. */
+			goto zlib_done;
+		case Z_OK:
+		case Z_BUF_ERROR:
+			if (!d_stream->avail_out) {
+				break;
+			}
+			/* fall through */
+		default:
+			page_zip_fail(("page_zip_decompress_node_ptrs:"
+				       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+				       d_stream->msg));
+			goto zlib_error;
+		}
+
+		ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
+		/* Prepare to decompress the data bytes. */
+		d_stream->next_out = rec;
+		/* Set heap_no and the status bits. */
+		mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
+		heap_status += 1 << REC_HEAP_NO_SHIFT;
+
+		/* Read the offsets. The status bits are needed here. */
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		/* Non-leaf nodes should not have any externally
+		stored columns. */
+		ut_ad(!rec_offs_any_extern(offsets));
+
+		/* Decompress the data bytes, except node_ptr. */
+		d_stream->avail_out = rec_offs_data_size(offsets)
+			- REC_NODE_PTR_SIZE;
+
+		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+		case Z_STREAM_END:
+			goto zlib_done;
+		case Z_OK:
+		case Z_BUF_ERROR:
+			if (!d_stream->avail_out) {
+				break;
+			}
+			/* fall through */
+		default:
+			page_zip_fail(("page_zip_decompress_node_ptrs:"
+				       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+				       d_stream->msg));
+			goto zlib_error;
+		}
+
+		/* Clear the node pointer in case the record
+		will be deleted and the space will be reallocated
+		to a smaller record. */
+		memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
+		d_stream->next_out += REC_NODE_PTR_SIZE;
+
+		ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
+	}
+
+	/* Decompress any trailing garbage, in case the last record was
+	allocated from an originally longer space on the free list. */
+	d_stream->avail_out = page_header_get_field(page_zip->data,
+						    PAGE_HEAP_TOP)
+		- page_offset(d_stream->next_out);
+	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+			  - PAGE_ZIP_START - PAGE_DIR)) {
+
+		page_zip_fail(("page_zip_decompress_node_ptrs:"
+			       " avail_out = %u\n",
+			       d_stream->avail_out));
+		goto zlib_error;
+	}
+
+	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+		page_zip_fail(("page_zip_decompress_node_ptrs:"
+			       " inflate(Z_FINISH)=%s\n",
+			       d_stream->msg));
+zlib_error:
+		inflateEnd(d_stream);
+		return(FALSE);
+	}
+
+	/* Note that d_stream->avail_out > 0 may hold here
+	if the modification log is nonempty. */
+
+zlib_done:
+	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+		ut_error;
+	}
+
+	{
+		page_t*	page = page_align(d_stream->next_out);
+
+		/* Clear the unused heap space on the uncompressed page. */
+		memset(d_stream->next_out, 0,
+		       page_dir_get_nth_slot(page,
+					     page_dir_get_n_slots(page) - 1)
+		       - d_stream->next_out);
+	}
+
+#ifdef UNIV_DEBUG
+	page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+	/* Apply the modification log. */
+	{
+		const byte*	mod_log_ptr;
+		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+						 d_stream->avail_in + 1,
+						 recs, n_dense,
+						 ULINT_UNDEFINED, heap_status,
+						 index, offsets);
+
+		if (UNIV_UNLIKELY(!mod_log_ptr)) {
+			return(FALSE);
+		}
+		page_zip->m_end = mod_log_ptr - page_zip->data;
+		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+	}
+
+	if (UNIV_UNLIKELY
+	    (page_zip_get_trailer_len(page_zip,
+				      dict_index_is_clust(index), NULL)
+	     + page_zip->m_end >= page_zip_get_size(page_zip))) {
+		page_zip_fail(("page_zip_decompress_node_ptrs:"
+			       " %lu + %lu >= %lu, %lu\n",
+			       (ulong) page_zip_get_trailer_len(
+				       page_zip, dict_index_is_clust(index),
+				       NULL),
+			       (ulong) page_zip->m_end,
+			       (ulong) page_zip_get_size(page_zip),
+			       (ulong) dict_index_is_clust(index)));
+		return(FALSE);
+	}
+
+	/* Restore the uncompressed columns in heap_no order. */
+	storage	= page_zip->data + page_zip_get_size(page_zip)
+		- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+
+	for (slot = 0; slot < n_dense; slot++) {
+		rec_t*		rec	= recs[slot];
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		/* Non-leaf nodes should not have any externally
+		stored columns. */
+		ut_ad(!rec_offs_any_extern(offsets));
+		storage -= REC_NODE_PTR_SIZE;
+
+		memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
+		       storage, REC_NODE_PTR_SIZE);
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress the records of a leaf node of a secondary index.
+@return	TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_sec(
+/*====================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	z_stream*	d_stream,	/*!< in/out: compressed page stream */
+	rec_t**		recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense,	/*!< in: size of recs[] */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint*		offsets)	/*!< in/out: temporary offsets */
+{
+	ulint	heap_status	= REC_STATUS_ORDINARY
+		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+	ulint	slot;
+
+	ut_a(!dict_index_is_clust(index));
+
+	/* Subtract the space reserved for uncompressed data. */
+	d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+
+	for (slot = 0; slot < n_dense; slot++) {
+		rec_t*	rec = recs[slot];
+
+		/* Decompress everything up to this record. */
+		d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
+			- d_stream->next_out;
+
+		if (UNIV_LIKELY(d_stream->avail_out)) {
+			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+			case Z_STREAM_END:
+				/* Apparently, n_dense has grown
+				since the time the page was last compressed. */
+				goto zlib_done;
+			case Z_OK:
+			case Z_BUF_ERROR:
+				if (!d_stream->avail_out) {
+					break;
+				}
+				/* fall through */
+			default:
+				page_zip_fail(("page_zip_decompress_sec:"
+					       " inflate(Z_SYNC_FLUSH)=%s\n",
+					       d_stream->msg));
+				goto zlib_error;
+			}
+		}
+
+		ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
+
+		/* Skip the REC_N_NEW_EXTRA_BYTES. */
+
+		d_stream->next_out = rec;
+
+		/* Set heap_no and the status bits. */
+		mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
+		heap_status += 1 << REC_HEAP_NO_SHIFT;
+	}
+
+	/* Decompress the data of the last record and any trailing garbage,
+	in case the last record was allocated from an originally longer space
+	on the free list. */
+	d_stream->avail_out = page_header_get_field(page_zip->data,
+						    PAGE_HEAP_TOP)
+		- page_offset(d_stream->next_out);
+	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+			  - PAGE_ZIP_START - PAGE_DIR)) {
+
+		page_zip_fail(("page_zip_decompress_sec:"
+			       " avail_out = %u\n",
+			       d_stream->avail_out));
+		goto zlib_error;
+	}
+
+	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+		page_zip_fail(("page_zip_decompress_sec:"
+			       " inflate(Z_FINISH)=%s\n",
+			       d_stream->msg));
+zlib_error:
+		inflateEnd(d_stream);
+		return(FALSE);
+	}
+
+	/* Note that d_stream->avail_out > 0 may hold here
+	if the modification log is nonempty. */
+
+zlib_done:
+	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+		ut_error;
+	}
+
+	{
+		page_t*	page = page_align(d_stream->next_out);
+
+		/* Clear the unused heap space on the uncompressed page. */
+		memset(d_stream->next_out, 0,
+		       page_dir_get_nth_slot(page,
+					     page_dir_get_n_slots(page) - 1)
+		       - d_stream->next_out);
+	}
+
+#ifdef UNIV_DEBUG
+	page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+	/* Apply the modification log. */
+	{
+		const byte*	mod_log_ptr;
+		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+						 d_stream->avail_in + 1,
+						 recs, n_dense,
+						 ULINT_UNDEFINED, heap_status,
+						 index, offsets);
+
+		if (UNIV_UNLIKELY(!mod_log_ptr)) {
+			return(FALSE);
+		}
+		page_zip->m_end = mod_log_ptr - page_zip->data;
+		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+	}
+
+	if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
+			  + page_zip->m_end >= page_zip_get_size(page_zip))) {
+
+		page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
+			       (ulong) page_zip_get_trailer_len(
+				       page_zip, FALSE, NULL),
+			       (ulong) page_zip->m_end,
+			       (ulong) page_zip_get_size(page_zip)));
+		return(FALSE);
+	}
+
+	/* There are no uncompressed columns on leaf pages of
+	secondary indexes. */
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a record of a leaf node of a clustered index that contains
+externally stored columns.
+@return	TRUE on success */
+static
+ibool
+page_zip_decompress_clust_ext(
+/*==========================*/
+	z_stream*	d_stream,	/*!< in/out: compressed page stream */
+	rec_t*		rec,		/*!< in/out: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
+	ulint		trx_id_col)	/*!< in: position of of DB_TRX_ID */
+{
+	ulint	i;
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		ulint	len;
+		byte*	dst;
+
+		if (UNIV_UNLIKELY(i == trx_id_col)) {
+			/* Skip trx_id and roll_ptr */
+			dst = rec_get_nth_field(rec, offsets, i, &len);
+			if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
+					  + DATA_ROLL_PTR_LEN)) {
+
+				page_zip_fail(("page_zip_decompress_clust_ext:"
+					       " len[%lu] = %lu\n",
+					       (ulong) i, (ulong) len));
+				return(FALSE);
+			}
+
+			if (rec_offs_nth_extern(offsets, i)) {
+
+				page_zip_fail(("page_zip_decompress_clust_ext:"
+					       " DB_TRX_ID at %lu is ext\n",
+					       (ulong) i));
+				return(FALSE);
+			}
+
+			d_stream->avail_out = dst - d_stream->next_out;
+
+			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+			case Z_STREAM_END:
+			case Z_OK:
+			case Z_BUF_ERROR:
+				if (!d_stream->avail_out) {
+					break;
+				}
+				/* fall through */
+			default:
+				page_zip_fail(("page_zip_decompress_clust_ext:"
+					       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+					       d_stream->msg));
+				return(FALSE);
+			}
+
+			ut_ad(d_stream->next_out == dst);
+
+			/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
+			avoid uninitialized bytes in case the record
+			is affected by page_zip_apply_log(). */
+			memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+			d_stream->next_out += DATA_TRX_ID_LEN
+				+ DATA_ROLL_PTR_LEN;
+		} else if (rec_offs_nth_extern(offsets, i)) {
+			dst = rec_get_nth_field(rec, offsets, i, &len);
+			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			dst += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			d_stream->avail_out = dst - d_stream->next_out;
+			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+			case Z_STREAM_END:
+			case Z_OK:
+			case Z_BUF_ERROR:
+				if (!d_stream->avail_out) {
+					break;
+				}
+				/* fall through */
+			default:
+				page_zip_fail(("page_zip_decompress_clust_ext:"
+					       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+					       d_stream->msg));
+				return(FALSE);
+			}
+
+			ut_ad(d_stream->next_out == dst);
+
+			/* Clear the BLOB pointer in case
+			the record will be deleted and the
+			space will not be reused.  Note that
+			the final initialization of the BLOB
+			pointers (copying from "externs"
+			or clearing) will have to take place
+			only after the page modification log
+			has been applied.  Otherwise, we
+			could end up with an uninitialized
+			BLOB pointer when a record is deleted,
+			reallocated and deleted. */
+			memset(d_stream->next_out, 0,
+			       BTR_EXTERN_FIELD_REF_SIZE);
+			d_stream->next_out
+				+= BTR_EXTERN_FIELD_REF_SIZE;
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a clustered index.
+@return	TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_clust(
+/*======================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	z_stream*	d_stream,	/*!< in/out: compressed page stream */
+	rec_t**		recs,		/*!< in: dense page directory
+					sorted by address */
+	ulint		n_dense,	/*!< in: size of recs[] */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint		trx_id_col,	/*!< index of the trx_id column */
+	ulint*		offsets,	/*!< in/out: temporary offsets */
+	mem_heap_t*	heap)		/*!< in: temporary memory heap */
+{
+	int		err;
+	ulint		slot;
+	ulint		heap_status	= REC_STATUS_ORDINARY
+		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+	const byte*	storage;
+	const byte*	externs;
+
+	ut_a(dict_index_is_clust(index));
+
+	/* Subtract the space reserved for uncompressed data. */
+	d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
+					 + DATA_TRX_ID_LEN
+					 + DATA_ROLL_PTR_LEN);
+
+	/* Decompress the records in heap_no order. */
+	for (slot = 0; slot < n_dense; slot++) {
+		rec_t*	rec	= recs[slot];
+
+		d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
+			- d_stream->next_out;
+
+		ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+		      - PAGE_ZIP_START - PAGE_DIR);
+		err = inflate(d_stream, Z_SYNC_FLUSH);
+		switch (err) {
+		case Z_STREAM_END:
+			/* Apparently, n_dense has grown
+			since the time the page was last compressed. */
+			goto zlib_done;
+		case Z_OK:
+		case Z_BUF_ERROR:
+			if (UNIV_LIKELY(!d_stream->avail_out)) {
+				break;
+			}
+			/* fall through */
+		default:
+			page_zip_fail(("page_zip_decompress_clust:"
+				       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+				       d_stream->msg));
+			goto zlib_error;
+		}
+
+		ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
+		/* Prepare to decompress the data bytes. */
+		d_stream->next_out = rec;
+		/* Set heap_no and the status bits. */
+		mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
+		heap_status += 1 << REC_HEAP_NO_SHIFT;
+
+		/* Read the offsets. The status bits are needed here. */
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		/* This is a leaf page in a clustered index. */
+
+		/* Check if there are any externally stored columns.
+		For each externally stored column, restore the
+		BTR_EXTERN_FIELD_REF separately. */
+
+		if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
+			if (UNIV_UNLIKELY
+			    (!page_zip_decompress_clust_ext(
+				    d_stream, rec, offsets, trx_id_col))) {
+
+				goto zlib_error;
+			}
+		} else {
+			/* Skip trx_id and roll_ptr */
+			ulint	len;
+			byte*	dst = rec_get_nth_field(rec, offsets,
+							trx_id_col, &len);
+			if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
+					  + DATA_ROLL_PTR_LEN)) {
+
+				page_zip_fail(("page_zip_decompress_clust:"
+					       " len = %lu\n", (ulong) len));
+				goto zlib_error;
+			}
+
+			d_stream->avail_out = dst - d_stream->next_out;
+
+			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+			case Z_STREAM_END:
+			case Z_OK:
+			case Z_BUF_ERROR:
+				if (!d_stream->avail_out) {
+					break;
+				}
+				/* fall through */
+			default:
+				page_zip_fail(("page_zip_decompress_clust:"
+					       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+					       d_stream->msg));
+				goto zlib_error;
+			}
+
+			ut_ad(d_stream->next_out == dst);
+
+			/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
+			avoid uninitialized bytes in case the record
+			is affected by page_zip_apply_log(). */
+			memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+			d_stream->next_out += DATA_TRX_ID_LEN
+				+ DATA_ROLL_PTR_LEN;
+		}
+
+		/* Decompress the last bytes of the record. */
+		d_stream->avail_out = rec_get_end(rec, offsets)
+			- d_stream->next_out;
+
+		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+		case Z_STREAM_END:
+		case Z_OK:
+		case Z_BUF_ERROR:
+			if (!d_stream->avail_out) {
+				break;
+			}
+			/* fall through */
+		default:
+			page_zip_fail(("page_zip_decompress_clust:"
+				       " 3 inflate(Z_SYNC_FLUSH)=%s\n",
+				       d_stream->msg));
+			goto zlib_error;
+		}
+	}
+
+	/* Decompress any trailing garbage, in case the last record was
+	allocated from an originally longer space on the free list. */
+	d_stream->avail_out = page_header_get_field(page_zip->data,
+						    PAGE_HEAP_TOP)
+		- page_offset(d_stream->next_out);
+	if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+			  - PAGE_ZIP_START - PAGE_DIR)) {
+
+		page_zip_fail(("page_zip_decompress_clust:"
+			       " avail_out = %u\n",
+			       d_stream->avail_out));
+		goto zlib_error;
+	}
+
+	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+		page_zip_fail(("page_zip_decompress_clust:"
+			       " inflate(Z_FINISH)=%s\n",
+			       d_stream->msg));
+zlib_error:
+		inflateEnd(d_stream);
+		return(FALSE);
+	}
+
+	/* Note that d_stream->avail_out > 0 may hold here
+	if the modification log is nonempty. */
+
+zlib_done:
+	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+		ut_error;
+	}
+
+	{
+		page_t*	page = page_align(d_stream->next_out);
+
+		/* Clear the unused heap space on the uncompressed page. */
+		memset(d_stream->next_out, 0,
+		       page_dir_get_nth_slot(page,
+					     page_dir_get_n_slots(page) - 1)
+		       - d_stream->next_out);
+	}
+
+#ifdef UNIV_DEBUG
+	page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+	/* Apply the modification log. */
+	{
+		const byte*	mod_log_ptr;
+		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+						 d_stream->avail_in + 1,
+						 recs, n_dense,
+						 trx_id_col, heap_status,
+						 index, offsets);
+
+		if (UNIV_UNLIKELY(!mod_log_ptr)) {
+			return(FALSE);
+		}
+		page_zip->m_end = mod_log_ptr - page_zip->data;
+		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+	}
+
+	if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
+			  + page_zip->m_end >= page_zip_get_size(page_zip))) {
+
+		page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
+			       (ulong) page_zip_get_trailer_len(
+				       page_zip, TRUE, NULL),
+			       (ulong) page_zip->m_end,
+			       (ulong) page_zip_get_size(page_zip)));
+		return(FALSE);
+	}
+
+	storage = page_zip->data + page_zip_get_size(page_zip)
+		- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+
+	externs = storage - n_dense
+		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+	/* Restore the uncompressed columns in heap_no order. */
+
+	for (slot = 0; slot < n_dense; slot++) {
+		ulint	i;
+		ulint	len;
+		byte*	dst;
+		rec_t*	rec	= recs[slot];
+		ibool	exists	= !page_zip_dir_find_free(
+			page_zip, page_offset(rec));
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		dst = rec_get_nth_field(rec, offsets,
+					trx_id_col, &len);
+		ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+		memcpy(dst, storage,
+		       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+		/* Check if there are any externally stored
+		columns in this record.  For each externally
+		stored column, restore or clear the
+		BTR_EXTERN_FIELD_REF. */
+		if (!rec_offs_any_extern(offsets)) {
+			continue;
+		}
+
+		for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+			if (!rec_offs_nth_extern(offsets, i)) {
+				continue;
+			}
+			dst = rec_get_nth_field(rec, offsets, i, &len);
+
+			if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
+				page_zip_fail(("page_zip_decompress_clust:"
+					       " %lu < 20\n",
+					       (ulong) len));
+				return(FALSE);
+			}
+
+			dst += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			if (UNIV_LIKELY(exists)) {
+				/* Existing record:
+				restore the BLOB pointer */
+				externs -= BTR_EXTERN_FIELD_REF_SIZE;
+
+				if (UNIV_UNLIKELY
+				    (externs < page_zip->data
+				     + page_zip->m_end)) {
+					page_zip_fail(("page_zip_"
+						       "decompress_clust: "
+						       "%p < %p + %lu\n",
+						       (const void*) externs,
+						       (const void*)
+						       page_zip->data,
+						       (ulong)
+						       page_zip->m_end));
+					return(FALSE);
+				}
+
+				memcpy(dst, externs,
+				       BTR_EXTERN_FIELD_REF_SIZE);
+
+				page_zip->n_blobs++;
+			} else {
+				/* Deleted record:
+				clear the BLOB pointer */
+				memset(dst, 0,
+				       BTR_EXTERN_FIELD_REF_SIZE);
+			}
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a page.  This function should tolerate errors on the compressed
+page.  Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return	TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+page_zip_decompress(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in: data, ssize;
+				out: m_start, m_end, m_nonempty, n_blobs */
+	page_t*		page)	/*!< out: uncompressed page, may be trashed */
+{
+	z_stream	d_stream;
+	dict_index_t*	index	= NULL;
+	rec_t**		recs;	/*!< dense page directory, sorted by address */
+	ulint		n_dense;/* number of user records on the page */
+	ulint		trx_id_col = ULINT_UNDEFINED;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+	ullint		usec = ut_time_us(NULL);
+
+	ut_ad(page_zip_simple_validate(page_zip));
+	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	/* The dense directory excludes the infimum and supremum records. */
+	n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
+	if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
+			  >= page_zip_get_size(page_zip))) {
+		page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
+			       (ulong) n_dense,
+			       (ulong) page_zip_get_size(page_zip)));
+		return(FALSE);
+	}
+
+	heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
+	recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
+
+#ifdef UNIV_ZIP_DEBUG
+	/* Clear the page. */
+	memset(page, 0x55, UNIV_PAGE_SIZE);
+#endif /* UNIV_ZIP_DEBUG */
+	UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
+	/* Copy the page header. */
+	memcpy(page, page_zip->data, PAGE_DATA);
+
+	/* Copy the page directory. */
+	if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
+					       recs + n_dense, n_dense))) {
+zlib_error:
+		mem_heap_free(heap);
+		return(FALSE);
+	}
+
+	/* Copy the infimum and supremum records. */
+	memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
+	       infimum_extra, sizeof infimum_extra);
+	if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
+		rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
+				      PAGE_NEW_SUPREMUM);
+	} else {
+		rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
+				      page_zip_dir_get(page_zip, 0)
+				      & PAGE_ZIP_DIR_SLOT_MASK);
+	}
+	memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
+	memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
+	       supremum_extra_data, sizeof supremum_extra_data);
+
+	page_zip_set_alloc(&d_stream, heap);
+
+	if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
+			  != Z_OK)) {
+		ut_error;
+	}
+
+	d_stream.next_in = page_zip->data + PAGE_DATA;
+	/* Subtract the space reserved for
+	the page header and the end marker of the modification log. */
+	d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
+
+	d_stream.next_out = page + PAGE_ZIP_START;
+	d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
+
+	/* Decode the zlib header and the index information. */
+	if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
+
+		page_zip_fail(("page_zip_decompress:"
+			       " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
+		goto zlib_error;
+	}
+
+	if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
+
+		page_zip_fail(("page_zip_decompress:"
+			       " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
+		goto zlib_error;
+	}
+
+	index = page_zip_fields_decode(
+		page + PAGE_ZIP_START, d_stream.next_out,
+		page_is_leaf(page) ? &trx_id_col : NULL);
+
+	if (UNIV_UNLIKELY(!index)) {
+
+		goto zlib_error;
+	}
+
+	/* Decompress the user records. */
+	page_zip->n_blobs = 0;
+	d_stream.next_out = page + PAGE_ZIP_START;
+
+	{
+		/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
+		ulint	n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
+			+ dict_index_get_n_fields(index);
+		offsets = mem_heap_alloc(heap, n * sizeof(ulint));
+		*offsets = n;
+	}
+
+	/* Decompress the records in heap_no order. */
+	if (!page_is_leaf(page)) {
+		/* This is a node pointer page. */
+		ulint	info_bits;
+
+		if (UNIV_UNLIKELY
+		    (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
+						    recs, n_dense, index,
+						    offsets, heap))) {
+			goto err_exit;
+		}
+
+		info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
+			? REC_INFO_MIN_REC_FLAG : 0;
+
+		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
+							    info_bits))) {
+			goto err_exit;
+		}
+	} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+		/* This is a leaf page in a secondary index. */
+		if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
+							   recs, n_dense,
+							   index, offsets))) {
+			goto err_exit;
+		}
+
+		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
+							    page, 0))) {
+err_exit:
+			page_zip_fields_free(index);
+			mem_heap_free(heap);
+			return(FALSE);
+		}
+	} else {
+		/* This is a leaf page in a clustered index. */
+		if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
+							     &d_stream, recs,
+							     n_dense, index,
+							     trx_id_col,
+							     offsets, heap))) {
+			goto err_exit;
+		}
+
+		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
+							    page, 0))) {
+			goto err_exit;
+		}
+	}
+
+	ut_a(page_is_comp(page));
+	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+
+	page_zip_fields_free(index);
+	mem_heap_free(heap);
+	{
+		page_zip_stat_t*	zip_stat
+			= &page_zip_stat[page_zip->ssize - 1];
+		zip_stat->decompressed++;
+		zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
+	}
+
+	/* Update the stat counter for LRU policy. */
+	buf_LRU_stat_inc_unzip();
+
+	return(TRUE);
+}
+
+#ifdef UNIV_ZIP_DEBUG
+/**********************************************************************//**
+Dump a block of memory on the standard error stream. */
+static
+void
+page_zip_hexdump_func(
+/*==================*/
+	const char*	name,	/*!< in: name of the data structure */
+	const void*	buf,	/*!< in: data */
+	ulint		size)	/*!< in: length of the data, in bytes */
+{
+	const byte*	s	= buf;
+	ulint		addr;
+	const ulint	width	= 32; /* bytes per line */
+
+	fprintf(stderr, "%s:\n", name);
+
+	for (addr = 0; addr < size; addr += width) {
+		ulint	i;
+
+		fprintf(stderr, "%04lx ", (ulong) addr);
+
+		i = ut_min(width, size - addr);
+
+		while (i--) {
+			fprintf(stderr, "%02x", *s++);
+		}
+
+		putc('\n', stderr);
+	}
+}
+
+/** Dump a block of memory on the standard error stream.
+@param buf	in: data
+@param size	in: length of the data, in bytes */
+#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
+
+/** Flag: make page_zip_validate() compare page headers only */
+UNIV_INTERN ibool	page_zip_validate_header_only = FALSE;
+
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return	TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate_low(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	ibool			sloppy)	/*!< in: FALSE=strict,
+					TRUE=ignore the MIN_REC_FLAG */
+{
+	page_zip_des_t	temp_page_zip;
+	byte*		temp_page_buf;
+	page_t*		temp_page;
+	ibool		valid;
+
+	if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+		   FIL_PAGE_LSN - FIL_PAGE_PREV)
+	    || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
+	    || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+		      PAGE_DATA - FIL_PAGE_DATA)) {
+		page_zip_fail(("page_zip_validate: page header\n"));
+		page_zip_hexdump(page_zip, sizeof *page_zip);
+		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
+		page_zip_hexdump(page, UNIV_PAGE_SIZE);
+		return(FALSE);
+	}
+
+	ut_a(page_is_comp(page));
+
+	if (page_zip_validate_header_only) {
+		return(TRUE);
+	}
+
+	/* page_zip_decompress() expects the uncompressed page to be
+	UNIV_PAGE_SIZE aligned. */
+	temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
+	temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	/* Get detailed information on the valid bits in case the
+	UNIV_MEM_ASSERT_RW() checks fail.  The v-bits of page[],
+	page_zip->data[] or page_zip could be viewed at temp_page[] or
+	temp_page_zip in a debugger when running valgrind --db-attach. */
+	VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
+	UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
+	VALGRIND_GET_VBITS(page_zip->data, temp_page,
+			   page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	temp_page_zip = *page_zip;
+	valid = page_zip_decompress(&temp_page_zip, temp_page);
+	if (!valid) {
+		fputs("page_zip_validate(): failed to decompress\n", stderr);
+		goto func_exit;
+	}
+	if (page_zip->n_blobs != temp_page_zip.n_blobs) {
+		page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
+			       page_zip->n_blobs, temp_page_zip.n_blobs));
+		valid = FALSE;
+	}
+#ifdef UNIV_DEBUG
+	if (page_zip->m_start != temp_page_zip.m_start) {
+		page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
+			       page_zip->m_start, temp_page_zip.m_start));
+		valid = FALSE;
+	}
+#endif /* UNIV_DEBUG */
+	if (page_zip->m_end != temp_page_zip.m_end) {
+		page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
+			       page_zip->m_end, temp_page_zip.m_end));
+		valid = FALSE;
+	}
+	if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
+		page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
+			       page_zip->m_nonempty,
+			       temp_page_zip.m_nonempty));
+		valid = FALSE;
+	}
+	if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
+		   UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
+
+		/* In crash recovery, the "minimum record" flag may be
+		set incorrectly until the mini-transaction is
+		committed.  Let us tolerate that difference when we
+		are performing a sloppy validation. */
+
+		if (sloppy) {
+			byte	info_bits_diff;
+			ulint	offset
+				= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+						    TRUE);
+			ut_a(offset >= PAGE_NEW_SUPREMUM);
+			offset -= 5 /* REC_NEW_INFO_BITS */;
+
+			info_bits_diff = page[offset] ^ temp_page[offset];
+
+			if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
+				temp_page[offset] = page[offset];
+
+				if (!memcmp(page + PAGE_HEADER,
+					    temp_page + PAGE_HEADER,
+					    UNIV_PAGE_SIZE - PAGE_HEADER
+					    - FIL_PAGE_DATA_END)) {
+
+					/* Only the minimum record flag
+					differed.  Let us ignore it. */
+					page_zip_fail(("page_zip_validate: "
+						       "min_rec_flag "
+						       "(ignored, "
+						       "%lu,%lu,0x%02lx)\n",
+						       page_get_space_id(page),
+						       page_get_page_no(page),
+						       (ulong) page[offset]));
+					goto func_exit;
+				}
+			}
+		}
+		page_zip_fail(("page_zip_validate: content\n"));
+		valid = FALSE;
+	}
+
+func_exit:
+	if (!valid) {
+		page_zip_hexdump(page_zip, sizeof *page_zip);
+		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
+		page_zip_hexdump(page, UNIV_PAGE_SIZE);
+		page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
+	}
+	ut_free(temp_page_buf);
+	return(valid);
+}
+
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return	TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate(
+/*==============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page)	/*!< in: uncompressed page */
+{
+	return(page_zip_validate_low(page_zip, page,
+				     recv_recovery_is_on()));
+}
+#endif /* UNIV_ZIP_DEBUG */
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Assert that the compressed and decompressed page headers match.
+@return	TRUE */
+static
+ibool
+page_zip_header_cmp(
+/*================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const byte*		page)	/*!< in: uncompressed page */
+{
+	ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+		      FIL_PAGE_LSN - FIL_PAGE_PREV));
+	ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
+		      2));
+	ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+		      PAGE_DATA - FIL_PAGE_DATA));
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Write a record on the compressed page that contains externally stored
+columns.  The data must already have been written to the uncompressed page.
+@return	end of modification log */
+static
+byte*
+page_zip_write_rec_ext(
+/*===================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	const page_t*	page,		/*!< in: page containing rec */
+	const byte*	rec,		/*!< in: record being written */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	ulint		create,		/*!< in: nonzero=insert, zero=update */
+	ulint		trx_id_col,	/*!< in: position of DB_TRX_ID */
+	ulint		heap_no,	/*!< in: heap number of rec */
+	byte*		storage,	/*!< in: end of dense page directory */
+	byte*		data)		/*!< in: end of modification log */
+{
+	const byte*	start	= rec;
+	ulint		i;
+	ulint		len;
+	byte*		externs	= storage;
+	ulint		n_ext	= rec_offs_n_extern(offsets);
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+		* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
+
+	/* Note that this will not take into account
+	the BLOB columns of rec if create==TRUE. */
+	ut_ad(data + rec_offs_data_size(offsets)
+	      - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+	      - n_ext * BTR_EXTERN_FIELD_REF_SIZE
+	      < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
+
+	{
+		ulint	blob_no = page_zip_get_n_prev_extern(
+			page_zip, rec, index);
+		byte*	ext_end = externs - page_zip->n_blobs
+			* BTR_EXTERN_FIELD_REF_SIZE;
+		ut_ad(blob_no <= page_zip->n_blobs);
+		externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
+
+		if (create) {
+			page_zip->n_blobs += n_ext;
+			ASSERT_ZERO_BLOB(ext_end - n_ext
+					 * BTR_EXTERN_FIELD_REF_SIZE);
+			memmove(ext_end - n_ext
+				* BTR_EXTERN_FIELD_REF_SIZE,
+				ext_end,
+				externs - ext_end);
+		}
+
+		ut_a(blob_no + n_ext <= page_zip->n_blobs);
+	}
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		const byte*	src;
+
+		if (UNIV_UNLIKELY(i == trx_id_col)) {
+			ut_ad(!rec_offs_nth_extern(offsets,
+						   i));
+			ut_ad(!rec_offs_nth_extern(offsets,
+						   i + 1));
+			/* Locate trx_id and roll_ptr. */
+			src = rec_get_nth_field(rec, offsets,
+						i, &len);
+			ut_ad(len == DATA_TRX_ID_LEN);
+			ut_ad(src + DATA_TRX_ID_LEN
+			      == rec_get_nth_field(
+				      rec, offsets,
+				      i + 1, &len));
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+
+			/* Log the preceding fields. */
+			ASSERT_ZERO(data, src - start);
+			memcpy(data, start, src - start);
+			data += src - start;
+			start = src + (DATA_TRX_ID_LEN
+				       + DATA_ROLL_PTR_LEN);
+
+			/* Store trx_id and roll_ptr. */
+			memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+			       * (heap_no - 1),
+			       src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			i++; /* skip also roll_ptr */
+		} else if (rec_offs_nth_extern(offsets, i)) {
+			src = rec_get_nth_field(rec, offsets,
+						i, &len);
+
+			ut_ad(dict_index_is_clust(index));
+			ut_ad(len
+			      >= BTR_EXTERN_FIELD_REF_SIZE);
+			src += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			ASSERT_ZERO(data, src - start);
+			memcpy(data, start, src - start);
+			data += src - start;
+			start = src + BTR_EXTERN_FIELD_REF_SIZE;
+
+			/* Store the BLOB pointer. */
+			externs -= BTR_EXTERN_FIELD_REF_SIZE;
+			ut_ad(data < externs);
+			memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
+		}
+	}
+
+	/* Log the last bytes of the record. */
+	len = rec_offs_data_size(offsets) - (start - rec);
+
+	ASSERT_ZERO(data, len);
+	memcpy(data, start, len);
+	data += len;
+
+	return(data);
+}
+
+/**********************************************************************//**
+Write an entire record on the compressed page.  The data must already
+have been written to the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_rec(
+/*===============*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record being written */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		create)	/*!< in: nonzero=insert, zero=update */
+{
+	const page_t*	page;
+	byte*		data;
+	byte*		storage;
+	ulint		heap_no;
+	byte*		slot;
+
+	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + page_zip_dir_size(page_zip));
+	ut_ad(rec_offs_comp(offsets));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	ut_ad(page_zip->m_start >= PAGE_DATA);
+
+	page = page_align(rec);
+
+	ut_ad(page_zip_header_cmp(page_zip, page));
+	ut_ad(page_simple_validate_new((page_t*) page));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	slot = page_zip_dir_find(page_zip, page_offset(rec));
+	ut_a(slot);
+	/* Copy the delete mark. */
+	if (rec_get_deleted_flag(rec, TRUE)) {
+		*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
+	} else {
+		*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
+	}
+
+	ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
+	ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
+	      - PAGE_DIR - PAGE_DIR_SLOT_SIZE
+	      * page_dir_get_n_slots(page));
+
+	heap_no = rec_get_heap_no_new(rec);
+	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
+	ut_ad(heap_no < page_dir_get_n_heap(page));
+
+	/* Append to the modification log. */
+	data = page_zip->data + page_zip->m_end;
+	ut_ad(!*data);
+
+	/* Identify the record by writing its heap number - 1.
+	0 is reserved to indicate the end of the modification log. */
+
+	if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
+		*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
+		ut_ad(!*data);
+	}
+	*data++ = (byte) ((heap_no - 1) << 1);
+	ut_ad(!*data);
+
+	{
+		const byte*	start	= rec - rec_offs_extra_size(offsets);
+		const byte*	b	= rec - REC_N_NEW_EXTRA_BYTES;
+
+		/* Write the extra bytes backwards, so that
+		rec_offs_extra_size() can be easily computed in
+		page_zip_apply_log() by invoking
+		rec_get_offsets_reverse(). */
+
+		while (b != start) {
+			*data++ = *--b;
+			ut_ad(!*data);
+		}
+	}
+
+	/* Write the data bytes.  Store the uncompressed bytes separately. */
+	storage = page_zip->data + page_zip_get_size(page_zip)
+		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+		* PAGE_ZIP_DIR_SLOT_SIZE;
+
+	if (page_is_leaf(page)) {
+		ulint		len;
+
+		if (dict_index_is_clust(index)) {
+			ulint		trx_id_col;
+
+			trx_id_col = dict_index_get_sys_col_pos(index,
+								DATA_TRX_ID);
+			ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+			/* Store separately trx_id, roll_ptr and
+			the BTR_EXTERN_FIELD_REF of each BLOB column. */
+			if (rec_offs_any_extern(offsets)) {
+				data = page_zip_write_rec_ext(
+					page_zip, page,
+					rec, index, offsets, create,
+					trx_id_col, heap_no, storage, data);
+			} else {
+				/* Locate trx_id and roll_ptr. */
+				const byte*	src
+					= rec_get_nth_field(rec, offsets,
+							    trx_id_col, &len);
+				ut_ad(len == DATA_TRX_ID_LEN);
+				ut_ad(src + DATA_TRX_ID_LEN
+				      == rec_get_nth_field(
+					      rec, offsets,
+					      trx_id_col + 1, &len));
+				ut_ad(len == DATA_ROLL_PTR_LEN);
+
+				/* Log the preceding fields. */
+				ASSERT_ZERO(data, src - rec);
+				memcpy(data, rec, src - rec);
+				data += src - rec;
+
+				/* Store trx_id and roll_ptr. */
+				memcpy(storage
+				       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+				       * (heap_no - 1),
+				       src,
+				       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+				src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+				/* Log the last bytes of the record. */
+				len = rec_offs_data_size(offsets)
+					- (src - rec);
+
+				ASSERT_ZERO(data, len);
+				memcpy(data, src, len);
+				data += len;
+			}
+		} else {
+			/* Leaf page of a secondary index:
+			no externally stored columns */
+			ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
+			      == ULINT_UNDEFINED);
+			ut_ad(!rec_offs_any_extern(offsets));
+
+			/* Log the entire record. */
+			len = rec_offs_data_size(offsets);
+
+			ASSERT_ZERO(data, len);
+			memcpy(data, rec, len);
+			data += len;
+		}
+	} else {
+		/* This is a node pointer page. */
+		ulint	len;
+
+		/* Non-leaf nodes should not have any externally
+		stored columns. */
+		ut_ad(!rec_offs_any_extern(offsets));
+
+		/* Copy the data bytes, except node_ptr. */
+		len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
+		ut_ad(data + len < storage - REC_NODE_PTR_SIZE
+		      * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
+		ASSERT_ZERO(data, len);
+		memcpy(data, rec, len);
+		data += len;
+
+		/* Copy the node pointer to the uncompressed area. */
+		memcpy(storage - REC_NODE_PTR_SIZE
+		       * (heap_no - 1),
+		       rec + len,
+		       REC_NODE_PTR_SIZE);
+	}
+
+	ut_a(!*data);
+	ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
+	page_zip->m_end = data - page_zip->data;
+	page_zip->m_nonempty = TRUE;
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page_align(rec)));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/***********************************************************//**
+Parses a log record of writing a BLOB pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_blob_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< in/out: compressed page */
+{
+	ulint	offset;
+	ulint	z_offset;
+
+	ut_ad(!page == !page_zip);
+
+	if (UNIV_UNLIKELY
+	    (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
+
+		return(NULL);
+	}
+
+	offset = mach_read_from_2(ptr);
+	z_offset = mach_read_from_2(ptr + 2);
+
+	if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
+	    || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+	    || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+corrupt:
+		recv_sys->found_corrupt_log = TRUE;
+
+		return(NULL);
+	}
+
+	if (page) {
+		if (UNIV_UNLIKELY(!page_zip)
+		    || UNIV_UNLIKELY(!page_is_leaf(page))) {
+
+			goto corrupt;
+		}
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+		memcpy(page + offset,
+		       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
+		memcpy(page_zip->data + z_offset,
+		       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
+}
+
+/**********************************************************************//**
+Write a BLOB pointer of a record on the leaf page of a clustered index.
+The information must already have been updated on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_blob_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in/out: record whose data is being
+				written */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		n,	/*!< in: column index */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle,
+				or NULL if no logging is needed */
+{
+	const byte*	field;
+	byte*		externs;
+	const page_t*	page	= page_align(rec);
+	ulint		blob_no;
+	ulint		len;
+
+	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+	ut_ad(page_simple_validate_new((page_t*) page));
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + page_zip_dir_size(page_zip));
+	ut_ad(rec_offs_comp(offsets));
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_offs_any_extern(offsets));
+	ut_ad(rec_offs_nth_extern(offsets, n));
+
+	ut_ad(page_zip->m_start >= PAGE_DATA);
+	ut_ad(page_zip_header_cmp(page_zip, page));
+
+	ut_ad(page_is_leaf(page));
+	ut_ad(dict_index_is_clust(index));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
+		+ rec_get_n_extern_new(rec, index, n);
+	ut_a(blob_no < page_zip->n_blobs);
+
+	externs = page_zip->data + page_zip_get_size(page_zip)
+		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+		* (PAGE_ZIP_DIR_SLOT_SIZE
+		   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+	field = rec_get_nth_field(rec, offsets, n, &len);
+
+	externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
+	field += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+	memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	if (mtr) {
+#ifndef UNIV_HOTBACKUP
+		byte*	log_ptr	= mlog_open(
+			mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
+		if (UNIV_UNLIKELY(!log_ptr)) {
+			return;
+		}
+
+		log_ptr = mlog_write_initial_log_record_fast(
+			(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
+		mach_write_to_2(log_ptr, page_offset(field));
+		log_ptr += 2;
+		mach_write_to_2(log_ptr, externs - page_zip->data);
+		log_ptr += 2;
+		memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
+		log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
+		mlog_close(mtr, log_ptr);
+#endif /* !UNIV_HOTBACKUP */
+	}
+}
+
+/***********************************************************//**
+Parses a log record of writing the node pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_node_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< in/out: compressed page */
+{
+	ulint	offset;
+	ulint	z_offset;
+
+	ut_ad(!page == !page_zip);
+
+	if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
+
+		return(NULL);
+	}
+
+	offset = mach_read_from_2(ptr);
+	z_offset = mach_read_from_2(ptr + 2);
+
+	if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
+	    || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+	    || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+corrupt:
+		recv_sys->found_corrupt_log = TRUE;
+
+		return(NULL);
+	}
+
+	if (page) {
+		byte*	storage_end;
+		byte*	field;
+		byte*	storage;
+		ulint	heap_no;
+
+		if (UNIV_UNLIKELY(!page_zip)
+		    || UNIV_UNLIKELY(page_is_leaf(page))) {
+
+			goto corrupt;
+		}
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+		field = page + offset;
+		storage = page_zip->data + z_offset;
+
+		storage_end = page_zip->data + page_zip_get_size(page_zip)
+			- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+			* PAGE_ZIP_DIR_SLOT_SIZE;
+
+		heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
+
+		if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
+		    || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
+		    || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
+
+			goto corrupt;
+		}
+
+		memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
+		memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
+}
+
+/**********************************************************************//**
+Write the node pointer of a record on a non-leaf compressed page. */
+UNIV_INTERN
+void
+page_zip_write_node_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	ulint		size,	/*!< in: data size of rec */
+	ulint		ptr,	/*!< in: node pointer */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+{
+	byte*	field;
+	byte*	storage;
+	page_t*	page	= page_align(rec);
+
+	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+	ut_ad(page_simple_validate_new(page));
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + page_zip_dir_size(page_zip));
+	ut_ad(page_rec_is_comp(rec));
+
+	ut_ad(page_zip->m_start >= PAGE_DATA);
+	ut_ad(page_zip_header_cmp(page_zip, page));
+
+	ut_ad(!page_is_leaf(page));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(rec, size);
+
+	storage = page_zip->data + page_zip_get_size(page_zip)
+		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+		* PAGE_ZIP_DIR_SLOT_SIZE
+		- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
+	field = rec + size - REC_NODE_PTR_SIZE;
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+	ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#if REC_NODE_PTR_SIZE != 4
+# error "REC_NODE_PTR_SIZE != 4"
+#endif
+	mach_write_to_4(field, ptr);
+	memcpy(storage, field, REC_NODE_PTR_SIZE);
+
+	if (mtr) {
+#ifndef UNIV_HOTBACKUP
+		byte*	log_ptr	= mlog_open(mtr,
+					    11 + 2 + 2 + REC_NODE_PTR_SIZE);
+		if (UNIV_UNLIKELY(!log_ptr)) {
+			return;
+		}
+
+		log_ptr = mlog_write_initial_log_record_fast(
+			field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
+		mach_write_to_2(log_ptr, page_offset(field));
+		log_ptr += 2;
+		mach_write_to_2(log_ptr, storage - page_zip->data);
+		log_ptr += 2;
+		memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
+		log_ptr += REC_NODE_PTR_SIZE;
+		mlog_close(mtr, log_ptr);
+#endif /* !UNIV_HOTBACKUP */
+	}
+}
+
+/**********************************************************************//**
+Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+UNIV_INTERN
+void
+page_zip_write_trx_id_and_roll_ptr(
+/*===============================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
+	trx_id_t	trx_id,	/*!< in: transaction identifier */
+	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
+{
+	byte*	field;
+	byte*	storage;
+	page_t*	page	= page_align(rec);
+	ulint	len;
+
+	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+	ut_ad(page_simple_validate_new(page));
+	ut_ad(page_zip_simple_validate(page_zip));
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + page_zip_dir_size(page_zip));
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_offs_comp(offsets));
+
+	ut_ad(page_zip->m_start >= PAGE_DATA);
+	ut_ad(page_zip_header_cmp(page_zip, page));
+
+	ut_ad(page_is_leaf(page));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	storage = page_zip->data + page_zip_get_size(page_zip)
+		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+		* PAGE_ZIP_DIR_SLOT_SIZE
+		- (rec_get_heap_no_new(rec) - 1)
+		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+	field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
+	ut_ad(len == DATA_TRX_ID_LEN);
+	ut_ad(field + DATA_TRX_ID_LEN
+	      == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
+	ut_ad(len == DATA_ROLL_PTR_LEN);
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+	ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
+	mach_write_to_6(field, trx_id);
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+	mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
+	memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+}
+
+#ifdef UNIV_ZIP_DEBUG
+/** Set this variable in a debugger to disable page_zip_clear_rec().
+The only observable effect should be the compression ratio due to
+deleted records not being zeroed out.  In rare cases, there can be
+page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
+columns if the space is reallocated for a smaller record. */
+UNIV_INTERN ibool	page_zip_clear_rec_disable;
+#endif /* UNIV_ZIP_DEBUG */
+
+/**********************************************************************//**
+Clear an area on the uncompressed and compressed page, if possible. */
+static
+void
+page_zip_clear_rec(
+/*===============*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in: record to clear */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	heap_no;
+	page_t*	page	= page_align(rec);
+	/* page_zip_validate() would fail here if a record
+	containing externally stored columns is being deleted. */
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
+	ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
+	ut_ad(page_zip_header_cmp(page_zip, page));
+
+	heap_no = rec_get_heap_no_new(rec);
+	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	if (
+#ifdef UNIV_ZIP_DEBUG
+	    !page_zip_clear_rec_disable &&
+#endif /* UNIV_ZIP_DEBUG */
+	    page_zip->m_end
+	    + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
+	    + page_zip_get_trailer_len(page_zip,
+				       dict_index_is_clust(index), NULL)
+	    < page_zip_get_size(page_zip)) {
+		byte*	data;
+
+		/* Clear only the data bytes, because the allocator and
+		the decompressor depend on the extra bytes. */
+		memset(rec, 0, rec_offs_data_size(offsets));
+
+		if (!page_is_leaf(page)) {
+			/* Clear node_ptr on the compressed page. */
+			byte*	storage	= page_zip->data
+				+ page_zip_get_size(page_zip)
+				- (page_dir_get_n_heap(page)
+				   - PAGE_HEAP_NO_USER_LOW)
+				* PAGE_ZIP_DIR_SLOT_SIZE;
+
+			memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
+			       0, REC_NODE_PTR_SIZE);
+		} else if (dict_index_is_clust(index)) {
+			/* Clear trx_id and roll_ptr on the compressed page. */
+			byte*	storage	= page_zip->data
+				+ page_zip_get_size(page_zip)
+				- (page_dir_get_n_heap(page)
+				   - PAGE_HEAP_NO_USER_LOW)
+				* PAGE_ZIP_DIR_SLOT_SIZE;
+
+			memset(storage - (heap_no - 1)
+			       * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+			       0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		}
+
+		/* Log that the data was zeroed out. */
+		data = page_zip->data + page_zip->m_end;
+		ut_ad(!*data);
+		if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
+			*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
+			ut_ad(!*data);
+		}
+		*data++ = (byte) ((heap_no - 1) << 1 | 1);
+		ut_ad(!*data);
+		ut_ad((ulint) (data - page_zip->data)
+		      < page_zip_get_size(page_zip));
+		page_zip->m_end = data - page_zip->data;
+		page_zip->m_nonempty = TRUE;
+	} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
+		/* Do not clear the record, because there is not enough space
+		to log the operation. */
+
+		if (rec_offs_any_extern(offsets)) {
+			ulint	i;
+
+			for (i = rec_offs_n_fields(offsets); i--; ) {
+				/* Clear all BLOB pointers in order to make
+				page_zip_validate() pass. */
+				if (rec_offs_nth_extern(offsets, i)) {
+					ulint	len;
+					byte*	field = rec_get_nth_field(
+						rec, offsets, i, &len);
+					memset(field + len
+					       - BTR_EXTERN_FIELD_REF_SIZE,
+					       0, BTR_EXTERN_FIELD_REF_SIZE);
+				}
+			}
+		}
+	}
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page.  The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
+{
+	byte*	slot = page_zip_dir_find(page_zip, page_offset(rec));
+	ut_a(slot);
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	if (flag) {
+		*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
+	} else {
+		*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
+	}
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page_align(rec)));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page.  The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
+{
+	byte*	slot = page_zip_dir_find(page_zip, page_offset(rec));
+	ut_a(slot);
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	if (flag) {
+		*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
+	} else {
+		*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
+	}
+}
+
+/**********************************************************************//**
+Insert a record to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_insert(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	prev_rec,/*!< in: record after which to insert */
+	const byte*	free_rec,/*!< in: record from which rec was
+				allocated, or NULL */
+	byte*		rec)	/*!< in: record to insert */
+{
+	ulint	n_dense;
+	byte*	slot_rec;
+	byte*	slot_free;
+
+	ut_ad(prev_rec != rec);
+	ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
+	ut_ad(page_zip_simple_validate(page_zip));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	if (page_rec_is_infimum(prev_rec)) {
+		/* Use the first slot. */
+		slot_rec = page_zip->data + page_zip_get_size(page_zip);
+	} else {
+		byte*	end	= page_zip->data + page_zip_get_size(page_zip);
+		byte*	start	= end - page_zip_dir_user_size(page_zip);
+
+		if (UNIV_LIKELY(!free_rec)) {
+			/* PAGE_N_RECS was already incremented
+			in page_cur_insert_rec_zip(), but the
+			dense directory slot at that position
+			contains garbage.  Skip it. */
+			start += PAGE_ZIP_DIR_SLOT_SIZE;
+		}
+
+		slot_rec = page_zip_dir_find_low(start, end,
+						 page_offset(prev_rec));
+		ut_a(slot_rec);
+	}
+
+	/* Read the old n_dense (n_heap may have been incremented). */
+	n_dense = page_dir_get_n_heap(page_zip->data)
+		- (PAGE_HEAP_NO_USER_LOW + 1);
+
+	if (UNIV_LIKELY_NULL(free_rec)) {
+		/* The record was allocated from the free list.
+		Shift the dense directory only up to that slot.
+		Note that in this case, n_dense is actually
+		off by one, because page_cur_insert_rec_zip()
+		did not increment n_heap. */
+		ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
+		      + PAGE_HEAP_NO_USER_LOW);
+		ut_ad(rec >= free_rec);
+		slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
+		ut_ad(slot_free);
+		slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
+	} else {
+		/* The record was allocated from the heap.
+		Shift the entire dense directory. */
+		ut_ad(rec_get_heap_no_new(rec) == n_dense
+		      + PAGE_HEAP_NO_USER_LOW);
+
+		/* Shift to the end of the dense page directory. */
+		slot_free = page_zip->data + page_zip_get_size(page_zip)
+			- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
+	}
+
+	/* Shift the dense directory to allocate place for rec. */
+	memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
+		slot_rec - slot_free);
+
+	/* Write the entry for the inserted record.
+	The "owned" and "deleted" flags must be zero. */
+	mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
+}
+
+/**********************************************************************//**
+Shift the dense page directory and the array of BLOB pointers
+when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in: record to delete */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
+	const byte*	free)	/*!< in: previous start of the free list */
+{
+	byte*	slot_rec;
+	byte*	slot_free;
+	ulint	n_ext;
+	page_t*	page	= page_align(rec);
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_comp(offsets));
+
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+			   rec_offs_extra_size(offsets));
+
+	slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
+
+	ut_a(slot_rec);
+
+	/* This could not be done before page_zip_dir_find(). */
+	page_header_set_field(page, page_zip, PAGE_N_RECS,
+			      (ulint)(page_get_n_recs(page) - 1));
+
+	if (UNIV_UNLIKELY(!free)) {
+		/* Make the last slot the start of the free list. */
+		slot_free = page_zip->data + page_zip_get_size(page_zip)
+			- PAGE_ZIP_DIR_SLOT_SIZE
+			* (page_dir_get_n_heap(page_zip->data)
+			   - PAGE_HEAP_NO_USER_LOW);
+	} else {
+		slot_free = page_zip_dir_find_free(page_zip,
+						   page_offset(free));
+		ut_a(slot_free < slot_rec);
+		/* Grow the free list by one slot by moving the start. */
+		slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
+	}
+
+	if (UNIV_LIKELY(slot_rec > slot_free)) {
+		memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
+			slot_free,
+			slot_rec - slot_free);
+	}
+
+	/* Write the entry for the deleted record.
+	The "owned" and "deleted" flags will be cleared. */
+	mach_write_to_2(slot_free, page_offset(rec));
+
+	if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
+		ut_ad(!rec_offs_any_extern(offsets));
+		goto skip_blobs;
+	}
+
+	n_ext = rec_offs_n_extern(offsets);
+	if (UNIV_UNLIKELY(n_ext)) {
+		/* Shift and zero fill the array of BLOB pointers. */
+		ulint	blob_no;
+		byte*	externs;
+		byte*	ext_end;
+
+		blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
+		ut_a(blob_no + n_ext <= page_zip->n_blobs);
+
+		externs = page_zip->data + page_zip_get_size(page_zip)
+			- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+			* (PAGE_ZIP_DIR_SLOT_SIZE
+			   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+		ext_end = externs - page_zip->n_blobs
+			* BTR_EXTERN_FIELD_REF_SIZE;
+		externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
+
+		page_zip->n_blobs -= n_ext;
+		/* Shift and zero fill the array. */
+		memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
+			(page_zip->n_blobs - blob_no)
+			* BTR_EXTERN_FIELD_REF_SIZE);
+		memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
+	}
+
+skip_blobs:
+	/* The compression algorithm expects info_bits and n_owned
+	to be 0 for deleted records. */
+	rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+
+	page_zip_clear_rec(page_zip, rec, index, offsets);
+}
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		is_clustered)	/*!< in: nonzero for clustered index,
+					zero for others */
+{
+	ulint	n_dense;
+	byte*	dir;
+	byte*	stored;
+
+	ut_ad(page_is_comp(page_zip->data));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	/* Read the old n_dense (n_heap has already been incremented). */
+	n_dense = page_dir_get_n_heap(page_zip->data)
+		- (PAGE_HEAP_NO_USER_LOW + 1);
+
+	dir = page_zip->data + page_zip_get_size(page_zip)
+		- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
+
+	if (!page_is_leaf(page_zip->data)) {
+		ut_ad(!page_zip->n_blobs);
+		stored = dir - n_dense * REC_NODE_PTR_SIZE;
+	} else if (UNIV_UNLIKELY(is_clustered)) {
+		/* Move the BLOB pointer array backwards to make space for the
+		roll_ptr and trx_id columns and the dense directory slot. */
+		byte*	externs;
+
+		stored = dir - n_dense
+			* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		externs = stored
+			- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+		ASSERT_ZERO(externs
+			    - (PAGE_ZIP_DIR_SLOT_SIZE
+			       + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+			    PAGE_ZIP_DIR_SLOT_SIZE
+			    + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
+				   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+			externs, stored - externs);
+	} else {
+		stored = dir
+			- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+		ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
+			    PAGE_ZIP_DIR_SLOT_SIZE);
+	}
+
+	/* Move the uncompressed area backwards to make space
+	for one directory slot. */
+	memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
+}
+
+/***********************************************************//**
+Parses a log record of writing to the header of a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_header(
+/*========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< in/out: compressed page */
+{
+	ulint	offset;
+	ulint	len;
+
+	ut_ad(ptr && end_ptr);
+	ut_ad(!page == !page_zip);
+
+	if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
+
+		return(NULL);
+	}
+
+	offset = (ulint) *ptr++;
+	len = (ulint) *ptr++;
+
+	if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
+corrupt:
+		recv_sys->found_corrupt_log = TRUE;
+
+		return(NULL);
+	}
+
+	if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
+
+		return(NULL);
+	}
+
+	if (page) {
+		if (UNIV_UNLIKELY(!page_zip)) {
+
+			goto corrupt;
+		}
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+		memcpy(page + offset, ptr, len);
+		memcpy(page_zip->data + offset, ptr, len);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	return(ptr + len);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Write a log record of writing to the uncompressed header portion of a page. */
+UNIV_INTERN
+void
+page_zip_write_header_log(
+/*======================*/
+	const byte*	data,	/*!< in: data on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	byte*	log_ptr	= mlog_open(mtr, 11 + 1 + 1);
+	ulint	offset	= page_offset(data);
+
+	ut_ad(offset < PAGE_DATA);
+	ut_ad(offset + length < PAGE_DATA);
+#if PAGE_DATA > 255
+# error "PAGE_DATA > 255"
+#endif
+	ut_ad(length < 256);
+
+	/* If no logging is requested, we may return now */
+	if (UNIV_UNLIKELY(!log_ptr)) {
+
+		return;
+	}
+
+	log_ptr = mlog_write_initial_log_record_fast(
+		(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
+	*log_ptr++ = (byte) offset;
+	*log_ptr++ = (byte) length;
+	mlog_close(mtr, log_ptr);
+
+	mlog_catenate_string(mtr, data, length);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Reorganize and compress a page.  This is a low-level operation for
+compressed pages, to be used when page_zip_compress() fails.
+On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
+The function btr_page_reorganize() should be preferred whenever possible.
+IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
+non-clustered index, the caller must update the insert buffer free
+bits in the same mini-transaction in such a way that the modification
+will be redo-logged.
+@return TRUE on success, FALSE on failure; page and page_zip will be
+left intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_reorganize(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page with compressed page;
+				on the compressed page, in: size;
+				out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+{
+	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
+	page_t*		page		= buf_block_get_frame(block);
+	buf_block_t*	temp_block;
+	page_t*		temp_page;
+	ulint		log_mode;
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page_is_comp(page));
+	ut_ad(!dict_index_is_ibuf(index));
+	/* Note that page_zip_validate(page_zip, page) may fail here. */
+	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	/* Disable logging */
+	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+#ifndef UNIV_HOTBACKUP
+	temp_block = buf_block_alloc(0);
+	btr_search_drop_page_hash_index(block);
+	block->check_index_page_at_flush = TRUE;
+#else /* !UNIV_HOTBACKUP */
+	ut_ad(block == back_block1);
+	temp_block = back_block2;
+#endif /* !UNIV_HOTBACKUP */
+	temp_page = temp_block->frame;
+
+	/* Copy the old page to temporary space */
+	buf_frame_copy(temp_page, page);
+
+	/* Recreate the page: note that global data on page (possible
+	segment headers, next page-field, etc.) is preserved intact */
+
+	page_create(block, mtr, TRUE);
+
+	/* Copy the records from the temporary space to the recreated page;
+	do not copy the lock bits yet */
+
+	page_copy_rec_list_end_no_locks(block, temp_block,
+					page_get_infimum_rec(temp_page),
+					index, mtr);
+
+	if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
+		/* Copy max trx id to recreated page */
+		trx_id_t	max_trx_id = page_get_max_trx_id(temp_page);
+		page_set_max_trx_id(block, NULL, max_trx_id, NULL);
+		ut_ad(!ut_dulint_is_zero(max_trx_id));
+	}
+
+	/* Restore logging. */
+	mtr_set_log_mode(mtr, log_mode);
+
+	if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
+
+		/* Restore the old page and exit. */
+		buf_frame_copy(page, temp_page);
+
+#ifndef UNIV_HOTBACKUP
+		buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
+		return(FALSE);
+	}
+
+	lock_move_reorganize_page(block, temp_block);
+
+#ifndef UNIV_HOTBACKUP
+	buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
+	return(TRUE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Copy the records of a page byte for byte.  Do not copy the page header
+or trailer, except those B-tree header fields that are directly
+related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
+NOTE: The caller must update the lock table and the adaptive hash index. */
+UNIV_INTERN
+void
+page_zip_copy_recs(
+/*===============*/
+	page_zip_des_t*		page_zip,	/*!< out: copy of src_zip
+						(n_blobs, m_start, m_end,
+						m_nonempty, data[0..size-1]) */
+	page_t*			page,		/*!< out: copy of src */
+	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
+	const page_t*		src,		/*!< in: page */
+	dict_index_t*		index,		/*!< in: index of the B-tree */
+	mtr_t*			mtr)		/*!< in: mini-transaction */
+{
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(!dict_index_is_ibuf(index));
+#ifdef UNIV_ZIP_DEBUG
+	/* The B-tree operations that call this function may set
+	FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
+	mismatch.  A strict page_zip_validate() will be executed later
+	during the B-tree operations. */
+	ut_a(page_zip_validate_low(src_zip, src, TRUE));
+#endif /* UNIV_ZIP_DEBUG */
+	ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
+	if (UNIV_UNLIKELY(src_zip->n_blobs)) {
+		ut_a(page_is_leaf(src));
+		ut_a(dict_index_is_clust(index));
+	}
+
+	/* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
+	indexes.  It does not matter on other pages. */
+	ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
+	     || !ut_dulint_is_zero(page_get_max_trx_id(src)));
+
+	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
+	UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
+	UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
+
+	/* Copy those B-tree page header fields that are related to
+	the records stored in the page.  Also copy the field
+	PAGE_MAX_TRX_ID.  Skip the rest of the page header and
+	trailer.  On the compressed page, there is no trailer. */
+#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
+# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
+#endif
+	memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
+	       PAGE_HEADER_PRIV_END);
+	memcpy(PAGE_DATA + page, PAGE_DATA + src,
+	       UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
+	memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
+	       PAGE_HEADER_PRIV_END);
+	memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
+	       page_zip_get_size(page_zip) - PAGE_DATA);
+
+	/* Copy all fields of src_zip to page_zip, except the pointer
+	to the compressed data page. */
+	{
+		page_zip_t*	data = page_zip->data;
+		memcpy(page_zip, src_zip, sizeof *page_zip);
+		page_zip->data = data;
+	}
+	ut_ad(page_zip_get_trailer_len(page_zip,
+				       dict_index_is_clust(index), NULL)
+	      + page_zip->m_end < page_zip_get_size(page_zip));
+
+	if (!page_is_leaf(src)
+	    && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
+	    && UNIV_LIKELY(mach_read_from_4(page
+					    + FIL_PAGE_PREV) != FIL_NULL)) {
+		/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
+		ulint	offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+						 TRUE);
+		if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
+			rec_t*	rec = page + offs;
+			ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
+			     & REC_INFO_MIN_REC_FLAG);
+			rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
+		}
+	}
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+	page_zip_compress_write_log(page_zip, page, index, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Parses a log record of compressing an index page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_compress(
+/*====================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< out: compressed page */
+{
+	ulint	size;
+	ulint	trailer_size;
+
+	ut_ad(ptr && end_ptr);
+	ut_ad(!page == !page_zip);
+
+	if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
+
+		return(NULL);
+	}
+
+	size = mach_read_from_2(ptr);
+	ptr += 2;
+	trailer_size = mach_read_from_2(ptr);
+	ptr += 2;
+
+	if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
+
+		return(NULL);
+	}
+
+	if (page) {
+		if (UNIV_UNLIKELY(!page_zip)
+		    || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
+corrupt:
+			recv_sys->found_corrupt_log = TRUE;
+
+			return(NULL);
+		}
+
+		memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
+		memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
+		memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
+		memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
+		       page_zip_get_size(page_zip) - trailer_size
+		       - (FIL_PAGE_TYPE + size));
+		memcpy(page_zip->data + page_zip_get_size(page_zip)
+		       - trailer_size, ptr + 8 + size, trailer_size);
+
+		if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
+
+			goto corrupt;
+		}
+	}
+
+	return(ptr + 8 + size + trailer_size);
+}
+
+/**********************************************************************//**
+Calculate the compressed page checksum.
+@return	page checksum */
+UNIV_INTERN
+ulint
+page_zip_calc_checksum(
+/*===================*/
+	const void*	data,	/*!< in: compressed page */
+	ulint		size)	/*!< in: size of compressed page */
+{
+	/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
+	and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
+
+	const Bytef*	s	= data;
+	uLong		adler;
+
+	ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+	adler = adler32(0L, s + FIL_PAGE_OFFSET,
+			FIL_PAGE_LSN - FIL_PAGE_OFFSET);
+	adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
+	adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+			size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+	return((ulint) adler);
+}
diff --git a/storage/innobase/pars/lexyy.c b/storage/innodb_plugin/pars/lexyy.c
similarity index 95%
rename from storage/innobase/pars/lexyy.c
rename to storage/innodb_plugin/pars/lexyy.c
index b65de138573..37d892e51e3 100644
--- a/storage/innobase/pars/lexyy.c
+++ b/storage/innodb_plugin/pars/lexyy.c
@@ -1,7 +1,25 @@
-#include "univ.i"
-#line 2 "_flex_tmp.c"
+/*****************************************************************************
 
-#line 4 "_flex_tmp.c"
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+#include "univ.i"
+#line 2 "lexyy.c"
+
+#line 4 "lexyy.c"
 
 #define  YY_INT_ALIGNED short int
 
@@ -141,9 +159,9 @@ typedef unsigned int flex_uint32_t;
 typedef struct yy_buffer_state *YY_BUFFER_STATE;
 #endif
 
-extern int yyleng;
+static int yyleng;
 
-extern FILE *yyin, *yyout;
+static FILE *yyin, *yyout;
 
 #define EOB_ACT_CONTINUE_SCAN 0
 #define EOB_ACT_END_OF_FILE 1
@@ -265,7 +283,7 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
 /* yy_hold_char holds the character lost when yytext is formed. */
 static char yy_hold_char;
 static int yy_n_chars;		/* number of characters read into yy_ch_buf */
-int yyleng;
+static int yyleng;
 
 /* Points to current character in buffer. */
 static char *yy_c_buf_p = (char *) 0;
@@ -277,13 +295,13 @@ static int yy_start = 0;	/* start state number */
  */
 static int yy_did_buffer_switch_on_eof;
 
-void yyrestart (FILE *input_file  );
-void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
-YY_BUFFER_STATE yy_create_buffer (FILE *file,int size  );
-void yy_delete_buffer (YY_BUFFER_STATE b  );
-void yy_flush_buffer (YY_BUFFER_STATE b  );
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer  );
-void yypop_buffer_state (void );
+static void yyrestart (FILE *input_file  );
+__attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
+static YY_BUFFER_STATE yy_create_buffer (FILE *file,int size  );
+static void yy_delete_buffer (YY_BUFFER_STATE b  );
+static void yy_flush_buffer (YY_BUFFER_STATE b  );
+__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer  );
+__attribute__((unused)) static void yypop_buffer_state (void );
 
 static void yyensure_buffer_stack (void );
 static void yy_load_buffer_state (void );
@@ -295,9 +313,9 @@ YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size  );
 YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str  );
 YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len  );
 
-void *yyalloc (yy_size_t  );
-void *yyrealloc (void *,yy_size_t  );
-void yyfree (void *  );
+static void *yyalloc (yy_size_t  );
+static void *yyrealloc (void *,yy_size_t  );
+static void yyfree (void *  );
 
 #define yy_new_buffer yy_create_buffer
 
@@ -330,15 +348,15 @@ void yyfree (void *  );
 
 typedef unsigned char YY_CHAR;
 
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+static FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
 
 typedef int yy_state_type;
 
-extern int yylineno;
+static int yylineno;
 
-int yylineno = 1;
+static int yylineno = 1;
 
-extern char *yytext;
+static char *yytext;
 #define yytext_ptr yytext
 
 static yy_state_type yy_get_previous_state (void );
@@ -673,8 +691,8 @@ static yyconst flex_int16_t yy_chk[499] =
 static yy_state_type yy_last_accepting_state;
 static char *yy_last_accepting_cpos;
 
-extern int yy_flex_debug;
-int yy_flex_debug = 0;
+static int yy_flex_debug;
+static int yy_flex_debug = 0;
 
 /* The intent behind this definition is that it'll catch
  * any uses of REJECT which flex missed.
@@ -683,9 +701,9 @@ int yy_flex_debug = 0;
 #define yymore() yymore_used_but_not_detected
 #define YY_MORE_ADJ 0
 #define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
+static char *yytext;
 #line 1 "pars0lex.l"
-/******************************************************
+/**************************************************//**
 SQL parser lexical analyzer: input file for the GNU Flex lexer generator
 
 (c) 1997 Innobase Oy
@@ -729,13 +747,13 @@ Linux.
 static ulint	stringbuf_len_alloc = 0; /* Allocated length */
 static ulint	stringbuf_len = 0; /* Current length */
 static char*	stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
+/** Appends a string to the buffer. */
 static
 void
 string_append(
 /*==========*/
-	const char*	str,	/* in: string to be appended */
-	ulint		len)	/* in: length of the string */
+	const char*	str,	/*!< in: string to be appended */
+	ulint		len)	/*!< in: length of the string */
 {
 	if (stringbuf == NULL) {
 		stringbuf = malloc(1);
@@ -756,7 +774,7 @@ string_append(
 
 
 
-#line 759 "_flex_tmp.c"
+#line 759 "lexyy.c"
 
 #define INITIAL 0
 #define comment 1
@@ -880,9 +898,9 @@ static int input (void );
 #ifndef YY_DECL
 #define YY_DECL_IS_OURS 1
 
-extern int yylex (void);
+UNIV_INTERN int yylex (void);
 
-#define YY_DECL int yylex (void)
+#define YY_DECL UNIV_INTERN int yylex (void)
 #endif /* !YY_DECL */
 
 /* Code executed at the beginning of each rule, after yytext and yyleng
@@ -911,7 +929,7 @@ YY_DECL
 #line 92 "pars0lex.l"
 
 
-#line 914 "_flex_tmp.c"
+#line 914 "lexyy.c"
 
 	if ( (yy_init) )
 		{
@@ -1913,7 +1931,7 @@ YY_RULE_SETUP
 #line 648 "pars0lex.l"
 YY_FATAL_ERROR( "flex scanner jammed" );
 	YY_BREAK
-#line 1916 "_flex_tmp.c"
+#line 1916 "lexyy.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(comment):
 case YY_STATE_EOF(quoted):
@@ -2317,7 +2335,7 @@ static int yy_get_next_buffer (void)
  * 
  * @note This function does not reset the start condition to @c INITIAL .
  */
-    void yyrestart  (FILE * input_file )
+    static void yyrestart  (FILE * input_file )
 {
     
 	if ( ! YY_CURRENT_BUFFER ){
@@ -2334,7 +2352,7 @@ static int yy_get_next_buffer (void)
  * @param new_buffer The new input buffer.
  * 
  */
-    void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
+    __attribute__((unused)) static void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
 {
     
 	/* TODO. We should be able to replace this entire function body
@@ -2379,7 +2397,7 @@ static void yy_load_buffer_state  (void)
  * 
  * @return the allocated buffer state.
  */
-    YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
+    static YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
 {
 	YY_BUFFER_STATE b;
     
@@ -2407,7 +2425,7 @@ static void yy_load_buffer_state  (void)
  * @param b a buffer created with yy_create_buffer()
  * 
  */
-    void yy_delete_buffer (YY_BUFFER_STATE  b )
+    static void yy_delete_buffer (YY_BUFFER_STATE  b )
 {
     
 	if ( ! b )
@@ -2454,7 +2472,7 @@ static void yy_load_buffer_state  (void)
  * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
  * 
  */
-    void yy_flush_buffer (YY_BUFFER_STATE  b )
+    static void yy_flush_buffer (YY_BUFFER_STATE  b )
 {
     	if ( ! b )
 		return;
@@ -2483,7 +2501,7 @@ static void yy_load_buffer_state  (void)
  *  @param new_buffer The new state.
  *  
  */
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
 {
     	if (new_buffer == NULL)
 		return;
@@ -2513,7 +2531,7 @@ void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
  *  The next element becomes the new top.
  *  
  */
-void yypop_buffer_state (void)
+__attribute__((unused)) static void yypop_buffer_state (void)
 {
     	if (!YY_CURRENT_BUFFER)
 		return;
@@ -2603,7 +2621,7 @@ static void yy_fatal_error (yyconst char* msg )
 /** Get the current line number.
  * 
  */
-int yyget_lineno  (void)
+__attribute__((unused)) static int yyget_lineno  (void)
 {
         
     return yylineno;
@@ -2612,7 +2630,7 @@ int yyget_lineno  (void)
 /** Get the input stream.
  * 
  */
-FILE *yyget_in  (void)
+__attribute__((unused)) static FILE *yyget_in  (void)
 {
         return yyin;
 }
@@ -2620,7 +2638,7 @@ FILE *yyget_in  (void)
 /** Get the output stream.
  * 
  */
-FILE *yyget_out  (void)
+__attribute__((unused)) static FILE *yyget_out  (void)
 {
         return yyout;
 }
@@ -2628,7 +2646,7 @@ FILE *yyget_out  (void)
 /** Get the length of the current token.
  * 
  */
-int yyget_leng  (void)
+__attribute__((unused)) static int yyget_leng  (void)
 {
         return yyleng;
 }
@@ -2637,7 +2655,7 @@ int yyget_leng  (void)
  * 
  */
 
-char *yyget_text  (void)
+__attribute__((unused)) static char *yyget_text  (void)
 {
         return yytext;
 }
@@ -2646,7 +2664,7 @@ char *yyget_text  (void)
  * @param line_number
  * 
  */
-void yyset_lineno (int  line_number )
+__attribute__((unused)) static void yyset_lineno (int  line_number )
 {
     
     yylineno = line_number;
@@ -2658,28 +2676,28 @@ void yyset_lineno (int  line_number )
  * 
  * @see yy_switch_to_buffer
  */
-void yyset_in (FILE *  in_str )
+__attribute__((unused)) static void yyset_in (FILE *  in_str )
 {
         yyin = in_str ;
 }
 
-void yyset_out (FILE *  out_str )
+__attribute__((unused)) static void yyset_out (FILE *  out_str )
 {
         yyout = out_str ;
 }
 
-int yyget_debug  (void)
+__attribute__((unused)) static int yyget_debug  (void)
 {
         return yy_flex_debug;
 }
 
-void yyset_debug (int  bdebug )
+__attribute__((unused)) static void yyset_debug (int  bdebug )
 {
         yy_flex_debug = bdebug ;
 }
 
 /* yylex_destroy is for both reentrant and non-reentrant scanners. */
-int yylex_destroy  (void)
+__attribute__((unused)) static int yylex_destroy  (void)
 {
     
     /* Pop the buffer stack, destroying each element. */
@@ -2720,12 +2738,12 @@ static int yy_flex_strlen (yyconst char * s )
 }
 #endif
 
-void *yyalloc (yy_size_t  size )
+static void *yyalloc (yy_size_t  size )
 {
 	return (void *) malloc( size );
 }
 
-void *yyrealloc  (void * ptr, yy_size_t  size )
+static void *yyrealloc  (void * ptr, yy_size_t  size )
 {
 	/* The cast to (char *) in the following accommodates both
 	 * implementations that use char* generic pointers, and those
@@ -2737,7 +2755,7 @@ void *yyrealloc  (void * ptr, yy_size_t  size )
 	return (void *) realloc( (char *) ptr, size );
 }
 
-void yyfree (void * ptr )
+static void yyfree (void * ptr )
 {
 	free( (char *) ptr );	/* see yyrealloc() for (char *) cast */
 }
diff --git a/storage/innodb_plugin/pars/make_bison.sh b/storage/innodb_plugin/pars/make_bison.sh
new file mode 100755
index 00000000000..09bb86e3106
--- /dev/null
+++ b/storage/innodb_plugin/pars/make_bison.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# generate parser files from bison input files.
+
+set -eu
+TMPFILE=pars0grm.tab.c
+OUTFILE=pars0grm.c
+
+bison -d pars0grm.y
+mv pars0grm.tab.h ../include/pars0grm.h
+
+sed -e '
+s/'"$TMPFILE"'/'"$OUTFILE"'/;
+s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
+s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
+' < "$TMPFILE" > "$OUTFILE"
+
+rm "$TMPFILE"
diff --git a/storage/innodb_plugin/pars/make_flex.sh b/storage/innodb_plugin/pars/make_flex.sh
new file mode 100755
index 00000000000..89308a6636f
--- /dev/null
+++ b/storage/innodb_plugin/pars/make_flex.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# generate lexer files from flex input files.
+
+set -eu
+
+TMPFILE=_flex_tmp.c
+OUTFILE=lexyy.c
+
+flex -o $TMPFILE pars0lex.l
+
+# AIX needs its includes done in a certain order, so include "univ.i" first
+# to be sure we get it right.
+echo '#include "univ.i"' > $OUTFILE
+
+# flex assigns a pointer to an int in one place without a cast, resulting in
+# a warning on Win64.  Add the cast.  Also define some symbols as static.
+sed -e '
+s/'"$TMPFILE"'/'"$OUTFILE"'/;
+s/\(int offset = \)\((yy_c_buf_p) - (yytext_ptr)\);/\1(int)(\2);/;
+s/\(void yy\(restart\|_\(delete\|flush\)_buffer\)\)/static \1/;
+s/\(void yy_switch_to_buffer\)/__attribute__((unused)) static \1/;
+s/\(void yy\(push\|pop\)_buffer_state\)/__attribute__((unused)) static \1/;
+s/\(YY_BUFFER_STATE yy_create_buffer\)/static \1/;
+s/\(\(int\|void\) yy[gs]et_\)/__attribute__((unused)) static \1/;
+s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/;
+s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/;
+s/\(int yylex_destroy\)/__attribute__((unused)) static \1/;
+s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/;
+s/^\(\(FILE\|char\) *\* *yyget\)/__attribute__((unused)) static \1/;
+s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/;
+' < $TMPFILE >> $OUTFILE
+
+rm $TMPFILE
diff --git a/storage/innobase/pars/pars0grm.c b/storage/innodb_plugin/pars/pars0grm.c
similarity index 85%
rename from storage/innobase/pars/pars0grm.c
rename to storage/innodb_plugin/pars/pars0grm.c
index 2e39b05bada..d667970735e 100644
--- a/storage/innobase/pars/pars0grm.c
+++ b/storage/innodb_plugin/pars/pars0grm.c
@@ -1,27 +1,29 @@
-/* A Bison parser, made by GNU Bison 1.875d.  */
+/*****************************************************************************
 
-/* Skeleton parser for Yacc-like parsing with Bison,
-   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+As a special exception, when this file is copied by Bison into a
+Bison output file, you may use that output file without restriction.
+This special exception was added by the Free Software Foundation
+in version 1.24 of Bison.
 
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
-/* As a special exception, when this file is copied by Bison into a
-   Bison output file, you may use that output file without restriction.
-   This special exception was added by the Free Software Foundation
-   in version 1.24 of Bison.  */
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/* A Bison parser, made by GNU Bison 2.0.  */
 
 /* Written by Richard Stallman by simplifying the original so called
    ``semantic'' parser.  */
@@ -292,8 +294,8 @@ typedef int YYSTYPE;
 /* Copy the second part of user declarations.  */
 
 
-/* Line 214 of yacc.c.  */
-#line 297 "pars0grm.tab.c"
+/* Line 213 of yacc.c.  */
+#line 297 "pars0grm.c"
 
 #if ! defined (yyoverflow) || YYERROR_VERBOSE
 
@@ -308,14 +310,10 @@ typedef int YYSTYPE;
 
 # ifdef YYSTACK_USE_ALLOCA
 #  if YYSTACK_USE_ALLOCA
-#   define YYSTACK_ALLOC alloca
-#  endif
-# else
-#  if defined (alloca) || defined (_ALLOCA_H)
-#   define YYSTACK_ALLOC alloca
-#  else
 #   ifdef __GNUC__
 #    define YYSTACK_ALLOC __builtin_alloca
+#   else
+#    define YYSTACK_ALLOC alloca
 #   endif
 #  endif
 # endif
@@ -1059,20 +1057,53 @@ do								\
     }								\
 while (0)
 
+
 #define YYTERROR	1
 #define YYERRCODE	256
 
-/* YYLLOC_DEFAULT -- Compute the default location (before the actions
-   are run).  */
 
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+   If N is 0, then set CURRENT to the empty location which ends
+   the previous symbol: RHS[0] (always defined).  */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
 #ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N)		\
-   ((Current).first_line   = (Rhs)[1].first_line,	\
-    (Current).first_column = (Rhs)[1].first_column,	\
-    (Current).last_line    = (Rhs)[N].last_line,	\
-    (Current).last_column  = (Rhs)[N].last_column)
+# define YYLLOC_DEFAULT(Current, Rhs, N)				\
+    do									\
+      if (N)								\
+	{								\
+	  (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
+	  (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
+	  (Current).last_line    = YYRHSLOC (Rhs, N).last_line;		\
+	  (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
+	}								\
+      else								\
+	{								\
+	  (Current).first_line   = (Current).last_line   =		\
+	    YYRHSLOC (Rhs, 0).last_line;				\
+	  (Current).first_column = (Current).last_column =		\
+	    YYRHSLOC (Rhs, 0).last_column;				\
+	}								\
+    while (0)
 #endif
 
+
+/* YY_LOCATION_PRINT -- Print the location on the stream.
+   This macro was not mandated originally: define only if we know
+   we won't break user code: when these are the locations we know.  */
+
+#ifndef YY_LOCATION_PRINT
+# if YYLTYPE_IS_TRIVIAL
+#  define YY_LOCATION_PRINT(File, Loc)			\
+     fprintf (File, "%d.%d-%d.%d",			\
+              (Loc).first_line, (Loc).first_column,	\
+              (Loc).last_line,  (Loc).last_column)
+# else
+#  define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+# endif
+#endif
+
+
 /* YYLEX -- calling `yylex' with the right arguments.  */
 
 #ifdef YYLEX_PARAM
@@ -1095,19 +1126,13 @@ do {						\
     YYFPRINTF Args;				\
 } while (0)
 
-# define YYDSYMPRINT(Args)			\
-do {						\
-  if (yydebug)					\
-    yysymprint Args;				\
-} while (0)
-
-# define YYDSYMPRINTF(Title, Token, Value, Location)		\
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)		\
 do {								\
   if (yydebug)							\
     {								\
       YYFPRINTF (stderr, "%s ", Title);				\
       yysymprint (stderr, 					\
-                  Token, Value);	\
+                  Type, Value);	\
       YYFPRINTF (stderr, "\n");					\
     }								\
 } while (0)
@@ -1174,8 +1199,7 @@ do {					\
 int yydebug;
 #else /* !YYDEBUG */
 # define YYDPRINTF(Args)
-# define YYDSYMPRINT(Args)
-# define YYDSYMPRINTF(Title, Token, Value, Location)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
 # define YY_STACK_PRINT(Bottom, Top)
 # define YY_REDUCE_PRINT(Rule)
 #endif /* !YYDEBUG */
@@ -1193,10 +1217,6 @@ int yydebug;
    SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
    evaluated with infinite-precision integer arithmetic.  */
 
-#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0
-# undef YYMAXDEPTH
-#endif
-
 #ifndef YYMAXDEPTH
 # define YYMAXDEPTH 10000
 #endif
@@ -1278,15 +1298,15 @@ yysymprint (yyoutput, yytype, yyvaluep)
   (void) yyvaluep;
 
   if (yytype < YYNTOKENS)
-    {
-      YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
-# ifdef YYPRINT
-      YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# endif
-    }
+    YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
   else
     YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
 
+
+# ifdef YYPRINT
+  if (yytype < YYNTOKENS)
+    YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# endif
   switch (yytype)
     {
       default:
@@ -1302,10 +1322,11 @@ yysymprint (yyoutput, yytype, yyvaluep)
 
 #if defined (__STDC__) || defined (__cplusplus)
 static void
-yydestruct (int yytype, YYSTYPE *yyvaluep)
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
 #else
 static void
-yydestruct (yytype, yyvaluep)
+yydestruct (yymsg, yytype, yyvaluep)
+    const char *yymsg;
     int yytype;
     YYSTYPE *yyvaluep;
 #endif
@@ -1313,6 +1334,10 @@ yydestruct (yytype, yyvaluep)
   /* Pacify ``unused variable'' warnings.  */
   (void) yyvaluep;
 
+  if (!yymsg)
+    yymsg = "Deleting";
+  YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
   switch (yytype)
     {
 
@@ -1326,28 +1351,28 @@ yydestruct (yytype, yyvaluep)
 
 #ifdef YYPARSE_PARAM
 # if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM);
+UNIV_INTERN int yyparse (void *YYPARSE_PARAM);
 # else
-int yyparse ();
+UNIV_INTERN int yyparse ();
 # endif
 #else /* ! YYPARSE_PARAM */
 #if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void);
+UNIV_INTERN int yyparse (void);
 #else
-int yyparse ();
+UNIV_INTERN int yyparse ();
 #endif
 #endif /* ! YYPARSE_PARAM */
 
 
 
-/* The lookahead symbol.  */
-int yychar;
+/* The look-ahead symbol.  */
+static int yychar;
 
-/* The semantic value of the lookahead symbol.  */
-YYSTYPE yylval;
+/* The semantic value of the look-ahead symbol.  */
+UNIV_INTERN YYSTYPE yylval;
 
 /* Number of syntax errors so far.  */
-int yynerrs;
+static int yynerrs;
 
 
 
@@ -1357,9 +1382,9 @@ int yynerrs;
 
 #ifdef YYPARSE_PARAM
 # if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM)
+UNIV_INTERN int yyparse (void *YYPARSE_PARAM)
 # else
-int yyparse (YYPARSE_PARAM)
+UNIV_INTERN int yyparse (YYPARSE_PARAM)
   void *YYPARSE_PARAM;
 # endif
 #else /* ! YYPARSE_PARAM */
@@ -1379,7 +1404,7 @@ yyparse ()
   int yyresult;
   /* Number of tokens to shift before error messages enabled.  */
   int yyerrstatus;
-  /* Lookahead token as an internal (translated) token number.  */
+  /* Look-ahead token as an internal (translated) token number.  */
   int yytoken = 0;
 
   /* Three stacks and their tools:
@@ -1431,6 +1456,8 @@ yyparse ()
   yyvsp = yyvs;
 
 
+  yyvsp[0] = yylval;
+
   goto yysetstate;
 
 /*------------------------------------------------------------.
@@ -1520,18 +1547,18 @@ yyparse ()
 yybackup:
 
 /* Do appropriate processing given the current state.  */
-/* Read a lookahead token if we need one and don't already have one.  */
+/* Read a look-ahead token if we need one and don't already have one.  */
 /* yyresume: */
 
-  /* First try to decide what to do without reference to lookahead token.  */
+  /* First try to decide what to do without reference to look-ahead token.  */
 
   yyn = yypact[yystate];
   if (yyn == YYPACT_NINF)
     goto yydefault;
 
-  /* Not known => get a lookahead token if don't already have one.  */
+  /* Not known => get a look-ahead token if don't already have one.  */
 
-  /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol.  */
+  /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol.  */
   if (yychar == YYEMPTY)
     {
       YYDPRINTF ((stderr, "Reading a token: "));
@@ -1546,7 +1573,7 @@ yybackup:
   else
     {
       yytoken = YYTRANSLATE (yychar);
-      YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc);
+      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
     }
 
   /* If the proper action on seeing token YYTOKEN is to reduce or to
@@ -1566,8 +1593,8 @@ yybackup:
   if (yyn == YYFINAL)
     YYACCEPT;
 
-  /* Shift the lookahead token.  */
-  YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken]));
+  /* Shift the look-ahead token.  */
+  YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
 
   /* Discard the token being shifted unless it is eof.  */
   if (yychar != YYEOF)
@@ -1618,277 +1645,277 @@ yyreduce:
     {
         case 25:
 #line 166 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 26:
 #line 168 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
     break;
 
   case 27:
 #line 172 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 28:
 #line 174 "pars0grm.y"
-    { yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;}
+    { (yyval) = pars_func((yyvsp[-3]), (yyvsp[-1])); ;}
     break;
 
   case 29:
 #line 175 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 30:
 #line 176 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 31:
 #line 177 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 32:
 #line 178 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 33:
 #line 179 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 34:
 #line 180 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 35:
 #line 181 "pars0grm.y"
-    { yyval = yyvsp[0];;}
+    { (yyval) = (yyvsp[0]);;}
     break;
 
   case 36:
 #line 182 "pars0grm.y"
-    { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('+', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 37:
 #line 183 "pars0grm.y"
-    { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('-', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 38:
 #line 184 "pars0grm.y"
-    { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('*', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 39:
 #line 185 "pars0grm.y"
-    { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('/', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 40:
 #line 186 "pars0grm.y"
-    { yyval = pars_op('-', yyvsp[0], NULL); ;}
+    { (yyval) = pars_op('-', (yyvsp[0]), NULL); ;}
     break;
 
   case 41:
 #line 187 "pars0grm.y"
-    { yyval = yyvsp[-1]; ;}
+    { (yyval) = (yyvsp[-1]); ;}
     break;
 
   case 42:
 #line 188 "pars0grm.y"
-    { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('=', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 43:
 #line 189 "pars0grm.y"
-    { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('<', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 44:
 #line 190 "pars0grm.y"
-    { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op('>', (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 45:
 #line 191 "pars0grm.y"
-    { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 46:
 #line 192 "pars0grm.y"
-    { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 47:
 #line 193 "pars0grm.y"
-    { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 48:
 #line 194 "pars0grm.y"
-    { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 49:
 #line 195 "pars0grm.y"
-    { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 50:
 #line 196 "pars0grm.y"
-    { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;}
+    { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[0]), NULL); ;}
     break;
 
   case 51:
 #line 198 "pars0grm.y"
-    { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
+    { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
     break;
 
   case 52:
 #line 200 "pars0grm.y"
-    { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
+    { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
     break;
 
   case 53:
 #line 204 "pars0grm.y"
-    { yyval = &pars_to_char_token; ;}
+    { (yyval) = &pars_to_char_token; ;}
     break;
 
   case 54:
 #line 205 "pars0grm.y"
-    { yyval = &pars_to_number_token; ;}
+    { (yyval) = &pars_to_number_token; ;}
     break;
 
   case 55:
 #line 206 "pars0grm.y"
-    { yyval = &pars_to_binary_token; ;}
+    { (yyval) = &pars_to_binary_token; ;}
     break;
 
   case 56:
 #line 208 "pars0grm.y"
-    { yyval = &pars_binary_to_number_token; ;}
+    { (yyval) = &pars_binary_to_number_token; ;}
     break;
 
   case 57:
 #line 209 "pars0grm.y"
-    { yyval = &pars_substr_token; ;}
+    { (yyval) = &pars_substr_token; ;}
     break;
 
   case 58:
 #line 210 "pars0grm.y"
-    { yyval = &pars_concat_token; ;}
+    { (yyval) = &pars_concat_token; ;}
     break;
 
   case 59:
 #line 211 "pars0grm.y"
-    { yyval = &pars_instr_token; ;}
+    { (yyval) = &pars_instr_token; ;}
     break;
 
   case 60:
 #line 212 "pars0grm.y"
-    { yyval = &pars_length_token; ;}
+    { (yyval) = &pars_length_token; ;}
     break;
 
   case 61:
 #line 213 "pars0grm.y"
-    { yyval = &pars_sysdate_token; ;}
+    { (yyval) = &pars_sysdate_token; ;}
     break;
 
   case 62:
 #line 214 "pars0grm.y"
-    { yyval = &pars_rnd_token; ;}
+    { (yyval) = &pars_rnd_token; ;}
     break;
 
   case 63:
 #line 215 "pars0grm.y"
-    { yyval = &pars_rnd_str_token; ;}
+    { (yyval) = &pars_rnd_str_token; ;}
     break;
 
   case 67:
 #line 226 "pars0grm.y"
-    { yyval = pars_stored_procedure_call(yyvsp[-4]); ;}
+    { (yyval) = pars_stored_procedure_call((yyvsp[-4])); ;}
     break;
 
   case 68:
 #line 231 "pars0grm.y"
-    { yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;}
+    { (yyval) = pars_procedure_call((yyvsp[-3]), (yyvsp[-1])); ;}
     break;
 
   case 69:
 #line 235 "pars0grm.y"
-    { yyval = &pars_replstr_token; ;}
+    { (yyval) = &pars_replstr_token; ;}
     break;
 
   case 70:
 #line 236 "pars0grm.y"
-    { yyval = &pars_printf_token; ;}
+    { (yyval) = &pars_printf_token; ;}
     break;
 
   case 71:
 #line 237 "pars0grm.y"
-    { yyval = &pars_assert_token; ;}
+    { (yyval) = &pars_assert_token; ;}
     break;
 
   case 72:
 #line 241 "pars0grm.y"
-    { yyval = yyvsp[-2]; ;}
+    { (yyval) = (yyvsp[-2]); ;}
     break;
 
   case 73:
 #line 245 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 74:
 #line 247 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 75:
 #line 251 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 76:
 #line 252 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 77:
 #line 254 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 78:
 #line 258 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 79:
 #line 259 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]);;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0]));;}
     break;
 
   case 80:
 #line 260 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 81:
 #line 264 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 82:
 #line 266 "pars0grm.y"
-    { yyval = pars_func(&pars_count_token,
+    { (yyval) = pars_func(&pars_count_token,
 				          que_node_list_add_last(NULL,
 					    sym_tab_add_int_lit(
 						pars_sym_tab_global, 1))); ;}
@@ -1896,74 +1923,74 @@ yyreduce:
 
   case 83:
 #line 271 "pars0grm.y"
-    { yyval = pars_func(&pars_count_token,
+    { (yyval) = pars_func(&pars_count_token,
 					    que_node_list_add_last(NULL,
 						pars_func(&pars_distinct_token,
 						     que_node_list_add_last(
-								NULL, yyvsp[-1])))); ;}
+								NULL, (yyvsp[-1]))))); ;}
     break;
 
   case 84:
 #line 277 "pars0grm.y"
-    { yyval = pars_func(&pars_sum_token,
+    { (yyval) = pars_func(&pars_sum_token,
 						que_node_list_add_last(NULL,
-									yyvsp[-1])); ;}
+									(yyvsp[-1]))); ;}
     break;
 
   case 85:
 #line 283 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 86:
 #line 284 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 87:
 #line 286 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 88:
 #line 290 "pars0grm.y"
-    { yyval = pars_select_list(&pars_star_denoter,
+    { (yyval) = pars_select_list(&pars_star_denoter,
 								NULL); ;}
     break;
 
   case 89:
 #line 293 "pars0grm.y"
-    { yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_select_list((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 90:
 #line 294 "pars0grm.y"
-    { yyval = pars_select_list(yyvsp[0], NULL); ;}
+    { (yyval) = pars_select_list((yyvsp[0]), NULL); ;}
     break;
 
   case 91:
 #line 298 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 92:
 #line 299 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 93:
 #line 303 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 94:
 #line 305 "pars0grm.y"
-    { yyval = &pars_update_token; ;}
+    { (yyval) = &pars_update_token; ;}
     break;
 
   case 95:
 #line 309 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 96:
@@ -1973,375 +2000,375 @@ yyreduce:
 
   case 97:
 #line 315 "pars0grm.y"
-    { yyval = &pars_asc_token; ;}
+    { (yyval) = &pars_asc_token; ;}
     break;
 
   case 98:
 #line 316 "pars0grm.y"
-    { yyval = &pars_asc_token; ;}
+    { (yyval) = &pars_asc_token; ;}
     break;
 
   case 99:
 #line 317 "pars0grm.y"
-    { yyval = &pars_desc_token; ;}
+    { (yyval) = &pars_desc_token; ;}
     break;
 
   case 100:
 #line 321 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 101:
 #line 323 "pars0grm.y"
-    { yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;}
+    { (yyval) = pars_order_by((yyvsp[-1]), (yyvsp[0])); ;}
     break;
 
   case 102:
 #line 332 "pars0grm.y"
-    { yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3],
-								yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
+    { (yyval) = pars_select_statement((yyvsp[-6]), (yyvsp[-4]), (yyvsp[-3]),
+								(yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
     break;
 
   case 103:
 #line 338 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 104:
 #line 343 "pars0grm.y"
-    { yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;}
+    { (yyval) = pars_insert_statement((yyvsp[-4]), (yyvsp[-1]), NULL); ;}
     break;
 
   case 105:
 #line 345 "pars0grm.y"
-    { yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+    { (yyval) = pars_insert_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
     break;
 
   case 106:
 #line 349 "pars0grm.y"
-    { yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_column_assignment((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 107:
 #line 353 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 108:
 #line 355 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 109:
 #line 361 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 110:
 #line 367 "pars0grm.y"
-    { yyval = pars_update_statement_start(FALSE,
-								yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_update_statement_start(FALSE,
+								(yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 111:
 #line 373 "pars0grm.y"
-    { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+    { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
     break;
 
   case 112:
 #line 378 "pars0grm.y"
-    { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
+    { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
     break;
 
   case 113:
 #line 383 "pars0grm.y"
-    { yyval = pars_update_statement_start(TRUE,
-								yyvsp[0], NULL); ;}
+    { (yyval) = pars_update_statement_start(TRUE,
+								(yyvsp[0]), NULL); ;}
     break;
 
   case 114:
 #line 389 "pars0grm.y"
-    { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+    { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
     break;
 
   case 115:
 #line 394 "pars0grm.y"
-    { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
+    { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
     break;
 
   case 116:
 #line 399 "pars0grm.y"
-    { yyval = pars_row_printf_statement(yyvsp[0]); ;}
+    { (yyval) = pars_row_printf_statement((yyvsp[0])); ;}
     break;
 
   case 117:
 #line 404 "pars0grm.y"
-    { yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_assignment_statement((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 118:
 #line 410 "pars0grm.y"
-    { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_elsif_element((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 119:
 #line 414 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 120:
 #line 416 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
     break;
 
   case 121:
 #line 420 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 122:
 #line 422 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 123:
 #line 423 "pars0grm.y"
-    { yyval = yyvsp[0]; ;}
+    { (yyval) = (yyvsp[0]); ;}
     break;
 
   case 124:
 #line 430 "pars0grm.y"
-    { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;}
+    { (yyval) = pars_if_statement((yyvsp[-5]), (yyvsp[-3]), (yyvsp[-2])); ;}
     break;
 
   case 125:
 #line 436 "pars0grm.y"
-    { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;}
+    { (yyval) = pars_while_statement((yyvsp[-4]), (yyvsp[-2])); ;}
     break;
 
   case 126:
 #line 444 "pars0grm.y"
-    { yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;}
+    { (yyval) = pars_for_statement((yyvsp[-8]), (yyvsp[-6]), (yyvsp[-4]), (yyvsp[-2])); ;}
     break;
 
   case 127:
 #line 448 "pars0grm.y"
-    { yyval = pars_exit_statement(); ;}
+    { (yyval) = pars_exit_statement(); ;}
     break;
 
   case 128:
 #line 452 "pars0grm.y"
-    { yyval = pars_return_statement(); ;}
+    { (yyval) = pars_return_statement(); ;}
     break;
 
   case 129:
 #line 457 "pars0grm.y"
-    { yyval = pars_open_statement(
-						ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;}
+    { (yyval) = pars_open_statement(
+						ROW_SEL_OPEN_CURSOR, (yyvsp[0])); ;}
     break;
 
   case 130:
 #line 463 "pars0grm.y"
-    { yyval = pars_open_statement(
-						ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;}
+    { (yyval) = pars_open_statement(
+						ROW_SEL_CLOSE_CURSOR, (yyvsp[0])); ;}
     break;
 
   case 131:
 #line 469 "pars0grm.y"
-    { yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0], NULL); ;}
+    { (yyval) = pars_fetch_statement((yyvsp[-2]), (yyvsp[0]), NULL); ;}
     break;
 
   case 132:
 #line 471 "pars0grm.y"
-    { yyval = pars_fetch_statement(yyvsp[-2], NULL, yyvsp[0]); ;}
+    { (yyval) = pars_fetch_statement((yyvsp[-2]), NULL, (yyvsp[0])); ;}
     break;
 
   case 133:
 #line 476 "pars0grm.y"
-    { yyval = pars_column_def(yyvsp[-4], yyvsp[-3], yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
+    { (yyval) = pars_column_def((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
     break;
 
   case 134:
 #line 480 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 135:
 #line 482 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 136:
 #line 486 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 137:
 #line 488 "pars0grm.y"
-    { yyval = yyvsp[-1]; ;}
+    { (yyval) = (yyvsp[-1]); ;}
     break;
 
   case 138:
 #line 492 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 139:
 #line 494 "pars0grm.y"
-    { yyval = &pars_int_token;
+    { (yyval) = &pars_int_token;
 					/* pass any non-NULL pointer */ ;}
     break;
 
   case 140:
 #line 499 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 141:
 #line 501 "pars0grm.y"
-    { yyval = &pars_int_token;
+    { (yyval) = &pars_int_token;
 					/* pass any non-NULL pointer */ ;}
     break;
 
   case 142:
 #line 506 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 143:
 #line 508 "pars0grm.y"
-    { yyval = &pars_int_token;
+    { (yyval) = &pars_int_token;
 					/* pass any non-NULL pointer */ ;}
     break;
 
   case 144:
 #line 515 "pars0grm.y"
-    { yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = pars_create_table((yyvsp[-4]), (yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 145:
 #line 519 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 146:
 #line 521 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 147:
 #line 525 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 148:
 #line 526 "pars0grm.y"
-    { yyval = &pars_unique_token; ;}
+    { (yyval) = &pars_unique_token; ;}
     break;
 
   case 149:
 #line 530 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 150:
 #line 531 "pars0grm.y"
-    { yyval = &pars_clustered_token; ;}
+    { (yyval) = &pars_clustered_token; ;}
     break;
 
   case 151:
 #line 539 "pars0grm.y"
-    { yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;}
+    { (yyval) = pars_create_index((yyvsp[-8]), (yyvsp[-7]), (yyvsp[-5]), (yyvsp[-3]), (yyvsp[-1])); ;}
     break;
 
   case 152:
 #line 544 "pars0grm.y"
-    { yyval = pars_commit_statement(); ;}
+    { (yyval) = pars_commit_statement(); ;}
     break;
 
   case 153:
 #line 549 "pars0grm.y"
-    { yyval = pars_rollback_statement(); ;}
+    { (yyval) = pars_rollback_statement(); ;}
     break;
 
   case 154:
 #line 553 "pars0grm.y"
-    { yyval = &pars_int_token; ;}
+    { (yyval) = &pars_int_token; ;}
     break;
 
   case 155:
 #line 554 "pars0grm.y"
-    { yyval = &pars_int_token; ;}
+    { (yyval) = &pars_int_token; ;}
     break;
 
   case 156:
 #line 555 "pars0grm.y"
-    { yyval = &pars_char_token; ;}
+    { (yyval) = &pars_char_token; ;}
     break;
 
   case 157:
 #line 556 "pars0grm.y"
-    { yyval = &pars_binary_token; ;}
+    { (yyval) = &pars_binary_token; ;}
     break;
 
   case 158:
 #line 557 "pars0grm.y"
-    { yyval = &pars_blob_token; ;}
+    { (yyval) = &pars_blob_token; ;}
     break;
 
   case 159:
 #line 562 "pars0grm.y"
-    { yyval = pars_parameter_declaration(yyvsp[-2],
-							PARS_INPUT, yyvsp[0]); ;}
+    { (yyval) = pars_parameter_declaration((yyvsp[-2]),
+							PARS_INPUT, (yyvsp[0])); ;}
     break;
 
   case 160:
 #line 565 "pars0grm.y"
-    { yyval = pars_parameter_declaration(yyvsp[-2],
-							PARS_OUTPUT, yyvsp[0]); ;}
+    { (yyval) = pars_parameter_declaration((yyvsp[-2]),
+							PARS_OUTPUT, (yyvsp[0])); ;}
     break;
 
   case 161:
 #line 570 "pars0grm.y"
-    { yyval = NULL; ;}
+    { (yyval) = NULL; ;}
     break;
 
   case 162:
 #line 571 "pars0grm.y"
-    { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
     break;
 
   case 163:
 #line 573 "pars0grm.y"
-    { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+    { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
     break;
 
   case 164:
 #line 578 "pars0grm.y"
-    { yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;}
+    { (yyval) = pars_variable_declaration((yyvsp[-2]), (yyvsp[-1])); ;}
     break;
 
   case 168:
 #line 590 "pars0grm.y"
-    { yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;}
+    { (yyval) = pars_cursor_declaration((yyvsp[-3]), (yyvsp[-1])); ;}
     break;
 
   case 169:
 #line 595 "pars0grm.y"
-    { yyval = pars_function_declaration(yyvsp[-1]); ;}
+    { (yyval) = pars_function_declaration((yyvsp[-1])); ;}
     break;
 
   case 175:
 #line 616 "pars0grm.y"
-    { yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7],
-								yyvsp[-1]); ;}
+    { (yyval) = pars_procedure_definition((yyvsp[-9]), (yyvsp[-7]),
+								(yyvsp[-1])); ;}
     break;
 
 
     }
 
 /* Line 1010 of yacc.c.  */
-#line 2345 "pars0grm.tab.c"
+#line 2345 "pars0grm.c"
 
   yyvsp -= yylen;
   yyssp -= yylen;
@@ -2441,7 +2468,7 @@ yyerrlab:
 
   if (yyerrstatus == 3)
     {
-      /* If just tried and failed to reuse lookahead token after an
+      /* If just tried and failed to reuse look-ahead token after an
 	 error, discard it.  */
 
       if (yychar <= YYEOF)
@@ -2451,23 +2478,22 @@ yyerrlab:
 	  if (yychar == YYEOF)
 	     for (;;)
 	       {
+
 		 YYPOPSTACK;
 		 if (yyssp == yyss)
 		   YYABORT;
-		 YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
-		 yydestruct (yystos[*yyssp], yyvsp);
+		 yydestruct ("Error: popping",
+                             yystos[*yyssp], yyvsp);
 	       }
         }
       else
 	{
-	  YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc);
-	  yydestruct (yytoken, &yylval);
+	  yydestruct ("Error: discarding", yytoken, &yylval);
 	  yychar = YYEMPTY;
-
 	}
     }
 
-  /* Else will try to reuse lookahead token after shifting the error
+  /* Else will try to reuse look-ahead token after shifting the error
      token.  */
   goto yyerrlab1;
 
@@ -2484,7 +2510,7 @@ yyerrorlab:
      goto yyerrorlab;
 #endif
 
-  yyvsp -= yylen;
+yyvsp -= yylen;
   yyssp -= yylen;
   yystate = *yyssp;
   goto yyerrlab1;
@@ -2514,8 +2540,8 @@ yyerrlab1:
       if (yyssp == yyss)
 	YYABORT;
 
-      YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
-      yydestruct (yystos[yystate], yyvsp);
+
+      yydestruct ("Error: popping", yystos[yystate], yyvsp);
       YYPOPSTACK;
       yystate = *yyssp;
       YY_STACK_PRINT (yyss, yyssp);
@@ -2524,11 +2550,12 @@ yyerrlab1:
   if (yyn == YYFINAL)
     YYACCEPT;
 
-  YYDPRINTF ((stderr, "Shifting error token, "));
-
   *++yyvsp = yylval;
 
 
+  /* Shift the error token. */
+  YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
   yystate = yyn;
   goto yynewstate;
 
@@ -2544,6 +2571,9 @@ yyacceptlab:
 | yyabortlab -- YYABORT comes here.  |
 `-----------------------------------*/
 yyabortlab:
+  yydestruct ("Error: discarding lookahead",
+              yytoken, &yylval);
+  yychar = YYEMPTY;
   yyresult = 1;
   goto yyreturn;
 
diff --git a/storage/innobase/pars/pars0grm.y b/storage/innodb_plugin/pars/pars0grm.y
similarity index 94%
rename from storage/innobase/pars/pars0grm.y
rename to storage/innodb_plugin/pars/pars0grm.y
index a07be9975a1..14d64f1826f 100644
--- a/storage/innobase/pars/pars0grm.y
+++ b/storage/innodb_plugin/pars/pars0grm.y
@@ -1,13 +1,28 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
 /******************************************************
 SQL parser: input file for the GNU Bison parser generator
 
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
 Look from pars0lex.l for instructions how to generate the C files for
 the InnoDB parser.
+
+Created 12/14/1997 Heikki Tuuri
 *******************************************************/
 
 %{
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innodb_plugin/pars/pars0lex.l
similarity index 90%
rename from storage/innobase/pars/pars0lex.l
rename to storage/innodb_plugin/pars/pars0lex.l
index ad65034fab0..4abff65e98b 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innodb_plugin/pars/pars0lex.l
@@ -1,11 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
 /******************************************************
 SQL parser lexical analyzer: input file for the GNU Flex lexer generator
 
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
 The InnoDB parser is frozen because MySQL takes care of SQL parsing.
 Therefore we normally keep the InnoDB parser C files as they are, and do
 not automatically generate them from pars0grm.y and pars0lex.l.
@@ -18,6 +31,8 @@ How to make the InnoDB parser and lexer C files:
 
 These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
 Linux.
+
+Created 12/14/1997 Heikki Tuuri
 *******************************************************/
 
 %option nostdinit
@@ -55,13 +70,13 @@ Linux.
 static ulint	stringbuf_len_alloc = 0; /* Allocated length */
 static ulint	stringbuf_len = 0; /* Current length */
 static char*	stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
+/** Appends a string to the buffer. */
 static
 void
 string_append(
 /*==========*/
-	const char*	str,	/* in: string to be appended */
-	ulint		len)	/* in: length of the string */
+	const char*	str,	/*!< in: string to be appended */
+	ulint		len)	/*!< in: length of the string */
 {
 	if (stringbuf == NULL) {
 		stringbuf = malloc(1);
diff --git a/storage/innobase/pars/pars0opt.c b/storage/innodb_plugin/pars/pars0opt.c
similarity index 84%
rename from storage/innobase/pars/pars0opt.c
rename to storage/innodb_plugin/pars/pars0opt.c
index 2abe6720235..2e392ba4836 100644
--- a/storage/innobase/pars/pars0opt.c
+++ b/storage/innodb_plugin/pars/pars0opt.c
@@ -1,7 +1,24 @@
-/******************************************************
-Simple SQL optimizer
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file pars/pars0opt.c
+Simple SQL optimizer
 
 Created 12/21/1997 Heikki Tuuri
 *******************************************************/
@@ -31,15 +48,14 @@ Created 12/21/1997 Heikki Tuuri
 #define OPT_SCROLL_COND	4
 
 
-/***********************************************************************
-Inverts a comparison operator. */
+/*******************************************************************//**
+Inverts a comparison operator.
+@return	the equivalent operator when the order of the arguments is switched */
 static
 int
 opt_invert_cmp_op(
 /*==============*/
-			/* out: the equivalent operator when the order of
-			the arguments is switched */
-	int	op)	/* in: operator */
+	int	op)	/*!< in: operator */
 {
 	if (op == '<') {
 		return('>');
@@ -58,18 +74,18 @@ opt_invert_cmp_op(
 	return(0);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Checks if the value of an expression can be calculated BEFORE the nth table
 in a join is accessed. If this is the case, it can possibly be used in an
-index search for the nth table. */
+index search for the nth table.
+@return	TRUE if already determined */
 static
 ibool
 opt_check_exp_determined_before(
 /*============================*/
-					/* out: TRUE if already determined */
-	que_node_t*	exp,		/* in: expression */
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		nth_table)	/* in: nth table will be accessed */
+	que_node_t*	exp,		/*!< in: expression */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		nth_table)	/*!< in: nth table will be accessed */
 {
 	func_node_t*	func_node;
 	sym_node_t*	sym_node;
@@ -118,24 +134,22 @@ opt_check_exp_determined_before(
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Looks in a comparison condition if a column value is already restricted by
-it BEFORE the nth table is accessed. */
+it BEFORE the nth table is accessed.
+@return	expression restricting the value of the column, or NULL if not known */
 static
 que_node_t*
 opt_look_for_col_in_comparison_before(
 /*==================================*/
-					/* out: expression restricting the
-					value of the column, or NULL if not
-					known */
-	ulint		cmp_type,	/* in: OPT_EQUAL, OPT_COMPARISON */
-	ulint		col_no,		/* in: column number */
-	func_node_t*	search_cond,	/* in: comparison condition */
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		nth_table,	/* in: nth table in a join (a query
+	ulint		cmp_type,	/*!< in: OPT_EQUAL, OPT_COMPARISON */
+	ulint		col_no,		/*!< in: column number */
+	func_node_t*	search_cond,	/*!< in: comparison condition */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		nth_table,	/*!< in: nth table in a join (a query
 					from a single table is considered a
 					join of 1 table) */
-	ulint*		op)		/* out: comparison operator ('=',
+	ulint*		op)		/*!< out: comparison operator ('=',
 					PARS_GE_TOKEN, ... ); this is inverted
 					if the column appears on the right
 					side */
@@ -215,26 +229,24 @@ opt_look_for_col_in_comparison_before(
 	return(NULL);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Looks in a search condition if a column value is already restricted by the
 search condition BEFORE the nth table is accessed. Takes into account that
 if we will fetch in an ascending order, we cannot utilize an upper limit for
-a column value; in a descending order, respectively, a lower limit. */
+a column value; in a descending order, respectively, a lower limit.
+@return	expression restricting the value of the column, or NULL if not known */
 static
 que_node_t*
 opt_look_for_col_in_cond_before(
 /*============================*/
-					/* out: expression restricting the
-					value of the column, or NULL if not
-					known */
-	ulint		cmp_type,	/* in: OPT_EQUAL, OPT_COMPARISON */
-	ulint		col_no,		/* in: column number */
-	func_node_t*	search_cond,	/* in: search condition or NULL */
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		nth_table,	/* in: nth table in a join (a query
+	ulint		cmp_type,	/*!< in: OPT_EQUAL, OPT_COMPARISON */
+	ulint		col_no,		/*!< in: column number */
+	func_node_t*	search_cond,	/*!< in: search condition or NULL */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		nth_table,	/*!< in: nth table in a join (a query
 					from a single table is considered a
 					join of 1 table) */
-	ulint*		op)		/* out: comparison operator ('=',
+	ulint*		op)		/*!< out: comparison operator ('=',
 					PARS_GE_TOKEN, ... ) */
 {
 	func_node_t*	new_cond;
@@ -293,24 +305,24 @@ opt_look_for_col_in_cond_before(
 	return(exp);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Calculates the goodness for an index according to a select node. The
 goodness is 4 times the number of first fields in index whose values we
 already know exactly in the query. If we have a comparison condition for
 an additional field, 2 point are added. If the index is unique, and we know
 all the unique fields for the index we add 1024 points. For a clustered index
-we add 1 point. */
+we add 1 point.
+@return	goodness */
 static
 ulint
 opt_calc_index_goodness(
 /*====================*/
-					/* out: goodness */
-	dict_index_t*	index,		/* in: index */
-	sel_node_t*	sel_node,	/* in: parsed select node */
-	ulint		nth_table,	/* in: nth table in a join */
-	que_node_t**	index_plan,	/* in/out: comparison expressions for
+	dict_index_t*	index,		/*!< in: index */
+	sel_node_t*	sel_node,	/*!< in: parsed select node */
+	ulint		nth_table,	/*!< in: nth table in a join */
+	que_node_t**	index_plan,	/*!< in/out: comparison expressions for
 					this index */
-	ulint*		last_op)	/* out: last comparison operator, if
+	ulint*		last_op)	/*!< out: last comparison operator, if
 					goodness > 1 */
 {
 	que_node_t*	exp;
@@ -362,14 +374,14 @@ opt_calc_index_goodness(
 	if (goodness >= 4 * dict_index_get_n_unique(index)) {
 		goodness += 1024;
 
-		if (index->type & DICT_CLUSTERED) {
+		if (dict_index_is_clust(index)) {
 
 			goodness += 1024;
 		}
 	}
 
 	/* We have to test for goodness here, as last_op may note be set */
-	if (goodness && index->type & DICT_CLUSTERED) {
+	if (goodness && dict_index_is_clust(index)) {
 
 		goodness++;
 	}
@@ -377,30 +389,29 @@ opt_calc_index_goodness(
 	return(goodness);
 }
 
-/***********************************************************************
-Calculates the number of matched fields based on an index goodness. */
+/*******************************************************************//**
+Calculates the number of matched fields based on an index goodness.
+@return	number of excatly or partially matched fields */
 UNIV_INLINE
 ulint
 opt_calc_n_fields_from_goodness(
 /*============================*/
-				/* out: number of excatly or partially matched
-				fields */
-	ulint	goodness)	/* in: goodness */
+	ulint	goodness)	/*!< in: goodness */
 {
 	return(((goodness % 1024) + 2) / 4);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
-... */
+...
+@return	search mode */
 UNIV_INLINE
 ulint
 opt_op_to_search_mode(
 /*==================*/
-			/* out: search mode */
-	ibool	asc,	/* in: TRUE if the rows should be fetched in an
+	ibool	asc,	/*!< in: TRUE if the rows should be fetched in an
 			ascending order */
-	ulint	op)	/* in: operator '=', PARS_GE_TOKEN, ... */
+	ulint	op)	/*!< in: operator '=', PARS_GE_TOKEN, ... */
 {
 	if (op == '=') {
 		if (asc) {
@@ -427,15 +438,15 @@ opt_op_to_search_mode(
 	return(0);
 }
 
-/***********************************************************************
-Determines if a node is an argument node of a function node. */
+/*******************************************************************//**
+Determines if a node is an argument node of a function node.
+@return	TRUE if is an argument */
 static
 ibool
 opt_is_arg(
 /*=======*/
-					/* out: TRUE if is an argument */
-	que_node_t*	arg_node,	/* in: possible argument node */
-	func_node_t*	func_node)	/* in: function node */
+	que_node_t*	arg_node,	/*!< in: possible argument node */
+	func_node_t*	func_node)	/*!< in: function node */
 {
 	que_node_t*	arg;
 
@@ -453,7 +464,7 @@ opt_is_arg(
 	return(FALSE);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Decides if the fetching of rows should be made in a descending order, and
 also checks that the chosen query plan produces a result which satisfies
 the order-by. */
@@ -461,7 +472,7 @@ static
 void
 opt_check_order_by(
 /*===============*/
-	sel_node_t*	sel_node)	/* in: select node; asserts an error
+	sel_node_t*	sel_node)	/*!< in: select node; asserts an error
 					if the plan does not agree with the
 					order-by */
 {
@@ -505,7 +516,7 @@ opt_check_order_by(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Optimizes a select. Decides which indexes to tables to use. The tables
 are accessed in the order that they were written to the FROM part in the
 select statement. */
@@ -513,9 +524,9 @@ static
 void
 opt_search_plan_for_table(
 /*======================*/
-	sel_node_t*	sel_node,	/* in: parsed select node */
-	ulint		i,		/* in: this is the ith table */
-	dict_table_t*	table)		/* in: table */
+	sel_node_t*	sel_node,	/*!< in: parsed select node */
+	ulint		i,		/*!< in: this is the ith table */
+	dict_table_t*	table)		/*!< in: table */
 {
 	plan_t*		plan;
 	dict_index_t*	index;
@@ -587,7 +598,7 @@ opt_search_plan_for_table(
 						   best_last_op);
 	}
 
-	if ((best_index->type & DICT_CLUSTERED)
+	if (dict_index_is_clust(best_index)
 	    && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
 
 		plan->unique_search = TRUE;
@@ -601,22 +612,19 @@ opt_search_plan_for_table(
 	btr_pcur_init(&(plan->clust_pcur));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Looks at a comparison condition and decides if it can, and need, be tested for
-a table AFTER the table has been accessed. */
+a table AFTER the table has been accessed.
+@return OPT_NOT_COND if not for this table, else OPT_END_COND,
+OPT_TEST_COND, or OPT_SCROLL_COND, where the last means that the
+condition need not be tested, except when scroll cursors are used */
 static
 ulint
 opt_classify_comparison(
 /*====================*/
-					/* out: OPT_NOT_COND if not for this
-					table, else OPT_END_COND,
-					OPT_TEST_COND, or OPT_SCROLL_COND,
-					where the last means that the
-					condition need not be tested, except
-					when scroll cursors are used */
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		i,		/* in: ith table in the join */
-	func_node_t*	cond)		/* in: comparison condition */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		i,		/*!< in: ith table in the join */
+	func_node_t*	cond)		/*!< in: comparison condition */
 {
 	plan_t*	plan;
 	ulint	n_fields;
@@ -697,15 +705,15 @@ opt_classify_comparison(
 	return(OPT_TEST_COND);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Recursively looks for test conditions for a table in a join. */
 static
 void
 opt_find_test_conds(
 /*================*/
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		i,		/* in: ith table in the join */
-	func_node_t*	cond)		/* in: conjunction of search
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		i,		/*!< in: ith table in the join */
+	func_node_t*	cond)		/*!< in: conjunction of search
 					conditions or NULL */
 {
 	func_node_t*	new_cond;
@@ -742,7 +750,7 @@ opt_find_test_conds(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Normalizes a list of comparison conditions so that a column of the table
 appears on the left side of the comparison if possible. This is accomplished
 by switching the arguments of the operator. */
@@ -750,9 +758,9 @@ static
 void
 opt_normalize_cmp_conds(
 /*====================*/
-	func_node_t*	cond,	/* in: first in a list of comparison
+	func_node_t*	cond,	/*!< in: first in a list of comparison
 				conditions, or NULL */
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in: table */
 {
 	que_node_t*	arg1;
 	que_node_t*	arg2;
@@ -784,7 +792,7 @@ opt_normalize_cmp_conds(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Finds out the search condition conjuncts we can, and need, to test as the ith
 table in a join is accessed. The search tuple can eliminate the need to test
 some conjuncts. */
@@ -792,8 +800,8 @@ static
 void
 opt_determine_and_normalize_test_conds(
 /*===================================*/
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		i)		/* in: ith table in the join */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		i)		/*!< in: ith table in the join */
 {
 	plan_t*	plan;
 
@@ -812,24 +820,24 @@ opt_determine_and_normalize_test_conds(
 	ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Looks for occurrences of the columns of the table in the query subgraph and
 adds them to the list of columns if an occurrence of the same column does not
 already exist in the list. If the column is already in the list, puts a value
 indirection to point to the occurrence in the column list, except if the
 column occurrence we are looking at is in the column list, in which case
 nothing is done. */
-
+UNIV_INTERN
 void
 opt_find_all_cols(
 /*==============*/
-	ibool		copy_val,	/* in: if TRUE, new found columns are
+	ibool		copy_val,	/*!< in: if TRUE, new found columns are
 					added as columns to copy */
-	dict_index_t*	index,		/* in: index of the table to use */
-	sym_node_list_t* col_list,	/* in: base node of a list where
+	dict_index_t*	index,		/*!< in: index of the table to use */
+	sym_node_list_t* col_list,	/*!< in: base node of a list where
 					to add new found columns */
-	plan_t*		plan,		/* in: plan or NULL */
-	que_node_t*	exp)		/* in: expression or condition or
+	plan_t*		plan,		/*!< in: plan or NULL */
+	que_node_t*	exp)		/*!< in: expression or condition or
 					NULL */
 {
 	func_node_t*	func_node;
@@ -906,7 +914,7 @@ opt_find_all_cols(
 
 	sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos(
 		dict_table_get_first_index(index->table), sym_node->col_no);
-	if (!(index->type & DICT_CLUSTERED)) {
+	if (!dict_index_is_clust(index)) {
 
 		ut_a(plan);
 
@@ -921,7 +929,7 @@ opt_find_all_cols(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Looks for occurrences of the columns of the table in conditions which are
 not yet determined AFTER the join operation has fetched a row in the ith
 table. The values for these column must be copied to dynamic memory for
@@ -930,9 +938,9 @@ static
 void
 opt_find_copy_cols(
 /*===============*/
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		i,		/* in: ith table in the join */
-	func_node_t*	search_cond)	/* in: search condition or NULL */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		i,		/*!< in: ith table in the join */
+	func_node_t*	search_cond)	/*!< in: search condition or NULL */
 {
 	func_node_t*	new_cond;
 	plan_t*		plan;
@@ -969,7 +977,7 @@ opt_find_copy_cols(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Classifies the table columns according to whether we use the column only while
 holding the latch on the page, or whether we have to copy the column value to
 dynamic memory. Puts the first occurrence of a column to either list in the
@@ -978,8 +986,8 @@ static
 void
 opt_classify_cols(
 /*==============*/
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		i)		/* in: ith table in the join */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		i)		/*!< in: ith table in the join */
 {
 	plan_t*		plan;
 	que_node_t*	exp;
@@ -1013,15 +1021,15 @@ opt_classify_cols(
 			  sel_node->search_cond);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Fills in the info in plan which is used in accessing a clustered index
 record. The columns must already be classified for the plan node. */
 static
 void
 opt_clust_access(
 /*=============*/
-	sel_node_t*	sel_node,	/* in: select node */
-	ulint		n)		/* in: nth table in select */
+	sel_node_t*	sel_node,	/*!< in: select node */
+	ulint		n)		/*!< in: nth table in select */
 {
 	plan_t*		plan;
 	dict_table_t*	table;
@@ -1041,7 +1049,7 @@ opt_clust_access(
 
 	plan->no_prefetch = FALSE;
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		plan->clust_map = NULL;
 		plan->clust_ref = NULL;
 
@@ -1085,15 +1093,15 @@ opt_clust_access(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Optimizes a select. Decides which indexes to tables to use. The tables
 are accessed in the order that they were written to the FROM part in the
 select statement. */
-
+UNIV_INTERN
 void
 opt_search_plan(
 /*============*/
-	sel_node_t*	sel_node)	/* in: parsed select node */
+	sel_node_t*	sel_node)	/*!< in: parsed select node */
 {
 	sym_node_t*	table_node;
 	dict_table_t*	table;
@@ -1160,13 +1168,13 @@ opt_search_plan(
 #endif
 }
 
-/************************************************************************
+/********************************************************************//**
 Prints info of a query plan. */
-
+UNIV_INTERN
 void
 opt_print_query_plan(
 /*=================*/
-	sel_node_t*	sel_node)	/* in: select node */
+	sel_node_t*	sel_node)	/*!< in: select node */
 {
 	plan_t*	plan;
 	ulint	n_fields;
diff --git a/storage/innobase/pars/pars0pars.c b/storage/innodb_plugin/pars/pars0pars.c
similarity index 70%
rename from storage/innobase/pars/pars0pars.c
rename to storage/innodb_plugin/pars/pars0pars.c
index 89f6f862995..9faf36d00a8 100644
--- a/storage/innobase/pars/pars0pars.c
+++ b/storage/innodb_plugin/pars/pars0pars.c
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file pars/pars0pars.c
+SQL parser
 
 Created 11/19/1996 Heikki Tuuri
 *******************************************************/
@@ -32,114 +49,107 @@ on 1/27/1998 */
 #include "eval0eval.h"
 
 #ifdef UNIV_SQL_DEBUG
-/* If the following is set TRUE, the lexer will print the SQL string
+/** If the following is set TRUE, the lexer will print the SQL string
 as it tokenizes it */
-
-ibool	pars_print_lexed	= FALSE;
+UNIV_INTERN ibool	pars_print_lexed	= FALSE;
 #endif /* UNIV_SQL_DEBUG */
 
 /* Global variable used while parsing a single procedure or query : the code is
 NOT re-entrant */
-sym_tab_t*	pars_sym_tab_global;
+UNIV_INTERN sym_tab_t*	pars_sym_tab_global;
 
 /* Global variables used to denote certain reserved words, used in
 constructing the parsing tree */
 
-pars_res_word_t	pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-pars_res_word_t	pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-pars_res_word_t	pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-pars_res_word_t	pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-pars_res_word_t	pars_substr_token = {PARS_SUBSTR_TOKEN};
-pars_res_word_t	pars_replstr_token = {PARS_REPLSTR_TOKEN};
-pars_res_word_t	pars_concat_token = {PARS_CONCAT_TOKEN};
-pars_res_word_t	pars_instr_token = {PARS_INSTR_TOKEN};
-pars_res_word_t	pars_length_token = {PARS_LENGTH_TOKEN};
-pars_res_word_t	pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-pars_res_word_t	pars_printf_token = {PARS_PRINTF_TOKEN};
-pars_res_word_t	pars_assert_token = {PARS_ASSERT_TOKEN};
-pars_res_word_t	pars_rnd_token = {PARS_RND_TOKEN};
-pars_res_word_t	pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-pars_res_word_t	pars_count_token = {PARS_COUNT_TOKEN};
-pars_res_word_t	pars_sum_token = {PARS_SUM_TOKEN};
-pars_res_word_t	pars_distinct_token = {PARS_DISTINCT_TOKEN};
-pars_res_word_t	pars_binary_token = {PARS_BINARY_TOKEN};
-pars_res_word_t	pars_blob_token = {PARS_BLOB_TOKEN};
-pars_res_word_t	pars_int_token = {PARS_INT_TOKEN};
-pars_res_word_t	pars_char_token = {PARS_CHAR_TOKEN};
-pars_res_word_t	pars_float_token = {PARS_FLOAT_TOKEN};
-pars_res_word_t	pars_update_token = {PARS_UPDATE_TOKEN};
-pars_res_word_t	pars_asc_token = {PARS_ASC_TOKEN};
-pars_res_word_t	pars_desc_token = {PARS_DESC_TOKEN};
-pars_res_word_t	pars_open_token = {PARS_OPEN_TOKEN};
-pars_res_word_t	pars_close_token = {PARS_CLOSE_TOKEN};
-pars_res_word_t	pars_share_token = {PARS_SHARE_TOKEN};
-pars_res_word_t	pars_unique_token = {PARS_UNIQUE_TOKEN};
-pars_res_word_t	pars_clustered_token = {PARS_CLUSTERED_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_to_char_token = {PARS_TO_CHAR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_substr_token = {PARS_SUBSTR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_replstr_token = {PARS_REPLSTR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_concat_token = {PARS_CONCAT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_instr_token = {PARS_INSTR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_length_token = {PARS_LENGTH_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_sysdate_token = {PARS_SYSDATE_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_printf_token = {PARS_PRINTF_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_assert_token = {PARS_ASSERT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_rnd_token = {PARS_RND_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_rnd_str_token = {PARS_RND_STR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_count_token = {PARS_COUNT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_sum_token = {PARS_SUM_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_distinct_token = {PARS_DISTINCT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_binary_token = {PARS_BINARY_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_blob_token = {PARS_BLOB_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_int_token = {PARS_INT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_char_token = {PARS_CHAR_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_float_token = {PARS_FLOAT_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_update_token = {PARS_UPDATE_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_asc_token = {PARS_ASC_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_desc_token = {PARS_DESC_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_open_token = {PARS_OPEN_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_close_token = {PARS_CLOSE_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_share_token = {PARS_SHARE_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_unique_token = {PARS_UNIQUE_TOKEN};
+UNIV_INTERN pars_res_word_t	pars_clustered_token = {PARS_CLUSTERED_TOKEN};
 
-/* Global variable used to denote the '*' in SELECT * FROM.. */
-#define PARS_STAR_DENOTER	12345678
-ulint	pars_star_denoter	= PARS_STAR_DENOTER;
+/** Global variable used to denote the '*' in SELECT * FROM.. */
+UNIV_INTERN ulint	pars_star_denoter	= 12345678;
 
 
-/*************************************************************************
-Determines the class of a function code. */
+/*********************************************************************//**
+Determines the class of a function code.
+@return	function class: PARS_FUNC_ARITH, ... */
 static
 ulint
 pars_func_get_class(
 /*================*/
-			/* out: function class: PARS_FUNC_ARITH, ... */
-	int	func)	/* in: function code: '=', PARS_GE_TOKEN, ... */
+	int	func)	/*!< in: function code: '=', PARS_GE_TOKEN, ... */
 {
-	if ((func == '+') || (func == '-') || (func == '*') || (func == '/')) {
-
+	switch (func) {
+	case '+': case '-': case '*': case '/':
 		return(PARS_FUNC_ARITH);
 
-	} else if ((func == '=') || (func == '<') || (func == '>')
-		   || (func == PARS_GE_TOKEN) || (func == PARS_LE_TOKEN)
-		   || (func == PARS_NE_TOKEN)) {
-
+	case '=': case '<': case '>':
+	case PARS_GE_TOKEN: case PARS_LE_TOKEN: case PARS_NE_TOKEN:
 		return(PARS_FUNC_CMP);
 
-	} else if ((func == PARS_AND_TOKEN) || (func == PARS_OR_TOKEN)
-		   || (func == PARS_NOT_TOKEN)) {
-
+	case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN:
 		return(PARS_FUNC_LOGICAL);
 
-	} else if ((func == PARS_COUNT_TOKEN) || (func == PARS_SUM_TOKEN)) {
-
+	case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN:
 		return(PARS_FUNC_AGGREGATE);
 
-	} else if ((func == PARS_TO_CHAR_TOKEN)
-		   || (func == PARS_TO_NUMBER_TOKEN)
-		   || (func == PARS_TO_BINARY_TOKEN)
-		   || (func == PARS_BINARY_TO_NUMBER_TOKEN)
-		   || (func == PARS_SUBSTR_TOKEN)
-		   || (func == PARS_CONCAT_TOKEN)
-		   || (func == PARS_LENGTH_TOKEN)
-		   || (func == PARS_INSTR_TOKEN)
-		   || (func == PARS_SYSDATE_TOKEN)
-		   || (func == PARS_NOTFOUND_TOKEN)
-		   || (func == PARS_PRINTF_TOKEN)
-		   || (func == PARS_ASSERT_TOKEN)
-		   || (func == PARS_RND_TOKEN)
-		   || (func == PARS_RND_STR_TOKEN)
-		   || (func == PARS_REPLSTR_TOKEN)) {
-
+	case PARS_TO_CHAR_TOKEN:
+	case PARS_TO_NUMBER_TOKEN:
+	case PARS_TO_BINARY_TOKEN:
+	case PARS_BINARY_TO_NUMBER_TOKEN:
+	case PARS_SUBSTR_TOKEN:
+	case PARS_CONCAT_TOKEN:
+	case PARS_LENGTH_TOKEN:
+	case PARS_INSTR_TOKEN:
+	case PARS_SYSDATE_TOKEN:
+	case PARS_NOTFOUND_TOKEN:
+	case PARS_PRINTF_TOKEN:
+	case PARS_ASSERT_TOKEN:
+	case PARS_RND_TOKEN:
+	case PARS_RND_STR_TOKEN:
+	case PARS_REPLSTR_TOKEN:
 		return(PARS_FUNC_PREDEFINED);
-	} else {
+
+	default:
 		return(PARS_FUNC_OTHER);
 	}
 }
 
-/*************************************************************************
-Parses an operator or predefined function expression. */
+/*********************************************************************//**
+Parses an operator or predefined function expression.
+@return	own: function node in a query tree */
 static
 func_node_t*
 pars_func_low(
 /*==========*/
-				/* out, own: function node in a query tree */
-	int		func,	/* in: function token code */
-	que_node_t*	arg)	/* in: first argument in the argument list */
+	int		func,	/*!< in: function token code */
+	que_node_t*	arg)	/*!< in: first argument in the argument list */
 {
 	func_node_t*	node;
 
@@ -160,29 +170,29 @@ pars_func_low(
 	return(node);
 }
 
-/*************************************************************************
-Parses a function expression. */
-
+/*********************************************************************//**
+Parses a function expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
 func_node_t*
 pars_func(
 /*======*/
-				/* out, own: function node in a query tree */
-	que_node_t*	res_word,/* in: function name reserved word */
-	que_node_t*	arg)	/* in: first argument in the argument list */
+	que_node_t*	res_word,/*!< in: function name reserved word */
+	que_node_t*	arg)	/*!< in: first argument in the argument list */
 {
 	return(pars_func_low(((pars_res_word_t*)res_word)->code, arg));
 }
 
-/*************************************************************************
-Parses an operator expression. */
-
+/*********************************************************************//**
+Parses an operator expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
 func_node_t*
 pars_op(
 /*====*/
-				/* out, own: function node in a query tree */
-	int		func,	/* in: operator token code */
-	que_node_t*	arg1,	/* in: first argument */
-	que_node_t*	arg2)	/* in: second argument or NULL for an unary
+	int		func,	/*!< in: operator token code */
+	que_node_t*	arg1,	/*!< in: first argument */
+	que_node_t*	arg2)	/*!< in: second argument or NULL for an unary
 				operator */
 {
 	que_node_list_add_last(NULL, arg1);
@@ -194,15 +204,15 @@ pars_op(
 	return(pars_func_low(func, arg1));
 }
 
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
+/*********************************************************************//**
+Parses an ORDER BY clause. Order by a single column only is supported.
+@return	own: order-by node in a query tree */
+UNIV_INTERN
 order_node_t*
 pars_order_by(
 /*==========*/
-				/* out, own: order-by node in a query tree */
-	sym_node_t*	column,	/* in: column name */
-	pars_res_word_t* asc)	/* in: &pars_asc_token or pars_desc_token */
+	sym_node_t*	column,	/*!< in: column name */
+	pars_res_word_t* asc)	/*!< in: &pars_asc_token or pars_desc_token */
 {
 	order_node_t*	node;
 
@@ -222,28 +232,43 @@ pars_order_by(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
+Determine if a data type is a built-in string data type of the InnoDB
+SQL parser.
+@return	TRUE if string data type */
+static
+ibool
+pars_is_string_type(
+/*================*/
+	ulint	mtype)	/*!< in: main data type */
+{
+	switch (mtype) {
+	case DATA_VARCHAR: case DATA_CHAR:
+	case DATA_FIXBINARY: case DATA_BINARY:
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
 Resolves the data type of a function in an expression. The argument data
 types must already be resolved. */
 static
 void
 pars_resolve_func_data_type(
 /*========================*/
-	func_node_t*	node)	/* in: function node */
+	func_node_t*	node)	/*!< in: function node */
 {
 	que_node_t*	arg;
-	ulint		func;
 
 	ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
 
 	arg = node->args;
 
-	func = node->func;
-
-	if ((func == PARS_SUM_TOKEN)
-	    || (func == '+') || (func == '-') || (func == '*')
-	    || (func == '/') || (func == '+')) {
-
+	switch (node->func) {
+	case PARS_SUM_TOKEN:
+	case '+': case '-': case '*': case '/':
 		/* Inherit the data type from the first argument (which must
 		not be the SQL null literal whose type is DATA_ERROR) */
 
@@ -252,15 +277,21 @@ pars_resolve_func_data_type(
 
 		ut_a(dtype_get_mtype(que_node_get_data_type(node))
 		     == DATA_INT);
-	} else if (func == PARS_COUNT_TOKEN) {
+		break;
+
+	case PARS_COUNT_TOKEN:
 		ut_a(arg);
 		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if (func == PARS_TO_CHAR_TOKEN) {
+	case PARS_TO_CHAR_TOKEN:
+	case PARS_RND_STR_TOKEN:
 		ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
 		dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
 			  DATA_ENGLISH, 0);
-	} else if (func == PARS_TO_BINARY_TOKEN) {
+		break;
+
+	case PARS_TO_BINARY_TOKEN:
 		if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
 			dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
 				  DATA_ENGLISH, 0);
@@ -268,77 +299,63 @@ pars_resolve_func_data_type(
 			dtype_set(que_node_get_data_type(node), DATA_BINARY,
 				  0, 0);
 		}
-	} else if (func == PARS_TO_NUMBER_TOKEN) {
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg))
-		     == DATA_VARCHAR);
-		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg))
-		     == DATA_VARCHAR);
+	case PARS_TO_NUMBER_TOKEN:
+	case PARS_BINARY_TO_NUMBER_TOKEN:
+	case PARS_LENGTH_TOKEN:
+	case PARS_INSTR_TOKEN:
+		ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
 		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if (func == PARS_LENGTH_TOKEN) {
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg))
-		     == DATA_VARCHAR);
-		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
-	} else if (func == PARS_INSTR_TOKEN) {
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg))
-		     == DATA_VARCHAR);
-		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
-	} else if (func == PARS_SYSDATE_TOKEN) {
+	case PARS_SYSDATE_TOKEN:
 		ut_a(arg == NULL);
 		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if ((func == PARS_SUBSTR_TOKEN)
-		   || (func == PARS_CONCAT_TOKEN)) {
-
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg))
-		     == DATA_VARCHAR);
+	case PARS_SUBSTR_TOKEN:
+	case PARS_CONCAT_TOKEN:
+		ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
 		dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
 			  DATA_ENGLISH, 0);
+		break;
 
-	} else if ((func == '>') || (func == '<') || (func == '=')
-		   || (func == PARS_GE_TOKEN)
-		   || (func == PARS_LE_TOKEN)
-		   || (func == PARS_NE_TOKEN)
-		   || (func == PARS_AND_TOKEN)
-		   || (func == PARS_OR_TOKEN)
-		   || (func == PARS_NOT_TOKEN)
-		   || (func == PARS_NOTFOUND_TOKEN)) {
+	case '>': case '<': case '=':
+	case PARS_GE_TOKEN:
+	case PARS_LE_TOKEN:
+	case PARS_NE_TOKEN:
+	case PARS_AND_TOKEN:
+	case PARS_OR_TOKEN:
+	case PARS_NOT_TOKEN:
+	case PARS_NOTFOUND_TOKEN:
 
 		/* We currently have no iboolean type: use integer type */
 		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if (func == PARS_RND_TOKEN) {
+	case PARS_RND_TOKEN:
 		ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
 		dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+		break;
 
-	} else if (func == PARS_RND_STR_TOKEN) {
-		ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
-		dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
-			  DATA_ENGLISH, 0);
-	} else {
+	default:
 		ut_error;
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resolves the meaning of variables in an expression and the data types of
 functions. It is an error if some identifier cannot be resolved here. */
 static
 void
 pars_resolve_exp_variables_and_types(
 /*=================================*/
-	sel_node_t*	select_node,	/* in: select node or NULL; if
+	sel_node_t*	select_node,	/*!< in: select node or NULL; if
 					this is not NULL then the variable
 					sym nodes are added to the
 					copy_variables list of select_node */
-	que_node_t*	exp_node)	/* in: expression */
+	que_node_t*	exp_node)	/*!< in: expression */
 {
 	func_node_t*	func_node;
 	que_node_t*	arg;
@@ -417,7 +434,7 @@ pars_resolve_exp_variables_and_types(
 			que_node_get_data_type(node));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resolves the meaning of variables in an expression list. It is an error if
 some identifier cannot be resolved here. Resolves also the data types of
 functions. */
@@ -425,8 +442,8 @@ static
 void
 pars_resolve_exp_list_variables_and_types(
 /*======================================*/
-	sel_node_t*	select_node,	/* in: select node or NULL */
-	que_node_t*	exp_node)	/* in: expression list first node, or
+	sel_node_t*	select_node,	/*!< in: select node or NULL */
+	que_node_t*	exp_node)	/*!< in: expression list first node, or
 					NULL */
 {
 	while (exp_node) {
@@ -436,14 +453,14 @@ pars_resolve_exp_list_variables_and_types(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resolves the columns in an expression. */
 static
 void
 pars_resolve_exp_columns(
 /*=====================*/
-	sym_node_t*	table_node,	/* in: first node in a table list */
-	que_node_t*	exp_node)	/* in: expression */
+	sym_node_t*	table_node,	/*!< in: first node in a table list */
+	que_node_t*	exp_node)	/*!< in: expression */
 {
 	func_node_t*	func_node;
 	que_node_t*	arg;
@@ -517,14 +534,14 @@ pars_resolve_exp_columns(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resolves the meaning of columns in an expression list. */
 static
 void
 pars_resolve_exp_list_columns(
 /*==========================*/
-	sym_node_t*	table_node,	/* in: first node in a table list */
-	que_node_t*	exp_node)	/* in: expression list first node, or
+	sym_node_t*	table_node,	/*!< in: first node in a table list */
+	que_node_t*	exp_node)	/*!< in: expression list first node, or
 					NULL */
 {
 	while (exp_node) {
@@ -534,13 +551,13 @@ pars_resolve_exp_list_columns(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Retrieves the table definition for a table name id. */
 static
 void
 pars_retrieve_table_def(
 /*====================*/
-	sym_node_t*	sym_node)	/* in: table node */
+	sym_node_t*	sym_node)	/*!< in: table node */
 {
 	const char*	table_name;
 
@@ -557,14 +574,14 @@ pars_retrieve_table_def(
 	ut_a(sym_node->table);
 }
 
-/*************************************************************************
-Retrieves the table definitions for a list of table name ids. */
+/*********************************************************************//**
+Retrieves the table definitions for a list of table name ids.
+@return	number of tables */
 static
 ulint
 pars_retrieve_table_list_defs(
 /*==========================*/
-					/* out: number of tables */
-	sym_node_t*	sym_node)	/* in: first table node in list */
+	sym_node_t*	sym_node)	/*!< in: first table node in list */
 {
 	ulint		count		= 0;
 
@@ -584,13 +601,13 @@ pars_retrieve_table_list_defs(
 	return(count);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Adds all columns to the select list if the query is SELECT * FROM ... */
 static
 void
 pars_select_all_columns(
 /*====================*/
-	sel_node_t*	select_node)	/* in: select node already containing
+	sel_node_t*	select_node)	/*!< in: select node already containing
 					the table list */
 {
 	sym_node_t*	col_node;
@@ -621,17 +638,16 @@ pars_select_all_columns(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
+statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
 sel_node_t*
 pars_select_list(
 /*=============*/
-					/* out, own: select node in a query
-					tree */
-	que_node_t*	select_list,	/* in: select list */
-	sym_node_t*	into_list)	/* in: variables list or NULL */
+	que_node_t*	select_list,	/*!< in: select list */
+	sym_node_t*	into_list)	/*!< in: variables list or NULL */
 {
 	sel_node_t*	node;
 
@@ -645,14 +661,14 @@ pars_select_list(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if the query is an aggregate query, in which case the selct list must
 contain only aggregate function items. */
 static
 void
 pars_check_aggregate(
 /*=================*/
-	sel_node_t*	select_node)	/* in: select node already containing
+	sel_node_t*	select_node)	/*!< in: select node already containing
 					the select list */
 {
 	que_node_t*	exp_node;
@@ -688,21 +704,20 @@ pars_check_aggregate(
 	}
 }
 
-/*************************************************************************
-Parses a select statement. */
-
+/*********************************************************************//**
+Parses a select statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
 sel_node_t*
 pars_select_statement(
 /*==================*/
-					/* out, own: select node in a query
-					tree */
-	sel_node_t*	select_node,	/* in: select node already containing
+	sel_node_t*	select_node,	/*!< in: select node already containing
 					the select list */
-	sym_node_t*	table_list,	/* in: table list */
-	que_node_t*	search_cond,	/* in: search condition or NULL */
-	pars_res_word_t* for_update,	/* in: NULL or &pars_update_token */
-	pars_res_word_t* lock_shared,	/* in: NULL or &pars_share_token */
-	order_node_t*	order_by)	/* in: NULL or an order-by node */
+	sym_node_t*	table_list,	/*!< in: table list */
+	que_node_t*	search_cond,	/*!< in: search condition or NULL */
+	pars_res_word_t* for_update,	/*!< in: NULL or &pars_update_token */
+	pars_res_word_t* lock_shared,	/*!< in: NULL or &pars_share_token */
+	order_node_t*	order_by)	/*!< in: NULL or an order-by node */
 {
 	select_node->state = SEL_NODE_OPEN;
 
@@ -772,16 +787,16 @@ pars_select_statement(
 	return(select_node);
 }
 
-/*************************************************************************
-Parses a cursor declaration. */
-
+/*********************************************************************//**
+Parses a cursor declaration.
+@return	sym_node */
+UNIV_INTERN
 que_node_t*
 pars_cursor_declaration(
 /*====================*/
-					/* out: sym_node */
-	sym_node_t*	sym_node,	/* in: cursor id node in the symbol
+	sym_node_t*	sym_node,	/*!< in: cursor id node in the symbol
 					table */
-	sel_node_t*	select_node)	/* in: select node */
+	sel_node_t*	select_node)	/*!< in: select node */
 {
 	sym_node->resolved = TRUE;
 	sym_node->token_type = SYM_CURSOR;
@@ -793,14 +808,14 @@ pars_cursor_declaration(
 	return(sym_node);
 }
 
-/*************************************************************************
-Parses a function declaration. */
-
+/*********************************************************************//**
+Parses a function declaration.
+@return	sym_node */
+UNIV_INTERN
 que_node_t*
 pars_function_declaration(
 /*======================*/
-					/* out: sym_node */
-	sym_node_t*	sym_node)	/* in: function id node in the symbol
+	sym_node_t*	sym_node)	/*!< in: function id node in the symbol
 					table */
 {
 	sym_node->resolved = TRUE;
@@ -813,17 +828,16 @@ pars_function_declaration(
 	return(sym_node);
 }
 
-/*************************************************************************
-Parses a delete or update statement start. */
-
+/*********************************************************************//**
+Parses a delete or update statement start.
+@return	own: update node in a query tree */
+UNIV_INTERN
 upd_node_t*
 pars_update_statement_start(
 /*========================*/
-					/* out, own: update node in a query
-					tree */
-	ibool		is_delete,	/* in: TRUE if delete */
-	sym_node_t*	table_sym,	/* in: table name node */
-	col_assign_node_t* col_assign_list)/* in: column assignment list, NULL
+	ibool		is_delete,	/*!< in: TRUE if delete */
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	col_assign_node_t* col_assign_list)/*!< in: column assignment list, NULL
 					if delete */
 {
 	upd_node_t*	node;
@@ -838,15 +852,15 @@ pars_update_statement_start(
 	return(node);
 }
 
-/*************************************************************************
-Parses a column assignment in an update. */
-
+/*********************************************************************//**
+Parses a column assignment in an update.
+@return	column assignment node */
+UNIV_INTERN
 col_assign_node_t*
 pars_column_assignment(
 /*===================*/
-				/* out: column assignment node */
-	sym_node_t*	column,	/* in: column to assign */
-	que_node_t*	exp)	/* in: value to assign */
+	sym_node_t*	column,	/*!< in: column to assign */
+	que_node_t*	exp)	/*!< in: value to assign */
 {
 	col_assign_node_t*	node;
 
@@ -860,13 +874,13 @@ pars_column_assignment(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Processes an update node assignment list. */
 static
 void
 pars_process_assign_list(
 /*=====================*/
-	upd_node_t*	node)	/* in: update node */
+	upd_node_t*	node)	/*!< in: update node */
 {
 	col_assign_node_t*	col_assign_list;
 	sym_node_t*		table_sym;
@@ -927,7 +941,8 @@ pars_process_assign_list(
 
 		if (!dict_col_get_fixed_size(
 			    dict_index_get_nth_col(clust_index,
-						   upd_field->field_no))) {
+						   upd_field->field_no),
+			    dict_table_is_comp(node->table))) {
 			changes_field_size = 0;
 		}
 
@@ -946,18 +961,17 @@ pars_process_assign_list(
 	node->cmpl_info = changes_ord_field | changes_field_size;
 }
 
-/*************************************************************************
-Parses an update or delete statement. */
-
+/*********************************************************************//**
+Parses an update or delete statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
 upd_node_t*
 pars_update_statement(
 /*==================*/
-					/* out, own: update node in a query
-					tree */
-	upd_node_t*	node,		/* in: update node */
-	sym_node_t*	cursor_sym,	/* in: pointer to a cursor entry in
+	upd_node_t*	node,		/*!< in: update node */
+	sym_node_t*	cursor_sym,	/*!< in: pointer to a cursor entry in
 					the symbol table or NULL */
-	que_node_t*	search_cond)	/* in: search condition or NULL */
+	que_node_t*	search_cond)	/*!< in: search condition or NULL */
 {
 	sym_node_t*	table_sym;
 	sel_node_t*	sel_node;
@@ -1021,7 +1035,7 @@ pars_update_statement(
 
 	plan->no_prefetch = TRUE;
 
-	if (!((plan->index)->type & DICT_CLUSTERED)) {
+	if (!dict_index_is_clust(plan->index)) {
 
 		plan->must_get_clust = TRUE;
 
@@ -1030,33 +1044,19 @@ pars_update_statement(
 		node->pcur = &(plan->pcur);
 	}
 
-	if (!node->is_delete && node->searched_update
-	    && (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE)
-	    && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
-		/* The select node can perform the update in-place */
-
-		ut_a(plan->asc);
-
-		node->select_will_do_update = TRUE;
-		sel_node->select_will_do_update = TRUE;
-		sel_node->latch_mode = BTR_MODIFY_LEAF;
-	}
-
 	return(node);
 }
 
-/*************************************************************************
-Parses an insert statement. */
-
+/*********************************************************************//**
+Parses an insert statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
 ins_node_t*
 pars_insert_statement(
 /*==================*/
-					/* out, own: update node in a query
-					tree */
-	sym_node_t*	table_sym,	/* in: table name node */
-	que_node_t*	values_list,	/* in: value expression list or NULL */
-	sel_node_t*	select)		/* in: select condition or NULL */
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	que_node_t*	values_list,	/*!< in: value expression list or NULL */
+	sel_node_t*	select)		/*!< in: select condition or NULL */
 {
 	ins_node_t*	node;
 	dtuple_t*	row;
@@ -1104,19 +1104,19 @@ pars_insert_statement(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Set the type of a dfield. */
 static
 void
 pars_set_dfield_type(
 /*=================*/
-	dfield_t*		dfield,		/* in: dfield */
-	pars_res_word_t*	type,		/* in: pointer to a type
+	dfield_t*		dfield,		/*!< in: dfield */
+	pars_res_word_t*	type,		/*!< in: pointer to a type
 						token */
-	ulint			len,		/* in: length, or 0 */
-	ibool			is_unsigned,	/* in: if TRUE, column is
+	ulint			len,		/*!< in: length, or 0 */
+	ibool			is_unsigned,	/*!< in: if TRUE, column is
 						UNSIGNED. */
-	ibool			is_not_null)	/* in: if TRUE, column is
+	ibool			is_not_null)	/*!< in: if TRUE, column is
 						NOT NULL. */
 {
 	ulint flags = 0;
@@ -1154,17 +1154,16 @@ pars_set_dfield_type(
 	}
 }
 
-/*************************************************************************
-Parses a variable declaration. */
-
+/*********************************************************************//**
+Parses a variable declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
 sym_node_t*
 pars_variable_declaration(
 /*======================*/
-				/* out, own: symbol table node of type
-				SYM_VAR */
-	sym_node_t*	node,	/* in: symbol table node allocated for the
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
 				id of the variable */
-	pars_res_word_t* type)	/* in: pointer to a type token */
+	pars_res_word_t* type)	/*!< in: pointer to a type token */
 {
 	node->resolved = TRUE;
 	node->token_type = SYM_VAR;
@@ -1176,19 +1175,18 @@ pars_variable_declaration(
 	return(node);
 }
 
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
+/*********************************************************************//**
+Parses a procedure parameter declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
 sym_node_t*
 pars_parameter_declaration(
 /*=======================*/
-				/* out, own: symbol table node of type
-				SYM_VAR */
-	sym_node_t*	node,	/* in: symbol table node allocated for the
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
 				id of the parameter */
 	ulint		param_type,
-				/* in: PARS_INPUT or PARS_OUTPUT */
-	pars_res_word_t* type)	/* in: pointer to a type token */
+				/*!< in: PARS_INPUT or PARS_OUTPUT */
+	pars_res_word_t* type)	/*!< in: pointer to a type token */
 {
 	ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
 
@@ -1199,14 +1197,14 @@ pars_parameter_declaration(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the parent field in a query node list. */
 static
 void
 pars_set_parent_in_list(
 /*====================*/
-	que_node_t*	node_list,	/* in: first node in a list */
-	que_node_t*	parent)		/* in: parent value to set in all
+	que_node_t*	node_list,	/*!< in: first node in a list */
+	que_node_t*	parent)		/*!< in: parent value to set in all
 					nodes of the list */
 {
 	que_common_t*	common;
@@ -1220,15 +1218,15 @@ pars_set_parent_in_list(
 	}
 }
 
-/*************************************************************************
-Parses an elsif element. */
-
+/*********************************************************************//**
+Parses an elsif element.
+@return	elsif node */
+UNIV_INTERN
 elsif_node_t*
 pars_elsif_element(
 /*===============*/
-					/* out: elsif node */
-	que_node_t*	cond,		/* in: if-condition */
-	que_node_t*	stat_list)	/* in: statement list */
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list)	/*!< in: statement list */
 {
 	elsif_node_t*	node;
 
@@ -1245,16 +1243,16 @@ pars_elsif_element(
 	return(node);
 }
 
-/*************************************************************************
-Parses an if-statement. */
-
+/*********************************************************************//**
+Parses an if-statement.
+@return	if-statement node */
+UNIV_INTERN
 if_node_t*
 pars_if_statement(
 /*==============*/
-					/* out: if-statement node */
-	que_node_t*	cond,		/* in: if-condition */
-	que_node_t*	stat_list,	/* in: statement list */
-	que_node_t*	else_part)	/* in: else-part statement list
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list,	/*!< in: statement list */
+	que_node_t*	else_part)	/*!< in: else-part statement list
 					or elsif element list */
 {
 	if_node_t*	node;
@@ -1296,15 +1294,15 @@ pars_if_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses a while-statement. */
-
+/*********************************************************************//**
+Parses a while-statement.
+@return	while-statement node */
+UNIV_INTERN
 while_node_t*
 pars_while_statement(
 /*=================*/
-					/* out: while-statement node */
-	que_node_t*	cond,		/* in: while-condition */
-	que_node_t*	stat_list)	/* in: statement list */
+	que_node_t*	cond,		/*!< in: while-condition */
+	que_node_t*	stat_list)	/*!< in: statement list */
 {
 	while_node_t*	node;
 
@@ -1323,17 +1321,17 @@ pars_while_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses a for-loop-statement. */
-
+/*********************************************************************//**
+Parses a for-loop-statement.
+@return	for-statement node */
+UNIV_INTERN
 for_node_t*
 pars_for_statement(
 /*===============*/
-					/* out: for-statement node */
-	sym_node_t*	loop_var,	/* in: loop variable */
-	que_node_t*	loop_start_limit,/* in: loop start expression */
-	que_node_t*	loop_end_limit,	/* in: loop end expression */
-	que_node_t*	stat_list)	/* in: statement list */
+	sym_node_t*	loop_var,	/*!< in: loop variable */
+	que_node_t*	loop_start_limit,/*!< in: loop start expression */
+	que_node_t*	loop_end_limit,	/*!< in: loop end expression */
+	que_node_t*	stat_list)	/*!< in: statement list */
 {
 	for_node_t*	node;
 
@@ -1359,13 +1357,13 @@ pars_for_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses an exit statement. */
-
+/*********************************************************************//**
+Parses an exit statement.
+@return	exit statement node */
+UNIV_INTERN
 exit_node_t*
 pars_exit_statement(void)
 /*=====================*/
-					/* out: exit statement node */
 {
 	exit_node_t*	node;
 
@@ -1375,13 +1373,13 @@ pars_exit_statement(void)
 	return(node);
 }
 
-/*************************************************************************
-Parses a return-statement. */
-
+/*********************************************************************//**
+Parses a return-statement.
+@return	return-statement node */
+UNIV_INTERN
 return_node_t*
 pars_return_statement(void)
 /*=======================*/
-					/* out: return-statement node */
 {
 	return_node_t*	node;
 
@@ -1392,15 +1390,15 @@ pars_return_statement(void)
 	return(node);
 }
 
-/*************************************************************************
-Parses an assignment statement. */
-
+/*********************************************************************//**
+Parses an assignment statement.
+@return	assignment statement node */
+UNIV_INTERN
 assign_node_t*
 pars_assignment_statement(
 /*======================*/
-				/* out: assignment statement node */
-	sym_node_t*	var,	/* in: variable to assign */
-	que_node_t*	val)	/* in: value to assign */
+	sym_node_t*	var,	/*!< in: variable to assign */
+	que_node_t*	val)	/*!< in: value to assign */
 {
 	assign_node_t*	node;
 
@@ -1420,15 +1418,15 @@ pars_assignment_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses a procedure call. */
-
+/*********************************************************************//**
+Parses a procedure call.
+@return	function node */
+UNIV_INTERN
 func_node_t*
 pars_procedure_call(
 /*================*/
-				/* out: function node */
-	que_node_t*	res_word,/* in: procedure name reserved word */
-	que_node_t*	args)	/* in: argument list */
+	que_node_t*	res_word,/*!< in: procedure name reserved word */
+	que_node_t*	args)	/*!< in: argument list */
 {
 	func_node_t*	node;
 
@@ -1439,17 +1437,17 @@ pars_procedure_call(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
+non-NULL.
+@return	fetch statement node */
+UNIV_INTERN
 fetch_node_t*
 pars_fetch_statement(
 /*=================*/
-					/* out: fetch statement node */
-	sym_node_t*	cursor,		/* in: cursor node */
-	sym_node_t*	into_list,	/* in: variables to set, or NULL */
-	sym_node_t*	user_func)	/* in: user function name, or NULL */
+	sym_node_t*	cursor,		/*!< in: cursor node */
+	sym_node_t*	into_list,	/*!< in: variables to set, or NULL */
+	sym_node_t*	user_func)	/*!< in: user function name, or NULL */
 {
 	sym_node_t*	cursor_decl;
 	fetch_node_t*	node;
@@ -1491,16 +1489,16 @@ pars_fetch_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses an open or close cursor statement. */
-
+/*********************************************************************//**
+Parses an open or close cursor statement.
+@return	fetch statement node */
+UNIV_INTERN
 open_node_t*
 pars_open_statement(
 /*================*/
-				/* out: fetch statement node */
-	ulint		type,	/* in: ROW_SEL_OPEN_CURSOR
+	ulint		type,	/*!< in: ROW_SEL_OPEN_CURSOR
 				or ROW_SEL_CLOSE_CURSOR */
-	sym_node_t*	cursor)	/* in: cursor node */
+	sym_node_t*	cursor)	/*!< in: cursor node */
 {
 	sym_node_t*	cursor_decl;
 	open_node_t*	node;
@@ -1521,14 +1519,14 @@ pars_open_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses a row_printf-statement. */
-
+/*********************************************************************//**
+Parses a row_printf-statement.
+@return	row_printf-statement node */
+UNIV_INTERN
 row_printf_node_t*
 pars_row_printf_statement(
 /*======================*/
-					/* out: row_printf-statement node */
-	sel_node_t*	sel_node)	/* in: select node */
+	sel_node_t*	sel_node)	/*!< in: select node */
 {
 	row_printf_node_t*	node;
 
@@ -1543,9 +1541,10 @@ pars_row_printf_statement(
 	return(node);
 }
 
-/*************************************************************************
-Parses a commit statement. */
-
+/*********************************************************************//**
+Parses a commit statement.
+@return	own: commit node struct */
+UNIV_INTERN
 commit_node_t*
 pars_commit_statement(void)
 /*=======================*/
@@ -1553,9 +1552,10 @@ pars_commit_statement(void)
 	return(commit_node_create(pars_sym_tab_global->heap));
 }
 
-/*************************************************************************
-Parses a rollback statement. */
-
+/*********************************************************************//**
+Parses a rollback statement.
+@return	own: rollback node struct */
+UNIV_INTERN
 roll_node_t*
 pars_rollback_statement(void)
 /*=========================*/
@@ -1563,22 +1563,21 @@ pars_rollback_statement(void)
 	return(roll_node_create(pars_sym_tab_global->heap));
 }
 
-/*************************************************************************
-Parses a column definition at a table creation. */
-
+/*********************************************************************//**
+Parses a column definition at a table creation.
+@return	column sym table node */
+UNIV_INTERN
 sym_node_t*
 pars_column_def(
 /*============*/
-						/* out: column sym table
-						node */
-	sym_node_t*		sym_node,	/* in: column node in the
+	sym_node_t*		sym_node,	/*!< in: column node in the
 						symbol table */
-	pars_res_word_t*	type,		/* in: data type */
-	sym_node_t*		len,		/* in: length of column, or
+	pars_res_word_t*	type,		/*!< in: data type */
+	sym_node_t*		len,		/*!< in: length of column, or
 						NULL */
-	void*			is_unsigned,	/* in: if not NULL, column
+	void*			is_unsigned,	/*!< in: if not NULL, column
 						is of type UNSIGNED. */
-	void*			is_not_null)	/* in: if not NULL, column
+	void*			is_not_null)	/*!< in: if not NULL, column
 						is of type NOT NULL. */
 {
 	ulint len2;
@@ -1595,18 +1594,18 @@ pars_column_def(
 	return(sym_node);
 }
 
-/*************************************************************************
-Parses a table creation operation. */
-
+/*********************************************************************//**
+Parses a table creation operation.
+@return	table create subgraph */
+UNIV_INTERN
 tab_node_t*
 pars_create_table(
 /*==============*/
-					/* out: table create subgraph */
-	sym_node_t*	table_sym,	/* in: table name node in the symbol
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
 					table */
-	sym_node_t*	column_defs,	/* in: list of column names */
+	sym_node_t*	column_defs,	/*!< in: list of column names */
 	void*		not_fit_in_memory __attribute__((unused)))
-					/* in: a non-NULL pointer means that
+					/*!< in: a non-NULL pointer means that
 					this is a table which in simulations
 					should be simulated as not fitting
 					in memory; thread is put to sleep
@@ -1620,7 +1619,7 @@ pars_create_table(
 	dict_table_t*	table;
 	sym_node_t*	column;
 	tab_node_t*	node;
-	dtype_t*	dtype;
+	const dtype_t*	dtype;
 	ulint		n_cols;
 
 	n_cols = que_node_list_get_len(column_defs);
@@ -1657,20 +1656,20 @@ pars_create_table(
 	return(node);
 }
 
-/*************************************************************************
-Parses an index creation operation. */
-
+/*********************************************************************//**
+Parses an index creation operation.
+@return	index create subgraph */
+UNIV_INTERN
 ind_node_t*
 pars_create_index(
 /*==============*/
-					/* out: index create subgraph */
-	pars_res_word_t* unique_def,	/* in: not NULL if a unique index */
-	pars_res_word_t* clustered_def,	/* in: not NULL if a clustered index */
-	sym_node_t*	index_sym,	/* in: index name node in the symbol
+	pars_res_word_t* unique_def,	/*!< in: not NULL if a unique index */
+	pars_res_word_t* clustered_def,	/*!< in: not NULL if a clustered index */
+	sym_node_t*	index_sym,	/*!< in: index name node in the symbol
 					table */
-	sym_node_t*	table_sym,	/* in: table name node in the symbol
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
 					table */
-	sym_node_t*	column_list)	/* in: list of column names */
+	sym_node_t*	column_list)	/*!< in: list of column names */
 {
 	dict_index_t*	index;
 	sym_node_t*	column;
@@ -1714,17 +1713,17 @@ pars_create_index(
 	return(node);
 }
 
-/*************************************************************************
-Parses a procedure definition. */
-
+/*********************************************************************//**
+Parses a procedure definition.
+@return	query fork node */
+UNIV_INTERN
 que_fork_t*
 pars_procedure_definition(
 /*======================*/
-					/* out: query fork node */
-	sym_node_t*	sym_node,	/* in: procedure id node in the symbol
+	sym_node_t*	sym_node,	/*!< in: procedure id node in the symbol
 					table */
-	sym_node_t*	param_list,	/* in: parameter declaration list */
-	que_node_t*	stat_list)	/* in: statement list */
+	sym_node_t*	param_list,	/*!< in: parameter declaration list */
+	que_node_t*	stat_list)	/*!< in: statement list */
 {
 	proc_node_t*	node;
 	que_fork_t*	fork;
@@ -1761,32 +1760,32 @@ pars_procedure_definition(
 	return(fork);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Parses a stored procedure call, when this is not within another stored
 procedure, that is, the client issues a procedure call directly.
 In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
+parsed procedure tree, not via InnoDB SQL, so this function is not used.
+@return	query graph */
+UNIV_INTERN
 que_fork_t*
 pars_stored_procedure_call(
 /*=======================*/
-					/* out: query graph */
 	sym_node_t*	sym_node __attribute__((unused)))
-					/* in: stored procedure name */
+					/*!< in: stored procedure name */
 {
 	ut_error;
 	return(NULL);
 }
 
-/*****************************************************************
+/*************************************************************//**
 Retrieves characters to the lexical analyzer. */
-
+UNIV_INTERN
 void
 pars_get_lex_chars(
 /*===============*/
-	char*	buf,		/* in/out: buffer where to copy */
-	int*	result,		/* out: number of characters copied or EOF */
-	int	max_size)	/* in: maximum number of characters which fit
+	char*	buf,		/*!< in/out: buffer where to copy */
+	int*	result,		/*!< out: number of characters copied or EOF */
+	int	max_size)	/*!< in: maximum number of characters which fit
 				in the buffer */
 {
 	int	len;
@@ -1826,14 +1825,14 @@ pars_get_lex_chars(
 	pars_sym_tab_global->next_char_pos += len;
 }
 
-/*****************************************************************
+/*************************************************************//**
 Called by yyparse on error. */
-
+UNIV_INTERN
 void
 yyerror(
 /*====*/
 	const char*	s __attribute__((unused)))
-				/* in: error message string */
+				/*!< in: error message string */
 {
 	ut_ad(s);
 
@@ -1842,15 +1841,15 @@ yyerror(
 	ut_error;
 }
 
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
+/*************************************************************//**
+Parses an SQL string returning the query graph.
+@return	own: the query graph */
+UNIV_INTERN
 que_t*
 pars_sql(
 /*=====*/
-				/* out, own: the query graph */
-	pars_info_t*	info,	/* in: extra information, or NULL */
-	const char*	str)	/* in: SQL string */
+	pars_info_t*	info,	/*!< in: extra information, or NULL */
+	const char*	str)	/*!< in: SQL string */
 {
 	sym_node_t*	sym_node;
 	mem_heap_t*	heap;
@@ -1891,19 +1890,19 @@ pars_sql(
 	return(graph);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Completes a query graph by adding query thread and fork nodes
 above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
+type QUE_FORK_MYSQL_INTERFACE.
+@return	query thread node to run */
+UNIV_INTERN
 que_thr_t*
 pars_complete_graph_for_exec(
 /*=========================*/
-				/* out: query thread node to run */
-	que_node_t*	node,	/* in: root node for an incomplete
+	que_node_t*	node,	/*!< in: root node for an incomplete
 				query graph */
-	trx_t*		trx,	/* in: transaction handle */
-	mem_heap_t*	heap)	/* in: memory heap from which allocated */
+	trx_t*		trx,	/*!< in: transaction handle */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
 	que_fork_t*	fork;
 	que_thr_t*	thr;
@@ -1922,13 +1921,13 @@ pars_complete_graph_for_exec(
 	return(thr);
 }
 
-/********************************************************************
-Create parser info struct.*/
-
+/****************************************************************//**
+Create parser info struct.
+@return	own: info struct */
+UNIV_INTERN
 pars_info_t*
 pars_info_create(void)
 /*==================*/
-		/* out, own: info struct */
 {
 	pars_info_t*	info;
 	mem_heap_t*	heap;
@@ -1946,29 +1945,29 @@ pars_info_create(void)
 	return(info);
 }
 
-/********************************************************************
-Free info struct and everything it contains.*/
-
+/****************************************************************//**
+Free info struct and everything it contains. */
+UNIV_INTERN
 void
 pars_info_free(
 /*===========*/
-	pars_info_t*	info)	/* in: info struct */
+	pars_info_t*	info)	/*!< in, own: info struct */
 {
 	mem_heap_free(info->heap);
 }
 
-/********************************************************************
+/****************************************************************//**
 Add bound literal. */
-
+UNIV_INTERN
 void
 pars_info_add_literal(
 /*==================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const void*	address,	/* in: address */
-	ulint		length,		/* in: length of data */
-	ulint		type,		/* in: type, e.g. DATA_FIXBINARY */
-	ulint		prtype)		/* in: precise type, e.g.
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const void*	address,	/*!< in: address */
+	ulint		length,		/*!< in: length of data */
+	ulint		type,		/*!< in: type, e.g. DATA_FIXBINARY */
+	ulint		prtype)		/*!< in: precise type, e.g.
 					DATA_UNSIGNED */
 {
 	pars_bound_lit_t*	pbl;
@@ -1990,22 +1989,22 @@ pars_info_add_literal(
 	ib_vector_push(info->bound_lits, pbl);
 }
 
-/********************************************************************
+/****************************************************************//**
 Equivalent to pars_info_add_literal(info, name, str, strlen(str),
 DATA_VARCHAR, DATA_ENGLISH). */
-
+UNIV_INTERN
 void
 pars_info_add_str_literal(
 /*======================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const char*	str)		/* in: string */
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	str)		/*!< in: string */
 {
 	pars_info_add_literal(info, name, str, strlen(str),
 			      DATA_VARCHAR, DATA_ENGLISH);
 }
 
-/********************************************************************
+/****************************************************************//**
 Equivalent to:
 
 char buf[4];
@@ -2014,13 +2013,13 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-
+UNIV_INTERN
 void
 pars_info_add_int4_literal(
 /*=======================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	lint		val)		/* in: value */
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	lint		val)		/*!< in: value */
 {
 	byte*	buf = mem_heap_alloc(info->heap, 4);
 
@@ -2028,7 +2027,7 @@ pars_info_add_int4_literal(
 	pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 }
 
-/********************************************************************
+/****************************************************************//**
 Equivalent to:
 
 char buf[8];
@@ -2037,13 +2036,13 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-
+UNIV_INTERN
 void
 pars_info_add_dulint_literal(
 /*=========================*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	dulint		val)		/* in: value */
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	dulint		val)		/*!< in: value */
 {
 	byte*	buf = mem_heap_alloc(info->heap, 8);
 
@@ -2052,16 +2051,16 @@ pars_info_add_dulint_literal(
 	pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
 }
 
-/********************************************************************
+/****************************************************************//**
 Add user function. */
-
+UNIV_INTERN
 void
 pars_info_add_function(
 /*===================*/
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name,	/* in: function name */
-	pars_user_func_cb_t	func,	/* in: function address */
-	void*			arg)	/* in: user-supplied argument */
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name,	/*!< in: function name */
+	pars_user_func_cb_t	func,	/*!< in: function address */
+	void*			arg)	/*!< in: user-supplied argument */
 {
 	pars_user_func_t*	puf;
 
@@ -2080,15 +2079,15 @@ pars_info_add_function(
 	ib_vector_push(info->funcs, puf);
 }
 
-/********************************************************************
+/****************************************************************//**
 Add bound id. */
-
+UNIV_INTERN
 void
 pars_info_add_id(
 /*=============*/
-	pars_info_t*	info,		/* in: info struct */
-	const char*	name,		/* in: name */
-	const char*	id)		/* in: id */
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	id)		/*!< in: id */
 {
 	pars_bound_id_t*	bid;
 
@@ -2106,16 +2105,15 @@ pars_info_add_id(
 	ib_vector_push(info->bound_ids, bid);
 }
 
-/********************************************************************
-Get user function with the given name.*/
-
+/****************************************************************//**
+Get user function with the given name.
+@return	user func, or NULL if not found */
+UNIV_INTERN
 pars_user_func_t*
 pars_info_get_user_func(
 /*====================*/
-					/* out: user func, or NULL if not
-					found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name)	/* in: function name to find*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name)	/*!< in: function name to find*/
 {
 	ulint		i;
 	ib_vector_t*	vec;
@@ -2137,16 +2135,15 @@ pars_info_get_user_func(
 	return(NULL);
 }
 
-/********************************************************************
-Get bound literal with the given name.*/
-
+/****************************************************************//**
+Get bound literal with the given name.
+@return	bound literal, or NULL if not found */
+UNIV_INTERN
 pars_bound_lit_t*
 pars_info_get_bound_lit(
 /*====================*/
-					/* out: bound literal, or NULL if
-					not found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name)	/* in: bound literal name to find */
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name)	/*!< in: bound literal name to find */
 {
 	ulint		i;
 	ib_vector_t*	vec;
@@ -2168,16 +2165,15 @@ pars_info_get_bound_lit(
 	return(NULL);
 }
 
-/********************************************************************
-Get bound id with the given name.*/
-
+/****************************************************************//**
+Get bound id with the given name.
+@return	bound id, or NULL if not found */
+UNIV_INTERN
 pars_bound_id_t*
 pars_info_get_bound_id(
 /*===================*/
-					/* out: bound id, or NULL if not
-					found */
-	pars_info_t*		info,	/* in: info struct */
-	const char*		name)	/* in: bound id name to find */
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name)	/*!< in: bound id name to find */
 {
 	ulint		i;
 	ib_vector_t*	vec;
diff --git a/storage/innobase/pars/pars0sym.c b/storage/innodb_plugin/pars/pars0sym.c
similarity index 67%
rename from storage/innobase/pars/pars0sym.c
rename to storage/innodb_plugin/pars/pars0sym.c
index 2d56fff2d42..b56350116bb 100644
--- a/storage/innobase/pars/pars0sym.c
+++ b/storage/innodb_plugin/pars/pars0sym.c
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser symbol table
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file pars/pars0sym.c
+SQL parser symbol table
 
 Created 12/15/1997 Heikki Tuuri
 *******************************************************/
@@ -21,14 +38,14 @@ Created 12/15/1997 Heikki Tuuri
 #include "eval0eval.h"
 #include "row0sel.h"
 
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
+/******************************************************************//**
+Creates a symbol table for a single stored procedure or query.
+@return	own: symbol table */
+UNIV_INTERN
 sym_tab_t*
 sym_tab_create(
 /*===========*/
-				/* out, own: symbol table */
-	mem_heap_t*	heap)	/* in: memory heap where to create */
+	mem_heap_t*	heap)	/*!< in: memory heap where to create */
 {
 	sym_tab_t*	sym_tab;
 
@@ -42,15 +59,15 @@ sym_tab_create(
 	return(sym_tab);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees the memory allocated dynamically AFTER parsing phase for variables
 etc. in the symbol table. Does not free the mem heap where the table was
 originally created. Frees also SQL explicit cursor definitions. */
-
+UNIV_INTERN
 void
 sym_tab_free_private(
 /*=================*/
-	sym_tab_t*	sym_tab)	/* in, own: symbol table */
+	sym_tab_t*	sym_tab)	/*!< in, own: symbol table */
 {
 	sym_node_t*	sym;
 	func_node_t*	func;
@@ -80,15 +97,15 @@ sym_tab_free_private(
 	}
 }
 
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
+/******************************************************************//**
+Adds an integer literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_int_lit(
 /*================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	ulint		val)		/* in: integer value */
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	ulint		val)		/*!< in: integer value */
 {
 	sym_node_t*	node;
 	byte*		data;
@@ -102,7 +119,7 @@ sym_tab_add_int_lit(
 
 	node->indirection = NULL;
 
-	dtype_set(&(node->common.val.type), DATA_INT, 0, 4);
+	dtype_set(dfield_get_type(&node->common.val), DATA_INT, 0, 4);
 
 	data = mem_heap_alloc(sym_tab->heap, 4);
 	mach_write_to_4(data, val);
@@ -120,17 +137,17 @@ sym_tab_add_int_lit(
 	return(node);
 }
 
-/**********************************************************************
-Adds a string literal to a symbol table. */
-
+/******************************************************************//**
+Adds a string literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_str_lit(
 /*================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	byte*		str,		/* in: string with no quotes around
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	byte*		str,		/*!< in: string with no quotes around
 					it */
-	ulint		len)		/* in: string length */
+	ulint		len)		/*!< in: string length */
 {
 	sym_node_t*	node;
 	byte*		data;
@@ -144,7 +161,8 @@ sym_tab_add_str_lit(
 
 	node->indirection = NULL;
 
-	dtype_set(&(node->common.val.type), DATA_VARCHAR, DATA_ENGLISH, 0);
+	dtype_set(dfield_get_type(&node->common.val),
+		  DATA_VARCHAR, DATA_ENGLISH, 0);
 
 	if (len) {
 		data = mem_heap_alloc(sym_tab->heap, len);
@@ -166,16 +184,16 @@ sym_tab_add_str_lit(
 	return(node);
 }
 
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
+/******************************************************************//**
+Add a bound literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_bound_lit(
 /*==================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	const char*	name,		/* in: name of bound literal */
-	ulint*		lit_type)	/* out: type of literal (PARS_*_LIT) */
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name,		/*!< in: name of bound literal */
+	ulint*		lit_type)	/*!< out: type of literal (PARS_*_LIT) */
 {
 	sym_node_t*		node;
 	pars_bound_lit_t*	blit;
@@ -226,7 +244,8 @@ sym_tab_add_bound_lit(
 		ut_error;
 	}
 
-	dtype_set(&(node->common.val.type), blit->type, blit->prtype, len);
+	dtype_set(dfield_get_type(&node->common.val),
+		  blit->type, blit->prtype, len);
 
 	dfield_set_data(&(node->common.val), blit->address, blit->length);
 
@@ -241,14 +260,14 @@ sym_tab_add_bound_lit(
 	return(node);
 }
 
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
+/******************************************************************//**
+Adds an SQL null literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_null_lit(
 /*=================*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab)	/* in: symbol table */
+	sym_tab_t*	sym_tab)	/*!< in: symbol table */
 {
 	sym_node_t*	node;
 
@@ -261,9 +280,9 @@ sym_tab_add_null_lit(
 
 	node->indirection = NULL;
 
-	node->common.val.type.mtype = DATA_ERROR;
+	dfield_get_type(&node->common.val)->mtype = DATA_ERROR;
 
-	dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+	dfield_set_null(&node->common.val);
 
 	node->common.val_buf_size = 0;
 	node->prefetch_buf = NULL;
@@ -276,16 +295,16 @@ sym_tab_add_null_lit(
 	return(node);
 }
 
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
+/******************************************************************//**
+Adds an identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_id(
 /*===========*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	byte*		name,		/* in: identifier name */
-	ulint		len)		/* in: identifier length */
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	byte*		name,		/*!< in: identifier name */
+	ulint		len)		/*!< in: identifier length */
 {
 	sym_node_t*	node;
 
@@ -301,7 +320,7 @@ sym_tab_add_id(
 
 	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
 
-	dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+	dfield_set_null(&node->common.val);
 
 	node->common.val_buf_size = 0;
 	node->prefetch_buf = NULL;
@@ -312,15 +331,15 @@ sym_tab_add_id(
 	return(node);
 }
 
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
+/******************************************************************//**
+Add a bound identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
 sym_node_t*
 sym_tab_add_bound_id(
 /*===========*/
-					/* out: symbol table node */
-	sym_tab_t*	sym_tab,	/* in: symbol table */
-	const char*	name)		/* in: name of bound id */
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name)		/*!< in: name of bound id */
 {
 	sym_node_t*		node;
 	pars_bound_id_t*	bid;
@@ -340,7 +359,7 @@ sym_tab_add_bound_id(
 
 	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
 
-	dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+	dfield_set_null(&node->common.val);
 
 	node->common.val_buf_size = 0;
 	node->prefetch_buf = NULL;
diff --git a/storage/innodb_plugin/plug.in b/storage/innodb_plugin/plug.in
new file mode 100644
index 00000000000..6daa6c5daed
--- /dev/null
+++ b/storage/innodb_plugin/plug.in
@@ -0,0 +1,156 @@
+#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+MYSQL_STORAGE_ENGINE(innodb_plugin,, [InnoDB Storage Engine],
+        [Transactional Tables using InnoDB], [max,max-no-ndb])
+MYSQL_PLUGIN_DIRECTORY(innodb_plugin, [storage/innodb_plugin])
+MYSQL_PLUGIN_DYNAMIC(innodb_plugin,  [ha_innodb_plugin.la])
+MYSQL_PLUGIN_ACTIONS(innodb_plugin,  [
+  AC_CHECK_HEADERS(sched.h)
+  AC_CHECK_SIZEOF(int, 4)
+  AC_CHECK_SIZEOF(long, 4)
+  AC_CHECK_SIZEOF(void*, 4)
+  AC_CHECK_FUNCS(sched_yield fdatasync localtime_r)
+  AC_C_BIGENDIAN
+  case "$target_os" in
+	lin*)
+		CFLAGS="$CFLAGS -DUNIV_LINUX";;
+	hpux10*)
+		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
+	hp*)
+		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
+	aix*)
+		CFLAGS="$CFLAGS -DUNIV_AIX";;
+	irix*|osf*|sysv5uw7*|openbsd*)
+		CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+	*solaris*|*SunOS*)
+		# Begin Solaris atomic function checks
+		AC_CHECK_FUNCS(atomic_cas_ulong atomic_cas_32 \
+			       atomic_cas_64 atomic_add_long,
+			AC_DEFINE(
+				[HAVE_SOLARIS_ATOMICS],
+				[1],
+				[Define to 1 if Solaris supports \
+				 atomic functions.]))
+		### End Solaris atomic function checks
+
+		CFLAGS="$CFLAGS -DUNIV_SOLARIS";;
+  esac
+  INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN"
+  case "$target_cpu---$target_os" in
+	x86_64---*)
+		# The AMD64 ABI forbids absolute addresses in shared libraries
+		;;
+	*---solaris*|*---SunOS*)
+		# Shared objects must be linked from PIC code on Solaris.
+		;;
+	*86---)
+		# Use absolute addresses on IA-32
+		INNODB_DYNAMIC_CFLAGS="$INNODB_DYNAMIC_CFLAGS -prefer-non-pic"
+		;;
+  esac
+  AC_SUBST(INNODB_DYNAMIC_CFLAGS)
+  AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins)
+  AC_TRY_RUN(
+    [
+      #include <pthread.h>
+      #include <string.h>
+
+      int main(int argc, char** argv) {
+        pthread_t       x1;
+        pthread_t       x2;
+        pthread_t       x3;
+
+	memset(&x1, 0x0, sizeof(x1));
+	memset(&x2, 0x0, sizeof(x2));
+	memset(&x3, 0x0, sizeof(x3));
+
+        __sync_bool_compare_and_swap(&x1, x2, x3);
+
+        return(0);
+      }
+    ],
+    [
+      AC_DEFINE([HAVE_ATOMIC_PTHREAD_T], [1],
+                [pthread_t can be used by GCC atomic builtins])
+      AC_MSG_RESULT(yes)
+    ],
+    [
+      AC_MSG_RESULT(no)
+    ]
+    )
+
+  # Try using solaris atomics on SunOS if GCC atomics are not available
+  AC_CHECK_DECLS(
+    [HAVE_ATOMIC_PTHREAD_T],
+    [
+      AC_MSG_NOTICE(no need to check pthread_t size)
+    ],
+    [
+      AC_CHECK_DECLS(
+        [HAVE_SOLARIS_ATOMICS],
+        [
+          AC_MSG_CHECKING(checking if pthread_t size is integral)
+          AC_TRY_RUN(
+            [
+              #include <pthread.h>
+              int main()
+              {
+                pthread_t x = 0;
+                return(0);
+              }
+            ],
+            [
+              AC_DEFINE([HAVE_ATOMIC_PTHREAD_T], [1],
+                [pthread_t can be used by solaris atomics])
+              AC_MSG_RESULT(yes)
+              # size of pthread_t is needed for typed solaris atomics
+              AC_CHECK_SIZEOF([pthread_t], [], [#include <pthread.h>])
+            ],
+            [
+              AC_MSG_RESULT(no)
+            ])
+        ])
+    ])
+  # Check for x86 PAUSE instruction
+  AC_MSG_CHECKING(for x86 PAUSE instruction)
+  # We have to actually try running the test program, because of a bug
+  # in Solaris on x86_64, where it wrongly reports that PAUSE is not
+  # supported when trying to run an application. See
+  # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
+  # We use ib_ prefix to avoid collisoins if this code is added to
+  # mysql's configure.in.
+  AC_TRY_RUN(
+    [
+      int main() {
+        __asm__ __volatile__ ("pause");
+        return(0);
+      }
+    ],
+    [
+      AC_DEFINE([IB_HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
+      AC_MSG_RESULT(yes)
+    ],
+    [
+      AC_MSG_RESULT(no)
+    ],
+    [
+      AC_MSG_RESULT(no)
+    ]
+  )
+  ])
+
+# vim: set ft=config:
diff --git a/storage/innobase/que/que0que.c b/storage/innodb_plugin/que/que0que.c
similarity index 87%
rename from storage/innobase/que/que0que.c
rename to storage/innodb_plugin/que/que0que.c
index bf83f28f04e..54b1e7535fa 100644
--- a/storage/innobase/que/que0que.c
+++ b/storage/innodb_plugin/que/que0que.c
@@ -1,7 +1,24 @@
-/******************************************************
-Query graph
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file que/que0que.c
+Query graph
 
 Created 5/27/1996 Heikki Tuuri
 *******************************************************/
@@ -31,11 +48,11 @@ Created 5/27/1996 Heikki Tuuri
 #define QUE_ROUND_ROBIN_LIMIT	(64 * 256 * 256 * 256)
 #define QUE_MAX_LOOPS_WITHOUT_CHECK	16
 
+#ifdef UNIV_DEBUG
 /* If the following flag is set TRUE, the module will print trace info
 of SQL execution in the UNIV_SQL_DEBUG version */
-ibool	que_trace_on		= FALSE;
-
-ibool	que_always_false	= FALSE;
+UNIV_INTERN ibool	que_trace_on		= FALSE;
+#endif /* UNIV_DEBUG */
 
 /* Short introduction to query graphs
    ==================================
@@ -107,7 +124,7 @@ When the execution of the graph completes, it is like returning
 from a subprocedure: the query thread which requested the operation
 starts running again. */
 
-/**************************************************************************
+/**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction.
 ***NOTE***: This is the only function in which such a transition is allowed
@@ -116,35 +133,35 @@ static
 void
 que_thr_move_to_run_state(
 /*======================*/
-	que_thr_t*	thr);	/* in: an query thread */
+	que_thr_t*	thr);	/*!< in: an query thread */
 
-/***************************************************************************
+/***********************************************************************//**
 Adds a query graph to the session's list of graphs. */
-
+UNIV_INTERN
 void
 que_graph_publish(
 /*==============*/
-	que_t*	graph,	/* in: graph */
-	sess_t*	sess)	/* in: session */
+	que_t*	graph,	/*!< in: graph */
+	sess_t*	sess)	/*!< in: session */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
 	UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
 }
 
-/***************************************************************************
-Creates a query graph fork node. */
-
+/***********************************************************************//**
+Creates a query graph fork node.
+@return	own: fork node */
+UNIV_INTERN
 que_fork_t*
 que_fork_create(
 /*============*/
-					/* out, own: fork node */
-	que_t*		graph,		/* in: graph, if NULL then this
+	que_t*		graph,		/*!< in: graph, if NULL then this
 					fork node is assumed to be the
 					graph root */
-	que_node_t*	parent,		/* in: parent node */
-	ulint		fork_type,	/* in: fork type */
-	mem_heap_t*	heap)		/* in: memory heap where created */
+	que_node_t*	parent,		/*!< in: parent node */
+	ulint		fork_type,	/*!< in: fork type */
+	mem_heap_t*	heap)		/*!< in: memory heap where created */
 {
 	que_fork_t*	fork;
 
@@ -178,15 +195,15 @@ que_fork_create(
 	return(fork);
 }
 
-/***************************************************************************
-Creates a query graph thread node. */
-
+/***********************************************************************//**
+Creates a query graph thread node.
+@return	own: query thread node */
+UNIV_INTERN
 que_thr_t*
 que_thr_create(
 /*===========*/
-				/* out, own: query thread node */
-	que_fork_t*	parent,	/* in: parent node, i.e., a fork node */
-	mem_heap_t*	heap)	/* in: memory heap where created */
+	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
+	mem_heap_t*	heap)	/*!< in: memory heap where created */
 {
 	que_thr_t*	thr;
 
@@ -214,20 +231,20 @@ que_thr_create(
 	return(thr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Moves a suspended query thread to the QUE_THR_RUNNING state and may release
 a single worker thread to execute it. This function should be used to end
 the wait state of a query thread waiting for a lock or a stored procedure
 completion. */
-
+UNIV_INTERN
 void
 que_thr_end_wait(
 /*=============*/
-	que_thr_t*	thr,		/* in: query thread in the
+	que_thr_t*	thr,		/*!< in: query thread in the
 					QUE_THR_LOCK_WAIT,
 					or QUE_THR_PROCEDURE_WAIT, or
 					QUE_THR_SIG_REPLY_WAIT state */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -262,13 +279,13 @@ que_thr_end_wait(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Same as que_thr_end_wait, but no parameter next_thr available. */
-
+UNIV_INTERN
 void
 que_thr_end_wait_no_next_thr(
 /*=========================*/
-	que_thr_t*	thr)	/* in: query thread in the QUE_THR_LOCK_WAIT,
+	que_thr_t*	thr)	/*!< in: query thread in the QUE_THR_LOCK_WAIT,
 				or QUE_THR_PROCEDURE_WAIT, or
 				QUE_THR_SIG_REPLY_WAIT state */
 {
@@ -299,13 +316,13 @@ que_thr_end_wait_no_next_thr(
 	/* srv_que_task_enqueue_low(thr); */
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Inits a query thread for a command. */
 UNIV_INLINE
 void
 que_thr_init_command(
 /*=================*/
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	thr->run_node = thr;
 	thr->prev_node = thr->common.parent;
@@ -313,20 +330,19 @@ que_thr_init_command(
 	que_thr_move_to_run_state(thr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Starts execution of a command in a query fork. Picks a query thread which
 is not in the QUE_THR_RUNNING state and moves it to that state. If none
 can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
+is returned.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
 que_thr_t*
 que_fork_start_command(
 /*===================*/
-				/* out: a query thread of the graph moved to
-				QUE_THR_RUNNING state, or NULL; the query
-				thread should be executed by que_run_threads
-				by the caller */
-	que_fork_t*	fork)	/* in: a query fork */
+	que_fork_t*	fork)	/*!< in: a query fork */
 {
 	que_thr_t*	thr;
 	que_thr_t*	suspended_thr = NULL;
@@ -336,6 +352,9 @@ que_fork_start_command(
 
 	fork->last_sel_node = NULL;
 
+	suspended_thr = NULL;
+	completed_thr = NULL;
+
 	/* Choose the query thread to run: usually there is just one thread,
 	but in a parallelized select, which necessarily is non-scrollable,
 	there may be several to choose from */
@@ -399,16 +418,16 @@ que_fork_start_command(
 	return(thr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 After signal handling is finished, returns control to a query graph error
 handling routine. (Currently, just returns the control to the root of the
 graph so that the graph can communicate an error message to the client.) */
-
+UNIV_INTERN
 void
 que_fork_error_handle(
 /*==================*/
-	trx_t*	trx __attribute__((unused)),	/* in: trx */
-	que_t*	fork)	/* in: query graph which was run before signal
+	trx_t*	trx __attribute__((unused)),	/*!< in: trx */
+	que_t*	fork)	/*!< in: query graph which was run before signal
 			handling started, NULL not allowed */
 {
 	que_thr_t*	thr;
@@ -440,16 +459,16 @@ que_fork_error_handle(
 	srv_que_task_enqueue_low(thr);
 }
 
-/********************************************************************
-Tests if all the query threads in the same fork have a given state. */
+/****************************************************************//**
+Tests if all the query threads in the same fork have a given state.
+@return TRUE if all the query threads in the same fork were in the
+given state */
 UNIV_INLINE
 ibool
 que_fork_all_thrs_in_state(
 /*=======================*/
-				/* out: TRUE if all the query threads in the
-				same fork were in the given state */
-	que_fork_t*	fork,	/* in: query fork */
-	ulint		state)	/* in: state */
+	que_fork_t*	fork,	/*!< in: query fork */
+	ulint		state)	/*!< in: state */
 {
 	que_thr_t*	thr_node;
 
@@ -467,13 +486,13 @@ que_fork_all_thrs_in_state(
 	return(TRUE);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Calls que_graph_free_recursive for statements in a statement list. */
 static
 void
 que_graph_free_stat_list(
 /*=====================*/
-	que_node_t*	node)	/* in: first query graph node in the list */
+	que_node_t*	node)	/*!< in: first query graph node in the list */
 {
 	while (node) {
 		que_graph_free_recursive(node);
@@ -482,14 +501,14 @@ que_graph_free_stat_list(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a query graph, but not the heap where it was created. Does not free
 explicit cursor declarations, they are freed in que_graph_free. */
-
+UNIV_INTERN
 void
 que_graph_free_recursive(
 /*=====================*/
-	que_node_t*	node)	/* in: query graph node */
+	que_node_t*	node)	/*!< in: query graph node */
 {
 	que_fork_t*	fork;
 	que_thr_t*	thr;
@@ -646,13 +665,13 @@ que_graph_free_recursive(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a query graph. */
-
+UNIV_INTERN
 void
 que_graph_free(
 /*===========*/
-	que_t*	graph)	/* in: query graph; we assume that the memory
+	que_t*	graph)	/*!< in: query graph; we assume that the memory
 			heap where this graph was created is private
 			to this graph: if not, then use
 			que_graph_free_recursive and free the heap
@@ -677,46 +696,14 @@ que_graph_free(
 	mem_heap_free(graph->heap);
 }
 
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
-			/* out: TRUE if freed */
-	que_t*	graph)	/* in: query graph */
-{
-	sess_t*	sess;
-
-	ut_ad(mutex_own(&kernel_mutex));
-
-	sess = (graph->trx)->sess;
-
-	if ((graph->state == QUE_FORK_BEING_FREED)
-	    && (graph->n_active_thrs == 0)) {
-
-		UT_LIST_REMOVE(graphs, sess->graphs, graph);
-		que_graph_free(graph);
-
-		sess_try_close(sess);
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/********************************************************************
-Performs an execution step on a thr node. */
+/****************************************************************//**
+Performs an execution step on a thr node.
+@return	query thread to run next, or NULL if none */
 static
 que_thr_t*
 que_thr_node_step(
 /*==============*/
-				/* out: query thread to run next, or NULL
-				if none */
-	que_thr_t*	thr)	/* in: query thread where run_node must
+	que_thr_t*	thr)	/*!< in: query thread where run_node must
 				be the thread node itself */
 {
 	ut_ad(thr->run_node == thr);
@@ -748,7 +735,7 @@ que_thr_node_step(
 	return(NULL);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction if thr was
 not active.
@@ -758,7 +745,7 @@ static
 void
 que_thr_move_to_run_state(
 /*======================*/
-	que_thr_t*	thr)	/* in: an query thread */
+	que_thr_t*	thr)	/*!< in: an query thread */
 {
 	trx_t*	trx;
 
@@ -781,7 +768,7 @@ que_thr_move_to_run_state(
 	thr->state = QUE_THR_RUNNING;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Decrements the query thread reference counts in the query graph and the
 transaction. May start signal handling, e.g., a rollback.
 *** NOTE ***:
@@ -793,8 +780,8 @@ static
 void
 que_thr_dec_refer_count(
 /*====================*/
-	que_thr_t*	thr,		/* in: query thread */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	que_thr_t*	thr,		/*!< in: query thread */
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -802,13 +789,11 @@ que_thr_dec_refer_count(
 {
 	que_fork_t*	fork;
 	trx_t*		trx;
-	sess_t*		sess;
 	ulint		fork_type;
 	ibool		stopped;
 
 	fork = thr->common.parent;
 	trx = thr_get_trx(thr);
-	sess = trx->sess;
 
 	mutex_enter(&kernel_mutex);
 
@@ -835,7 +820,7 @@ que_thr_dec_refer_count(
 
 				*next_thr = thr;
 			} else {
-				ut_a(0);
+				ut_error;
 				srv_que_task_enqueue_low(thr);
 			}
 
@@ -866,7 +851,8 @@ que_thr_dec_refer_count(
 
 	if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {
 
-		if (fork_type == QUE_FORK_ROLLBACK) {
+		switch (fork_type) {
+		case QUE_FORK_ROLLBACK:
 			/* This is really the undo graph used in rollback,
 			no roll_node in this graph */
 
@@ -874,18 +860,17 @@ que_thr_dec_refer_count(
 			ut_ad(trx->handling_signals == TRUE);
 
 			trx_finish_rollback_off_kernel(fork, trx, next_thr);
+			break;
 
-		} else if (fork_type == QUE_FORK_PURGE) {
+		case QUE_FORK_PURGE:
+		case QUE_FORK_RECOVERY:
+		case QUE_FORK_MYSQL_INTERFACE:
 
 			/* Do nothing */
-		} else if (fork_type == QUE_FORK_RECOVERY) {
+			break;
 
-			/* Do nothing */
-		} else if (fork_type == QUE_FORK_MYSQL_INTERFACE) {
-
-			/* Do nothing */
-		} else {
-			ut_error;	/* not used in MySQL */
+		default:
+			ut_error;	/*!< not used in MySQL */
 		}
 	}
 
@@ -906,16 +891,16 @@ que_thr_dec_refer_count(
 	mutex_exit(&kernel_mutex);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Stops a query thread if graph or trx is in a state requiring it. The
 conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
+to be reserved.
+@return	TRUE if stopped */
+UNIV_INTERN
 ibool
 que_thr_stop(
 /*=========*/
-				/* out: TRUE if stopped */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 	que_t*	graph;
@@ -953,16 +938,16 @@ que_thr_stop(
 	return(ret);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
 query thread is stopped and made inactive, except in the case where
 it was put to the lock wait state in lock0lock.c, but the lock has already
 been granted or the transaction chosen as a victim in deadlock resolution. */
-
+UNIV_INTERN
 void
 que_thr_stop_for_mysql(
 /*===================*/
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 
@@ -1000,16 +985,16 @@ que_thr_stop_for_mysql(
 	mutex_exit(&kernel_mutex);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction if thr was
 not active. */
-
+UNIV_INTERN
 void
 que_thr_move_to_run_state_for_mysql(
 /*================================*/
-	que_thr_t*	thr,	/* in: an query thread */
-	trx_t*		trx)	/* in: transaction */
+	que_thr_t*	thr,	/*!< in: an query thread */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	if (thr->magic_n != QUE_THR_MAGIC_N) {
 		fprintf(stderr,
@@ -1033,15 +1018,15 @@ que_thr_move_to_run_state_for_mysql(
 	thr->state = QUE_THR_RUNNING;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL
 select, when there is no error or lock wait. */
-
+UNIV_INTERN
 void
 que_thr_stop_for_mysql_no_error(
 /*============================*/
-	que_thr_t*	thr,	/* in: query thread */
-	trx_t*		trx)	/* in: transaction */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	ut_ad(thr->state == QUE_THR_RUNNING);
 	ut_ad(thr->is_active == TRUE);
@@ -1066,15 +1051,15 @@ que_thr_stop_for_mysql_no_error(
 	trx->n_active_thrs--;
 }
 
-/********************************************************************
+/****************************************************************//**
 Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
+given node, or NULL if the node is not within a loop.
+@return	containing loop node, or NULL. */
+UNIV_INTERN
 que_node_t*
 que_node_get_containing_loop_node(
 /*==============================*/
-				/* out: containing loop node, or NULL. */
-	que_node_t*	node)	/* in: node */
+	que_node_t*	node)	/*!< in: node */
 {
 	ut_ad(node);
 
@@ -1097,13 +1082,13 @@ que_node_get_containing_loop_node(
 	return(node);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info of an SQL query graph node. */
-
+UNIV_INTERN
 void
 que_node_print_info(
 /*================*/
-	que_node_t*	node)	/* in: query graph node */
+	que_node_t*	node)	/*!< in: query graph node */
 {
 	ulint		type;
 	const char*	str;
@@ -1160,16 +1145,15 @@ que_node_print_info(
 		(ulong) type, str, (void*) node);
 }
 
-/**************************************************************************
-Performs an execution step on a query thread. */
+/**********************************************************************//**
+Performs an execution step on a query thread.
+@return query thread to run next: it may differ from the input
+parameter if, e.g., a subprocedure call is made */
 UNIV_INLINE
 que_thr_t*
 que_thr_step(
 /*=========*/
-				/* out: query thread to run next: it may
-				differ from the input parameter if, e.g., a
-				subprocedure call is made */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	que_node_t*	node;
 	que_thr_t*	old_thr;
@@ -1283,13 +1267,13 @@ que_thr_step(
 	return(thr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Run a query thread until it finishes or encounters e.g. a lock wait. */
 static
 void
 que_run_threads_low(
 /*================*/
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	que_thr_t*	next_thr;
 	ulint		cumul_resource;
@@ -1343,12 +1327,13 @@ loop:
 	goto loop;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Run a query thread. Handles lock waits. */
+UNIV_INTERN
 void
 que_run_threads(
 /*============*/
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 loop:
 	ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
@@ -1397,19 +1382,19 @@ loop:
 	mutex_exit(&kernel_mutex);
 }
 
-/*************************************************************************
-Evaluate the given SQL. */
-
+/*********************************************************************//**
+Evaluate the given SQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 que_eval_sql(
 /*=========*/
-				/* out: error code or DB_SUCCESS */
-	pars_info_t*	info,	/* in: info struct, or NULL */
-	const char*	sql,	/* in: SQL string */
+	pars_info_t*	info,	/*!< in: info struct, or NULL */
+	const char*	sql,	/*!< in: SQL string */
 	ibool		reserve_dict_mutex,
-				/* in: if TRUE, acquire/release
+				/*!< in: if TRUE, acquire/release
 				dict_sys->mutex around call to pars_sql. */
-	trx_t*		trx)	/* in: trx */
+	trx_t*		trx)	/*!< in: trx */
 {
 	que_thr_t*	thr;
 	que_t*		graph;
diff --git a/storage/innobase/read/read0read.c b/storage/innodb_plugin/read/read0read.c
similarity index 83%
rename from storage/innobase/read/read0read.c
rename to storage/innodb_plugin/read/read0read.c
index 4068cf4fa69..85adae4ddff 100644
--- a/storage/innobase/read/read0read.c
+++ b/storage/innodb_plugin/read/read0read.c
@@ -1,7 +1,24 @@
-/******************************************************
-Cursor read
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file read/read0read.c
+Cursor read
 
 Created 2/16/1997 Heikki Tuuri
 *******************************************************/
@@ -120,39 +137,40 @@ TODO: proof this
 
 */
 
-/*************************************************************************
-Creates a read view object. */
+/*********************************************************************//**
+Creates a read view object.
+@return	own: read view struct */
 UNIV_INLINE
 read_view_t*
 read_view_create_low(
 /*=================*/
-				/* out, own: read view struct */
-	ulint		n,	/* in: number of cells in the trx_ids array */
-	mem_heap_t*	heap)	/* in: memory heap from which allocated */
+	ulint		n,	/*!< in: number of cells in the trx_ids array */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
 	read_view_t*	view;
 
 	view = mem_heap_alloc(heap, sizeof(read_view_t));
 
 	view->n_trx_ids = n;
-	view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint));
+	view->trx_ids = mem_heap_alloc(heap, n * sizeof *view->trx_ids);
 
 	return(view);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Makes a copy of the oldest existing read view, with the exception that also
 the creating trx of the oldest view is set as not visible in the 'copied'
 view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge. */
-
+with ..._close. This is used in purge.
+@return	own: read view struct */
+UNIV_INTERN
 read_view_t*
 read_view_oldest_copy_or_open_new(
 /*==============================*/
-					/* out, own: read view struct */
-	dulint		cr_trx_id,	/* in: trx_id of creating
-					transaction, or (0, 0) used in purge*/
-	mem_heap_t*	heap)		/* in: memory heap from which
+	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
+					transaction, or ut_dulint_zero
+					used in purge */
+	mem_heap_t*	heap)		/*!< in: memory heap from which
 					allocated */
 {
 	read_view_t*	old_view;
@@ -173,8 +191,7 @@ read_view_oldest_copy_or_open_new(
 
 	n = old_view->n_trx_ids;
 
-	if (ut_dulint_cmp(old_view->creator_trx_id,
-			  ut_dulint_create(0,0)) != 0) {
+	if (!ut_dulint_is_zero(old_view->creator_trx_id)) {
 		n++;
 	} else {
 		needs_insert = FALSE;
@@ -226,18 +243,18 @@ read_view_oldest_copy_or_open_new(
 	return(view_copy);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
+point in time are seen in the view.
+@return	own: read view struct */
+UNIV_INTERN
 read_view_t*
 read_view_open_now(
 /*===============*/
-					/* out, own: read view struct */
-	dulint		cr_trx_id,	/* in: trx_id of creating
-					transaction, or (0, 0) used in
-					purge */
-	mem_heap_t*	heap)		/* in: memory heap from which
+	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
+					transaction, or ut_dulint_zero
+					used in purge */
+	mem_heap_t*	heap)		/*!< in: memory heap from which
 					allocated */
 {
 	read_view_t*	view;
@@ -250,7 +267,7 @@ read_view_open_now(
 
 	view->creator_trx_id = cr_trx_id;
 	view->type = VIEW_NORMAL;
-	view->undo_no = ut_dulint_create(0, 0);
+	view->undo_no = ut_dulint_zero;
 
 	/* No future transactions should be visible in the view */
 
@@ -301,27 +318,27 @@ read_view_open_now(
 	return(view);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Closes a read view. */
-
+UNIV_INTERN
 void
 read_view_close(
 /*============*/
-	read_view_t*	view)	/* in: read view */
+	read_view_t*	view)	/*!< in: read view */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
 	UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Closes a consistent read view for MySQL. This function is called at an SQL
 statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
+UNIV_INTERN
 void
 read_view_close_for_mysql(
 /*======================*/
-	trx_t*	trx)	/* in: trx which has a read view */
+	trx_t*	trx)	/*!< in: trx which has a read view */
 {
 	ut_a(trx->global_read_view);
 
@@ -337,13 +354,13 @@ read_view_close_for_mysql(
 	mutex_exit(&kernel_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Prints a read view to stderr. */
-
+UNIV_INTERN
 void
 read_view_print(
 /*============*/
-	read_view_t*	view)	/* in: read view */
+	const read_view_t*	view)	/*!< in: read view */
 {
 	ulint	n_ids;
 	ulint	i;
@@ -361,37 +378,33 @@ read_view_print(
 		(ulong) ut_dulint_get_high(view->low_limit_no),
 		(ulong) ut_dulint_get_low(view->low_limit_no));
 
-	fprintf(stderr, "Read view up limit trx id %lu %lu\n",
-		(ulong) ut_dulint_get_high(view->up_limit_id),
-		(ulong) ut_dulint_get_low(view->up_limit_id));
+	fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
+		TRX_ID_PREP_PRINTF(view->up_limit_id));
 
-	fprintf(stderr, "Read view low limit trx id %lu %lu\n",
-		(ulong) ut_dulint_get_high(view->low_limit_id),
-		(ulong) ut_dulint_get_low(view->low_limit_id));
+	fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
+		TRX_ID_PREP_PRINTF(view->low_limit_id));
 
 	fprintf(stderr, "Read view individually stored trx ids:\n");
 
 	n_ids = view->n_trx_ids;
 
 	for (i = 0; i < n_ids; i++) {
-		fprintf(stderr, "Read view trx id %lu %lu\n",
-			(ulong) ut_dulint_get_high(
-				read_view_get_nth_trx_id(view, i)),
-			(ulong) ut_dulint_get_low(
+		fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
+			TRX_ID_PREP_PRINTF(
 				read_view_get_nth_trx_id(view, i)));
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Create a high-granularity consistent cursor view for mysql to be used
 in cursors. In this consistent read view modifications done by the
 creating transaction after the cursor is created or future transactions
 are not visible. */
-
+UNIV_INTERN
 cursor_view_t*
 read_cursor_view_create_for_mysql(
 /*==============================*/
-	trx_t*	cr_trx)	/* in: trx where cursor view is created */
+	trx_t*	cr_trx)	/*!< in: trx where cursor view is created */
 {
 	cursor_view_t*	curview;
 	read_view_t*	view;
@@ -474,15 +487,15 @@ read_cursor_view_create_for_mysql(
 	return(curview);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Close a given consistent cursor view for mysql and restore global read view
 back to a transaction read view. */
-
+UNIV_INTERN
 void
 read_cursor_view_close_for_mysql(
 /*=============================*/
-	trx_t*		trx,	/* in: trx */
-	cursor_view_t*	curview)/* in: cursor view to be closed */
+	trx_t*		trx,	/*!< in: trx */
+	cursor_view_t*	curview)/*!< in: cursor view to be closed */
 {
 	ut_a(curview);
 	ut_a(curview->read_view);
@@ -502,16 +515,16 @@ read_cursor_view_close_for_mysql(
 	mem_heap_free(curview->heap);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This function sets a given consistent cursor view to a transaction
 read view if given consistent cursor view is not NULL. Otherwise, function
 restores a global read view to a transaction read view. */
-
+UNIV_INTERN
 void
 read_cursor_set_for_mysql(
 /*======================*/
-	trx_t*		trx,	/* in: transaction where cursor is set */
-	cursor_view_t*	curview)/* in: consistent cursor view to be set */
+	trx_t*		trx,	/*!< in: transaction where cursor is set */
+	cursor_view_t*	curview)/*!< in: consistent cursor view to be set */
 {
 	ut_a(trx);
 
diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innodb_plugin/rem/rem0cmp.c
similarity index 64%
rename from storage/innobase/rem/rem0cmp.c
rename to storage/innodb_plugin/rem/rem0cmp.c
index ca0ec663548..b707f2116d6 100644
--- a/storage/innobase/rem/rem0cmp.c
+++ b/storage/innodb_plugin/rem/rem0cmp.c
@@ -1,7 +1,24 @@
-/***********************************************************************
-Comparison services for records
+/*****************************************************************************
 
-(c) 1994-1996 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file rem/rem0cmp.c
+Comparison services for records
 
 Created 7/1/1994 Heikki Tuuri
 ************************************************************************/
@@ -34,75 +51,70 @@ where two records disagree only in the way that one
 has more fields than the other. */
 
 #ifdef UNIV_DEBUG
-/*****************************************************************
+/*************************************************************//**
 Used in debug checking of cmp_dtuple_... .
 This function is used to compare a data tuple to a physical record. If
 dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. */
+differ from dtuple in some of the m fields rec has.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
 static
 int
 cmp_debug_dtuple_rec_with_match(
 /*============================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record which differs from
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields);/* in/out: number of already
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields);/*!< in/out: number of already
 				completely  matched fields; when function
 				returns, contains the value for current
 				comparison */
 #endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
+/*************************************************************//**
 This function is used to compare two data fields for which the data type
 is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the the one in ha_innobase.cc! */
+must be a copy of the the one in ha_innobase.cc!
+@return	1, 0, -1, if a is greater, equal, less than b, respectively */
 extern
 int
 innobase_mysql_cmp(
 /*===============*/
-					/* out: 1, 0, -1, if a is greater,
-					equal, less than b, respectively */
-	int		mysql_type,	/* in: MySQL type */
-	uint		charset_number,	/* in: number of the charset */
-	unsigned char*	a,		/* in: data field */
-	unsigned int	a_length,	/* in: data field length,
+	int		mysql_type,	/*!< in: MySQL type */
+	uint		charset_number,	/*!< in: number of the charset */
+	const unsigned char* a,		/*!< in: data field */
+	unsigned int	a_length,	/*!< in: data field length,
 					not UNIV_SQL_NULL */
-	unsigned char*	b,		/* in: data field */
-	unsigned int	b_length);	/* in: data field length,
+	const unsigned char* b,		/*!< in: data field */
+	unsigned int	b_length);	/*!< in: data field length,
 					not UNIV_SQL_NULL */
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
 Transforms the character code so that it is ordered appropriately for the
 language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets. */
+comparisons for other char sets.
+@return	collation order position */
 UNIV_INLINE
 ulint
 cmp_collate(
 /*========*/
-			/* out: collation order position */
-	ulint	code)	/* in: code of a character stored in database record */
+	ulint	code)	/*!< in: code of a character stored in database record */
 {
 	return((ulint) srv_latin1_ordering[code]);
 }
 
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
+/*************************************************************//**
+Returns TRUE if two columns are equal for comparison purposes.
+@return	TRUE if the columns are considered equal in comparisons */
+UNIV_INTERN
 ibool
 cmp_cols_are_equal(
 /*===============*/
-					/* out: TRUE if the columns are
-					considered equal in comparisons */
-	const dict_col_t*	col1,	/* in: column 1 */
-	const dict_col_t*	col2,	/* in: column 2 */
+	const dict_col_t*	col1,	/*!< in: column 1 */
+	const dict_col_t*	col2,	/*!< in: column 2 */
 	ibool			check_charsets)
-					/* in: whether to check charsets */
+					/*!< in: whether to check charsets */
 {
 	if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype)
 	    && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) {
@@ -145,23 +157,21 @@ cmp_cols_are_equal(
 	return(col1->mtype != DATA_INT || col1->len == col2->len);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
+/*************************************************************//**
 Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison */
+is such that we must compare whole fields or call MySQL to do the comparison
+@return	1, 0, -1, if a is greater, equal, less than b, respectively */
 static
 int
 cmp_whole_field(
 /*============*/
-					/* out: 1, 0, -1, if a is greater,
-					equal, less than b, respectively */
-	ulint		mtype,		/* in: main type */
-	ulint		prtype,		/* in: precise type */
-	unsigned char*	a,		/* in: data field */
-	unsigned int	a_length,	/* in: data field length,
+	ulint		mtype,		/*!< in: main type */
+	ulint		prtype,		/*!< in: precise type */
+	const byte*	a,		/*!< in: data field */
+	unsigned int	a_length,	/*!< in: data field length,
 					not UNIV_SQL_NULL */
-	unsigned char*	b,		/* in: data field */
-	unsigned int	b_length)	/* in: data field length,
+	const byte*	b,		/*!< in: data field */
+	unsigned int	b_length)	/*!< in: data field length,
 					not UNIV_SQL_NULL */
 {
 	float		f_1;
@@ -272,27 +282,24 @@ cmp_whole_field(
 
 	return(0);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*****************************************************************
+/*************************************************************//**
 This function is used to compare two data fields for which we know the
-data type. */
-
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
 int
 cmp_data_data_slow(
 /*===============*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	ulint		mtype,	/* in: main type */
-	ulint		prtype,	/* in: precise type */
-	byte*		data1,	/* in: data field (== a pointer to a memory
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
 				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	byte*		data2,	/* in: data field (== a pointer to a memory
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
 				buffer) */
-	ulint		len2)	/* in: data field length or UNIV_SQL_NULL */
+	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
 {
-#ifndef UNIV_HOTBACKUP
 	ulint	data1_byte;
 	ulint	data2_byte;
 	ulint	cur_bytes;
@@ -385,57 +392,48 @@ next_byte:
 		data1++;
 		data2++;
 	}
-#else /* !UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-#endif /* !UNIV_HOTBACKUP */
 
 	return(0);		/* Not reached */
 }
 
-/*****************************************************************
+/*************************************************************//**
 This function is used to compare a data tuple to a physical record.
 Only dtuple->n_fields_cmp first fields are taken into account for
 the the data tuple! If we denote by n = n_fields_cmp, then rec must
 have either m >= n fields, or it must differ from dtuple in some of
 the m fields rec has. If rec has an externally stored field we do not
 compare it but return with value 0 if such a comparison should be
-made. */
-
+made.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared, or until
+the first externally stored field in rec */
+UNIV_INTERN
 int
 cmp_dtuple_rec_with_match(
 /*======================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared, or
-				until the first externally stored field in
-				rec */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record which differs from
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /* in/out: number of already completely
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
 				matched fields; when function returns,
 				contains the value for current comparison */
-	ulint*		matched_bytes) /* in/out: number of already matched
+	ulint*		matched_bytes) /*!< in/out: number of already matched
 				bytes within the first field not completely
 				matched; when function returns, contains the
 				value for current comparison */
 {
-#ifndef UNIV_HOTBACKUP
-	dfield_t*	dtuple_field;	/* current field in logical record */
+	const dfield_t*	dtuple_field;	/* current field in logical record */
 	ulint		dtuple_f_len;	/* the length of the current field
 					in the logical record */
-	byte*		dtuple_b_ptr;	/* pointer to the current byte in
+	const byte*	dtuple_b_ptr;	/* pointer to the current byte in
 					logical field data */
 	ulint		dtuple_byte;	/* value of current byte to be compared
 					in dtuple*/
 	ulint		rec_f_len;	/* length of current field in rec */
-	byte*		rec_b_ptr;	/* pointer to the current byte in
+	const byte*	rec_b_ptr;	/* pointer to the current byte in
 					rec field */
 	ulint		rec_byte;	/* value of current byte to be
 					compared in rec */
@@ -459,10 +457,10 @@ cmp_dtuple_rec_with_match(
 						     rec_offs_comp(offsets));
 		ulint	tup_info = dtuple_get_info_bits(dtuple);
 
-		if (rec_info & REC_INFO_MIN_REC_FLAG) {
+		if (UNIV_UNLIKELY(rec_info & REC_INFO_MIN_REC_FLAG)) {
 			ret = !(tup_info & REC_INFO_MIN_REC_FLAG);
 			goto order_resolved;
-		} else if (tup_info & REC_INFO_MIN_REC_FLAG) {
+		} else if (UNIV_UNLIKELY(tup_info & REC_INFO_MIN_REC_FLAG)) {
 			ret = -1;
 			goto order_resolved;
 		}
@@ -598,7 +596,7 @@ cmp_dtuple_rec_with_match(
 			}
 
 			ret = (int) (dtuple_byte - rec_byte);
-			if (UNIV_UNLIKELY(ret)) {
+			if (UNIV_LIKELY(ret)) {
 				if (ret < 0) {
 					ret = -1;
 					goto order_resolved;
@@ -634,27 +632,19 @@ order_resolved:
 	*matched_bytes = cur_bytes;
 
 	return(ret);
-#else /* !UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(0);
-#endif /* !UNIV_HOTBACKUP */
 }
 
-/******************************************************************
-Compares a data tuple to a physical record. */
-
+/**************************************************************//**
+Compares a data tuple to a physical record.
+@see cmp_dtuple_rec_with_match
+@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
+UNIV_INTERN
 int
 cmp_dtuple_rec(
 /*===========*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively; see the comments
-				for cmp_dtuple_rec_with_match */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	matched_fields	= 0;
 	ulint	matched_bytes	= 0;
@@ -664,17 +654,17 @@ cmp_dtuple_rec(
 					 &matched_fields, &matched_bytes));
 }
 
-/******************************************************************
+/**************************************************************//**
 Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
+is allowed to be a prefix of the corresponding field in the record.
+@return	TRUE if prefix */
+UNIV_INTERN
 ibool
 cmp_dtuple_is_prefix_of_rec(
 /*========================*/
-				/* out: TRUE if prefix */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	ulint	n_fields;
 	ulint	matched_fields	= 0;
@@ -704,47 +694,195 @@ cmp_dtuple_is_prefix_of_rec(
 	return(FALSE);
 }
 
-/*****************************************************************
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@return	1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+	const rec_t*		rec1,	/*!< in: physical record */
+	const rec_t*		rec2,	/*!< in: physical record */
+	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+	const dict_index_t*	index)	/*!< in: data dictionary index */
+{
+	ulint		rec1_f_len;	/*!< length of current field in rec1 */
+	const byte*	rec1_b_ptr;	/*!< pointer to the current byte
+					in rec1 field */
+	ulint		rec1_byte;	/*!< value of current byte to be
+					compared in rec1 */
+	ulint		rec2_f_len;	/*!< length of current field in rec2 */
+	const byte*	rec2_b_ptr;	/*!< pointer to the current byte
+					in rec2 field */
+	ulint		rec2_byte;	/*!< value of current byte to be
+					compared in rec2 */
+	ulint		cur_field;	/*!< current field number */
+	ulint		n_uniq;
+
+	n_uniq = dict_index_get_n_unique(index);
+	ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
+	ut_ad(rec_offs_n_fields(offsets2) >= n_uniq);
+
+	ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+
+	for (cur_field = 0; cur_field < n_uniq; cur_field++) {
+
+		ulint	cur_bytes;
+		ulint	mtype;
+		ulint	prtype;
+
+		{
+			const dict_col_t*	col
+				= dict_index_get_nth_col(index, cur_field);
+
+			mtype = col->mtype;
+			prtype = col->prtype;
+		}
+
+		ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
+		ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
+
+		rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
+					       cur_field, &rec1_f_len);
+		rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
+					       cur_field, &rec2_f_len);
+
+		if (rec1_f_len == UNIV_SQL_NULL
+		    || rec2_f_len == UNIV_SQL_NULL) {
+
+			if (rec1_f_len == rec2_f_len) {
+
+				goto next_field;
+
+			} else if (rec2_f_len == UNIV_SQL_NULL) {
+
+				/* We define the SQL null to be the
+				smallest possible value of a field
+				in the alphabetical order */
+
+				return(1);
+			} else {
+				return(-1);
+			}
+		}
+
+		if (mtype >= DATA_FLOAT
+		    || (mtype == DATA_BLOB
+			&& 0 == (prtype & DATA_BINARY_TYPE)
+			&& dtype_get_charset_coll(prtype)
+			!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+			int ret = cmp_whole_field(mtype, prtype,
+						  rec1_b_ptr,
+						  (unsigned) rec1_f_len,
+						  rec2_b_ptr,
+						  (unsigned) rec2_f_len);
+			if (ret) {
+				return(ret);
+			}
+
+			goto next_field;
+		}
+
+		/* Compare the fields */
+		for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
+			if (rec2_f_len <= cur_bytes) {
+
+				if (rec1_f_len <= cur_bytes) {
+
+					goto next_field;
+				}
+
+				rec2_byte = dtype_get_pad_char(mtype, prtype);
+
+				if (rec2_byte == ULINT_UNDEFINED) {
+					return(1);
+				}
+			} else {
+				rec2_byte = *rec2_b_ptr;
+			}
+
+			if (rec1_f_len <= cur_bytes) {
+				rec1_byte = dtype_get_pad_char(mtype, prtype);
+
+				if (rec1_byte == ULINT_UNDEFINED) {
+					return(-1);
+				}
+			} else {
+				rec1_byte = *rec1_b_ptr;
+			}
+
+			if (rec1_byte == rec2_byte) {
+				/* If the bytes are equal, they will remain
+				such even after the collation transformation
+				below */
+
+				continue;
+			}
+
+			if (mtype <= DATA_CHAR
+			    || (mtype == DATA_BLOB
+				&& !(prtype & DATA_BINARY_TYPE))) {
+
+				rec1_byte = cmp_collate(rec1_byte);
+				rec2_byte = cmp_collate(rec2_byte);
+			}
+
+			if (rec1_byte < rec2_byte) {
+				return(-1);
+			} else if (rec1_byte > rec2_byte) {
+				return(1);
+			}
+		}
+next_field:
+		continue;
+	}
+
+	/* If we ran out of fields, rec1 was equal to rec2. */
+	return(0);
+}
+
+/*************************************************************//**
 This function is used to compare two physical records. Only the common
 first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
+encountered, then 0 is returned.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
+UNIV_INTERN
 int
 cmp_rec_rec_with_match(
 /*===================*/
-				/* out: 1, 0 , -1 if rec1 is greater, equal,
-				less, respectively, than rec2; only the common
-				first fields are compared */
-	rec_t*		rec1,	/* in: physical record */
-	rec_t*		rec2,	/* in: physical record */
-	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index,	/* in: data dictionary index */
-	ulint*		matched_fields, /* in/out: number of already completely
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index,	/*!< in: data dictionary index */
+	ulint*		matched_fields, /*!< in/out: number of already completely
 				matched fields; when the function returns,
 				contains the value the for current
 				comparison */
-	ulint*		matched_bytes) /* in/out: number of already matched
+	ulint*		matched_bytes) /*!< in/out: number of already matched
 				bytes within the first field not completely
 				matched; when the function returns, contains
 				the value for the current comparison */
 {
-#ifndef UNIV_HOTBACKUP
-	ulint	rec1_n_fields;	/* the number of fields in rec */
-	ulint	rec1_f_len;	/* length of current field in rec */
-	byte*	rec1_b_ptr;	/* pointer to the current byte in rec field */
-	ulint	rec1_byte;	/* value of current byte to be compared in
-				rec */
-	ulint	rec2_n_fields;	/* the number of fields in rec */
-	ulint	rec2_f_len;	/* length of current field in rec */
-	byte*	rec2_b_ptr;	/* pointer to the current byte in rec field */
-	ulint	rec2_byte;	/* value of current byte to be compared in
-				rec */
-	ulint	cur_field;	/* current field number */
-	ulint	cur_bytes;	/* number of already matched bytes in current
-				field */
-	int	ret = 3333;	/* return value */
-	ulint	comp;
+	ulint		rec1_n_fields;	/* the number of fields in rec */
+	ulint		rec1_f_len;	/* length of current field in rec */
+	const byte*	rec1_b_ptr;	/* pointer to the current byte
+					in rec field */
+	ulint		rec1_byte;	/* value of current byte to be
+					compared in rec */
+	ulint		rec2_n_fields;	/* the number of fields in rec */
+	ulint		rec2_f_len;	/* length of current field in rec */
+	const byte*	rec2_b_ptr;	/* pointer to the current byte
+					in rec field */
+	ulint		rec2_byte;	/* value of current byte to be
+					compared in rec */
+	ulint		cur_field;	/* current field number */
+	ulint		cur_bytes;	/* number of already matched
+					bytes in current field */
+	int		ret = 0;	/* return value */
+	ulint		comp;
 
 	ut_ad(rec1 && rec2 && index);
 	ut_ad(rec_offs_validate(rec1, index, offsets1));
@@ -786,20 +924,19 @@ cmp_rec_rec_with_match(
 			if (cur_field == 0) {
 				/* Test if rec is the predefined minimum
 				record */
-				if (rec_get_info_bits(rec1, comp)
-				    & REC_INFO_MIN_REC_FLAG) {
+				if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
+						  & REC_INFO_MIN_REC_FLAG)) {
 
-					if (rec_get_info_bits(rec2, comp)
-					    & REC_INFO_MIN_REC_FLAG) {
-						ret = 0;
-					} else {
+					if (!(rec_get_info_bits(rec2, comp)
+					      & REC_INFO_MIN_REC_FLAG)) {
 						ret = -1;
 					}
 
 					goto order_resolved;
 
-				} else if (rec_get_info_bits(rec2, comp)
-					   & REC_INFO_MIN_REC_FLAG) {
+				} else if (UNIV_UNLIKELY
+					   (rec_get_info_bits(rec2, comp)
+					    & REC_INFO_MIN_REC_FLAG)) {
 
 					ret = 1;
 
@@ -812,8 +949,6 @@ cmp_rec_rec_with_match(
 				/* We do not compare to an externally
 				stored field */
 
-				ret = 0;
-
 				goto order_resolved;
 			}
 
@@ -933,8 +1068,9 @@ next_field:
 
 	ut_ad(cur_bytes == 0);
 
-	ret = 0;	/* If we ran out of fields, rec1 was equal to rec2 up
-			to the common fields */
+	/* If we ran out of fields, rec1 was equal to rec2 up
+	to the common fields */
+	ut_ad(ret == 0);
 order_resolved:
 
 	ut_ad((ret >= - 1) && (ret <= 1));
@@ -943,47 +1079,39 @@ order_resolved:
 	*matched_bytes = cur_bytes;
 
 	return(ret);
-#else /* !UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(0);
-#endif /* !UNIV_HOTBACKUP */
 }
 
 #ifdef UNIV_DEBUG
-/*****************************************************************
+/*************************************************************//**
 Used in debug checking of cmp_dtuple_... .
 This function is used to compare a data tuple to a physical record. If
 dtuple has n fields then rec must have either m >= n fields, or it must
 differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0. */
+externally stored field, returns 0.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
 static
 int
 cmp_debug_dtuple_rec_with_match(
 /*============================*/
-				/* out: 1, 0, -1, if dtuple is greater, equal,
-				less than rec, respectively, when only the
-				common first fields are compared */
-	dtuple_t*	dtuple,	/* in: data tuple */
-	rec_t*		rec,	/* in: physical record which differs from
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint*		matched_fields) /* in/out: number of already
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields) /*!< in/out: number of already
 				completely matched fields; when function
 				returns, contains the value for current
 				comparison */
 {
-	dfield_t*	dtuple_field;	/* current field in logical record */
+	const dfield_t*	dtuple_field;	/* current field in logical record */
 	ulint		dtuple_f_len;	/* the length of the current field
 					in the logical record */
-	byte*		dtuple_f_data;	/* pointer to the current logical
+	const byte*	dtuple_f_data;	/* pointer to the current logical
 					field data */
 	ulint		rec_f_len;	/* length of current field in rec */
-	byte*		rec_f_data;	/* pointer to the current rec field */
+	const byte*	rec_f_data;	/* pointer to the current rec field */
 	int		ret = 3333;	/* return value */
 	ulint		cur_field;	/* current field number */
 
@@ -997,8 +1125,9 @@ cmp_debug_dtuple_rec_with_match(
 	cur_field = *matched_fields;
 
 	if (cur_field == 0) {
-		if (rec_get_info_bits(rec, rec_offs_comp(offsets))
-		    & REC_INFO_MIN_REC_FLAG) {
+		if (UNIV_UNLIKELY
+		    (rec_get_info_bits(rec, rec_offs_comp(offsets))
+		     & REC_INFO_MIN_REC_FLAG)) {
 
 			ret = !(dtuple_get_info_bits(dtuple)
 				& REC_INFO_MIN_REC_FLAG);
@@ -1006,7 +1135,8 @@ cmp_debug_dtuple_rec_with_match(
 			goto order_resolved;
 		}
 
-		if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) {
+		if (UNIV_UNLIKELY
+		    (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
 			ret = -1;
 
 			goto order_resolved;
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innodb_plugin/rem/rem0rec.c
similarity index 58%
rename from storage/innobase/rem/rem0rec.c
rename to storage/innodb_plugin/rem/rem0rec.c
index 64f8e2d319c..1c8b3fd8c1e 100644
--- a/storage/innobase/rem/rem0rec.c
+++ b/storage/innodb_plugin/rem/rem0rec.c
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager
+/*****************************************************************************
 
-(c) 1994-2001 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file rem/rem0rec.c
+Record manager
 
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
@@ -124,26 +141,204 @@ end of some field (containing also <FIELD-END>).
 A record is a complete-field prefix of another record, if
 the corresponding canonical strings have the same property. */
 
-ulint	rec_dummy;	/* this is used to fool compiler in
-			rec_validate */
+/* this is used to fool compiler in rec_validate */
+UNIV_INTERN ulint	rec_dummy;
 
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
+/***************************************************************//**
+Validates the consistency of an old-style physical record.
+@return	TRUE if ok */
 static
 ibool
 rec_validate_old(
 /*=============*/
-			/* out: TRUE if ok */
-	rec_t*	rec);	/* in: physical record */
+	const rec_t*	rec);	/*!< in: physical record */
 
-/**********************************************************
+/******************************************************//**
+Determine how many of the first n columns in a compact
+physical record are stored externally.
+@return	number of externally stored columns */
+UNIV_INTERN
+ulint
+rec_get_n_extern_new(
+/*=================*/
+	const rec_t*	rec,	/*!< in: compact physical record */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n)	/*!< in: number of columns to scan */
+{
+	const byte*	nulls;
+	const byte*	lens;
+	dict_field_t*	field;
+	ulint		null_mask;
+	ulint		n_extern;
+	ulint		i;
+
+	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+	ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
+
+	if (n == ULINT_UNDEFINED) {
+		n = dict_index_get_n_fields(index);
+	}
+
+	nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+	null_mask = 1;
+	n_extern = 0;
+	i = 0;
+
+	/* read the lengths of fields 0..n */
+	do {
+		ulint	len;
+
+		field = dict_index_get_nth_field(index, i);
+		if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+			/* nullable field => read the null flag */
+
+			if (UNIV_UNLIKELY(!(byte) null_mask)) {
+				nulls--;
+				null_mask = 1;
+			}
+
+			if (*nulls & null_mask) {
+				null_mask <<= 1;
+				/* No length is stored for NULL fields. */
+				continue;
+			}
+			null_mask <<= 1;
+		}
+
+		if (UNIV_UNLIKELY(!field->fixed_len)) {
+			/* Variable-length field: read the length */
+			const dict_col_t*	col
+				= dict_field_get_col(field);
+			len = *lens--;
+			if (UNIV_UNLIKELY(col->len > 255)
+			    || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+				if (len & 0x80) {
+					/* 1exxxxxxx xxxxxxxx */
+					if (len & 0x40) {
+						n_extern++;
+					}
+					lens--;
+				}
+			}
+		}
+	} while (++i < n);
+
+	return(n_extern);
+}
+
+/******************************************************//**
+Determine the offset to each field in a leaf-page record
+in ROW_FORMAT=COMPACT.  This is a special case of
+rec_init_offsets() and rec_get_offsets_func(). */
+UNIV_INTERN
+void
+rec_init_offsets_comp_ordinary(
+/*===========================*/
+	const rec_t*		rec,	/*!< in: physical record in
+					ROW_FORMAT=COMPACT */
+	ulint			extra,	/*!< in: number of bytes to reserve
+					between the record header and
+					the data payload
+					(usually REC_N_NEW_EXTRA_BYTES) */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+{
+	ulint		i		= 0;
+	ulint		offs		= 0;
+	ulint		any_ext		= 0;
+	const byte*	nulls		= rec - (extra + 1);
+	const byte*	lens		= nulls
+		- UT_BITS_IN_BYTES(index->n_nullable);
+	dict_field_t*	field;
+	ulint		null_mask	= 1;
+
+#ifdef UNIV_DEBUG
+	/* We cannot invoke rec_offs_make_valid() here, because it can hold
+	that extra != REC_N_NEW_EXTRA_BYTES.  Similarly, rec_offs_validate()
+	will fail in that case, because it invokes rec_get_status(). */
+	offsets[2] = (ulint) rec;
+	offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+
+	/* read the lengths of fields 0..n */
+	do {
+		ulint	len;
+
+		field = dict_index_get_nth_field(index, i);
+		if (!(dict_field_get_col(field)->prtype
+		      & DATA_NOT_NULL)) {
+			/* nullable field => read the null flag */
+
+			if (UNIV_UNLIKELY(!(byte) null_mask)) {
+				nulls--;
+				null_mask = 1;
+			}
+
+			if (*nulls & null_mask) {
+				null_mask <<= 1;
+				/* No length is stored for NULL fields.
+				We do not advance offs, and we set
+				the length to zero and enable the
+				SQL NULL flag in offsets[]. */
+				len = offs | REC_OFFS_SQL_NULL;
+				goto resolved;
+			}
+			null_mask <<= 1;
+		}
+
+		if (UNIV_UNLIKELY(!field->fixed_len)) {
+			/* Variable-length field: read the length */
+			const dict_col_t*	col
+				= dict_field_get_col(field);
+			len = *lens--;
+			if (UNIV_UNLIKELY(col->len > 255)
+			    || UNIV_UNLIKELY(col->mtype
+					     == DATA_BLOB)) {
+				if (len & 0x80) {
+					/* 1exxxxxxx xxxxxxxx */
+					len <<= 8;
+					len |= *lens--;
+
+					offs += len & 0x3fff;
+					if (UNIV_UNLIKELY(len
+							  & 0x4000)) {
+						ut_ad(dict_index_is_clust
+						      (index));
+						any_ext = REC_OFFS_EXTERNAL;
+						len = offs
+							| REC_OFFS_EXTERNAL;
+					} else {
+						len = offs;
+					}
+
+					goto resolved;
+				}
+			}
+
+			len = offs += len;
+		} else {
+			len = offs += field->fixed_len;
+		}
+resolved:
+		rec_offs_base(offsets)[i + 1] = len;
+	} while (++i < rec_offs_n_fields(offsets));
+
+	*rec_offs_base(offsets)
+		= (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
+}
+
+/******************************************************//**
 The following function determines the offsets to each field in the
 record.	 The offsets are written to a previously allocated array of
 ulint, where rec_offs_n_fields(offsets) has been initialized to the
 number of fields in the record.	 The rest of the array will be
 initialized by this function.  rec_offs_base(offsets)[0] will be set
 to the extra size (if REC_OFFS_COMPACT is set, the record is in the
-new format), and rec_offs_base(offsets)[1..n_fields] will be set to
+new format; if REC_OFFS_EXTERNAL is set, the record contains externally
+stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to
 offsets past the end of fields 0..n_fields, or to the beginning of
 fields 1..n_fields+1.  When the high-order bit of the offset at [i+1]
 is set (REC_OFFS_SQL_NULL), the field i is NULL.  When the second
@@ -153,10 +348,10 @@ static
 void
 rec_init_offsets(
 /*=============*/
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets)/* in/out: array of offsets;
-				in: n=rec_offs_n_fields(offsets) */
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
 {
 	ulint	i	= 0;
 	ulint	offs;
@@ -184,7 +379,10 @@ rec_init_offsets(
 				= dict_index_get_n_unique_in_tree(index);
 			break;
 		case REC_STATUS_ORDINARY:
-			break;
+			rec_init_offsets_comp_ordinary(rec,
+						       REC_N_NEW_EXTRA_BYTES,
+						       index, offsets);
+			return;
 		}
 
 		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
@@ -232,17 +430,17 @@ rec_init_offsets(
 						     == DATA_BLOB)) {
 					if (len & 0x80) {
 						/* 1exxxxxxx xxxxxxxx */
+
 						len <<= 8;
 						len |= *lens--;
 
+						/* B-tree node pointers
+						must not contain externally
+						stored columns.  Thus
+						the "e" flag must be 0. */
+						ut_a(!(len & 0x4000));
 						offs += len & 0x3fff;
-						if (UNIV_UNLIKELY(len
-								  & 0x4000)) {
-							len = offs
-								| REC_OFFS_EXTERNAL;
-						} else {
-							len = offs;
-						}
+						len = offs;
 
 						goto resolved;
 					}
@@ -286,6 +484,7 @@ resolved:
 				if (offs & REC_2BYTE_EXTERN_MASK) {
 					offs &= ~REC_2BYTE_EXTERN_MASK;
 					offs |= REC_OFFS_EXTERNAL;
+					*rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
 				}
 				rec_offs_base(offsets)[1 + i] = offs;
 			} while (++i < rec_offs_n_fields(offsets));
@@ -293,24 +492,26 @@ resolved:
 	}
 }
 
-/**********************************************************
+/******************************************************//**
 The following function determines the offsets to each field
-in the record.	It can reuse a previously returned array. */
-
+in the record.	It can reuse a previously returned array.
+@return	the new offsets */
+UNIV_INTERN
 ulint*
 rec_get_offsets_func(
 /*=================*/
-				/* out: the new offsets */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets,/* in/out: array consisting of offsets[0]
-				allocated elements, or an array from
-				rec_get_offsets(), or NULL */
-	ulint		n_fields,/* in: maximum number of initialized fields
-				(ULINT_UNDEFINED if all fields) */
-	mem_heap_t**	heap,	/* in/out: memory heap */
-	const char*	file,	/* in: file name where called */
-	ulint		line)	/* in: line number where called */
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets,/*!< in/out: array consisting of
+					offsets[0] allocated elements,
+					or an array from rec_get_offsets(),
+					or NULL */
+	ulint			n_fields,/*!< in: maximum number of
+					initialized fields
+					 (ULINT_UNDEFINED if all fields) */
+	mem_heap_t**		heap,	/*!< in/out: memory heap */
+	const char*		file,	/*!< in: file name where called */
+	ulint			line)	/*!< in: line number where called */
 {
 	ulint	n;
 	ulint	size;
@@ -349,9 +550,9 @@ rec_get_offsets_func(
 
 	if (UNIV_UNLIKELY(!offsets)
 	    || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
-		if (!*heap) {
+		if (UNIV_UNLIKELY(!*heap)) {
 			*heap = mem_heap_create_func(size * sizeof(ulint),
-						     NULL, MEM_HEAP_DYNAMIC,
+						     MEM_HEAP_DYNAMIC,
 						     file, line);
 		}
 		offsets = mem_heap_alloc(*heap, size * sizeof(ulint));
@@ -363,18 +564,133 @@ rec_get_offsets_func(
 	return(offsets);
 }
 
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
+/******************************************************//**
+The following function determines the offsets to each field
+in the record.  It can reuse a previously allocated array. */
+UNIV_INTERN
+void
+rec_get_offsets_reverse(
+/*====================*/
+	const byte*		extra,	/*!< in: the extra bytes of a
+					compact record in reverse order,
+					excluding the fixed-size
+					REC_N_NEW_EXTRA_BYTES */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			node_ptr,/*!< in: nonzero=node pointer,
+					0=leaf node */
+	ulint*			offsets)/*!< in/out: array consisting of
+					offsets[0] allocated elements */
+{
+	ulint		n;
+	ulint		i;
+	ulint		offs;
+	ulint		any_ext;
+	const byte*	nulls;
+	const byte*	lens;
+	dict_field_t*	field;
+	ulint		null_mask;
+	ulint		n_node_ptr_field;
 
-byte*
-rec_get_nth_field_old(
-/*==================*/
-			/* out: pointer to the field */
-	rec_t*	rec,	/* in: record */
-	ulint	n,	/* in: index of the field */
-	ulint*	len)	/* out: length of the field; UNIV_SQL_NULL if SQL
-			null */
+	ut_ad(extra);
+	ut_ad(index);
+	ut_ad(offsets);
+	ut_ad(dict_table_is_comp(index->table));
+
+	if (UNIV_UNLIKELY(node_ptr)) {
+		n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
+		n = n_node_ptr_field + 1;
+	} else {
+		n_node_ptr_field = ULINT_UNDEFINED;
+		n = dict_index_get_n_fields(index);
+	}
+
+	ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
+	rec_offs_set_n_fields(offsets, n);
+
+	nulls = extra;
+	lens = nulls + UT_BITS_IN_BYTES(index->n_nullable);
+	i = offs = 0;
+	null_mask = 1;
+	any_ext = 0;
+
+	/* read the lengths of fields 0..n */
+	do {
+		ulint	len;
+		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
+			len = offs += 4;
+			goto resolved;
+		}
+
+		field = dict_index_get_nth_field(index, i);
+		if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+			/* nullable field => read the null flag */
+
+			if (UNIV_UNLIKELY(!(byte) null_mask)) {
+				nulls++;
+				null_mask = 1;
+			}
+
+			if (*nulls & null_mask) {
+				null_mask <<= 1;
+				/* No length is stored for NULL fields.
+				We do not advance offs, and we set
+				the length to zero and enable the
+				SQL NULL flag in offsets[]. */
+				len = offs | REC_OFFS_SQL_NULL;
+				goto resolved;
+			}
+			null_mask <<= 1;
+		}
+
+		if (UNIV_UNLIKELY(!field->fixed_len)) {
+			/* Variable-length field: read the length */
+			const dict_col_t*	col
+				= dict_field_get_col(field);
+			len = *lens++;
+			if (UNIV_UNLIKELY(col->len > 255)
+			    || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+				if (len & 0x80) {
+					/* 1exxxxxxx xxxxxxxx */
+					len <<= 8;
+					len |= *lens++;
+
+					offs += len & 0x3fff;
+					if (UNIV_UNLIKELY(len & 0x4000)) {
+						any_ext = REC_OFFS_EXTERNAL;
+						len = offs | REC_OFFS_EXTERNAL;
+					} else {
+						len = offs;
+					}
+
+					goto resolved;
+				}
+			}
+
+			len = offs += len;
+		} else {
+			len = offs += field->fixed_len;
+		}
+resolved:
+		rec_offs_base(offsets)[i + 1] = len;
+	} while (++i < rec_offs_n_fields(offsets));
+
+	ut_ad(lens >= extra);
+	*rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
+		| REC_OFFS_COMPACT | any_ext;
+}
+
+/************************************************************//**
+The following function is used to get the offset to the nth
+data field in an old-style record.
+@return	offset to the field */
+UNIV_INTERN
+ulint
+rec_get_nth_field_offs_old(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len)	/*!< out: length of the field;
+				UNIV_SQL_NULL if SQL null */
 {
 	ulint	os;
 	ulint	next_os;
@@ -382,13 +698,13 @@ rec_get_nth_field_old(
 	ut_ad(rec && len);
 	ut_ad(n < rec_get_n_fields_old(rec));
 
-	if (n > REC_MAX_N_FIELDS) {
+	if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
 		fprintf(stderr, "Error: trying to access field %lu in rec\n",
 			(ulong) n);
 		ut_error;
 	}
 
-	if (rec == NULL) {
+	if (UNIV_UNLIKELY(rec == NULL)) {
 		fputs("Error: rec is NULL pointer\n", stderr);
 		ut_error;
 	}
@@ -401,7 +717,7 @@ rec_get_nth_field_old(
 		if (next_os & REC_1BYTE_SQL_NULL_MASK) {
 			*len = UNIV_SQL_NULL;
 
-			return(rec + os);
+			return(os);
 		}
 
 		next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
@@ -413,7 +729,7 @@ rec_get_nth_field_old(
 		if (next_os & REC_2BYTE_SQL_NULL_MASK) {
 			*len = UNIV_SQL_NULL;
 
-			return(rec + os);
+			return(os);
 		}
 
 		next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
@@ -424,62 +740,50 @@ rec_get_nth_field_old(
 
 	ut_ad(*len < UNIV_PAGE_SIZE);
 
-	return(rec + os);
+	return(os);
 }
 
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
 ulint
-rec_get_converted_size_new(
-/*=======================*/
-				/* out: size */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple)	/* in: data tuple */
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+	const dict_index_t*	index,	/*!< in: record descriptor;
+					dict_table_is_comp() is
+					assumed to hold, even if
+					it does not */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
 {
-	ulint		size		= REC_N_NEW_EXTRA_BYTES
-		+ UT_BITS_IN_BYTES(index->n_nullable);
-	ulint		i;
-	ulint		n_fields;
-	ut_ad(index && dtuple);
-	ut_ad(dict_table_is_comp(index->table));
+	ulint	extra_size;
+	ulint	data_size;
+	ulint	i;
+	ut_ad(index);
+	ut_ad(fields);
+	ut_ad(n_fields > 0);
+	ut_ad(n_fields <= dict_index_get_n_fields(index));
 
-	switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) {
-	case REC_STATUS_ORDINARY:
-		n_fields = dict_index_get_n_fields(index);
-		ut_ad(n_fields == dtuple_get_n_fields(dtuple));
-		break;
-	case REC_STATUS_NODE_PTR:
-		n_fields = dict_index_get_n_unique_in_tree(index);
-		ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple));
-		ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4);
-		size += 4; /* child page number */
-		break;
-	case REC_STATUS_INFIMUM:
-	case REC_STATUS_SUPREMUM:
-		/* infimum or supremum record, 8 data bytes */
-		return(REC_N_NEW_EXTRA_BYTES + 8);
-	default:
-		ut_error;
-		return(ULINT_UNDEFINED);
-	}
+	extra_size = REC_N_NEW_EXTRA_BYTES
+		+ UT_BITS_IN_BYTES(index->n_nullable);
+	data_size = 0;
 
 	/* read the lengths of fields 0..n */
 	for (i = 0; i < n_fields; i++) {
-		dict_field_t*		field;
+		const dict_field_t*	field;
 		ulint			len;
 		const dict_col_t*	col;
 
 		field = dict_index_get_nth_field(index, i);
-		len = dtuple_get_nth_field(dtuple, i)->len;
+		len = dfield_get_len(&fields[i]);
 		col = dict_field_get_col(field);
 
-		ut_ad(dict_col_type_assert_equal(
-			      col, dfield_get_type(dtuple_get_nth_field(
-							   dtuple, i))));
+		ut_ad(dict_col_type_assert_equal(col,
+						 dfield_get_type(&fields[i])));
 
-		if (len == UNIV_SQL_NULL) {
+		if (dfield_is_null(&fields[i])) {
 			/* No length is stored for NULL fields. */
 			ut_ad(!(col->prtype & DATA_NOT_NULL));
 			continue;
@@ -492,31 +796,85 @@ rec_get_converted_size_new(
 			/* dict_index_add_col() should guarantee this */
 			ut_ad(!field->prefix_len
 			      || field->fixed_len == field->prefix_len);
+		} else if (dfield_is_ext(&fields[i])) {
+			extra_size += 2;
 		} else if (len < 128
 			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
-			size++;
+			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
 			maximum length from the column itself.  If this
 			is a prefix index column shorter than 256 bytes,
 			this will waste one byte. */
-			size += 2;
+			extra_size += 2;
 		}
-		size += len;
+		data_size += len;
 	}
 
-	return(size);
+	if (UNIV_LIKELY_NULL(extra)) {
+		*extra = extra_size;
+	}
+
+	return(extra_size + data_size);
 }
 
-/***************************************************************
-Sets the value of the ith field SQL null bit of an old-style record. */
+/**********************************************************//**
+Determines the size of a data tuple in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor;
+					dict_table_is_comp() is
+					assumed to hold, even if
+					it does not */
+	ulint			status,	/*!< in: status bits of the record */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+{
+	ulint	size;
+	ut_ad(index);
+	ut_ad(fields);
+	ut_ad(n_fields > 0);
 
+	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+	case REC_STATUS_ORDINARY:
+		ut_ad(n_fields == dict_index_get_n_fields(index));
+		size = 0;
+		break;
+	case REC_STATUS_NODE_PTR:
+		n_fields--;
+		ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
+		ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
+		size = REC_NODE_PTR_SIZE; /* child page number */
+		break;
+	case REC_STATUS_INFIMUM:
+	case REC_STATUS_SUPREMUM:
+		/* infimum or supremum record, 8 data bytes */
+		if (UNIV_LIKELY_NULL(extra)) {
+			*extra = REC_N_NEW_EXTRA_BYTES;
+		}
+		return(REC_N_NEW_EXTRA_BYTES + 8);
+	default:
+		ut_error;
+		return(ULINT_UNDEFINED);
+	}
+
+	return(size + rec_get_converted_size_comp_prefix(index, fields,
+							 n_fields, extra));
+}
+
+/***********************************************************//**
+Sets the value of the ith field SQL null bit of an old-style record. */
+UNIV_INTERN
 void
 rec_set_nth_field_null_bit(
 /*=======================*/
-	rec_t*	rec,	/* in: record */
-	ulint	i,	/* in: ith field */
-	ibool	val)	/* in: value to set */
+	rec_t*	rec,	/*!< in: record */
+	ulint	i,	/*!< in: ith field */
+	ibool	val)	/*!< in: value to set */
 {
 	ulint	info;
 
@@ -546,169 +904,15 @@ rec_set_nth_field_null_bit(
 	rec_2_set_field_end_info(rec, i, info);
 }
 
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
-
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
-	rec_t*	rec,	/* in: old-style record */
-	ulint	i,	/* in: ith field */
-	ibool	val,	/* in: value to set */
-	mtr_t*	mtr)	/* in: mtr holding an X-latch to the page where
-			rec is, or NULL; in the NULL case we do not
-			write to log about the change */
-{
-	ulint	info;
-
-	ut_a(!rec_get_1byte_offs_flag(rec));
-	ut_a(i < rec_get_n_fields_old(rec));
-
-	info = rec_2_get_field_end_info(rec, i);
-
-	if (val) {
-		info = info | REC_2BYTE_EXTERN_MASK;
-	} else {
-		info = info & ~REC_2BYTE_EXTERN_MASK;
-	}
-
-	if (mtr) {
-		mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1),
-				 info, MLOG_2BYTES, mtr);
-	} else {
-		rec_2_set_field_end_info(rec, i, info);
-	}
-}
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	ulint		ith,	/* in: ith field */
-	ibool		val,	/* in: value to set */
-	mtr_t*		mtr)	/* in: mtr holding an X-latch to the page
-				where rec is, or NULL; in the NULL case
-				we do not write to log about the change */
-{
-	byte*		nulls	= rec - (REC_N_NEW_EXTRA_BYTES + 1);
-	byte*		lens	= nulls - UT_BITS_IN_BYTES(index->n_nullable);
-	ulint		i;
-	ulint		n_fields;
-	ulint		null_mask	= 1;
-	ut_ad(rec && index);
-	ut_ad(dict_table_is_comp(index->table));
-	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
-
-	n_fields = dict_index_get_n_fields(index);
-
-	ut_ad(ith < n_fields);
-
-	/* read the lengths of fields 0..n */
-	for (i = 0; i < n_fields; i++) {
-		const dict_field_t*	field;
-		const dict_col_t*	col;
-
-		field = dict_index_get_nth_field(index, i);
-		col = dict_field_get_col(field);
-
-		if (!(col->prtype & DATA_NOT_NULL)) {
-			if (UNIV_UNLIKELY(!(byte) null_mask)) {
-				nulls--;
-				null_mask = 1;
-			}
-
-			if (*nulls & null_mask) {
-				null_mask <<= 1;
-				/* NULL fields cannot be external. */
-				ut_ad(i != ith);
-				continue;
-			}
-
-			null_mask <<= 1;
-		}
-		if (field->fixed_len) {
-			/* fixed-length fields cannot be external
-			(Fixed-length fields longer than
-			DICT_MAX_INDEX_COL_LEN will be treated as
-			variable-length ones in dict_index_add_col().) */
-			ut_ad(i != ith);
-			continue;
-		}
-		lens--;
-		if (col->len > 255 || col->mtype == DATA_BLOB) {
-			ulint	len = lens[1];
-			if (len & 0x80) { /* 1exxxxxx: 2-byte length */
-				if (i == ith) {
-					if (!val == !(len & 0x40)) {
-						return; /* no change */
-					}
-					/* toggle the extern bit */
-					len ^= 0x40;
-					if (mtr) {
-						mlog_write_ulint(lens + 1,
-								 len,
-								 MLOG_1BYTE,
-								 mtr);
-					} else {
-						lens[1] = (byte) len;
-					}
-					return;
-				}
-				lens--;
-			} else {
-				/* short fields cannot be external */
-				ut_ad(i != ith);
-			}
-		} else {
-			/* short fields cannot be external */
-			ut_ad(i != ith);
-		}
-	}
-}
-
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: record descriptor */
-	const ulint*	vec,	/* in: array of field numbers */
-	ulint		n_fields,/* in: number of fields numbers */
-	mtr_t*		mtr)	/* in: mtr holding an X-latch to the
-				page where rec is, or NULL;
-				in the NULL case we do not write
-				to log about the change */
-{
-	ulint	i;
-
-	if (dict_table_is_comp(index->table)) {
-		for (i = 0; i < n_fields; i++) {
-			rec_set_nth_field_extern_bit_new(rec, index, vec[i],
-							 TRUE, mtr);
-		}
-	} else {
-		for (i = 0; i < n_fields; i++) {
-			rec_set_nth_field_extern_bit_old(rec, vec[i],
-							 TRUE, mtr);
-		}
-	}
-}
-
-/***************************************************************
+/***********************************************************//**
 Sets an old-style record field to SQL null.
 The physical size of the field is not changed. */
-
+UNIV_INTERN
 void
 rec_set_nth_field_sql_null(
 /*=======================*/
-	rec_t*	rec,	/* in: record */
-	ulint	n)	/* in: index of the field */
+	rec_t*	rec,	/*!< in: record */
+	ulint	n)	/*!< in: index of the field */
 {
 	ulint	offset;
 
@@ -719,25 +923,24 @@ rec_set_nth_field_sql_null(
 	rec_set_nth_field_null_bit(rec, n, TRUE);
 }
 
-/*************************************************************
+/*********************************************************//**
 Builds an old-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
+stores it beginning from the start of the given buffer.
+@return	pointer to the origin of physical record */
 static
 rec_t*
 rec_convert_dtuple_to_rec_old(
 /*==========================*/
-			/* out: pointer to the origin of
-			physical record */
-	byte*	buf,	/* in: start address of the physical record */
-	dtuple_t* dtuple)/* in: data tuple */
+	byte*		buf,	/*!< in: start address of the physical record */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
 {
-	dfield_t*	field;
+	const dfield_t*	field;
 	ulint		n_fields;
 	ulint		data_size;
 	rec_t*		rec;
 	ulint		end_offset;
 	ulint		ored_offset;
-	byte*		data;
 	ulint		len;
 	ulint		i;
 
@@ -746,13 +949,13 @@ rec_convert_dtuple_to_rec_old(
 	ut_ad(dtuple_check_typed(dtuple));
 
 	n_fields = dtuple_get_n_fields(dtuple);
-	data_size = dtuple_get_data_size(dtuple);
+	data_size = dtuple_get_data_size(dtuple, 0);
 
 	ut_ad(n_fields > 0);
 
 	/* Calculate the offset of the origin in the physical record */
 
-	rec = buf + rec_get_converted_extra_size(data_size, n_fields);
+	rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext);
 #ifdef UNIV_DEBUG
 	/* Suppress Valgrind warnings of ut_ad()
 	in mach_write_to_1(), mach_write_to_2() et al. */
@@ -762,14 +965,14 @@ rec_convert_dtuple_to_rec_old(
 	rec_set_n_fields_old(rec, n_fields);
 
 	/* Set the info bits of the record */
-	rec_set_info_bits(rec, FALSE,
-			  dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
+	rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
+			      & REC_INFO_BITS_MASK);
 
 	/* Store the data and the offsets */
 
 	end_offset = 0;
 
-	if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+	if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
 
 		rec_set_1byte_offs_flag(rec, TRUE);
 
@@ -777,12 +980,9 @@ rec_convert_dtuple_to_rec_old(
 
 			field = dtuple_get_nth_field(dtuple, i);
 
-			data = dfield_get_data(field);
-			len = dfield_get_len(field);
-
-			if (len == UNIV_SQL_NULL) {
+			if (dfield_is_null(field)) {
 				len = dtype_get_sql_null_size(
-					dfield_get_type(field));
+					dfield_get_type(field), 0);
 				data_write_sql_null(rec + end_offset, len);
 
 				end_offset += len;
@@ -790,7 +990,10 @@ rec_convert_dtuple_to_rec_old(
 					| REC_1BYTE_SQL_NULL_MASK;
 			} else {
 				/* If the data is not SQL null, store it */
-				ut_memcpy(rec + end_offset, data, len);
+				len = dfield_get_len(field);
+
+				memcpy(rec + end_offset,
+				       dfield_get_data(field), len);
 
 				end_offset += len;
 				ored_offset = end_offset;
@@ -805,12 +1008,9 @@ rec_convert_dtuple_to_rec_old(
 
 			field = dtuple_get_nth_field(dtuple, i);
 
-			data = dfield_get_data(field);
-			len = dfield_get_len(field);
-
-			if (len == UNIV_SQL_NULL) {
+			if (dfield_is_null(field)) {
 				len = dtype_get_sql_null_size(
-					dfield_get_type(field));
+					dfield_get_type(field), 0);
 				data_write_sql_null(rec + end_offset, len);
 
 				end_offset += len;
@@ -818,10 +1018,17 @@ rec_convert_dtuple_to_rec_old(
 					| REC_2BYTE_SQL_NULL_MASK;
 			} else {
 				/* If the data is not SQL null, store it */
-				ut_memcpy(rec + end_offset, data, len);
+				len = dfield_get_len(field);
+
+				memcpy(rec + end_offset,
+				       dfield_get_data(field), len);
 
 				end_offset += len;
 				ored_offset = end_offset;
+
+				if (dfield_is_ext(field)) {
+					ored_offset |= REC_2BYTE_EXTERN_MASK;
+				}
 			}
 
 			rec_2_set_field_end_info(rec, i, ored_offset);
@@ -831,22 +1038,24 @@ rec_convert_dtuple_to_rec_old(
 	return(rec);
 }
 
-/*************************************************************
-Builds a new-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-static
-rec_t*
-rec_convert_dtuple_to_rec_new(
-/*==========================*/
-				/* out: pointer to the origin
-				of physical record */
-	byte*		buf,	/* in: start address of the physical record */
-	dict_index_t*	index,	/* in: record descriptor */
-	dtuple_t*	dtuple)	/* in: data tuple */
+/*********************************************************//**
+Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_rec_comp(
+/*===========================*/
+	rec_t*			rec,	/*!< in: origin of record */
+	ulint			extra,	/*!< in: number of bytes to
+					reserve between the record
+					header and the data payload
+					(normally REC_N_NEW_EXTRA_BYTES) */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			status,	/*!< in: status bits of the record */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields)/*!< in: number of data fields */
 {
-	dfield_t*	field;
-	dtype_t*	type;
-	rec_t*		rec		= buf + REC_N_NEW_EXTRA_BYTES;
+	const dfield_t*	field;
+	const dtype_t*	type;
 	byte*		end;
 	byte*		nulls;
 	byte*		lens;
@@ -855,18 +1064,10 @@ rec_convert_dtuple_to_rec_new(
 	ulint		n_node_ptr_field;
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
-	const ulint	n_fields	= dtuple_get_n_fields(dtuple);
-	const ulint	status		= dtuple_get_info_bits(dtuple)
-		& REC_NEW_STATUS_MASK;
-	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(extra == 0 || dict_table_is_comp(index->table));
+	ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
 	ut_ad(n_fields > 0);
 
-	/* Try to ensure that the memset() between the for() loops
-	completes fast.	 The address is not exact, but UNIV_PREFETCH
-	should never generate a memory fault. */
-	UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
-	UNIV_PREFETCH_RW(rec);
-
 	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
 	case REC_STATUS_ORDINARY:
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
@@ -880,73 +1081,21 @@ rec_convert_dtuple_to_rec_new(
 	case REC_STATUS_SUPREMUM:
 		ut_ad(n_fields == 1);
 		n_node_ptr_field = ULINT_UNDEFINED;
-		goto init;
+		break;
 	default:
-		ut_a(0);
-		return(0);
+		ut_error;
+		return;
 	}
 
-	/* Calculate the offset of the origin in the physical record.
-	We must loop over all fields to do this. */
-	rec += UT_BITS_IN_BYTES(index->n_nullable);
-
-	for (i = 0; i < n_fields; i++) {
-		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-#ifdef UNIV_DEBUG
-			field = dtuple_get_nth_field(dtuple, i);
-			type = dfield_get_type(field);
-			ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
-			ut_ad(dfield_get_len(field) == 4);
-#endif /* UNIV_DEBUG */
-			goto init;
-		}
-		field = dtuple_get_nth_field(dtuple, i);
-		type = dfield_get_type(field);
-		len = dfield_get_len(field);
-		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
-		ut_ad(dict_col_type_assert_equal(
-			      dict_field_get_col(dict_index_get_nth_field(
-							 index, i)),
-			      dfield_get_type(field)));
-
-		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
-			if (len == UNIV_SQL_NULL)
-				continue;
-		}
-		/* only nullable fields can be null */
-		ut_ad(len != UNIV_SQL_NULL);
-		if (fixed_len) {
-			ut_ad(len == fixed_len);
-		} else {
-			ut_ad(len <= dtype_get_len(type)
-			      || dtype_get_mtype(type) == DATA_BLOB);
-			rec++;
-			if (len >= 128
-			    && (dtype_get_len(type) >= 256
-				|| dtype_get_mtype(type) == DATA_BLOB)) {
-				rec++;
-			}
-		}
-	}
-
-init:
 	end = rec;
-	nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+	nulls = rec - (extra + 1);
 	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
 	/* clear the SQL-null flags */
-	memset (lens + 1, 0, nulls - lens);
-
-	/* Set the info bits of the record */
-	rec_set_status(rec, status);
-
-	rec_set_info_bits(rec, TRUE,
-			  dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
+	memset(lens + 1, 0, nulls - lens);
 
 	/* Store the data and the offsets */
 
-	for (i = 0; i < n_fields; i++) {
-		field = dtuple_get_nth_field(dtuple, i);
+	for (i = 0, field = fields; i < n_fields; i++, field++) {
 		type = dfield_get_type(field);
 		len = dfield_get_len(field);
 
@@ -954,9 +1103,9 @@ init:
 			ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
 			ut_ad(len == 4);
 			memcpy(end, dfield_get_data(field), len);
+			end += 4;
 			break;
 		}
-		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
 
 		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
 			/* nullable field */
@@ -970,7 +1119,7 @@ init:
 			ut_ad(*nulls < null_mask);
 
 			/* set the null flag if necessary */
-			if (len == UNIV_SQL_NULL) {
+			if (dfield_is_null(field)) {
 				*nulls |= null_mask;
 				null_mask <<= 1;
 				continue;
@@ -979,9 +1128,18 @@ init:
 			null_mask <<= 1;
 		}
 		/* only nullable fields can be null */
-		ut_ad(len != UNIV_SQL_NULL);
+		ut_ad(!dfield_is_null(field));
+
+		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
+
 		if (fixed_len) {
 			ut_ad(len == fixed_len);
+			ut_ad(!dfield_is_ext(field));
+		} else if (dfield_is_ext(field)) {
+			ut_ad(len <= REC_MAX_INDEX_COL_LEN
+			      + BTR_EXTERN_FIELD_REF_SIZE);
+			*lens-- = (byte) (len >> 8) | 0xc0;
+			*lens-- = (byte) len;
 		} else {
 			ut_ad(len <= dtype_get_len(type)
 			      || dtype_get_mtype(type) == DATA_BLOB);
@@ -991,7 +1149,6 @@ init:
 
 				*lens-- = (byte) len;
 			} else {
-				/* the extern bits will be set later */
 				ut_ad(len < 16384);
 				*lens-- = (byte) (len >> 8) | 0x80;
 				*lens-- = (byte) len;
@@ -1001,23 +1158,55 @@ init:
 		memcpy(end, dfield_get_data(field), len);
 		end += len;
 	}
+}
+
+/*********************************************************//**
+Builds a new-style physical record out of a data tuple and
+stores it beginning from the start of the given buffer.
+@return	pointer to the origin of physical record */
+static
+rec_t*
+rec_convert_dtuple_to_rec_new(
+/*==========================*/
+	byte*			buf,	/*!< in: start address of
+					the physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		dtuple)	/*!< in: data tuple */
+{
+	ulint	extra_size;
+	ulint	status;
+	rec_t*	rec;
+
+	status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
+	rec_get_converted_size_comp(index, status,
+				    dtuple->fields, dtuple->n_fields,
+				    &extra_size);
+	rec = buf + extra_size;
+
+	rec_convert_dtuple_to_rec_comp(
+		rec, REC_N_NEW_EXTRA_BYTES, index, status,
+		dtuple->fields, dtuple->n_fields);
+
+	/* Set the info bits of the record */
+	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
 
 	return(rec);
 }
 
-/*************************************************************
+/*********************************************************//**
 Builds a physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-
+stores it beginning from the start of the given buffer.
+@return	pointer to the origin of physical record */
+UNIV_INTERN
 rec_t*
 rec_convert_dtuple_to_rec(
 /*======================*/
-					/* out: pointer to the origin
-					of physical record */
-	byte*		buf,		/* in: start address of the
+	byte*			buf,	/*!< in: start address of the
 					physical record */
-	dict_index_t*	index,		/* in: record descriptor */
-	dtuple_t*	dtuple)		/* in: data tuple */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		dtuple,	/*!< in: data tuple */
+	ulint			n_ext)	/*!< in: number of
+					externally stored columns */
 {
 	rec_t*	rec;
 
@@ -1028,7 +1217,7 @@ rec_convert_dtuple_to_rec(
 	if (dict_table_is_comp(index->table)) {
 		rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple);
 	} else {
-		rec = rec_convert_dtuple_to_rec_old(buf, dtuple);
+		rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext);
 	}
 
 #ifdef UNIV_DEBUG
@@ -1036,7 +1225,7 @@ rec_convert_dtuple_to_rec(
 		mem_heap_t*	heap	= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		const ulint*	offsets;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		offsets = rec_get_offsets(rec, index,
 					  offsets_, ULINT_UNDEFINED, &heap);
@@ -1049,27 +1238,24 @@ rec_convert_dtuple_to_rec(
 	return(rec);
 }
 
-/******************************************************************
+/**************************************************************//**
 Copies the first n fields of a physical record to a data tuple. The fields
 are copied to the memory heap. */
-
+UNIV_INTERN
 void
 rec_copy_prefix_to_dtuple(
 /*======================*/
-	dtuple_t*	tuple,		/* in: data tuple */
-	rec_t*		rec,		/* in: physical record */
-	dict_index_t*	index,		/* in: record descriptor */
-	ulint		n_fields,	/* in: number of fields to copy */
-	mem_heap_t*	heap)		/* in: memory heap */
+	dtuple_t*		tuple,		/*!< out: data tuple */
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	mem_heap_t*		heap)		/*!< in: memory heap */
 {
-	dfield_t*	field;
-	byte*		data;
-	ulint		len;
-	byte*		buf = NULL;
-	ulint		i;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	ulint	i;
+	ulint	offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*	offsets	= offsets_;
+	rec_offs_init(offsets_);
 
 	offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
 
@@ -1080,34 +1266,37 @@ rec_copy_prefix_to_dtuple(
 				     rec, dict_table_is_comp(index->table)));
 
 	for (i = 0; i < n_fields; i++) {
+		dfield_t*	field;
+		const byte*	data;
+		ulint		len;
 
 		field = dtuple_get_nth_field(tuple, i);
 		data = rec_get_nth_field(rec, offsets, i, &len);
 
 		if (len != UNIV_SQL_NULL) {
-			buf = mem_heap_alloc(heap, len);
-
-			ut_memcpy(buf, data, len);
+			dfield_set_data(field,
+					mem_heap_dup(heap, data, len), len);
+			ut_ad(!rec_offs_nth_extern(offsets, i));
+		} else {
+			dfield_set_null(field);
 		}
-
-		dfield_set_data(field, buf, len);
 	}
 }
 
-/******************************************************************
+/**************************************************************//**
 Copies the first n fields of an old-style physical record
-to a new physical record in a buffer. */
+to a new physical record in a buffer.
+@return	own: copied record */
 static
 rec_t*
 rec_copy_prefix_to_buf_old(
 /*=======================*/
-				/* out, own: copied record */
-	rec_t*	rec,		/* in: physical record */
-	ulint	n_fields,	/* in: number of fields to copy */
-	ulint	area_end,	/* in: end of the prefix data */
-	byte**	buf,		/* in/out: memory buffer for the copied prefix,
-				or NULL */
-	ulint*	buf_size)	/* in/out: buffer size */
+	const rec_t*	rec,		/*!< in: physical record */
+	ulint		n_fields,	/*!< in: number of fields to copy */
+	ulint		area_end,	/*!< in: end of the prefix data */
+	byte**		buf,		/*!< in/out: memory buffer for
+					the copied prefix, or NULL */
+	ulint*		buf_size)	/*!< in/out: buffer size */
 {
 	rec_t*	copy_rec;
 	ulint	area_start;
@@ -1126,8 +1315,7 @@ rec_copy_prefix_to_buf_old(
 			mem_free(*buf);
 		}
 
-		*buf = mem_alloc(prefix_len);
-		*buf_size = prefix_len;
+		*buf = mem_alloc2(prefix_len, buf_size);
 	}
 
 	ut_memcpy(*buf, rec - area_start, prefix_len);
@@ -1139,23 +1327,25 @@ rec_copy_prefix_to_buf_old(
 	return(copy_rec);
 }
 
-/******************************************************************
+/**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
+a buffer.
+@return	own: copied record */
+UNIV_INTERN
 rec_t*
 rec_copy_prefix_to_buf(
 /*===================*/
-					/* out, own: copied record */
-	rec_t*		rec,		/* in: physical record */
-	dict_index_t*	index,		/* in: record descriptor */
-	ulint		n_fields,	/* in: number of fields to copy */
-	byte**		buf,		/* in/out: memory buffer
-					for the copied prefix, or NULL */
-	ulint*		buf_size)	/* in/out: buffer size */
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	byte**			buf,		/*!< in/out: memory buffer
+						for the copied prefix,
+						or NULL */
+	ulint*			buf_size)	/*!< in/out: buffer size */
 {
-	byte*		nulls;
-	byte*		lens;
+	const byte*	nulls;
+	const byte*	lens;
 	ulint		i;
 	ulint		prefix_len;
 	ulint		null_mask;
@@ -1244,8 +1434,7 @@ rec_copy_prefix_to_buf(
 			mem_free(*buf);
 		}
 
-		*buf = mem_alloc(prefix_len);
-		*buf_size = prefix_len;
+		*buf = mem_alloc2(prefix_len, buf_size);
 	}
 
 	memcpy(*buf, lens + 1, prefix_len);
@@ -1253,21 +1442,21 @@ rec_copy_prefix_to_buf(
 	return(*buf + (rec - (lens + 1)));
 }
 
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
+/***************************************************************//**
+Validates the consistency of an old-style physical record.
+@return	TRUE if ok */
 static
 ibool
 rec_validate_old(
 /*=============*/
-			/* out: TRUE if ok */
-	rec_t*	rec)	/* in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
 {
-	byte*	data;
-	ulint	len;
-	ulint	n_fields;
-	ulint	len_sum		= 0;
-	ulint	sum		= 0;
-	ulint	i;
+	const byte*	data;
+	ulint		len;
+	ulint		n_fields;
+	ulint		len_sum		= 0;
+	ulint		sum		= 0;
+	ulint		i;
 
 	ut_a(rec);
 	n_fields = rec_get_n_fields_old(rec);
@@ -1313,15 +1502,15 @@ rec_validate_old(
 	return(TRUE);
 }
 
-/*******************************************************************
-Validates the consistency of a physical record. */
-
+/***************************************************************//**
+Validates the consistency of a physical record.
+@return	TRUE if ok */
+UNIV_INTERN
 ibool
 rec_validate(
 /*=========*/
-				/* out: TRUE if ok */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
 	const byte*	data;
 	ulint		len;
@@ -1363,11 +1552,11 @@ rec_validate(
 		}
 	}
 
-	if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) {
+	if (len_sum != rec_offs_data_size(offsets)) {
 		fprintf(stderr,
 			"InnoDB: Error: record len should be %lu, len %lu\n",
 			(ulong) len_sum,
-			(ulong) (rec_get_end(rec, offsets) - rec));
+			(ulong) rec_offs_data_size(offsets));
 		return(FALSE);
 	}
 
@@ -1380,14 +1569,14 @@ rec_validate(
 	return(TRUE);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Prints an old-style physical record. */
-
+UNIV_INTERN
 void
 rec_print_old(
 /*==========*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec)	/* in: physical record */
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec)	/*!< in: physical record */
 {
 	const byte*	data;
 	ulint		len;
@@ -1417,49 +1606,38 @@ rec_print_old(
 			} else {
 				ut_print_buf(file, data, 30);
 
-				fputs("...(truncated)", file);
+				fprintf(file, " (total %lu bytes)",
+					(ulong) len);
 			}
 		} else {
 			fprintf(file, " SQL NULL, size %lu ",
 				rec_get_nth_field_size(rec, i));
 		}
-		putc(';', file);
-	}
 
-	putc('\n', file);
+		putc(';', file);
+		putc('\n', file);
+	}
 
 	rec_validate_old(rec);
 }
 
-/*******************************************************************
-Prints a physical record. */
-
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
+record header. */
+UNIV_INTERN
 void
-rec_print_new(
-/*==========*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec,	/* in: physical record */
-	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+rec_print_comp(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 {
-	const byte*	data;
-	ulint		len;
-	ulint		i;
-
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-
-	if (!rec_offs_comp(offsets)) {
-		rec_print_old(file, rec);
-		return;
-	}
-
-	ut_ad(rec);
-
-	fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
-		" compact format; info bits %lu\n",
-		(ulong) rec_offs_n_fields(offsets),
-		(ulong) rec_get_info_bits(rec, TRUE));
+	ulint	i;
 
 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		const byte*	data;
+		ulint		len;
 
 		data = rec_get_nth_field(rec, offsets, i, &len);
 
@@ -1472,28 +1650,54 @@ rec_print_new(
 			} else {
 				ut_print_buf(file, data, 30);
 
-				fputs("...(truncated)", file);
+				fprintf(file, " (total %lu bytes)",
+					(ulong) len);
 			}
 		} else {
 			fputs(" SQL NULL", file);
 		}
 		putc(';', file);
+		putc('\n', file);
+	}
+}
+
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print_new(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec);
+	ut_ad(offsets);
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (!rec_offs_comp(offsets)) {
+		rec_print_old(file, rec);
+		return;
 	}
 
-	putc('\n', file);
+	fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
+		" compact format; info bits %lu\n",
+		(ulong) rec_offs_n_fields(offsets),
+		(ulong) rec_get_info_bits(rec, TRUE));
 
+	rec_print_comp(file, rec, offsets);
 	rec_validate(rec, offsets);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Prints a physical record. */
-
+UNIV_INTERN
 void
 rec_print(
 /*======*/
-	FILE*		file,	/* in: file where to print */
-	rec_t*		rec,	/* in: physical record */
-	dict_index_t*	index)	/* in: record descriptor */
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	dict_index_t*	index)	/*!< in: record descriptor */
 {
 	ut_ad(index);
 
@@ -1503,7 +1707,7 @@ rec_print(
 	} else {
 		mem_heap_t*	heap	= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		rec_print_new(file, rec,
 			      rec_get_offsets(rec, index, offsets_,
@@ -1513,3 +1717,4 @@ rec_print(
 		}
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/row/row0ext.c b/storage/innodb_plugin/row/row0ext.c
new file mode 100644
index 00000000000..7320f5b1dca
--- /dev/null
+++ b/storage/innodb_plugin/row/row0ext.c
@@ -0,0 +1,115 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0ext.c
+Caching of externally stored column prefixes
+
+Created September 2006 Marko Makela
+*******************************************************/
+
+#include "row0ext.h"
+
+#ifdef UNIV_NONINL
+#include "row0ext.ic"
+#endif
+
+#include "btr0cur.h"
+
+/********************************************************************//**
+Fills the column prefix cache of an externally stored column. */
+static
+void
+row_ext_cache_fill(
+/*===============*/
+	row_ext_t*	ext,	/*!< in/out: column prefix cache */
+	ulint		i,	/*!< in: index of ext->ext[] */
+	ulint		zip_size,/*!< compressed page size in bytes, or 0 */
+	const dfield_t*	dfield)	/*!< in: data field */
+{
+	const byte*	field	= dfield_get_data(dfield);
+	ulint		f_len	= dfield_get_len(dfield);
+	byte*		buf	= ext->buf + i * REC_MAX_INDEX_COL_LEN;
+
+	ut_ad(i < ext->n_ext);
+	ut_ad(dfield_is_ext(dfield));
+	ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	if (UNIV_UNLIKELY(!memcmp(field_ref_zero,
+				  field + f_len - BTR_EXTERN_FIELD_REF_SIZE,
+				  BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* The BLOB pointer is not set: we cannot fetch it */
+		ext->len[i] = 0;
+	} else {
+		/* Fetch at most REC_MAX_INDEX_COL_LEN of the column.
+		The column should be non-empty.  However,
+		trx_rollback_or_clean_all_recovered() may try to
+		access a half-deleted BLOB if the server previously
+		crashed during the execution of
+		btr_free_externally_stored_field(). */
+		ext->len[i] = btr_copy_externally_stored_field_prefix(
+			buf, REC_MAX_INDEX_COL_LEN, zip_size, field, f_len);
+	}
+}
+
+/********************************************************************//**
+Creates a cache of column prefixes of externally stored columns.
+@return	own: column prefix cache */
+UNIV_INTERN
+row_ext_t*
+row_ext_create(
+/*===========*/
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	const ulint*	ext,	/*!< in: col_no's of externally stored columns
+				in the InnoDB table object, as reported by
+				dict_col_get_no(); NOT relative to the records
+				in the clustered index */
+	const dtuple_t*	tuple,	/*!< in: data tuple containing the field
+				references of the externally stored
+				columns; must be indexed by col_no;
+				the clustered index record must be
+				covered by a lock or a page latch
+				to prevent deletion (rollback or purge). */
+	ulint		zip_size,/*!< compressed page size in bytes, or 0 */
+	mem_heap_t*	heap)	/*!< in: heap where created */
+{
+	ulint		i;
+	row_ext_t*	ret = mem_heap_alloc(heap, (sizeof *ret)
+					     + (n_ext - 1) * sizeof ret->len);
+
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+	ret->n_ext = n_ext;
+	ret->ext = ext;
+	ret->buf = mem_heap_alloc(heap, n_ext * REC_MAX_INDEX_COL_LEN);
+#ifdef UNIV_DEBUG
+	memset(ret->buf, 0xaa, n_ext * REC_MAX_INDEX_COL_LEN);
+	UNIV_MEM_ALLOC(ret->buf, n_ext * REC_MAX_INDEX_COL_LEN);
+#endif
+
+	/* Fetch the BLOB prefixes */
+	for (i = 0; i < n_ext; i++) {
+		const dfield_t*	dfield;
+
+		dfield = dtuple_get_nth_field(tuple, ext[i]);
+		row_ext_cache_fill(ret, i, zip_size, dfield);
+	}
+
+	return(ret);
+}
diff --git a/storage/innobase/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
similarity index 78%
rename from storage/innobase/row/row0ins.c
rename to storage/innodb_plugin/row/row0ins.c
index ad14b927170..930c9ec1fc7 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innodb_plugin/row/row0ins.c
@@ -1,7 +1,24 @@
-/******************************************************
-Insert into a table
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0ins.c
+Insert into a table
 
 Created 4/20/1996 Heikki Tuuri
 *******************************************************/
@@ -12,6 +29,7 @@ Created 4/20/1996 Heikki Tuuri
 #include "row0ins.ic"
 #endif
 
+#include "ha_prototypes.h"
 #include "dict0dict.h"
 #include "dict0boot.h"
 #include "trx0undo.h"
@@ -34,33 +52,16 @@ Created 4/20/1996 Heikki Tuuri
 #define	ROW_INS_NEXT	2
 
 
-/*********************************************************************
-This prototype is copied from /mysql/sql/ha_innodb.cc.
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern
-void
-innobase_invalidate_query_cache(
-/*============================*/
-	trx_t*	trx,		/* in: transaction which modifies the table */
-	char*	full_name,	/* in: concatenation of database name, null
-				char '\0', table name, null char'\0';
-				NOTE that in Windows this is always
-				in LOWER CASE! */
-	ulint	full_name_len);	/* in: full name length where also the null
-				chars count */
-
-/*************************************************************************
-Creates an insert node struct. */
-
+/*********************************************************************//**
+Creates an insert node struct.
+@return	own: insert node struct */
+UNIV_INTERN
 ins_node_t*
 ins_node_create(
 /*============*/
-					/* out, own: insert node struct */
-	ulint		ins_type,	/* in: INS_VALUES, ... */
-	dict_table_t*	table,		/* in: table where to insert */
-	mem_heap_t*	heap)		/* in: mem heap where created */
+	ulint		ins_type,	/*!< in: INS_VALUES, ... */
+	dict_table_t*	table,		/*!< in: table where to insert */
+	mem_heap_t*	heap)		/*!< in: mem heap where created */
 {
 	ins_node_t*	node;
 
@@ -86,13 +87,13 @@ ins_node_create(
 	return(node);
 }
 
-/***************************************************************
+/***********************************************************//**
 Creates an entry template for each index of a table. */
-static
+UNIV_INTERN
 void
 ins_node_create_entry_list(
 /*=======================*/
-	ins_node_t*	node)	/* in: row insert node */
+	ins_node_t*	node)	/*!< in: row insert node */
 {
 	dict_index_t*	index;
 	dtuple_t*	entry;
@@ -104,7 +105,7 @@ ins_node_create_entry_list(
 	index = dict_table_get_first_index(node->table);
 
 	while (index != NULL) {
-		entry = row_build_index_entry(node->row, index,
+		entry = row_build_index_entry(node->row, NULL, index,
 					      node->entry_sys_heap);
 		UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
 
@@ -112,13 +113,13 @@ ins_node_create_entry_list(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Adds system field buffers to a row. */
 static
 void
 row_ins_alloc_sys_fields(
 /*=====================*/
-	ins_node_t*	node)	/* in: insert node */
+	ins_node_t*	node)	/*!< in: insert node */
 {
 	dtuple_t*		row;
 	dict_table_t*		table;
@@ -167,16 +168,16 @@ row_ins_alloc_sys_fields(
 	dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets a new row to insert for an INS_DIRECT node. This function is only used
 if we have constructed the row separately, which is a rare case; this
 function is quite slow. */
-
+UNIV_INTERN
 void
 ins_node_set_new_row(
 /*=================*/
-	ins_node_t*	node,	/* in: insert node */
-	dtuple_t*	row)	/* in: new row (or first row) for the node */
+	ins_node_t*	node,	/*!< in: insert node */
+	dtuple_t*	row)	/*!< in: new row (or first row) for the node */
 {
 	node->state = INS_NODE_SET_IX_LOCK;
 	node->index = NULL;
@@ -200,22 +201,23 @@ ins_node_set_new_row(
 	node->trx_id = ut_dulint_zero;
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Does an insert operation by updating a delete-marked existing record
 in the index. This situation can occur if the delete-marked record is
-kept in the index for consistent reads. */
+kept in the index for consistent reads.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_ins_sec_index_entry_by_modify(
 /*==============================*/
-				/* out: DB_SUCCESS or error code */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether mtr holds just a leaf
 				latch or also a tree latch */
-	btr_cur_t*	cursor,	/* in: B-tree cursor */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
+				latching any further pages */
 {
 	big_rec_t*	dummy_big_rec;
 	mem_heap_t*	heap;
@@ -225,7 +227,7 @@ row_ins_sec_index_entry_by_modify(
 
 	rec = btr_cur_get_rec(cursor);
 
-	ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
+	ut_ad(!dict_index_is_clust(cursor->index));
 	ut_ad(rec_get_deleted_flag(rec,
 				   dict_table_is_comp(cursor->index->table)));
 
@@ -244,7 +246,10 @@ row_ins_sec_index_entry_by_modify(
 
 		err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
 						update, 0, thr, mtr);
-		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+		switch (err) {
+		case DB_OVERFLOW:
+		case DB_UNDERFLOW:
+		case DB_ZIP_OVERFLOW:
 			err = DB_FAIL;
 		}
 	} else {
@@ -257,8 +262,9 @@ row_ins_sec_index_entry_by_modify(
 		}
 
 		err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
-						 &dummy_big_rec, update,
+						 &heap, &dummy_big_rec, update,
 						 0, thr, mtr);
+		ut_ad(!dummy_big_rec);
 	}
 func_exit:
 	mem_heap_free(heap);
@@ -266,35 +272,33 @@ func_exit:
 	return(err);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Does an insert operation by delete unmarking and updating a delete marked
 existing record in the index. This situation can occur if the delete marked
-record is kept in the index for consistent reads. */
+record is kept in the index for consistent reads.
+@return	DB_SUCCESS, DB_FAIL, or error code */
 static
 ulint
 row_ins_clust_index_entry_by_modify(
 /*================================*/
-				/* out: DB_SUCCESS, DB_FAIL, or error code */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether mtr holds just a leaf
 				latch or also a tree latch */
-	btr_cur_t*	cursor,	/* in: B-tree cursor */
-	big_rec_t**	big_rec,/* out: possible big rec vector of fields
+	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	big_rec_t**	big_rec,/*!< out: possible big rec vector of fields
 				which have to be stored externally by the
 				caller */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
+				latching any further pages */
 {
-	mem_heap_t*	heap;
 	rec_t*		rec;
 	upd_t*		update;
 	ulint		err;
 
-	ut_ad(cursor->index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(cursor->index));
 
 	*big_rec = NULL;
 
@@ -303,51 +307,53 @@ row_ins_clust_index_entry_by_modify(
 	ut_ad(rec_get_deleted_flag(rec,
 				   dict_table_is_comp(cursor->index->table)));
 
-	heap = mem_heap_create(1024);
+	if (!*heap) {
+		*heap = mem_heap_create(1024);
+	}
 
 	/* Build an update vector containing all the fields to be modified;
 	NOTE that this vector may NOT contain system columns trx_id or
 	roll_ptr */
 
-	update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
-						 n_ext_vec, rec,
-						 thr_get_trx(thr), heap);
+	update = row_upd_build_difference_binary(cursor->index, entry, rec,
+						 thr_get_trx(thr), *heap);
 	if (mode == BTR_MODIFY_LEAF) {
 		/* Try optimistic updating of the record, keeping changes
 		within the page */
 
 		err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
 						mtr);
-		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+		switch (err) {
+		case DB_OVERFLOW:
+		case DB_UNDERFLOW:
+		case DB_ZIP_OVERFLOW:
 			err = DB_FAIL;
 		}
 	} else {
 		ut_a(mode == BTR_MODIFY_TREE);
 		if (buf_LRU_buf_pool_running_out()) {
 
-			err = DB_LOCK_TABLE_FULL;
+			return(DB_LOCK_TABLE_FULL);
 
-			goto func_exit;
 		}
-		err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
+		err = btr_cur_pessimistic_update(0, cursor,
+						 heap, big_rec, update,
 						 0, thr, mtr);
 	}
-func_exit:
-	mem_heap_free(heap);
 
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Returns TRUE if in a cascaded update/delete an ancestor node of node
-updates (not DELETE, but UPDATE) table. */
+updates (not DELETE, but UPDATE) table.
+@return	TRUE if an ancestor updates table */
 static
 ibool
 row_ins_cascade_ancestor_updates_table(
 /*===================================*/
-				/* out: TRUE if an ancestor updates table */
-	que_node_t*	node,	/* in: node in a query graph */
-	dict_table_t*	table)	/* in: table */
+	que_node_t*	node,	/*!< in: node in a query graph */
+	dict_table_t*	table)	/*!< in: table */
 {
 	que_node_t*	parent;
 	upd_node_t*	upd_node;
@@ -371,15 +377,15 @@ row_ins_cascade_ancestor_updates_table(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Returns the number of ancestor UPDATE or DELETE nodes of a
-cascaded update/delete node. */
+cascaded update/delete node.
+@return	number of ancestors */
 static
 ulint
 row_ins_cascade_n_ancestors(
 /*========================*/
-				/* out: number of ancestors */
-	que_node_t*	node)	/* in: node in a query graph */
+	que_node_t*	node)	/*!< in: node in a query graph */
 {
 	que_node_t*	parent;
 	ulint		n_ancestors = 0;
@@ -397,26 +403,22 @@ row_ins_cascade_n_ancestors(
 	return(n_ancestors);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Calculates the update vector node->cascade->update for a child table in
-a cascaded update. */
+a cascaded update.
+@return number of fields in the calculated update vector; the value
+can also be 0 if no foreign key fields changed; the returned value is
+ULINT_UNDEFINED if the column type in the child table is too short to
+fit the new value in the parent table: that means the update fails */
 static
 ulint
 row_ins_cascade_calc_update_vec(
 /*============================*/
-					/* out: number of fields in the
-					calculated update vector; the value
-					can also be 0 if no foreign key
-					fields changed; the returned value
-					is ULINT_UNDEFINED if the column
-					type in the child table is too short
-					to fit the new value in the parent
-					table: that means the update fails */
-	upd_node_t*	node,		/* in: update node of the parent
+	upd_node_t*	node,		/*!< in: update node of the parent
 					table */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint whose
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint whose
 					type is != 0 */
-	mem_heap_t*	heap)		/* in: memory heap to use as
+	mem_heap_t*	heap)		/*!< in: memory heap to use as
 					temporary storage */
 {
 	upd_node_t*	cascade		= node->cascade_node;
@@ -469,6 +471,7 @@ row_ins_cascade_calc_update_vec(
 
 				ulint			min_size;
 				const dict_col_t*	col;
+				ulint			ufield_len;
 
 				col = dict_index_get_nth_col(index, i);
 
@@ -484,11 +487,15 @@ row_ins_cascade_calc_update_vec(
 				ufield->exp = NULL;
 
 				ufield->new_val = parent_ufield->new_val;
+				ufield_len = dfield_get_len(&ufield->new_val);
+
+				/* Clear the "external storage" flag */
+				dfield_set_len(&ufield->new_val, ufield_len);
 
 				/* Do not allow a NOT NULL column to be
 				updated as NULL */
 
-				if (ufield->new_val.len == UNIV_SQL_NULL
+				if (dfield_is_null(&ufield->new_val)
 				    && (col->prtype & DATA_NOT_NULL)) {
 
 					return(ULINT_UNDEFINED);
@@ -497,14 +504,14 @@ row_ins_cascade_calc_update_vec(
 				/* If the new value would not fit in the
 				column, do not allow the update */
 
-				if (ufield->new_val.len != UNIV_SQL_NULL
+				if (!dfield_is_null(&ufield->new_val)
 				    && dtype_get_at_most_n_mbchars(
 					col->prtype,
 					col->mbminlen, col->mbmaxlen,
 					col->len,
-					ufield->new_val.len,
-					ufield->new_val.data)
-				    < ufield->new_val.len) {
+					ufield_len,
+					dfield_get_data(&ufield->new_val))
+				    < ufield_len) {
 
 					return(ULINT_UNDEFINED);
 				}
@@ -516,28 +523,31 @@ row_ins_cascade_calc_update_vec(
 
 				min_size = dict_col_get_min_size(col);
 
-				if (min_size
-				    && ufield->new_val.len != UNIV_SQL_NULL
-				    && ufield->new_val.len < min_size) {
+				/* Because UNIV_SQL_NULL (the marker
+				of SQL NULL values) exceeds all possible
+				values of min_size, the test below will
+				not hold for SQL NULL columns. */
+
+				if (min_size > ufield_len) {
 
 					char*		pad_start;
 					const char*	pad_end;
-					ufield->new_val.data = mem_heap_alloc(
-						heap, min_size);
-					pad_start = ((char*) ufield
-						     ->new_val.data)
-						+ ufield->new_val.len;
-					pad_end = ((char*) ufield
-						   ->new_val.data)
-						+ min_size;
-					ufield->new_val.len = min_size;
-					ut_memcpy(ufield->new_val.data,
-						  parent_ufield->new_val.data,
-						  parent_ufield->new_val.len);
+					char*		padded_data
+						= mem_heap_alloc(
+							heap, min_size);
+					pad_start = padded_data + ufield_len;
+					pad_end = padded_data + min_size;
+
+					memcpy(padded_data,
+					       dfield_get_data(&ufield
+							       ->new_val),
+					       dfield_get_len(&ufield
+							      ->new_val));
 
 					switch (UNIV_EXPECT(col->mbminlen,1)) {
 					default:
 						ut_error;
+						return(ULINT_UNDEFINED);
 					case 1:
 						if (UNIV_UNLIKELY
 						    (dtype_get_charset_coll(
@@ -554,8 +564,7 @@ row_ins_cascade_calc_update_vec(
 						break;
 					case 2:
 						/* space=0x0020 */
-						ut_a(!(ufield->new_val.len
-						       % 2));
+						ut_a(!(ufield_len % 2));
 						ut_a(!(min_size % 2));
 						do {
 							*pad_start++ = 0x00;
@@ -563,9 +572,10 @@ row_ins_cascade_calc_update_vec(
 						} while (pad_start < pad_end);
 						break;
 					}
-				}
 
-				ufield->extern_storage = FALSE;
+					dfield_set_data(&ufield->new_val,
+							padded_data, min_size);
+				}
 
 				n_fields_updated++;
 			}
@@ -577,15 +587,15 @@ row_ins_cascade_calc_update_vec(
 	return(n_fields_updated);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Set detailed error message associated with foreign key errors for
 the given transaction. */
 static
 void
 row_ins_set_detailed(
 /*=================*/
-	trx_t*		trx,		/* in: transaction */
-	dict_foreign_t*	foreign)	/* in: foreign key constraint */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_foreign_t*	foreign)	/*!< in: foreign key constraint */
 {
 	mutex_enter(&srv_misc_tmpfile_mutex);
 	rewind(srv_misc_tmpfile);
@@ -603,21 +613,21 @@ row_ins_set_detailed(
 	mutex_exit(&srv_misc_tmpfile_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reports a foreign key error associated with an update or a delete of a
 parent table index entry. */
 static
 void
 row_ins_foreign_report_err(
 /*=======================*/
-	const char*	errstr,		/* in: error string from the viewpoint
+	const char*	errstr,		/*!< in: error string from the viewpoint
 					of the parent table */
-	que_thr_t*	thr,		/* in: query thread whose run_node
+	que_thr_t*	thr,		/*!< in: query thread whose run_node
 					is an update node */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint */
-	rec_t*		rec,		/* in: a matching index record in the
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
+	const rec_t*	rec,		/*!< in: a matching index record in the
 					child table */
-	dtuple_t*	entry)		/* in: index entry in the parent
+	const dtuple_t*	entry)		/*!< in: index entry in the parent
 					table */
 {
 	FILE*	ef	= dict_foreign_err_file;
@@ -659,7 +669,7 @@ row_ins_foreign_report_err(
 	mutex_exit(&dict_foreign_err_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reports a foreign key error to dict_foreign_err_file when we are trying
 to add an index entry to a child table. Note that the adding may be the result
 of an update, too. */
@@ -667,12 +677,12 @@ static
 void
 row_ins_foreign_report_add_err(
 /*===========================*/
-	trx_t*		trx,		/* in: transaction */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint */
-	rec_t*		rec,		/* in: a record in the parent table:
+	trx_t*		trx,		/*!< in: transaction */
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
+	const rec_t*	rec,		/*!< in: a record in the parent table:
 					it does not match entry because we
 					have an error! */
-	dtuple_t*	entry)		/* in: index entry to insert in the
+	const dtuple_t*	entry)		/*!< in: index entry to insert in the
 					child table */
 {
 	FILE*	ef	= dict_foreign_err_file;
@@ -693,6 +703,8 @@ row_ins_foreign_report_add_err(
 	ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
 	if (entry) {
 		fputs(" tuple:\n", ef);
+		/* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
+		It would be better to only display the user columns. */
 		dtuple_print(ef, entry);
 	}
 	fputs("\nBut in parent table ", ef);
@@ -704,7 +716,7 @@ row_ins_foreign_report_add_err(
 		/* If the cursor ended on a supremum record, it is better
 		to report the previous record in the error message, so that
 		the user gets a more descriptive error message. */
-		rec = page_rec_get_prev(rec);
+		rec = page_rec_get_prev_const(rec);
 	}
 
 	if (rec) {
@@ -715,15 +727,15 @@ row_ins_foreign_report_add_err(
 	mutex_exit(&dict_foreign_err_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Invalidate the query cache for the given table. */
 static
 void
 row_ins_invalidate_query_cache(
 /*===========================*/
-	que_thr_t*	thr,		/* in: query thread whose run_node
+	que_thr_t*	thr,		/*!< in: query thread whose run_node
 					is an update node */
-	const char*	name)		/* in: table name prefixed with
+	const char*	name)		/*!< in: table name prefixed with
 					database name and a '/' character */
 {
 	char*	buf;
@@ -736,32 +748,28 @@ row_ins_invalidate_query_cache(
 	ut_a(ptr);
 	*ptr = '\0';
 
-	/* We call a function in ha_innodb.cc */
-#ifndef UNIV_HOTBACKUP
 	innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
-#endif
 	mem_free(buf);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Perform referential actions or checks when a parent row is deleted or updated
 and the constraint had an ON DELETE or ON UPDATE condition which was not
-RESTRICT. */
+RESTRICT.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error code */
 static
 ulint
 row_ins_foreign_check_on_constraint(
 /*================================*/
-					/* out: DB_SUCCESS, DB_LOCK_WAIT,
-					or error code */
-	que_thr_t*	thr,		/* in: query thread whose run_node
+	que_thr_t*	thr,		/*!< in: query thread whose run_node
 					is an update node */
-	dict_foreign_t*	foreign,	/* in: foreign key constraint whose
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint whose
 					type is != 0 */
-	btr_pcur_t*	pcur,		/* in: cursor placed on a matching
+	btr_pcur_t*	pcur,		/*!< in: cursor placed on a matching
 					index record in the child table */
-	dtuple_t*	entry,		/* in: index entry in the parent
+	dtuple_t*	entry,		/*!< in: index entry in the parent
 					table */
-	mtr_t*		mtr)		/* in: mtr holding the latch of pcur
+	mtr_t*		mtr)		/*!< in: mtr holding the latch of pcur
 					page */
 {
 	upd_node_t*	node;
@@ -771,8 +779,9 @@ row_ins_foreign_check_on_constraint(
 	dict_index_t*	clust_index;
 	dtuple_t*	ref;
 	mem_heap_t*	upd_vec_heap	= NULL;
-	rec_t*		rec;
-	rec_t*		clust_rec;
+	const rec_t*	rec;
+	const rec_t*	clust_rec;
+	const buf_block_t* clust_block;
 	upd_t*		update;
 	ulint		n_to_update;
 	ulint		err;
@@ -899,12 +908,13 @@ row_ins_foreign_check_on_constraint(
 
 	rec = btr_pcur_get_rec(pcur);
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		/* pcur is already positioned in the clustered index of
 		the child table */
 
 		clust_index = index;
 		clust_rec = rec;
+		clust_block = btr_pcur_get_block(pcur);
 	} else {
 		/* We have to look for the record in the clustered index
 		in the child table */
@@ -920,6 +930,7 @@ row_ins_foreign_check_on_constraint(
 					   cascade->pcur, 0, mtr);
 
 		clust_rec = btr_pcur_get_rec(cascade->pcur);
+		clust_block = btr_pcur_get_block(cascade->pcur);
 
 		if (!page_rec_is_user_rec(clust_rec)
 		    || btr_pcur_get_low_match(cascade->pcur)
@@ -955,8 +966,8 @@ row_ins_foreign_check_on_constraint(
 		gap if the search criterion was not unique */
 
 		err = lock_clust_rec_read_check_and_lock_alt(
-			0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP,
-			thr);
+			0, clust_block, clust_rec, clust_index,
+			LOCK_X, LOCK_REC_NOT_GAP, thr);
 	}
 
 	if (err != DB_SUCCESS) {
@@ -987,14 +998,14 @@ row_ins_foreign_check_on_constraint(
 		update->n_fields = foreign->n_fields;
 
 		for (i = 0; i < foreign->n_fields; i++) {
-			(update->fields + i)->field_no
-				= dict_table_get_nth_col_pos(
-					table,
-					dict_index_get_nth_col_no(index, i));
-			(update->fields + i)->exp = NULL;
-			(update->fields + i)->new_val.len = UNIV_SQL_NULL;
-			(update->fields + i)->new_val.data = NULL;
-			(update->fields + i)->extern_storage = FALSE;
+			upd_field_t*	ufield = &update->fields[i];
+
+			ufield->field_no = dict_table_get_nth_col_pos(
+				table,
+				dict_index_get_nth_col_no(index, i));
+			ufield->orig_len = 0;
+			ufield->exp = NULL;
+			dfield_set_null(&ufield->new_val);
 		}
 	}
 
@@ -1107,95 +1118,92 @@ nonstandard_exit_func:
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets a shared lock on a record. Used in locking possible duplicate key
-records and also in checking foreign key constraints. */
+records and also in checking foreign key constraints.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_ins_set_shared_rec_lock(
 /*========================*/
-				/* out: DB_SUCCESS or error code */
-	ulint		type,	/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP type lock */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP type lock */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		err = lock_clust_rec_read_check_and_lock(
-			0, rec, index, offsets, LOCK_S, type, thr);
+			0, block, rec, index, offsets, LOCK_S, type, thr);
 	} else {
 		err = lock_sec_rec_read_check_and_lock(
-			0, rec, index, offsets, LOCK_S, type, thr);
+			0, block, rec, index, offsets, LOCK_S, type, thr);
 	}
 
 	return(err);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
 Sets a exclusive lock on a record. Used in locking possible duplicate key
-records */
+records
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_ins_set_exclusive_rec_lock(
 /*===========================*/
-				/* out: DB_SUCCESS or error code */
-	ulint		type,	/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOCK_REC_NOT_GAP type lock */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	que_thr_t*	thr)	/* in: query thread */
+	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP type lock */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ulint	err;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		err = lock_clust_rec_read_check_and_lock(
-			0, rec, index, offsets, LOCK_X, type, thr);
+			0, block, rec, index, offsets, LOCK_X, type, thr);
 	} else {
 		err = lock_sec_rec_read_check_and_lock(
-			0, rec, index, offsets, LOCK_X, type, thr);
+			0, block, rec, index, offsets, LOCK_X, type, thr);
 	}
 
 	return(err);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
+/***************************************************************//**
 Checks if foreign key constraint fails for an index entry. Sets shared locks
 which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_operation_lock. */
-
+the caller must have a shared latch on dict_operation_lock.
+@return	DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
+UNIV_INTERN
 ulint
 row_ins_check_foreign_constraint(
 /*=============================*/
-				/* out: DB_SUCCESS,
-				DB_NO_REFERENCED_ROW,
-				or DB_ROW_IS_REFERENCED */
-	ibool		check_ref,/* in: TRUE if we want to check that
+	ibool		check_ref,/*!< in: TRUE if we want to check that
 				the referenced table is ok, FALSE if we
 				want to to check the foreign key table */
-	dict_foreign_t*	foreign,/* in: foreign constraint; NOTE that the
+	dict_foreign_t*	foreign,/*!< in: foreign constraint; NOTE that the
 				tables mentioned in it must be in the
 				dictionary cache if they exist at all */
-	dict_table_t*	table,	/* in: if check_ref is TRUE, then the foreign
+	dict_table_t*	table,	/*!< in: if check_ref is TRUE, then the foreign
 				table, else the referenced table */
-	dtuple_t*	entry,	/* in: index entry for index */
-	que_thr_t*	thr)	/* in: query thread */
+	dtuple_t*	entry,	/*!< in: index entry for index */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	upd_node_t*	upd_node;
 	dict_table_t*	check_table;
 	dict_index_t*	check_index;
 	ulint		n_fields_cmp;
-	rec_t*		rec;
 	btr_pcur_t	pcur;
 	ibool		moved;
 	int		cmp;
@@ -1206,7 +1214,7 @@ row_ins_check_foreign_constraint(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 run_again:
 #ifdef UNIV_SYNC_DEBUG
@@ -1329,7 +1337,8 @@ run_again:
 	/* Scan index records and check if there is a matching record */
 
 	for (;;) {
-		rec = btr_pcur_get_rec(&pcur);
+		const rec_t*		rec = btr_pcur_get_rec(&pcur);
+		const buf_block_t*	block = btr_pcur_get_block(&pcur);
 
 		if (page_rec_is_infimum(rec)) {
 
@@ -1341,8 +1350,9 @@ run_again:
 
 		if (page_rec_is_supremum(rec)) {
 
-			err = row_ins_set_shared_rec_lock(
-				LOCK_ORDINARY, rec, check_index, offsets, thr);
+			err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block,
+							  rec, check_index,
+							  offsets, thr);
 			if (err != DB_SUCCESS) {
 
 				break;
@@ -1357,8 +1367,8 @@ run_again:
 			if (rec_get_deleted_flag(rec,
 						 rec_offs_comp(offsets))) {
 				err = row_ins_set_shared_rec_lock(
-					LOCK_ORDINARY, rec, check_index,
-					offsets, thr);
+					LOCK_ORDINARY, block,
+					rec, check_index, offsets, thr);
 				if (err != DB_SUCCESS) {
 
 					break;
@@ -1369,8 +1379,8 @@ run_again:
 				into gaps */
 
 				err = row_ins_set_shared_rec_lock(
-					LOCK_REC_NOT_GAP, rec, check_index,
-					offsets, thr);
+					LOCK_REC_NOT_GAP, block,
+					rec, check_index, offsets, thr);
 
 				if (err != DB_SUCCESS) {
 
@@ -1408,6 +1418,11 @@ run_again:
 
 						break;
 					}
+
+					/* row_ins_foreign_check_on_constraint
+					may have repositioned pcur on a
+					different block */
+					block = btr_pcur_get_block(&pcur);
 				} else {
 					row_ins_foreign_report_err(
 						"Trying to delete or update",
@@ -1421,7 +1436,8 @@ run_again:
 
 		if (cmp < 0) {
 			err = row_ins_set_shared_rec_lock(
-				LOCK_GAP, rec, check_index, offsets, thr);
+				LOCK_GAP, block,
+				rec, check_index, offsets, thr);
 			if (err != DB_SUCCESS) {
 
 				break;
@@ -1486,21 +1502,21 @@ exit_func:
 	return(err);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Checks if foreign key constraints fail for an index entry. If index
 is not mentioned in any constraint, this function does nothing,
 Otherwise does searches to the indexes of referenced tables and
 sets shared locks which lock either the success or the failure of
-a constraint. */
+a constraint.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_ins_check_foreign_constraints(
 /*==============================*/
-				/* out: DB_SUCCESS or error code */
-	dict_table_t*	table,	/* in: table */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry for index */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_table_t*	table,	/*!< in: table */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry for index */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_foreign_t*	foreign;
 	ulint		err;
@@ -1568,21 +1584,20 @@ row_ins_check_foreign_constraints(
 	return(DB_SUCCESS);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************
+/***************************************************************//**
 Checks if a unique key violation to rec would occur at the index entry
-insert. */
+insert.
+@return	TRUE if error */
 static
 ibool
 row_ins_dupl_error_with_rec(
 /*========================*/
-				/* out: TRUE if error */
-	rec_t*		rec,	/* in: user record; NOTE that we assume
+	const rec_t*	rec,	/*!< in: user record; NOTE that we assume
 				that the caller already has a record lock on
 				the record! */
-	dtuple_t*	entry,	/* in: entry to insert */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	ulint	matched_fields;
 	ulint	matched_bytes;
@@ -1607,7 +1622,7 @@ row_ins_dupl_error_with_rec(
 	/* In a unique secondary index we allow equal key values if they
 	contain SQL NULLs */
 
-	if (!(index->type & DICT_CLUSTERED)) {
+	if (!dict_index_is_clust(index)) {
 
 		for (i = 0; i < n_unique; i++) {
 			if (UNIV_SQL_NULL == dfield_get_len(
@@ -1620,37 +1635,32 @@ row_ins_dupl_error_with_rec(
 
 	return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
+/***************************************************************//**
 Scans a unique non-clustered index at a given index entry to determine
 whether a uniqueness violation has occurred for the key value of the entry.
-Set shared locks on possible duplicate records. */
+Set shared locks on possible duplicate records.
+@return	DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
 static
 ulint
 row_ins_scan_sec_index_for_duplicate(
 /*=================================*/
-				/* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
-				DB_LOCK_WAIT */
-	dict_index_t*	index,	/* in: non-clustered unique index */
-	dtuple_t*	entry,	/* in: index entry */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_index_t*	index,	/*!< in: non-clustered unique index */
+	dtuple_t*	entry,	/*!< in: index entry */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
-#ifndef UNIV_HOTBACKUP
 	ulint		n_unique;
 	ulint		i;
 	int		cmp;
 	ulint		n_fields_cmp;
-	rec_t*		rec;
 	btr_pcur_t	pcur;
 	ulint		err		= DB_SUCCESS;
-	ibool		moved;
 	unsigned	allow_duplicates;
 	mtr_t		mtr;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	n_unique = dict_index_get_n_unique(index);
 
@@ -1680,12 +1690,13 @@ row_ins_scan_sec_index_for_duplicate(
 
 	/* Scan index records and check if there is a duplicate */
 
-	for (;;) {
-		rec = btr_pcur_get_rec(&pcur);
+	do {
+		const rec_t*		rec	= btr_pcur_get_rec(&pcur);
+		const buf_block_t*	block	= btr_pcur_get_block(&pcur);
 
 		if (page_rec_is_infimum(rec)) {
 
-			goto next_rec;
+			continue;
 		}
 
 		offsets = rec_get_offsets(rec, index, offsets,
@@ -1699,11 +1710,13 @@ row_ins_scan_sec_index_for_duplicate(
 			INSERT ON DUPLICATE KEY UPDATE). */
 
 			err = row_ins_set_exclusive_rec_lock(
-				LOCK_ORDINARY, rec, index, offsets, thr);
+				LOCK_ORDINARY, block,
+				rec, index, offsets, thr);
 		} else {
 
 			err = row_ins_set_shared_rec_lock(
-				LOCK_ORDINARY, rec, index, offsets, thr);
+				LOCK_ORDINARY, block,
+				rec, index, offsets, thr);
 		}
 
 		if (err != DB_SUCCESS) {
@@ -1713,7 +1726,7 @@ row_ins_scan_sec_index_for_duplicate(
 
 		if (page_rec_is_supremum(rec)) {
 
-			goto next_rec;
+			continue;
 		}
 
 		cmp = cmp_dtuple_rec(entry, rec, offsets);
@@ -1734,13 +1747,7 @@ row_ins_scan_sec_index_for_duplicate(
 		}
 
 		ut_a(cmp == 0);
-next_rec:
-		moved = btr_pcur_move_to_next(&pcur, &mtr);
-
-		if (!moved) {
-			break;
-		}
-	}
+	} while (btr_pcur_move_to_next(&pcur, &mtr));
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
@@ -1751,33 +1758,24 @@ next_rec:
 	dtuple_set_n_fields_cmp(entry, n_fields_cmp);
 
 	return(err);
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
 }
 
-/*******************************************************************
+/***************************************************************//**
 Checks if a unique key violation error would occur at an index entry
 insert. Sets shared locks on possible duplicate records. Works only
-for a clustered index! */
+for a clustered index!
+@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error,
+DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
+record */
 static
 ulint
 row_ins_duplicate_error_in_clust(
 /*=============================*/
-				/* out: DB_SUCCESS if no error,
-				DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
-				have to wait for a lock on a possible
-				duplicate record */
-	btr_cur_t*	cursor,	/* in: B-tree cursor */
-	dtuple_t*	entry,	/* in: entry to insert */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
+	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
+	dtuple_t*	entry,	/*!< in: entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
-#ifndef UNIV_HOTBACKUP
 	ulint	err;
 	rec_t*	rec;
 	ulint	n_unique;
@@ -1785,12 +1783,12 @@ row_ins_duplicate_error_in_clust(
 	mem_heap_t*heap		= NULL;
 	ulint	offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*	offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	UT_NOT_USED(mtr);
 
-	ut_a(cursor->index->type & DICT_CLUSTERED);
-	ut_ad(cursor->index->type & DICT_UNIQUE);
+	ut_a(dict_index_is_clust(cursor->index));
+	ut_ad(dict_index_is_unique(cursor->index));
 
 	/* NOTE: For unique non-clustered indexes there may be any number
 	of delete marked records with the same value for the non-clustered
@@ -1828,12 +1826,14 @@ row_ins_duplicate_error_in_clust(
 				INSERT ON DUPLICATE KEY UPDATE). */
 
 				err = row_ins_set_exclusive_rec_lock(
-					LOCK_REC_NOT_GAP, rec,
-					cursor->index, offsets, thr);
+					LOCK_REC_NOT_GAP,
+					btr_cur_get_block(cursor),
+					rec, cursor->index, offsets, thr);
 			} else {
 
 				err = row_ins_set_shared_rec_lock(
-					LOCK_REC_NOT_GAP, rec,
+					LOCK_REC_NOT_GAP,
+					btr_cur_get_block(cursor), rec,
 					cursor->index, offsets, thr);
 			}
 
@@ -1866,13 +1866,15 @@ row_ins_duplicate_error_in_clust(
 				INSERT ON DUPLICATE KEY UPDATE). */
 
 				err = row_ins_set_exclusive_rec_lock(
-					LOCK_REC_NOT_GAP, rec,
-					cursor->index, offsets, thr);
+					LOCK_REC_NOT_GAP,
+					btr_cur_get_block(cursor),
+					rec, cursor->index, offsets, thr);
 			} else {
 
 				err = row_ins_set_shared_rec_lock(
-					LOCK_REC_NOT_GAP, rec,
-					cursor->index, offsets, thr);
+					LOCK_REC_NOT_GAP,
+					btr_cur_get_block(cursor),
+					rec, cursor->index, offsets, thr);
 			}
 
 			if (err != DB_SUCCESS) {
@@ -1887,7 +1889,7 @@ row_ins_duplicate_error_in_clust(
 			}
 		}
 
-		ut_a(!(cursor->index->type & DICT_CLUSTERED));
+		ut_a(!dict_index_is_clust(cursor->index));
 		/* This should never happen */
 	}
 
@@ -1897,31 +1899,22 @@ func_exit:
 		mem_heap_free(heap);
 	}
 	return(err);
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
 }
 
-/*******************************************************************
+/***************************************************************//**
 Checks if an index entry has long enough common prefix with an existing
 record so that the intended insert of the entry must be changed to a modify of
 the existing record. In the case of a clustered index, the prefix must be
 n_unique fields long, and in the case of a secondary index, all fields must be
-equal. */
+equal.
+@return 0 if no update, ROW_INS_PREV if previous should be updated;
+currently we do the search so that only the low_match record can match
+enough to the search tuple, not the next record */
 UNIV_INLINE
 ulint
 row_ins_must_modify(
 /*================*/
-				/* out: 0 if no update, ROW_INS_PREV if
-				previous should be updated; currently we
-				do the search so that only the low_match
-				record can match enough to the search tuple,
-				not the next record */
-	btr_cur_t*	cursor)	/* in: B-tree cursor */
+	btr_cur_t*	cursor)	/*!< in: B-tree cursor */
 {
 	ulint	enough_match;
 	rec_t*	rec;
@@ -1948,7 +1941,7 @@ row_ins_must_modify(
 	return(0);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Tries to insert an index entry to an index. If the index is clustered
 and a record with the same unique key is found, the other record is
 necessarily marked deleted by a committed transaction, or a unique key
@@ -1956,22 +1949,20 @@ violation error occurs. The delete marked record is then updated to an
 existing record, and we must write an undo log record on the delete
 marked record. If the index is secondary, and a record with exactly the
 same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed,
+or error code */
+static
 ulint
 row_ins_index_entry_low(
 /*====================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
-				if pessimistic retry needed, or error code */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether we wish optimistic or
 				pessimistic descent down the index tree */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	btr_cur_t	cursor;
 	ulint		ignore_sec_unique	= 0;
@@ -1983,9 +1974,6 @@ row_ins_index_entry_low(
 	big_rec_t*	big_rec			= NULL;
 	mtr_t		mtr;
 	mem_heap_t*	heap			= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets			= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
 	log_free_check();
 
@@ -2020,19 +2008,18 @@ row_ins_index_entry_low(
 		rec_t*	first_rec = page_rec_get_next(
 			page_get_infimum_rec(page));
 
-		if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) {
-			ut_a(rec_get_n_fields(first_rec, index)
-			     == dtuple_get_n_fields(entry));
-		}
+		ut_ad(page_rec_is_supremum(first_rec)
+		      || rec_get_n_fields(first_rec, index)
+		      == dtuple_get_n_fields(entry));
 	}
 #endif
 
 	n_unique = dict_index_get_n_unique(index);
 
-	if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
-					  || cursor.low_match >= n_unique)) {
+	if (dict_index_is_unique(index) && (cursor.up_match >= n_unique
+					    || cursor.low_match >= n_unique)) {
 
-		if (index->type & DICT_CLUSTERED) {
+		if (dict_index_is_clust(index)) {
 			/* Note that the following may return also
 			DB_LOCK_WAIT */
 
@@ -2076,23 +2063,24 @@ row_ins_index_entry_low(
 		if (modify == ROW_INS_NEXT) {
 			rec = page_rec_get_next(btr_cur_get_rec(&cursor));
 
-			btr_cur_position(index, rec, &cursor);
+			btr_cur_position(index, rec,
+					 btr_cur_get_block(&cursor),&cursor);
 		}
 
-		if (index->type & DICT_CLUSTERED) {
+		if (dict_index_is_clust(index)) {
 			err = row_ins_clust_index_entry_by_modify(
-				mode, &cursor, &big_rec, entry,
-				ext_vec, n_ext_vec, thr, &mtr);
+				mode, &cursor, &heap, &big_rec, entry,
+				thr, &mtr);
 		} else {
+			ut_ad(!n_ext);
 			err = row_ins_sec_index_entry_by_modify(
 				mode, &cursor, entry, thr, &mtr);
 		}
-
 	} else {
 		if (mode == BTR_MODIFY_LEAF) {
 			err = btr_cur_optimistic_insert(
 				0, &cursor, entry, &insert_rec, &big_rec,
-				thr, &mtr);
+				n_ext, thr, &mtr);
 		} else {
 			ut_a(mode == BTR_MODIFY_TREE);
 			if (buf_LRU_buf_pool_running_out()) {
@@ -2103,33 +2091,27 @@ row_ins_index_entry_low(
 			}
 			err = btr_cur_pessimistic_insert(
 				0, &cursor, entry, &insert_rec, &big_rec,
-				thr, &mtr);
-		}
-
-		if (err == DB_SUCCESS) {
-			if (ext_vec) {
-				rec_set_field_extern_bits(insert_rec, index,
-							  ext_vec, n_ext_vec,
-							  &mtr);
-			}
+				n_ext, thr, &mtr);
 		}
 	}
 
 function_exit:
 	mtr_commit(&mtr);
 
-	if (big_rec) {
-		rec_t*		rec;
+	if (UNIV_LIKELY_NULL(big_rec)) {
+		rec_t*	rec;
+		ulint*	offsets;
 		mtr_start(&mtr);
 
 		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
 					    BTR_MODIFY_TREE, &cursor, 0, &mtr);
 		rec = btr_cur_get_rec(&cursor);
-		offsets = rec_get_offsets(rec, index, offsets,
+		offsets = rec_get_offsets(rec, index, NULL,
 					  ULINT_UNDEFINED, &heap);
 
-		err = btr_store_big_rec_extern_fields(index, rec,
-						      offsets, big_rec, &mtr);
+		err = btr_store_big_rec_extern_fields(
+			index, btr_cur_get_block(&cursor),
+			rec, offsets, big_rec, &mtr);
 
 		if (modify) {
 			dtuple_big_rec_free(big_rec);
@@ -2146,27 +2128,25 @@ function_exit:
 	return(err);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Inserts an index entry to index. Tries first optimistic, then pessimistic
 descent down the tree. If the entry matches enough to a delete marked record,
 performs the insert by updating or delete unmarking the delete marked
-record. */
-
+record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
 ulint
 row_ins_index_entry(
 /*================*/
-				/* out: DB_SUCCESS, DB_LOCK_WAIT,
-				DB_DUPLICATE_KEY, or some other error code */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	que_thr_t*	thr)	/* in: query thread */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	ibool		foreign,/*!< in: TRUE=check foreign key constraints */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint	err;
 
-	if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
+	if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
 		err = row_ins_check_foreign_constraints(index->table, index,
 							entry, thr);
 		if (err != DB_SUCCESS) {
@@ -2178,7 +2158,7 @@ row_ins_index_entry(
 	/* Try first optimistic descent to the B-tree */
 
 	err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
-				      ext_vec, n_ext_vec, thr);
+				      n_ext, thr);
 	if (err != DB_FAIL) {
 
 		return(err);
@@ -2187,36 +2167,38 @@ row_ins_index_entry(
 	/* Try then pessimistic descent to the B-tree */
 
 	err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
-				      ext_vec, n_ext_vec, thr);
+				      n_ext, thr);
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Sets the values of the dtuple fields in entry from the values of appropriate
 columns in row. */
 static
 void
 row_ins_index_entry_set_vals(
 /*=========================*/
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry to make */
-	dtuple_t*	row)	/* in: row */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry to make */
+	const dtuple_t*	row)	/*!< in: row */
 {
-	dict_field_t*	ind_field;
-	dfield_t*	field;
-	dfield_t*	row_field;
-	ulint		n_fields;
-	ulint		i;
+	ulint	n_fields;
+	ulint	i;
 
 	ut_ad(entry && row);
 
 	n_fields = dtuple_get_n_fields(entry);
 
 	for (i = 0; i < n_fields; i++) {
+		dict_field_t*	ind_field;
+		dfield_t*	field;
+		const dfield_t*	row_field;
+		ulint		len;
+
 		field = dtuple_get_nth_field(entry, i);
 		ind_field = dict_index_get_nth_field(index, i);
-
 		row_field = dtuple_get_nth_field(row, ind_field->col->ind);
+		len = dfield_get_len(row_field);
 
 		/* Check column prefix indexes */
 		if (ind_field->prefix_len > 0
@@ -2225,28 +2207,32 @@ row_ins_index_entry_set_vals(
 			const	dict_col_t*	col
 				= dict_field_get_col(ind_field);
 
-			field->len = dtype_get_at_most_n_mbchars(
+			len = dtype_get_at_most_n_mbchars(
 				col->prtype, col->mbminlen, col->mbmaxlen,
 				ind_field->prefix_len,
-				row_field->len, row_field->data);
-		} else {
-			field->len = row_field->len;
+				len, dfield_get_data(row_field));
+
+			ut_ad(!dfield_is_ext(row_field));
 		}
 
-		field->data = row_field->data;
+		dfield_set_data(field, dfield_get_data(row_field), len);
+		if (dfield_is_ext(row_field)) {
+			ut_ad(dict_index_is_clust(index));
+			dfield_set_ext(field);
+		}
 	}
 }
 
-/***************************************************************
-Inserts a single index entry to the table. */
+/***********************************************************//**
+Inserts a single index entry to the table.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 static
 ulint
 row_ins_index_entry_step(
 /*=====================*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	ins_node_t*	node,	/* in: row insert node */
-	que_thr_t*	thr)	/* in: query thread */
+	ins_node_t*	node,	/*!< in: row insert node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint	err;
 
@@ -2256,24 +2242,24 @@ row_ins_index_entry_step(
 
 	ut_ad(dtuple_check_typed(node->entry));
 
-	err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
+	err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr);
 
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Allocates a row id for row and inits the node->index field. */
 UNIV_INLINE
 void
 row_ins_alloc_row_id_step(
 /*======================*/
-	ins_node_t*	node)	/* in: row insert node */
+	ins_node_t*	node)	/*!< in: row insert node */
 {
 	dulint	row_id;
 
 	ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
 
-	if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
+	if (dict_index_is_unique(dict_table_get_first_index(node->table))) {
 
 		/* No row id is stored if the clustered index is unique */
 
@@ -2287,13 +2273,13 @@ row_ins_alloc_row_id_step(
 	dict_sys_write_row_id(node->row_id_buf, row_id);
 }
 
-/***************************************************************
+/***********************************************************//**
 Gets a row to insert from the values list. */
 UNIV_INLINE
 void
 row_ins_get_row_from_values(
 /*========================*/
-	ins_node_t*	node)	/* in: row insert node */
+	ins_node_t*	node)	/*!< in: row insert node */
 {
 	que_node_t*	list_node;
 	dfield_t*	dfield;
@@ -2320,13 +2306,13 @@ row_ins_get_row_from_values(
 	}
 }
 
-/***************************************************************
+/***********************************************************//**
 Gets a row to insert from the select list. */
 UNIV_INLINE
 void
 row_ins_get_row_from_select(
 /*========================*/
-	ins_node_t*	node)	/* in: row insert node */
+	ins_node_t*	node)	/*!< in: row insert node */
 {
 	que_node_t*	list_node;
 	dfield_t*	dfield;
@@ -2351,16 +2337,16 @@ row_ins_get_row_from_select(
 	}
 }
 
-/***************************************************************
-Inserts a row to a table. */
-
+/***********************************************************//**
+Inserts a row to a table.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
+static
 ulint
 row_ins(
 /*====*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	ins_node_t*	node,	/* in: row insert node */
-	que_thr_t*	thr)	/* in: query thread */
+	ins_node_t*	node,	/*!< in: row insert node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint	err;
 
@@ -2406,15 +2392,15 @@ row_ins(
 	return(DB_SUCCESS);
 }
 
-/***************************************************************
+/***********************************************************//**
 Inserts a row to a table. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_ins_step(
 /*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ins_node_t*	node;
 	que_node_t*	parent;
diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c
new file mode 100644
index 00000000000..05a45dc647c
--- /dev/null
+++ b/storage/innodb_plugin/row/row0merge.c
@@ -0,0 +1,2364 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0merge.c
+New index creation routines using a merge sort
+
+Created 12/4/2005 Jan Lindstrom
+Completed by Sunny Bains and Marko Makela
+*******************************************************/
+
+#include "row0merge.h"
+#include "row0ext.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0load.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "read0read.h"
+#include "os0file.h"
+#include "lock0lock.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "que0que.h"
+#include "pars0pars.h"
+#include "mem0mem.h"
+#include "log0log.h"
+#include "ut0sort.h"
+#include "handler0alter.h"
+
+#ifdef UNIV_DEBUG
+/** Set these in order ot enable debug printout. */
+/* @{ */
+static ibool	row_merge_print_cmp;
+static ibool	row_merge_print_read;
+static ibool	row_merge_print_write;
+/* @} */
+#endif /* UNIV_DEBUG */
+
+/** @brief Block size for I/O operations in merge sort.
+
+The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+rounded to a power of 2.
+
+When not creating a PRIMARY KEY that contains column prefixes, this
+can be set as small as UNIV_PAGE_SIZE / 2.  See the comment above
+ut_ad(data_size < sizeof(row_merge_block_t)). */
+typedef byte	row_merge_block_t[1048576];
+
+/** @brief Secondary buffer for I/O operations of merge records.
+
+This buffer is used for writing or reading a record that spans two
+row_merge_block_t.  Thus, it must be able to hold one merge record,
+whose maximum size is the same as the minimum size of
+row_merge_block_t. */
+typedef byte	mrec_buf_t[UNIV_PAGE_SIZE];
+
+/** @brief Merge record in row_merge_block_t.
+
+The format is the same as a record in ROW_FORMAT=COMPACT with the
+exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
+typedef byte	mrec_t;
+
+/** Buffer for sorting in main memory. */
+struct row_merge_buf_struct {
+	mem_heap_t*	heap;		/*!< memory heap where allocated */
+	dict_index_t*	index;		/*!< the index the tuples belong to */
+	ulint		total_size;	/*!< total amount of data bytes */
+	ulint		n_tuples;	/*!< number of data tuples */
+	ulint		max_tuples;	/*!< maximum number of data tuples */
+	const dfield_t**tuples;		/*!< array of pointers to
+					arrays of fields that form
+					the data tuples */
+	const dfield_t**tmp_tuples;	/*!< temporary copy of tuples,
+					for sorting */
+};
+
+/** Buffer for sorting in main memory. */
+typedef struct row_merge_buf_struct row_merge_buf_t;
+
+/** Information about temporary files used in merge sort */
+struct merge_file_struct {
+	int	fd;		/*!< file descriptor */
+	ulint	offset;		/*!< file offset */
+};
+
+/** Information about temporary files used in merge sort */
+typedef struct merge_file_struct merge_file_t;
+
+#ifdef UNIV_DEBUG
+/******************************************************//**
+Display a merge tuple. */
+static
+void
+row_merge_tuple_print(
+/*==================*/
+	FILE*		f,	/*!< in: output stream */
+	const dfield_t*	entry,	/*!< in: tuple to print */
+	ulint		n_fields)/*!< in: number of fields in the tuple */
+{
+	ulint	j;
+
+	for (j = 0; j < n_fields; j++) {
+		const dfield_t*	field = &entry[j];
+
+		if (dfield_is_null(field)) {
+			fputs("\n NULL;", f);
+		} else {
+			ulint	field_len	= dfield_get_len(field);
+			ulint	len		= ut_min(field_len, 20);
+			if (dfield_is_ext(field)) {
+				fputs("\nE", f);
+			} else {
+				fputs("\n ", f);
+			}
+			ut_print_buf(f, dfield_get_data(field), len);
+			if (len != field_len) {
+				fprintf(f, " (total %lu bytes)", field_len);
+			}
+		}
+	}
+	putc('\n', f);
+}
+#endif /* UNIV_DEBUG */
+
+/******************************************************//**
+Allocate a sort buffer.
+@return	own: sort buffer */
+static
+row_merge_buf_t*
+row_merge_buf_create_low(
+/*=====================*/
+	mem_heap_t*	heap,		/*!< in: heap where allocated */
+	dict_index_t*	index,		/*!< in: secondary index */
+	ulint		max_tuples,	/*!< in: maximum number of data tuples */
+	ulint		buf_size)	/*!< in: size of the buffer, in bytes */
+{
+	row_merge_buf_t*	buf;
+
+	ut_ad(max_tuples > 0);
+	ut_ad(max_tuples <= sizeof(row_merge_block_t));
+	ut_ad(max_tuples < buf_size);
+
+	buf = mem_heap_zalloc(heap, buf_size);
+	buf->heap = heap;
+	buf->index = index;
+	buf->max_tuples = max_tuples;
+	buf->tuples = mem_heap_alloc(heap,
+				     2 * max_tuples * sizeof *buf->tuples);
+	buf->tmp_tuples = buf->tuples + max_tuples;
+
+	return(buf);
+}
+
+/******************************************************//**
+Allocate a sort buffer.
+@return	own: sort buffer */
+static
+row_merge_buf_t*
+row_merge_buf_create(
+/*=================*/
+	dict_index_t*	index)	/*!< in: secondary index */
+{
+	row_merge_buf_t*	buf;
+	ulint			max_tuples;
+	ulint			buf_size;
+	mem_heap_t*		heap;
+
+	max_tuples = sizeof(row_merge_block_t)
+		/ ut_max(1, dict_index_get_min_size(index));
+
+	buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
+
+	heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));
+
+	buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
+
+	return(buf);
+}
+
+/******************************************************//**
+Empty a sort buffer.
+@return	sort buffer */
+static
+row_merge_buf_t*
+row_merge_buf_empty(
+/*================*/
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
+{
+	ulint		buf_size;
+	ulint		max_tuples	= buf->max_tuples;
+	mem_heap_t*	heap		= buf->heap;
+	dict_index_t*	index		= buf->index;
+
+	buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
+
+	mem_heap_empty(heap);
+
+	return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));
+}
+
+/******************************************************//**
+Deallocate a sort buffer. */
+static
+void
+row_merge_buf_free(
+/*===============*/
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer, to be freed */
+{
+	mem_heap_free(buf->heap);
+}
+
+/******************************************************//**
+Insert a data tuple into a sort buffer.
+@return	TRUE if added, FALSE if out of space */
+static
+ibool
+row_merge_buf_add(
+/*==============*/
+	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
+	const dtuple_t*		row,	/*!< in: row in clustered index */
+	const row_ext_t*	ext)	/*!< in: cache of externally stored
+					column prefixes, or NULL */
+{
+	ulint			i;
+	ulint			n_fields;
+	ulint			data_size;
+	ulint			extra_size;
+	const dict_index_t*	index;
+	dfield_t*		entry;
+	dfield_t*		field;
+
+	if (buf->n_tuples >= buf->max_tuples) {
+		return(FALSE);
+	}
+
+	UNIV_PREFETCH_R(row->fields);
+
+	index = buf->index;
+
+	n_fields = dict_index_get_n_fields(index);
+
+	entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
+	buf->tuples[buf->n_tuples] = entry;
+	field = entry;
+
+	data_size = 0;
+	extra_size = UT_BITS_IN_BYTES(index->n_nullable);
+
+	for (i = 0; i < n_fields; i++, field++) {
+		const dict_field_t*	ifield;
+		const dict_col_t*	col;
+		ulint			col_no;
+		const dfield_t*		row_field;
+		ulint			len;
+
+		ifield = dict_index_get_nth_field(index, i);
+		col = ifield->col;
+		col_no = dict_col_get_no(col);
+		row_field = dtuple_get_nth_field(row, col_no);
+		dfield_copy(field, row_field);
+		len = dfield_get_len(field);
+
+		if (dfield_is_null(field)) {
+			ut_ad(!(col->prtype & DATA_NOT_NULL));
+			continue;
+		} else if (UNIV_LIKELY(!ext)) {
+		} else if (dict_index_is_clust(index)) {
+			/* Flag externally stored fields. */
+			const byte*	buf = row_ext_lookup(ext, col_no,
+							     &len);
+			if (UNIV_LIKELY_NULL(buf)) {
+				ut_a(buf != field_ref_zero);
+				if (i < dict_index_get_n_unique(index)) {
+					dfield_set_data(field, buf, len);
+				} else {
+					dfield_set_ext(field);
+					len = dfield_get_len(field);
+				}
+			}
+		} else {
+			const byte*	buf = row_ext_lookup(ext, col_no,
+							     &len);
+			if (UNIV_LIKELY_NULL(buf)) {
+				ut_a(buf != field_ref_zero);
+				dfield_set_data(field, buf, len);
+			}
+		}
+
+		/* If a column prefix index, take only the prefix */
+
+		if (ifield->prefix_len) {
+			len = dtype_get_at_most_n_mbchars(
+				col->prtype,
+				col->mbminlen, col->mbmaxlen,
+				ifield->prefix_len,
+				len, dfield_get_data(field));
+			dfield_set_len(field, len);
+		}
+
+		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+
+		if (ifield->fixed_len) {
+			ut_ad(len == ifield->fixed_len);
+			ut_ad(!dfield_is_ext(field));
+		} else if (dfield_is_ext(field)) {
+			extra_size += 2;
+		} else if (len < 128
+			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
+			extra_size++;
+		} else {
+			/* For variable-length columns, we look up the
+			maximum length from the column itself.  If this
+			is a prefix index column shorter than 256 bytes,
+			this will waste one byte. */
+			extra_size += 2;
+		}
+		data_size += len;
+	}
+
+#ifdef UNIV_DEBUG
+	{
+		ulint	size;
+		ulint	extra;
+
+		size = rec_get_converted_size_comp(index,
+						   REC_STATUS_ORDINARY,
+						   entry, n_fields, &extra);
+
+		ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
+		ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+	}
+#endif /* UNIV_DEBUG */
+
+	/* Add to the total size of the record in row_merge_block_t
+	the encoded length of extra_size and the extra bytes (extra_size).
+	See row_merge_buf_write() for the variable-length encoding
+	of extra_size. */
+	data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
+
+	/* The following assertion may fail if row_merge_block_t is
+	declared very small and a PRIMARY KEY is being created with
+	many prefix columns.  In that case, the record may exceed the
+	page_zip_rec_needs_ext() limit.  However, no further columns
+	will be moved to external storage until the record is inserted
+	to the clustered index B-tree. */
+	ut_ad(data_size < sizeof(row_merge_block_t));
+
+	/* Reserve one byte for the end marker of row_merge_block_t. */
+	if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {
+		return(FALSE);
+	}
+
+	buf->total_size += data_size;
+	buf->n_tuples++;
+
+	field = entry;
+
+	/* Copy the data fields. */
+
+	do {
+		dfield_dup(field++, buf->heap);
+	} while (--n_fields);
+
+	return(TRUE);
+}
+
+/** Structure for reporting duplicate records. */
+struct row_merge_dup_struct {
+	const dict_index_t*	index;		/*!< index being sorted */
+	TABLE*			table;		/*!< MySQL table object */
+	ulint			n_dup;		/*!< number of duplicates */
+};
+
+/** Structure for reporting duplicate records. */
+typedef struct row_merge_dup_struct row_merge_dup_t;
+
+/*************************************************************//**
+Report a duplicate key. */
+static
+void
+row_merge_dup_report(
+/*=================*/
+	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
+	const dfield_t*		entry)	/*!< in: duplicate index entry */
+{
+	mrec_buf_t 		buf;
+	const dtuple_t*		tuple;
+	dtuple_t		tuple_store;
+	const rec_t*		rec;
+	const dict_index_t*	index	= dup->index;
+	ulint			n_fields= dict_index_get_n_fields(index);
+	mem_heap_t*		heap	= NULL;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*			offsets;
+	ulint			n_ext;
+
+	if (dup->n_dup++) {
+		/* Only report the first duplicate record,
+		but count all duplicate records. */
+		return;
+	}
+
+	rec_offs_init(offsets_);
+
+	/* Convert the tuple to a record and then to MySQL format. */
+
+	tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
+	n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
+
+	rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
+	offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
+				  &heap);
+
+	innobase_rec_to_mysql(dup->table, rec, index, offsets);
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/*************************************************************//**
+Compare two tuples.
+@return	1, 0, -1 if a is greater, equal, less, respectively, than b */
+static
+int
+row_merge_tuple_cmp(
+/*================*/
+	ulint			n_field,/*!< in: number of fields */
+	const dfield_t*		a,	/*!< in: first tuple to be compared */
+	const dfield_t*		b,	/*!< in: second tuple to be compared */
+	row_merge_dup_t*	dup)	/*!< in/out: for reporting duplicates */
+{
+	int		cmp;
+	const dfield_t*	field	= a;
+
+	/* Compare the fields of the tuples until a difference is
+	found or we run out of fields to compare.  If !cmp at the
+	end, the tuples are equal. */
+	do {
+		cmp = cmp_dfield_dfield(a++, b++);
+	} while (!cmp && --n_field);
+
+	if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
+		/* Report a duplicate value error if the tuples are
+		logically equal.  NULL columns are logically inequal,
+		although they are equal in the sorting order.  Find
+		out if any of the fields are NULL. */
+		for (b = field; b != a; b++) {
+			if (dfield_is_null(b)) {
+
+				goto func_exit;
+			}
+		}
+
+		row_merge_dup_report(dup, field);
+	}
+
+func_exit:
+	return(cmp);
+}
+
+/** Wrapper for row_merge_tuple_sort() to inject some more context to
+UT_SORT_FUNCTION_BODY().
+@param a	array of tuples that being sorted
+@param b	aux (work area), same size as tuples[]
+@param c	lower bound of the sorting area, inclusive
+@param d	upper bound of the sorting area, inclusive */
+#define row_merge_tuple_sort_ctx(a,b,c,d) \
+	row_merge_tuple_sort(n_field, dup, a, b, c, d)
+/** Wrapper for row_merge_tuple_cmp() to inject some more context to
+UT_SORT_FUNCTION_BODY().
+@param a	first tuple to be compared
+@param b	second tuple to be compared
+@return	1, 0, -1 if a is greater, equal, less, respectively, than b */
+#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
+
+/**********************************************************************//**
+Merge sort the tuple buffer in main memory. */
+static
+void
+row_merge_tuple_sort(
+/*=================*/
+	ulint			n_field,/*!< in: number of fields */
+	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
+	const dfield_t**	tuples,	/*!< in/out: tuples */
+	const dfield_t**	aux,	/*!< in/out: work area */
+	ulint			low,	/*!< in: lower bound of the
+					sorting area, inclusive */
+	ulint			high)	/*!< in: upper bound of the
+					sorting area, exclusive */
+{
+	UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
+			      tuples, aux, low, high, row_merge_tuple_cmp_ctx);
+}
+
+/******************************************************//**
+Sort a buffer. */
+static
+void
+row_merge_buf_sort(
+/*===============*/
+	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
+	row_merge_dup_t*	dup)	/*!< in/out: for reporting duplicates */
+{
+	row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
+			     buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
+}
+
+/******************************************************//**
+Write a buffer to a block. */
+static
+void
+row_merge_buf_write(
+/*================*/
+	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
+#ifdef UNIV_DEBUG
+	const merge_file_t*	of,	/*!< in: output file */
+#endif /* UNIV_DEBUG */
+	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
+#ifndef UNIV_DEBUG
+# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
+#endif /* !UNIV_DEBUG */
+{
+	const dict_index_t*	index	= buf->index;
+	ulint			n_fields= dict_index_get_n_fields(index);
+	byte*			b	= &(*block)[0];
+
+	ulint		i;
+
+	for (i = 0; i < buf->n_tuples; i++) {
+		ulint		size;
+		ulint		extra_size;
+		const dfield_t*	entry		= buf->tuples[i];
+
+		size = rec_get_converted_size_comp(index,
+						   REC_STATUS_ORDINARY,
+						   entry, n_fields,
+						   &extra_size);
+		ut_ad(size > extra_size);
+		ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
+		extra_size -= REC_N_NEW_EXTRA_BYTES;
+		size -= REC_N_NEW_EXTRA_BYTES;
+
+		/* Encode extra_size + 1 */
+		if (extra_size + 1 < 0x80) {
+			*b++ = (byte) (extra_size + 1);
+		} else {
+			ut_ad((extra_size + 1) < 0x8000);
+			*b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
+			*b++ = (byte) (extra_size + 1);
+		}
+
+		ut_ad(b + size < block[1]);
+
+		rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
+					       REC_STATUS_ORDINARY,
+					       entry, n_fields);
+
+		b += size;
+
+#ifdef UNIV_DEBUG
+		if (row_merge_print_write) {
+			fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
+				(void*) b, of->fd, (ulong) of->offset,
+				(ulong) i);
+			row_merge_tuple_print(stderr, entry, n_fields);
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	/* Write an "end-of-chunk" marker. */
+	ut_a(b < block[1]);
+	ut_a(b == block[0] + buf->total_size);
+	*b++ = 0;
+#ifdef UNIV_DEBUG_VALGRIND
+	/* The rest of the block is uninitialized.  Initialize it
+	to avoid bogus warnings. */
+	memset(b, 0xff, block[1] - b);
+#endif /* UNIV_DEBUG_VALGRIND */
+#ifdef UNIV_DEBUG
+	if (row_merge_print_write) {
+		fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
+			(void*) b, of->fd, (ulong) of->offset);
+	}
+#endif /* UNIV_DEBUG */
+}
+
+/******************************************************//**
+Create a memory heap and allocate space for row_merge_rec_offsets().
+@return	memory heap */
+static
+mem_heap_t*
+row_merge_heap_create(
+/*==================*/
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint**			offsets1,	/*!< out: offsets */
+	ulint**			offsets2)	/*!< out: offsets */
+{
+	ulint		i	= 1 + REC_OFFS_HEADER_SIZE
+		+ dict_index_get_n_fields(index);
+	mem_heap_t*	heap	= mem_heap_create(2 * i * sizeof *offsets1);
+
+	*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
+	*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
+
+	(*offsets1)[0] = (*offsets2)[0] = i;
+	(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
+
+	return(heap);
+}
+
+/**********************************************************************//**
+Search an index object by name and column names.  If several indexes match,
+return the index with the max id.
+@return	matching index, NULL if not found */
+static
+dict_index_t*
+row_merge_dict_table_get_index(
+/*===========================*/
+	dict_table_t*		table,		/*!< in: table */
+	const merge_index_def_t*index_def)	/*!< in: index definition */
+{
+	ulint		i;
+	dict_index_t*	index;
+	const char**	column_names;
+
+	column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
+
+	for (i = 0; i < index_def->n_fields; ++i) {
+		column_names[i] = index_def->fields[i].field_name;
+	}
+
+	index = dict_table_get_index_by_max_id(
+		table, index_def->name, column_names, index_def->n_fields);
+
+	mem_free((void*) column_names);
+
+	return(index);
+}
+
+/********************************************************************//**
+Read a merge block from the file system.
+@return	TRUE if request was successful, FALSE if fail */
+static
+ibool
+row_merge_read(
+/*===========*/
+	int			fd,	/*!< in: file descriptor */
+	ulint			offset,	/*!< in: offset where to read */
+	row_merge_block_t*	buf)	/*!< out: data */
+{
+	ib_uint64_t	ofs = ((ib_uint64_t) offset) * sizeof *buf;
+	ibool		success;
+
+	success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
+						 (ulint) (ofs & 0xFFFFFFFF),
+						 (ulint) (ofs >> 32),
+						 sizeof *buf);
+	if (UNIV_UNLIKELY(!success)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: failed to read merge block at %llu\n", ofs);
+	}
+
+	return(UNIV_LIKELY(success));
+}
+
+/********************************************************************//**
+Read a merge block from the file system.
+@return	TRUE if request was successful, FALSE if fail */
+static
+ibool
+row_merge_write(
+/*============*/
+	int		fd,	/*!< in: file descriptor */
+	ulint		offset,	/*!< in: offset where to write */
+	const void*	buf)	/*!< in: data */
+{
+	ib_uint64_t	ofs = ((ib_uint64_t) offset)
+		* sizeof(row_merge_block_t);
+
+	return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
+					 (ulint) (ofs & 0xFFFFFFFF),
+					 (ulint) (ofs >> 32),
+					 sizeof(row_merge_block_t))));
+}
+
+/********************************************************************//**
+Read a merge record.
+@return	pointer to next record, or NULL on I/O error or end of list */
+static
+const byte*
+row_merge_read_rec(
+/*===============*/
+	row_merge_block_t*	block,	/*!< in/out: file buffer */
+	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
+	const byte*		b,	/*!< in: pointer to record */
+	const dict_index_t*	index,	/*!< in: index of the record */
+	int			fd,	/*!< in: file descriptor */
+	ulint*			foffs,	/*!< in/out: file offset */
+	const mrec_t**		mrec,	/*!< out: pointer to merge record,
+					or NULL on end of list
+					(non-NULL on I/O error) */
+	ulint*			offsets)/*!< out: offsets of mrec */
+{
+	ulint	extra_size;
+	ulint	data_size;
+	ulint	avail_size;
+
+	ut_ad(block);
+	ut_ad(buf);
+	ut_ad(b >= block[0]);
+	ut_ad(b < block[1]);
+	ut_ad(index);
+	ut_ad(foffs);
+	ut_ad(mrec);
+	ut_ad(offsets);
+
+	ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
+	      + dict_index_get_n_fields(index));
+
+	extra_size = *b++;
+
+	if (UNIV_UNLIKELY(!extra_size)) {
+		/* End of list */
+		*mrec = NULL;
+#ifdef UNIV_DEBUG
+		if (row_merge_print_read) {
+			fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
+				(const void*) b, (const void*) block,
+				fd, (ulong) *foffs);
+		}
+#endif /* UNIV_DEBUG */
+		return(NULL);
+	}
+
+	if (extra_size >= 0x80) {
+		/* Read another byte of extra_size. */
+
+		if (UNIV_UNLIKELY(b >= block[1])) {
+			if (!row_merge_read(fd, ++(*foffs), block)) {
+err_exit:
+				/* Signal I/O error. */
+				*mrec = b;
+				return(NULL);
+			}
+
+			/* Wrap around to the beginning of the buffer. */
+			b = block[0];
+		}
+
+		extra_size = (extra_size & 0x7f) << 8;
+		extra_size |= *b++;
+	}
+
+	/* Normalize extra_size.  Above, value 0 signals "end of list". */
+	extra_size--;
+
+	/* Read the extra bytes. */
+
+	if (UNIV_UNLIKELY(b + extra_size >= block[1])) {
+		/* The record spans two blocks.  Copy the entire record
+		to the auxiliary buffer and handle this as a special
+		case. */
+
+		avail_size = block[1] - b;
+
+		memcpy(*buf, b, avail_size);
+
+		if (!row_merge_read(fd, ++(*foffs), block)) {
+
+			goto err_exit;
+		}
+
+		/* Wrap around to the beginning of the buffer. */
+		b = block[0];
+
+		/* Copy the record. */
+		memcpy(*buf + avail_size, b, extra_size - avail_size);
+		b += extra_size - avail_size;
+
+		*mrec = *buf + extra_size;
+
+		rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+
+		data_size = rec_offs_data_size(offsets);
+
+		/* These overflows should be impossible given that
+		records are much smaller than either buffer, and
+		the record starts near the beginning of each buffer. */
+		ut_a(extra_size + data_size < sizeof *buf);
+		ut_a(b + data_size < block[1]);
+
+		/* Copy the data bytes. */
+		memcpy(*buf + extra_size, b, data_size);
+		b += data_size;
+
+		goto func_exit;
+	}
+
+	*mrec = b + extra_size;
+
+	rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+
+	data_size = rec_offs_data_size(offsets);
+	ut_ad(extra_size + data_size < sizeof *buf);
+
+	b += extra_size + data_size;
+
+	if (UNIV_LIKELY(b < block[1])) {
+		/* The record fits entirely in the block.
+		This is the normal case. */
+		goto func_exit;
+	}
+
+	/* The record spans two blocks.  Copy it to buf. */
+
+	b -= extra_size + data_size;
+	avail_size = block[1] - b;
+	memcpy(*buf, b, avail_size);
+	*mrec = *buf + extra_size;
+#ifdef UNIV_DEBUG
+	/* We cannot invoke rec_offs_make_valid() here, because there
+	are no REC_N_NEW_EXTRA_BYTES between extra_size and data_size.
+	Similarly, rec_offs_validate() would fail, because it invokes
+	rec_get_status(). */
+	offsets[2] = (ulint) *mrec;
+	offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+
+	if (!row_merge_read(fd, ++(*foffs), block)) {
+
+		goto err_exit;
+	}
+
+	/* Wrap around to the beginning of the buffer. */
+	b = block[0];
+
+	/* Copy the rest of the record. */
+	memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
+	b += extra_size + data_size - avail_size;
+
+func_exit:
+#ifdef UNIV_DEBUG
+	if (row_merge_print_read) {
+		fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
+			(const void*) b, (const void*) block,
+			fd, (ulong) *foffs);
+		rec_print_comp(stderr, *mrec, offsets);
+		putc('\n', stderr);
+	}
+#endif /* UNIV_DEBUG */
+
+	return(b);
+}
+
+/********************************************************************//**
+Write a merge record. */
+static
+void
+row_merge_write_rec_low(
+/*====================*/
+	byte*		b,	/*!< out: buffer */
+	ulint		e,	/*!< in: encoded extra_size */
+#ifdef UNIV_DEBUG
+	ulint		size,	/*!< in: total size to write */
+	int		fd,	/*!< in: file descriptor */
+	ulint		foffs,	/*!< in: file offset */
+#endif /* UNIV_DEBUG */
+	const mrec_t*	mrec,	/*!< in: record to write */
+	const ulint*	offsets)/*!< in: offsets of mrec */
+#ifndef UNIV_DEBUG
+# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets)	\
+	row_merge_write_rec_low(b, e, mrec, offsets)
+#endif /* !UNIV_DEBUG */
+{
+#ifdef UNIV_DEBUG
+	const byte* const end = b + size;
+	ut_ad(e == rec_offs_extra_size(offsets) + 1);
+
+	if (row_merge_print_write) {
+		fprintf(stderr, "row_merge_write %p,%d,%lu ",
+			(void*) b, fd, (ulong) foffs);
+		rec_print_comp(stderr, mrec, offsets);
+		putc('\n', stderr);
+	}
+#endif /* UNIV_DEBUG */
+
+	if (e < 0x80) {
+		*b++ = (byte) e;
+	} else {
+		*b++ = (byte) (0x80 | (e >> 8));
+		*b++ = (byte) e;
+	}
+
+	memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
+	ut_ad(b + rec_offs_size(offsets) == end);
+}
+
+/********************************************************************//**
+Write a merge record.
+@return	pointer to end of block, or NULL on error */
+static
+byte*
+row_merge_write_rec(
+/*================*/
+	row_merge_block_t*	block,	/*!< in/out: file buffer */
+	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
+	byte*			b,	/*!< in: pointer to end of block */
+	int			fd,	/*!< in: file descriptor */
+	ulint*			foffs,	/*!< in/out: file offset */
+	const mrec_t*		mrec,	/*!< in: record to write */
+	const ulint*		offsets)/*!< in: offsets of mrec */
+{
+	ulint	extra_size;
+	ulint	size;
+	ulint	avail_size;
+
+	ut_ad(block);
+	ut_ad(buf);
+	ut_ad(b >= block[0]);
+	ut_ad(b < block[1]);
+	ut_ad(mrec);
+	ut_ad(foffs);
+	ut_ad(mrec < block[0] || mrec > block[1]);
+	ut_ad(mrec < buf[0] || mrec > buf[1]);
+
+	/* Normalize extra_size.  Value 0 signals "end of list". */
+	extra_size = rec_offs_extra_size(offsets) + 1;
+
+	size = extra_size + (extra_size >= 0x80)
+		+ rec_offs_data_size(offsets);
+
+	if (UNIV_UNLIKELY(b + size >= block[1])) {
+		/* The record spans two blocks.
+		Copy it to the temporary buffer first. */
+		avail_size = block[1] - b;
+
+		row_merge_write_rec_low(buf[0],
+					extra_size, size, fd, *foffs,
+					mrec, offsets);
+
+		/* Copy the head of the temporary buffer, write
+		the completed block, and copy the tail of the
+		record to the head of the new block. */
+		memcpy(b, buf[0], avail_size);
+
+		if (!row_merge_write(fd, (*foffs)++, block)) {
+			return(NULL);
+		}
+
+		UNIV_MEM_INVALID(block[0], sizeof block[0]);
+
+		/* Copy the rest. */
+		b = block[0];
+		memcpy(b, buf[0] + avail_size, size - avail_size);
+		b += size - avail_size;
+	} else {
+		row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
+					mrec, offsets);
+		b += size;
+	}
+
+	return(b);
+}
+
+/********************************************************************//**
+Write an end-of-list marker.
+@return	pointer to end of block, or NULL on error */
+static
+byte*
+row_merge_write_eof(
+/*================*/
+	row_merge_block_t*	block,	/*!< in/out: file buffer */
+	byte*			b,	/*!< in: pointer to end of block */
+	int			fd,	/*!< in: file descriptor */
+	ulint*			foffs)	/*!< in/out: file offset */
+{
+	ut_ad(block);
+	ut_ad(b >= block[0]);
+	ut_ad(b < block[1]);
+	ut_ad(foffs);
+#ifdef UNIV_DEBUG
+	if (row_merge_print_write) {
+		fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
+			(void*) b, (void*) block, fd, (ulong) *foffs);
+	}
+#endif /* UNIV_DEBUG */
+
+	*b++ = 0;
+	UNIV_MEM_ASSERT_RW(block[0], b - block[0]);
+	UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);
+#ifdef UNIV_DEBUG_VALGRIND
+	/* The rest of the block is uninitialized.  Initialize it
+	to avoid bogus warnings. */
+	memset(b, 0xff, block[1] - b);
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	if (!row_merge_write(fd, (*foffs)++, block)) {
+		return(NULL);
+	}
+
+	UNIV_MEM_INVALID(block[0], sizeof block[0]);
+	return(block[0]);
+}
+
+/*************************************************************//**
+Compare two merge records.
+@return	1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
+static
+int
+row_merge_cmp(
+/*==========*/
+	const mrec_t*		mrec1,		/*!< in: first merge
+						record to be compared */
+	const mrec_t*		mrec2,		/*!< in: second merge
+						record to be compared */
+	const ulint*		offsets1,	/*!< in: first record offsets */
+	const ulint*		offsets2,	/*!< in: second record offsets */
+	const dict_index_t*	index)		/*!< in: index */
+{
+	int	cmp;
+
+	cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
+
+#ifdef UNIV_DEBUG
+	if (row_merge_print_cmp) {
+		fputs("row_merge_cmp1 ", stderr);
+		rec_print_comp(stderr, mrec1, offsets1);
+		fputs("\nrow_merge_cmp2 ", stderr);
+		rec_print_comp(stderr, mrec2, offsets2);
+		fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
+	}
+#endif /* UNIV_DEBUG */
+
+	return(cmp);
+}
+
+/********************************************************************//**
+Reads clustered index of the table and create temporary files
+containing the index entries for the indexes to be built.
+@return	DB_SUCCESS or error */
+static
+ulint
+row_merge_read_clustered_index(
+/*===========================*/
+	trx_t*			trx,	/*!< in: transaction */
+	TABLE*			table,	/*!< in/out: MySQL table object,
+					for reporting erroneous records */
+	const dict_table_t*	old_table,/*!< in: table where rows are
+					read from */
+	const dict_table_t*	new_table,/*!< in: table where indexes are
+					created; identical to old_table
+					unless creating a PRIMARY KEY */
+	dict_index_t**		index,	/*!< in: indexes to be created */
+	merge_file_t*		files,	/*!< in: temporary files */
+	ulint			n_index,/*!< in: number of indexes to create */
+	row_merge_block_t*	block)	/*!< in/out: file buffer */
+{
+	dict_index_t*		clust_index;	/* Clustered index */
+	mem_heap_t*		row_heap;	/* Heap memory to create
+						clustered index records */
+	row_merge_buf_t**	merge_buf;	/* Temporary list for records*/
+	btr_pcur_t		pcur;		/* Persistent cursor on the
+						clustered index */
+	mtr_t			mtr;		/* Mini transaction */
+	ulint			err = DB_SUCCESS;/* Return code */
+	ulint			i;
+	ulint			n_nonnull = 0;	/* number of columns
+						changed to NOT NULL */
+	ulint*			nonnull = NULL;	/* NOT NULL columns */
+
+	trx->op_info = "reading clustered index";
+
+	ut_ad(trx);
+	ut_ad(old_table);
+	ut_ad(new_table);
+	ut_ad(index);
+	ut_ad(files);
+
+	/* Create and initialize memory for record buffers */
+
+	merge_buf = mem_alloc(n_index * sizeof *merge_buf);
+
+	for (i = 0; i < n_index; i++) {
+		merge_buf[i] = row_merge_buf_create(index[i]);
+	}
+
+	mtr_start(&mtr);
+
+	/* Find the clustered index and create a persistent cursor
+	based on that. */
+
+	clust_index = dict_table_get_first_index(old_table);
+
+	btr_pcur_open_at_index_side(
+		TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+	if (UNIV_UNLIKELY(old_table != new_table)) {
+		ulint	n_cols = dict_table_get_n_cols(old_table);
+
+		/* A primary key will be created.  Identify the
+		columns that were flagged NOT NULL in the new table,
+		so that we can quickly check that the records in the
+		(old) clustered index do not violate the added NOT
+		NULL constraints. */
+
+		ut_a(n_cols == dict_table_get_n_cols(new_table));
+
+		nonnull = mem_alloc(n_cols * sizeof *nonnull);
+
+		for (i = 0; i < n_cols; i++) {
+			if (dict_table_get_nth_col(old_table, i)->prtype
+			    & DATA_NOT_NULL) {
+
+				continue;
+			}
+
+			if (dict_table_get_nth_col(new_table, i)->prtype
+			    & DATA_NOT_NULL) {
+
+				nonnull[n_nonnull++] = i;
+			}
+		}
+
+		if (!n_nonnull) {
+			mem_free(nonnull);
+			nonnull = NULL;
+		}
+	}
+
+	row_heap = mem_heap_create(sizeof(mrec_buf_t));
+
+	/* Scan the clustered index. */
+	for (;;) {
+		const rec_t*	rec;
+		ulint*		offsets;
+		dtuple_t*	row		= NULL;
+		row_ext_t*	ext;
+		ibool		has_next	= TRUE;
+
+		btr_pcur_move_to_next_on_page(&pcur);
+
+		/* When switching pages, commit the mini-transaction
+		in order to release the latch on the old page. */
+
+		if (btr_pcur_is_after_last_on_page(&pcur)) {
+			btr_pcur_store_position(&pcur, &mtr);
+			mtr_commit(&mtr);
+			mtr_start(&mtr);
+			btr_pcur_restore_position(BTR_SEARCH_LEAF,
+						  &pcur, &mtr);
+			has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+		}
+
+		if (UNIV_LIKELY(has_next)) {
+			rec = btr_pcur_get_rec(&pcur);
+			offsets = rec_get_offsets(rec, clust_index, NULL,
+						  ULINT_UNDEFINED, &row_heap);
+
+			/* Skip delete marked records. */
+			if (rec_get_deleted_flag(
+				    rec, dict_table_is_comp(old_table))) {
+				continue;
+			}
+
+			srv_n_rows_inserted++;
+
+			/* Build a row based on the clustered index. */
+
+			row = row_build(ROW_COPY_POINTERS, clust_index,
+					rec, offsets,
+					new_table, &ext, row_heap);
+
+			if (UNIV_LIKELY_NULL(nonnull)) {
+				for (i = 0; i < n_nonnull; i++) {
+					dfield_t*	field
+						= &row->fields[nonnull[i]];
+					dtype_t*	field_type
+						= dfield_get_type(field);
+
+					ut_a(!(field_type->prtype
+					       & DATA_NOT_NULL));
+
+					if (dfield_is_null(field)) {
+						err = DB_PRIMARY_KEY_IS_NULL;
+						i = 0;
+						goto err_exit;
+					}
+
+					field_type->prtype |= DATA_NOT_NULL;
+				}
+			}
+		}
+
+		/* Build all entries for all the indexes to be created
+		in a single scan of the clustered index. */
+
+		for (i = 0; i < n_index; i++) {
+			row_merge_buf_t*	buf	= merge_buf[i];
+			merge_file_t*		file	= &files[i];
+			const dict_index_t*	index	= buf->index;
+
+			if (UNIV_LIKELY
+			    (row && row_merge_buf_add(buf, row, ext))) {
+				continue;
+			}
+
+			/* The buffer must be sufficiently large
+			to hold at least one record. */
+			ut_ad(buf->n_tuples || !has_next);
+
+			/* We have enough data tuples to form a block.
+			Sort them and write to disk. */
+
+			if (buf->n_tuples) {
+				if (dict_index_is_unique(index)) {
+					row_merge_dup_t	dup;
+					dup.index = buf->index;
+					dup.table = table;
+					dup.n_dup = 0;
+
+					row_merge_buf_sort(buf, &dup);
+
+					if (dup.n_dup) {
+						err = DB_DUPLICATE_KEY;
+err_exit:
+						trx->error_key_num = i;
+						goto func_exit;
+					}
+				} else {
+					row_merge_buf_sort(buf, NULL);
+				}
+			}
+
+			row_merge_buf_write(buf, file, block);
+
+			if (!row_merge_write(file->fd, file->offset++,
+					     block)) {
+				err = DB_OUT_OF_FILE_SPACE;
+				goto err_exit;
+			}
+
+			UNIV_MEM_INVALID(block[0], sizeof block[0]);
+			merge_buf[i] = row_merge_buf_empty(buf);
+
+			/* Try writing the record again, now that
+			the buffer has been written out and emptied. */
+
+			if (UNIV_UNLIKELY
+			    (row && !row_merge_buf_add(buf, row, ext))) {
+				/* An empty buffer should have enough
+				room for at least one record. */
+				ut_error;
+			}
+		}
+
+		mem_heap_empty(row_heap);
+
+		if (UNIV_UNLIKELY(!has_next)) {
+			goto func_exit;
+		}
+	}
+
+func_exit:
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+	mem_heap_free(row_heap);
+
+	if (UNIV_LIKELY_NULL(nonnull)) {
+		mem_free(nonnull);
+	}
+
+	for (i = 0; i < n_index; i++) {
+		row_merge_buf_free(merge_buf[i]);
+	}
+
+	mem_free(merge_buf);
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/** Write a record via buffer 2 and read the next record to buffer N.
+@param N	number of the buffer (0 or 1)
+@param AT_END	statement to execute at end of input */
+#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END)				\
+	do {								\
+		b2 = row_merge_write_rec(&block[2], &buf[2], b2,	\
+					 of->fd, &of->offset,		\
+					 mrec##N, offsets##N);		\
+		if (UNIV_UNLIKELY(!b2)) {				\
+			goto corrupt;					\
+		}							\
+		b##N = row_merge_read_rec(&block[N], &buf[N],		\
+					  b##N, index,			\
+					  file->fd, foffs##N,		\
+					  &mrec##N, offsets##N);	\
+		if (UNIV_UNLIKELY(!b##N)) {				\
+			if (mrec##N) {					\
+				goto corrupt;				\
+			}						\
+			AT_END;						\
+		}							\
+	} while (0)
+
+/*************************************************************//**
+Merge two blocks of linked lists on disk and write a bigger block.
+@return	DB_SUCCESS or error code */
+static
+ulint
+row_merge_blocks(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index being created */
+	merge_file_t*		file,	/*!< in/out: file containing
+					index entries */
+	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
+	ulint*			foffs0,	/*!< in/out: offset of first
+					source list in the file */
+	ulint*			foffs1,	/*!< in/out: offset of second
+					source list in the file */
+	merge_file_t*		of,	/*!< in/out: output file */
+	TABLE*			table)	/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+{
+	mem_heap_t*	heap;	/*!< memory heap for offsets0, offsets1 */
+
+	mrec_buf_t	buf[3];	/*!< buffer for handling split mrec in block[] */
+	const byte*	b0;	/*!< pointer to block[0] */
+	const byte*	b1;	/*!< pointer to block[1] */
+	byte*		b2;	/*!< pointer to block[2] */
+	const mrec_t*	mrec0;	/*!< merge rec, points to block[0] or buf[0] */
+	const mrec_t*	mrec1;	/*!< merge rec, points to block[1] or buf[1] */
+	ulint*		offsets0;/* offsets of mrec0 */
+	ulint*		offsets1;/* offsets of mrec1 */
+
+	heap = row_merge_heap_create(index, &offsets0, &offsets1);
+
+	/* Write a record and read the next record.  Split the output
+	file in two halves, which can be merged on the following pass. */
+
+	if (!row_merge_read(file->fd, *foffs0, &block[0])
+	    || !row_merge_read(file->fd, *foffs1, &block[1])) {
+corrupt:
+		mem_heap_free(heap);
+		return(DB_CORRUPTION);
+	}
+
+	b0 = block[0];
+	b1 = block[1];
+	b2 = block[2];
+
+	b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
+				foffs0, &mrec0, offsets0);
+	b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,
+				foffs1, &mrec1, offsets1);
+	if (UNIV_UNLIKELY(!b0 && mrec0)
+	    || UNIV_UNLIKELY(!b1 && mrec1)) {
+
+		goto corrupt;
+	}
+
+	while (mrec0 && mrec1) {
+		switch (row_merge_cmp(mrec0, mrec1,
+				      offsets0, offsets1, index)) {
+		case 0:
+			if (UNIV_UNLIKELY
+			    (dict_index_is_unique(index))) {
+				innobase_rec_to_mysql(table, mrec0,
+						      index, offsets0);
+				mem_heap_free(heap);
+				return(DB_DUPLICATE_KEY);
+			}
+			/* fall through */
+		case -1:
+			ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
+			break;
+		case 1:
+			ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
+			break;
+		default:
+			ut_error;
+		}
+
+	}
+
+merged:
+	if (mrec0) {
+		/* append all mrec0 to output */
+		for (;;) {
+			ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+		}
+	}
+done0:
+	if (mrec1) {
+		/* append all mrec1 to output */
+		for (;;) {
+			ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
+		}
+	}
+done1:
+
+	mem_heap_free(heap);
+	b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);
+	return(b2 ? DB_SUCCESS : DB_CORRUPTION);
+}
+
+/*************************************************************//**
+Merge disk files.
+@return	DB_SUCCESS or error code */
+static
+ulint
+row_merge(
+/*======*/
+	const dict_index_t*	index,	/*!< in: index being created */
+	merge_file_t*		file,	/*!< in/out: file containing
+					index entries */
+	ulint			half,	/*!< in: half the file */
+	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
+	int*			tmpfd,	/*!< in/out: temporary file handle */
+	TABLE*			table)	/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+{
+	ulint		foffs0;	/*!< first input offset */
+	ulint		foffs1;	/*!< second input offset */
+	ulint		error;	/*!< error code */
+	merge_file_t	of;	/*!< output file */
+
+	UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
+	ut_ad(half > 0);
+
+	of.fd = *tmpfd;
+	of.offset = 0;
+
+	/* Merge blocks to the output file. */
+	foffs0 = 0;
+	foffs1 = half;
+
+	for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
+		error = row_merge_blocks(index, file, block,
+					 &foffs0, &foffs1, &of, table);
+
+		if (error != DB_SUCCESS) {
+			return(error);
+		}
+	}
+
+	/* Copy the last block, if there is one. */
+	while (foffs0 < half) {
+		if (!row_merge_read(file->fd, foffs0++, block)
+		    || !row_merge_write(of.fd, of.offset++, block)) {
+			return(DB_CORRUPTION);
+		}
+	}
+	while (foffs1 < file->offset) {
+		if (!row_merge_read(file->fd, foffs1++, block)
+		    || !row_merge_write(of.fd, of.offset++, block)) {
+			return(DB_CORRUPTION);
+		}
+	}
+
+	/* Swap file descriptors for the next pass. */
+	*tmpfd = file->fd;
+	*file = of;
+
+	UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
+
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Merge disk files.
+@return	DB_SUCCESS or error code */
+static
+ulint
+row_merge_sort(
+/*===========*/
+	const dict_index_t*	index,	/*!< in: index being created */
+	merge_file_t*		file,	/*!< in/out: file containing
+					index entries */
+	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
+	int*			tmpfd,	/*!< in/out: temporary file handle */
+	TABLE*			table)	/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+{
+	ulint	blksz;	/*!< block size */
+
+	for (blksz = 1; blksz < file->offset; blksz *= 2) {
+		ulint	half;
+		ulint	error;
+
+		ut_ad(ut_is_2pow(blksz));
+		half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
+		error = row_merge(index, file, half, block, tmpfd, table);
+
+		if (error != DB_SUCCESS) {
+			return(error);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Copy externally stored columns to the data tuple. */
+static
+void
+row_merge_copy_blobs(
+/*=================*/
+	const mrec_t*	mrec,	/*!< in: merge record */
+	const ulint*	offsets,/*!< in: offsets of mrec */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	dtuple_t*	tuple,	/*!< in/out: data tuple */
+	mem_heap_t*	heap)	/*!< in/out: memory heap */
+{
+	ulint	i;
+	ulint	n_fields = dtuple_get_n_fields(tuple);
+
+	for (i = 0; i < n_fields; i++) {
+		ulint		len;
+		const void*	data;
+		dfield_t*	field = dtuple_get_nth_field(tuple, i);
+
+		if (!dfield_is_ext(field)) {
+			continue;
+		}
+
+		ut_ad(!dfield_is_null(field));
+
+		/* The table is locked during index creation.
+		Therefore, externally stored columns cannot possibly
+		be freed between the time the BLOB pointers are read
+		(row_merge_read_clustered_index()) and dereferenced
+		(below). */
+		data = btr_rec_copy_externally_stored_field(
+			mrec, offsets, zip_size, i, &len, heap);
+
+		dfield_set_data(field, data, len);
+	}
+}
+
+/********************************************************************//**
+Read sorted file containing index data tuples and insert these data
+tuples to the index
+@return	DB_SUCCESS or error number */
+static
+ulint
+row_merge_insert_index_tuples(
+/*==========================*/
+	trx_t*			trx,	/*!< in: transaction */
+	dict_index_t*		index,	/*!< in: index */
+	dict_table_t*		table,	/*!< in: new table */
+	ulint			zip_size,/*!< in: compressed page size of
+					 the old table, or 0 if uncompressed */
+	int			fd,	/*!< in: file descriptor */
+	row_merge_block_t*	block)	/*!< in/out: file buffer */
+{
+	mrec_buf_t		buf;
+	const byte*		b;
+	que_thr_t*		thr;
+	ins_node_t*		node;
+	mem_heap_t*		tuple_heap;
+	mem_heap_t*		graph_heap;
+	ulint			error = DB_SUCCESS;
+	ulint			foffs = 0;
+	ulint*			offsets;
+
+	ut_ad(trx);
+	ut_ad(index);
+	ut_ad(table);
+
+	/* We use the insert query graph as the dummy graph
+	needed in the row module call */
+
+	trx->op_info = "inserting index entries";
+
+	graph_heap = mem_heap_create(500);
+	node = ins_node_create(INS_DIRECT, table, graph_heap);
+
+	thr = pars_complete_graph_for_exec(node, trx, graph_heap);
+
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+	tuple_heap = mem_heap_create(1000);
+
+	{
+		ulint i	= 1 + REC_OFFS_HEADER_SIZE
+			+ dict_index_get_n_fields(index);
+		offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
+		offsets[0] = i;
+		offsets[1] = dict_index_get_n_fields(index);
+	}
+
+	b = *block;
+
+	if (!row_merge_read(fd, foffs, block)) {
+		error = DB_CORRUPTION;
+	} else {
+		for (;;) {
+			const mrec_t*	mrec;
+			dtuple_t*	dtuple;
+			ulint		n_ext;
+
+			b = row_merge_read_rec(block, &buf, b, index,
+					       fd, &foffs, &mrec, offsets);
+			if (UNIV_UNLIKELY(!b)) {
+				/* End of list, or I/O error */
+				if (mrec) {
+					error = DB_CORRUPTION;
+				}
+				break;
+			}
+
+			dtuple = row_rec_to_index_entry_low(
+				mrec, index, offsets, &n_ext, tuple_heap);
+
+			if (UNIV_UNLIKELY(n_ext)) {
+				row_merge_copy_blobs(mrec, offsets, zip_size,
+						     dtuple, tuple_heap);
+			}
+
+			node->row = dtuple;
+			node->table = table;
+			node->trx_id = trx->id;
+
+			ut_ad(dtuple_validate(dtuple));
+
+			do {
+				thr->run_node = thr;
+				thr->prev_node = thr->common.parent;
+
+				error = row_ins_index_entry(index, dtuple,
+							    0, FALSE, thr);
+
+				if (UNIV_LIKELY(error == DB_SUCCESS)) {
+
+					goto next_rec;
+				}
+
+				thr->lock_state = QUE_THR_LOCK_ROW;
+				trx->error_state = error;
+				que_thr_stop_for_mysql(thr);
+				thr->lock_state = QUE_THR_LOCK_NOLOCK;
+			} while (row_mysql_handle_errors(&error, trx,
+							 thr, NULL));
+
+			goto err_exit;
+next_rec:
+			mem_heap_empty(tuple_heap);
+		}
+	}
+
+	que_thr_stop_for_mysql_no_error(thr, trx);
+err_exit:
+	que_graph_free(thr->graph);
+
+	trx->op_info = "";
+
+	mem_heap_free(tuple_heap);
+
+	return(error);
+}
+
+/*********************************************************************//**
+Sets an exclusive lock on a table, for the duration of creating indexes.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+row_merge_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
+{
+	mem_heap_t*	heap;
+	que_thr_t*	thr;
+	ulint		err;
+	sel_node_t*	node;
+
+	ut_ad(trx);
+	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+	heap = mem_heap_create(512);
+
+	trx->op_info = "setting table lock for creating or dropping index";
+
+	node = sel_node_create(heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap);
+	thr->graph->state = QUE_FORK_ACTIVE;
+
+	/* We use the select query graph as the dummy graph needed
+	in the lock module call */
+
+	thr = que_fork_get_first_thr(que_node_get_parent(thr));
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+	thr->run_node = thr;
+	thr->prev_node = thr->common.parent;
+
+	err = lock_table(0, table, mode, thr);
+
+	trx->error_state = err;
+
+	if (UNIV_LIKELY(err == DB_SUCCESS)) {
+		que_thr_stop_for_mysql_no_error(thr, trx);
+	} else {
+		que_thr_stop_for_mysql(thr);
+
+		if (err != DB_QUE_THR_SUSPENDED) {
+			ibool	was_lock_wait;
+
+			was_lock_wait = row_mysql_handle_errors(
+				&err, trx, thr, NULL);
+
+			if (was_lock_wait) {
+				goto run_again;
+			}
+		} else {
+			que_thr_t*	run_thr;
+			que_node_t*	parent;
+
+			parent = que_node_get_parent(thr);
+			run_thr = que_fork_start_command(parent);
+
+			ut_a(run_thr == thr);
+
+			/* There was a lock wait but the thread was not
+			in a ready to run or running state. */
+			trx->error_state = DB_LOCK_WAIT;
+
+			goto run_again;
+		}
+	}
+
+	que_graph_free(thr->graph);
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*********************************************************************//**
+Drop an index from the InnoDB system tables.  The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_index(
+/*=================*/
+	dict_index_t*	index,	/*!< in: index to be removed */
+	dict_table_t*	table,	/*!< in: table */
+	trx_t*		trx)	/*!< in: transaction handle */
+{
+	ulint		err;
+	pars_info_t*	info = pars_info_create();
+
+	/* We use the private SQL parser of Innobase to generate the
+	query graphs needed in deleting the dictionary data from system
+	tables in Innobase. Deleting a row from SYS_INDEXES table also
+	frees the file segments of the B-tree associated with the index. */
+
+	static const char str1[] =
+		"PROCEDURE DROP_INDEX_PROC () IS\n"
+		"BEGIN\n"
+		"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
+		"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
+		"		AND TABLE_ID = :tableid;\n"
+		"END;\n";
+
+	ut_ad(index && table && trx);
+
+	pars_info_add_dulint_literal(info, "indexid", index->id);
+	pars_info_add_dulint_literal(info, "tableid", table->id);
+
+	trx_start_if_not_started(trx);
+	trx->op_info = "dropping index";
+
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	err = que_eval_sql(info, str1, FALSE, trx);
+
+	ut_a(err == DB_SUCCESS);
+
+	/* Replace this index with another equivalent index for all
+	foreign key constraints on this table where this index is used */
+
+	dict_table_replace_index_in_foreign_list(table, index);
+	dict_index_remove_from_cache(table, index);
+
+	trx->op_info = "";
+}
+
+/*********************************************************************//**
+Drop those indexes which were created before an error occurred when
+building an index.  The data dictionary must have been locked
+exclusively by the caller, because the transaction will not be
+committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes(
+/*===================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	table,		/*!< in: table containing the indexes */
+	dict_index_t**	index,		/*!< in: indexes to drop */
+	ulint		num_created)	/*!< in: number of elements in index[] */
+{
+	ulint	key_num;
+
+	for (key_num = 0; key_num < num_created; key_num++) {
+		row_merge_drop_index(index[key_num], table, trx);
+	}
+}
+
+/*********************************************************************//**
+Drop all partially created indexes during crash recovery. */
+UNIV_INTERN
+void
+row_merge_drop_temp_indexes(void)
+/*=============================*/
+{
+	trx_t*		trx;
+	ulint		err;
+
+	/* We use the private SQL parser of Innobase to generate the
+	query graphs needed in deleting the dictionary data from system
+	tables in Innobase. Deleting a row from SYS_INDEXES table also
+	frees the file segments of the B-tree associated with the index. */
+	static const char drop_temp_indexes[] =
+		"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
+		"indexid CHAR;\n"
+		"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
+		"WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
+		"BEGIN\n"
+		"\tOPEN c;\n"
+		"\tWHILE 1=1 LOOP\n"
+		"\t\tFETCH c INTO indexid;\n"
+		"\t\tIF (SQL % NOTFOUND) THEN\n"
+		"\t\t\tEXIT;\n"
+		"\t\tEND IF;\n"
+		"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
+		"\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
+		"\tEND LOOP;\n"
+		"\tCLOSE c;\n"
+		"\tCOMMIT WORK;\n"
+		"END;\n";
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "dropping partially created indexes";
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Incomplete transactions may be holding some locks on the
+	data dictionary tables.  However, they should never have been
+	able to lock the records corresponding to the partially
+	created indexes that we are attempting to delete, because the
+	table was locked when the indexes were being created.  We will
+	drop the partially created indexes before the rollback of
+	incomplete transactions is initiated.  Thus, this should not
+	interfere with the incomplete transactions. */
+	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+	err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
+	ut_a(err == DB_SUCCESS);
+
+	row_mysql_unlock_data_dictionary(trx);
+	trx_free_for_background(trx);
+}
+
+/*********************************************************************//**
+Create a merge file. */
+static
+void
+row_merge_file_create(
+/*==================*/
+	merge_file_t*	merge_file)	/*!< out: merge file structure */
+{
+	merge_file->fd = innobase_mysql_tmpfile();
+	merge_file->offset = 0;
+}
+
+/*********************************************************************//**
+Destroy a merge file. */
+static
+void
+row_merge_file_destroy(
+/*===================*/
+	merge_file_t*	merge_file)	/*!< out: merge file structure */
+{
+	if (merge_file->fd != -1) {
+		close(merge_file->fd);
+		merge_file->fd = -1;
+	}
+}
+
+/*********************************************************************//**
+Determine the precise type of a column that is added to a tem
+if a column must be constrained NOT NULL.
+@return	col->prtype, possibly ORed with DATA_NOT_NULL */
+UNIV_INLINE
+ulint
+row_merge_col_prtype(
+/*=================*/
+	const dict_col_t*	col,		/*!< in: column */
+	const char*		col_name,	/*!< in: name of the column */
+	const merge_index_def_t*index_def)	/*!< in: the index definition
+						of the primary key */
+{
+	ulint	prtype = col->prtype;
+	ulint	i;
+
+	ut_ad(index_def->ind_type & DICT_CLUSTERED);
+
+	if (prtype & DATA_NOT_NULL) {
+
+		return(prtype);
+	}
+
+	/* All columns that are included
+	in the PRIMARY KEY must be NOT NULL. */
+
+	for (i = 0; i < index_def->n_fields; i++) {
+		if (!strcmp(col_name, index_def->fields[i].field_name)) {
+			return(prtype | DATA_NOT_NULL);
+		}
+	}
+
+	return(prtype);
+}
+
+/*********************************************************************//**
+Create a temporary table for creating a primary key, using the definition
+of an existing table.
+@return	table, or NULL on error */
+UNIV_INTERN
+dict_table_t*
+row_merge_create_temporary_table(
+/*=============================*/
+	const char*		table_name,	/*!< in: new table name */
+	const merge_index_def_t*index_def,	/*!< in: the index definition
+						of the primary key */
+	const dict_table_t*	table,		/*!< in: old table definition */
+	trx_t*			trx)		/*!< in/out: transaction
+						(sets error_state) */
+{
+	ulint		i;
+	dict_table_t*	new_table = NULL;
+	ulint		n_cols = dict_table_get_n_user_cols(table);
+	ulint		error;
+	mem_heap_t*	heap = mem_heap_create(1000);
+
+	ut_ad(table_name);
+	ut_ad(index_def);
+	ut_ad(table);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);
+
+	for (i = 0; i < n_cols; i++) {
+		const dict_col_t*	col;
+		const char*		col_name;
+
+		col = dict_table_get_nth_col(table, i);
+		col_name = dict_table_get_col_name(table, i);
+
+		dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
+				       row_merge_col_prtype(col, col_name,
+							    index_def),
+				       col->len);
+	}
+
+	error = row_create_table_for_mysql(new_table, trx);
+	mem_heap_free(heap);
+
+	if (error != DB_SUCCESS) {
+		trx->error_state = error;
+		new_table = NULL;
+	}
+
+	return(new_table);
+}
+
+/*********************************************************************//**
+Rename the temporary indexes in the dictionary to permanent ones.  The
+data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
+UNIV_INTERN
+ulint
+row_merge_rename_indexes(
+/*=====================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table)		/*!< in/out: table with new indexes */
+{
+	ulint		err = DB_SUCCESS;
+	pars_info_t*	info = pars_info_create();
+
+	/* We use the private SQL parser of Innobase to generate the
+	query graphs needed in renaming indexes. */
+
+	static const char rename_indexes[] =
+		"PROCEDURE RENAME_INDEXES_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
+		"WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='"
+		TEMP_INDEX_PREFIX_STR "';\n"
+		"END;\n";
+
+	ut_ad(table);
+	ut_ad(trx);
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	trx->op_info = "renaming indexes";
+
+	pars_info_add_dulint_literal(info, "tableid", table->id);
+
+	err = que_eval_sql(info, rename_indexes, FALSE, trx);
+
+	if (err == DB_SUCCESS) {
+		dict_index_t*	index = dict_table_get_first_index(table);
+		do {
+			if (*index->name == TEMP_INDEX_PREFIX) {
+				index->name++;
+			}
+			index = dict_table_get_next_index(index);
+		} while (index);
+	}
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*********************************************************************//**
+Rename the tables in the data dictionary.  The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+row_merge_rename_tables(
+/*====================*/
+	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
+					tmp_name */
+	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
+					old_table->name */
+	const char*	tmp_name,	/*!< in: new name for old_table */
+	trx_t*		trx)		/*!< in: transaction handle */
+{
+	ulint		err	= DB_ERROR;
+	pars_info_t*	info;
+	const char*	old_name= old_table->name;
+
+	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+	ut_ad(old_table != new_table);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	trx->op_info = "renaming tables";
+
+	/* We use the private SQL parser of Innobase to generate the query
+	graphs needed in updating the dictionary data in system tables. */
+
+	info = pars_info_create();
+
+	pars_info_add_str_literal(info, "new_name", new_table->name);
+	pars_info_add_str_literal(info, "old_name", old_name);
+	pars_info_add_str_literal(info, "tmp_name", tmp_name);
+
+	err = que_eval_sql(info,
+			   "PROCEDURE RENAME_TABLES () IS\n"
+			   "BEGIN\n"
+			   "UPDATE SYS_TABLES SET NAME = :tmp_name\n"
+			   " WHERE NAME = :old_name;\n"
+			   "UPDATE SYS_TABLES SET NAME = :old_name\n"
+			   " WHERE NAME = :new_name;\n"
+			   "END;\n", FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+
+		goto err_exit;
+	}
+
+	/* The following calls will also rename the .ibd data files if
+	the tables are stored in a single-table tablespace */
+
+	if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
+	    || !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
+
+		err = DB_ERROR;
+		goto err_exit;
+	}
+
+	err = dict_load_foreigns(old_name, TRUE);
+
+	if (err != DB_SUCCESS) {
+err_exit:
+		trx->error_state = DB_SUCCESS;
+		trx_general_rollback_for_mysql(trx, FALSE, NULL);
+		trx->error_state = DB_SUCCESS;
+	}
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*********************************************************************//**
+Create and execute a query graph for creating an index.
+@return	DB_SUCCESS or error code */
+static
+ulint
+row_merge_create_index_graph(
+/*=========================*/
+	trx_t*		trx,		/*!< in: trx */
+	dict_table_t*	table,		/*!< in: table */
+	dict_index_t*	index)		/*!< in: index */
+{
+	ind_node_t*	node;		/*!< Index creation node */
+	mem_heap_t*	heap;		/*!< Memory heap */
+	que_thr_t*	thr;		/*!< Query thread */
+	ulint		err;
+
+	ut_ad(trx);
+	ut_ad(table);
+	ut_ad(index);
+
+	heap = mem_heap_create(512);
+
+	index->table = table;
+	node = ind_create_graph_create(index, heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap);
+
+	ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
+
+	que_run_threads(thr);
+
+	err = trx->error_state;
+
+	que_graph_free((que_t*) que_node_get_parent(thr));
+
+	return(err);
+}
+
+/*********************************************************************//**
+Create the index and load in to the dictionary.
+@return	index, or NULL on error */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_index(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
+	dict_table_t*		table,	/*!< in: the index is on this table */
+	const merge_index_def_t*index_def)
+					/*!< in: the index definition */
+{
+	dict_index_t*	index;
+	ulint		err;
+	ulint		n_fields = index_def->n_fields;
+	ulint		i;
+
+	/* Create the index prototype, using the passed in def, this is not
+	a persistent operation. We pass 0 as the space id, and determine at
+	a lower level the space id where to store the table. */
+
+	index = dict_mem_index_create(table->name, index_def->name,
+				      0, index_def->ind_type, n_fields);
+
+	ut_a(index);
+
+	for (i = 0; i < n_fields; i++) {
+		merge_index_field_t*	ifield = &index_def->fields[i];
+
+		dict_mem_index_add_field(index, ifield->field_name,
+					 ifield->prefix_len);
+	}
+
+	/* Add the index to SYS_INDEXES, using the index prototype. */
+	err = row_merge_create_index_graph(trx, table, index);
+
+	if (err == DB_SUCCESS) {
+
+		index = row_merge_dict_table_get_index(
+			table, index_def);
+
+		ut_a(index);
+
+		/* Note the id of the transaction that created this
+		index, we use it to restrict readers from accessing
+		this index, to ensure read consistency. */
+		index->trx_id = (ib_uint64_t)
+			ut_conv_dulint_to_longlong(trx->id);
+	} else {
+		index = NULL;
+	}
+
+	return(index);
+}
+
+/*********************************************************************//**
+Check if a transaction can use an index. */
+UNIV_INTERN
+ibool
+row_merge_is_index_usable(
+/*======================*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_index_t*	index)	/*!< in: index to check */
+{
+	return(!trx->read_view || read_view_sees_trx_id(
+		       trx->read_view,
+		       ut_dulint_create((ulint) (index->trx_id >> 32),
+					(ulint) index->trx_id & 0xFFFFFFFF)));
+}
+
+/*********************************************************************//**
+Drop the old table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+row_merge_drop_table(
+/*=================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	table)		/*!< in: table to drop */
+{
+	/* There must be no open transactions on the table. */
+	ut_a(table->n_mysql_handles_opened == 0);
+
+	return(row_drop_table_for_mysql(table->name, trx, FALSE));
+}
+
+/*********************************************************************//**
+Build indexes on a table by reading a clustered index,
+creating a temporary file containing index entries, merge sorting
+these index entries and inserting sorted index entries to indexes.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+row_merge_build_indexes(
+/*====================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	old_table,	/*!< in: table where rows are
+					read from */
+	dict_table_t*	new_table,	/*!< in: table where indexes are
+					created; identical to old_table
+					unless creating a PRIMARY KEY */
+	dict_index_t**	indexes,	/*!< in: indexes to be created */
+	ulint		n_indexes,	/*!< in: size of indexes[] */
+	TABLE*		table)		/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+{
+	merge_file_t*		merge_files;
+	row_merge_block_t*	block;
+	ulint			block_size;
+	ulint			i;
+	ulint			error;
+	int			tmpfd;
+
+	ut_ad(trx);
+	ut_ad(old_table);
+	ut_ad(new_table);
+	ut_ad(indexes);
+	ut_ad(n_indexes);
+
+	trx_start_if_not_started(trx);
+
+	/* Allocate memory for merge file data structure and initialize
+	fields */
+
+	merge_files = mem_alloc(n_indexes * sizeof *merge_files);
+	block_size = 3 * sizeof *block;
+	block = os_mem_alloc_large(&block_size);
+
+	for (i = 0; i < n_indexes; i++) {
+
+		row_merge_file_create(&merge_files[i]);
+	}
+
+	tmpfd = innobase_mysql_tmpfile();
+
+	/* Reset the MySQL row buffer that is used when reporting
+	duplicate keys. */
+	innobase_rec_reset(table);
+
+	/* Read clustered index of the table and create files for
+	secondary index entries for merge sort */
+
+	error = row_merge_read_clustered_index(
+		trx, table, old_table, new_table, indexes,
+		merge_files, n_indexes, block);
+
+	if (error != DB_SUCCESS) {
+
+		goto func_exit;
+	}
+
+	/* Now we have files containing index entries ready for
+	sorting and inserting. */
+
+	for (i = 0; i < n_indexes; i++) {
+		error = row_merge_sort(indexes[i], &merge_files[i],
+				       block, &tmpfd, table);
+
+		if (error == DB_SUCCESS) {
+			error = row_merge_insert_index_tuples(
+				trx, indexes[i], new_table,
+				dict_table_zip_size(old_table),
+				merge_files[i].fd, block);
+		}
+
+		/* Close the temporary file to free up space. */
+		row_merge_file_destroy(&merge_files[i]);
+
+		if (error != DB_SUCCESS) {
+			trx->error_key_num = i;
+			goto func_exit;
+		}
+	}
+
+func_exit:
+	close(tmpfd);
+
+	for (i = 0; i < n_indexes; i++) {
+		row_merge_file_destroy(&merge_files[i]);
+	}
+
+	mem_free(merge_files);
+	os_mem_free_large(block, block_size);
+
+	return(error);
+}
diff --git a/storage/innobase/row/row0mysql.c b/storage/innodb_plugin/row/row0mysql.c
similarity index 81%
rename from storage/innobase/row/row0mysql.c
rename to storage/innodb_plugin/row/row0mysql.c
index 2d9ed4fc944..b345bb59624 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innodb_plugin/row/row0mysql.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0mysql.c
 Interface between Innobase row operations and MySQL.
 Contains also create table and other data dictionary operations.
 
-(c) 2000 Innobase Oy
-
 Created 9/17/2000 Heikki Tuuri
 *******************************************************/
 
@@ -14,6 +31,7 @@ Created 9/17/2000 Heikki Tuuri
 #endif
 
 #include "row0ins.h"
+#include "row0merge.h"
 #include "row0sel.h"
 #include "row0upd.h"
 #include "row0row.h"
@@ -25,6 +43,8 @@ Created 9/17/2000 Heikki Tuuri
 #include "dict0boot.h"
 #include "trx0roll.h"
 #include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0undo.h"
 #include "lock0lock.h"
 #include "rem0cmp.h"
 #include "log0log.h"
@@ -32,45 +52,54 @@ Created 9/17/2000 Heikki Tuuri
 #include "fil0fil.h"
 #include "ibuf0ibuf.h"
 
-/* A dummy variable used to fool the compiler */
-ibool	row_mysql_identically_false	= FALSE;
+/** Provide optional 4.x backwards compatibility for 5.0 and above */
+UNIV_INTERN ibool	row_rollback_on_timeout	= FALSE;
 
-/* Provide optional 4.x backwards compatibility for 5.0 and above */
-ibool	row_rollback_on_timeout	= FALSE;
-
-/* List of tables we should drop in background. ALTER TABLE in MySQL requires
-that the table handler can drop the table in background when there are no
-queries to it any more. Protected by the kernel mutex. */
+/** Chain node of the list of tables to drop in the background. */
 typedef struct row_mysql_drop_struct	row_mysql_drop_t;
+
+/** Chain node of the list of tables to drop in the background. */
 struct row_mysql_drop_struct{
-	char*				table_name;
-	UT_LIST_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
+	char*				table_name;	/*!< table name */
+	UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
+							/*!< list chain node */
 };
 
-UT_LIST_BASE_NODE_T(row_mysql_drop_t)	row_mysql_drop_list;
-ibool	row_mysql_drop_list_inited	= FALSE;
+/** @brief List of tables we should drop in background.
 
-/* Magic table names for invoking various monitor threads */
+ALTER TABLE in MySQL requires that the table handler can drop the
+table in background when there are no queries to it any
+more.  Protected by kernel_mutex. */
+static UT_LIST_BASE_NODE_T(row_mysql_drop_t)	row_mysql_drop_list;
+/** Flag: has row_mysql_drop_list been initialized? */
+static ibool	row_mysql_drop_list_inited	= FALSE;
+
+/** Magic table names for invoking various monitor threads */
+/* @{ */
 static const char S_innodb_monitor[] = "innodb_monitor";
 static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
 static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
 static const char S_innodb_table_monitor[] = "innodb_table_monitor";
 static const char S_innodb_mem_validate[] = "innodb_mem_validate";
+/* @} */
 
-/* Evaluates to true if str1 equals str2_onstack, used for comparing
-the above strings. */
+/** Evaluates to true if str1 equals str2_onstack, used for comparing
+the magic table names.
+@param str1		in: string to compare
+@param str1_len 	in: length of str1, in bytes, including terminating NUL
+@param str2_onstack	in: char[] array containing a NUL terminated string
+@return			TRUE if str1 equals str2_onstack */
 #define STR_EQ(str1, str1_len, str2_onstack) \
 	((str1_len) == sizeof(str2_onstack) \
 	 && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
 
-/***********************************************************************
-Determine if the given name is a name reserved for MySQL system tables. */
+/*******************************************************************//**
+Determine if the given name is a name reserved for MySQL system tables.
+@return	TRUE if name is a MySQL system table name */
 static
 ibool
 row_mysql_is_system_table(
 /*======================*/
-				 /* out: TRUE if name is a MySQL
-				 system table name */
 	const char*	name)
 {
 	if (strncmp(name, "mysql/", 6) != 0) {
@@ -83,7 +112,20 @@ row_mysql_is_system_table(
 	       || 0 == strcmp(name + 6, "db"));
 }
 
-/***********************************************************************
+/*********************************************************************//**
+If a table is not yet in the drop list, adds the table to the list of tables
+which the master thread drops in background. We need this on Unix because in
+ALTER TABLE MySQL may call drop table even if the table has running queries on
+it. Also, if there are running foreign key checks on the table, we drop the
+table lazily.
+@return	TRUE if the table was not yet in the drop list, and was added there */
+static
+ibool
+row_add_table_to_background_drop_list(
+/*==================================*/
+	const char*	name);	/*!< in: table name */
+
+/*******************************************************************//**
 Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
 static
 void
@@ -95,31 +137,31 @@ row_mysql_delay_if_needed(void)
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
-
+UNIV_INTERN
 void
 row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct of a
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct of a
 					ha_innobase:: table handle */
 {
 	mem_heap_free(prebuilt->blob_heap);
 	prebuilt->blob_heap = NULL;
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
 byte*
 row_mysql_store_true_var_len(
 /*=========================*/
-			/* out: pointer to the data, we skip the 1 or 2 bytes
-			at the start that are used to store the len */
-	byte*	dest,	/* in: where to store */
-	ulint	len,	/* in: length, must fit in two bytes */
-	ulint	lenlen)	/* in: storage length of len: either 1 or 2 bytes */
+	byte*	dest,	/*!< in: where to store */
+	ulint	len,	/*!< in: length, must fit in two bytes */
+	ulint	lenlen)	/*!< in: storage length of len: either 1 or 2 bytes */
 {
 	if (lenlen == 2) {
 		ut_a(len < 256 * 256);
@@ -137,18 +179,19 @@ row_mysql_store_true_var_len(
 	return(dest + 1);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+const byte*
 row_mysql_read_true_varchar(
 /*========================*/
-			/* out: pointer to the data, we skip the 1 or 2 bytes
-			at the start that are used to store the len */
-	ulint*	len,	/* out: variable-length field length */
-	byte*	field,	/* in: field in the MySQL format */
-	ulint	lenlen)	/* in: storage length of len: either 1 or 2 bytes */
+	ulint*		len,	/*!< out: variable-length field length */
+	const byte*	field,	/*!< in: field in the MySQL format */
+	ulint		lenlen)	/*!< in: storage length of len: either 1
+				or 2 bytes */
 {
 	if (lenlen == 2) {
 		*len = mach_read_from_2_little_endian(field);
@@ -163,20 +206,20 @@ row_mysql_read_true_varchar(
 	return(field + 1);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
-
+UNIV_INTERN
 void
 row_mysql_store_blob_ref(
 /*=====================*/
-	byte*	dest,		/* in: where to store */
-	ulint	col_len,	/* in: dest buffer size: determines into
+	byte*		dest,	/*!< in: where to store */
+	ulint		col_len,/*!< in: dest buffer size: determines into
 				how many bytes the BLOB length is stored,
 				the space for the length may vary from 1
 				to 4 bytes */
-	byte*	data,		/* in: BLOB data; if the value to store
+	const void*	data,	/*!< in: BLOB data; if the value to store
 				is SQL NULL this should be NULL pointer */
-	ulint	len)		/* in: BLOB length; if the value to store
+	ulint		len)	/*!< in: BLOB length; if the value to store
 				is SQL NULL this should be 0; remember
 				also to set the NULL bit in the MySQL record
 				header! */
@@ -197,68 +240,68 @@ row_mysql_store_blob_ref(
 
 	mach_write_to_n_little_endian(dest, col_len - 8, len);
 
-	ut_memcpy(dest + col_len - 8, &data, sizeof(byte*));
+	memcpy(dest + col_len - 8, &data, sizeof data);
 }
 
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return	pointer to BLOB data */
+UNIV_INTERN
+const byte*
 row_mysql_read_blob_ref(
 /*====================*/
-				/* out: pointer to BLOB data */
-	ulint*	len,		/* out: BLOB length */
-	byte*	ref,		/* in: BLOB reference in the MySQL format */
-	ulint	col_len)	/* in: BLOB reference length (not BLOB
-				length) */
+	ulint*		len,		/*!< out: BLOB length */
+	const byte*	ref,		/*!< in: BLOB reference in the
+					MySQL format */
+	ulint		col_len)	/*!< in: BLOB reference length
+					(not BLOB length) */
 {
 	byte*	data;
 
 	*len = mach_read_from_n_little_endian(ref, col_len - 8);
 
-	ut_memcpy(&data, ref + col_len - 8, sizeof(byte*));
+	memcpy(&data, ref + col_len - 8, sizeof data);
 
 	return(data);
 }
 
-/******************************************************************
+/**************************************************************//**
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
+row0sel.c.
+@return	up to which byte we used buf in the conversion */
+UNIV_INTERN
 byte*
 row_mysql_store_col_in_innobase_format(
 /*===================================*/
-					/* out: up to which byte we used
-					buf in the conversion */
-	dfield_t*	dfield,		/* in/out: dfield where dtype
+	dfield_t*	dfield,		/*!< in/out: dfield where dtype
 					information must be already set when
 					this function is called! */
-	byte*		buf,		/* in/out: buffer for a converted
+	byte*		buf,		/*!< in/out: buffer for a converted
 					integer value; this must be at least
 					col_len long then! */
-	ibool		row_format_col,	/* TRUE if the mysql_data is from
+	ibool		row_format_col,	/*!< TRUE if the mysql_data is from
 					a MySQL row, FALSE if from a MySQL
 					key value;
 					in MySQL, a true VARCHAR storage
 					format differs in a row and in a
 					key value: in a key value the length
 					is always stored in 2 bytes! */
-	byte*		mysql_data,	/* in: MySQL column value, not
+	const byte*	mysql_data,	/*!< in: MySQL column value, not
 					SQL NULL; NOTE that dfield may also
 					get a pointer to mysql_data,
 					therefore do not discard this as long
 					as dfield is used! */
-	ulint		col_len,	/* in: MySQL column length; NOTE that
+	ulint		col_len,	/*!< in: MySQL column length; NOTE that
 					this is the storage length of the
 					column in the MySQL format row, not
 					necessarily the length of the actual
 					payload data; if the column is a true
 					VARCHAR then this is irrelevant */
-	ulint		comp)		/* in: nonzero=compact format */
+	ulint		comp)		/*!< in: nonzero=compact format */
 {
-	byte*		ptr	= mysql_data;
-	dtype_t*	dtype;
+	const byte*	ptr	= mysql_data;
+	const dtype_t*	dtype;
 	ulint		type;
 	ulint		lenlen;
 
@@ -271,12 +314,12 @@ row_mysql_store_col_in_innobase_format(
 		sign bit negated if the data is a signed integer. In MySQL,
 		integers are stored in a little-endian format. */
 
-		ptr = buf + col_len;
+		byte*	p = buf + col_len;
 
 		for (;;) {
-			ptr--;
-			*ptr = *mysql_data;
-			if (ptr == buf) {
+			p--;
+			*p = *mysql_data;
+			if (p == buf) {
 				break;
 			}
 			mysql_data++;
@@ -284,9 +327,10 @@ row_mysql_store_col_in_innobase_format(
 
 		if (!(dtype->prtype & DATA_UNSIGNED)) {
 
-			*ptr = (byte) (*ptr ^ 128);
+			*buf ^= 128;
 		}
 
+		ptr = buf;
 		buf += col_len;
 	} else if ((type == DATA_VARCHAR
 		    || type == DATA_VARMYSQL
@@ -382,7 +426,7 @@ row_mysql_store_col_in_innobase_format(
 	return(buf);
 }
 
-/******************************************************************
+/**************************************************************//**
 Convert a row in the MySQL format to a row in the Innobase format. Note that
 the function to convert a MySQL format key value to an InnoDB dtuple is
 row_sel_convert_mysql_key_to_innobase() in row0sel.c. */
@@ -390,12 +434,12 @@ static
 void
 row_mysql_convert_row_to_innobase(
 /*==============================*/
-	dtuple_t*	row,		/* in/out: Innobase row where the
+	dtuple_t*	row,		/*!< in/out: Innobase row where the
 					field type information is already
 					copied there! */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct where template
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct where template
 					must be of type ROW_MYSQL_WHOLE_ROW */
-	byte*		mysql_rec)	/* in: row in the MySQL format;
+	byte*		mysql_rec)	/*!< in: row in the MySQL format;
 					NOTE: do not discard as long as
 					row is used, as row may contain
 					pointers to this record! */
@@ -420,7 +464,7 @@ row_mysql_convert_row_to_innobase(
 
 				/* It is SQL NULL */
 
-				dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+				dfield_set_null(dfield);
 
 				goto next_column;
 			}
@@ -438,23 +482,22 @@ next_column:
 	}
 }
 
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running the
+query thread */
+UNIV_INTERN
 ibool
 row_mysql_handle_errors(
 /*====================*/
-				/* out: TRUE if it was a lock wait and
-				we should continue running the query thread */
-	ulint*		new_err,/* out: possible new error encountered in
+	ulint*		new_err,/*!< out: possible new error encountered in
 				lock wait, or if no new error, the value
 				of trx->error_state at the entry of this
 				function */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t*	thr,	/* in: query thread */
-	trx_savept_t*	savept)	/* in: savepoint or NULL */
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_savept_t*	savept)	/*!< in: savepoint or NULL */
 {
-#ifndef UNIV_HOTBACKUP
 	ulint	err;
 
 handle_new_error:
@@ -464,15 +507,21 @@ handle_new_error:
 
 	trx->error_state = DB_SUCCESS;
 
-	if ((err == DB_DUPLICATE_KEY)
-	    || (err == DB_FOREIGN_DUPLICATE_KEY)) {
-		if (savept) {
-			/* Roll back the latest, possibly incomplete
-			insertion or update */
-
-			trx_general_rollback_for_mysql(trx, TRUE, savept);
+	switch (err) {
+	case DB_LOCK_WAIT_TIMEOUT:
+		if (row_rollback_on_timeout) {
+			trx_general_rollback_for_mysql(trx, FALSE, NULL);
+			break;
 		}
-	} else if (err == DB_TOO_BIG_RECORD) {
+		/* fall through */
+	case DB_DUPLICATE_KEY:
+	case DB_FOREIGN_DUPLICATE_KEY:
+	case DB_TOO_BIG_RECORD:
+	case DB_ROW_IS_REFERENCED:
+	case DB_NO_REFERENCED_ROW:
+	case DB_CANNOT_ADD_CONSTRAINT:
+	case DB_TOO_MANY_CONCURRENT_TRXS:
+	case DB_OUT_OF_FILE_SPACE:
 		if (savept) {
 			/* Roll back the latest, possibly incomplete
 			insertion or update */
@@ -480,19 +529,8 @@ handle_new_error:
 			trx_general_rollback_for_mysql(trx, TRUE, savept);
 		}
 		/* MySQL will roll back the latest SQL statement */
-	} else if (err == DB_ROW_IS_REFERENCED
-		   || err == DB_NO_REFERENCED_ROW
-		   || err == DB_CANNOT_ADD_CONSTRAINT
-		   || err == DB_TOO_MANY_CONCURRENT_TRXS) {
-		if (savept) {
-			/* Roll back the latest, possibly incomplete
-			insertion or update */
-
-			trx_general_rollback_for_mysql(trx, TRUE, savept);
-		}
-		/* MySQL will roll back the latest SQL statement */
-	} else if (err == DB_LOCK_WAIT) {
-
+		break;
+	case DB_LOCK_WAIT:
 		srv_suspend_mysql_thread(thr);
 
 		if (trx->error_state != DB_SUCCESS) {
@@ -505,31 +543,15 @@ handle_new_error:
 
 		return(TRUE);
 
-	} else if (err == DB_DEADLOCK
-		   || err == DB_LOCK_TABLE_FULL
-		   || (err == DB_LOCK_WAIT_TIMEOUT
-		       && row_rollback_on_timeout)) {
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
 		/* Roll back the whole transaction; this resolution was added
 		to version 3.23.43 */
 
 		trx_general_rollback_for_mysql(trx, FALSE, NULL);
+		break;
 
-	} else if (err == DB_OUT_OF_FILE_SPACE
-		   || err == DB_LOCK_WAIT_TIMEOUT) {
-
-		ut_ad(!(err == DB_LOCK_WAIT_TIMEOUT
-		        && row_rollback_on_timeout));
-
-		if (savept) {
-			/* Roll back the latest, possibly incomplete
-			insertion or update */
-
-			trx_general_rollback_for_mysql(trx, TRUE, savept);
-		}
-		/* MySQL will roll back the latest SQL statement */
-
-	} else if (err == DB_MUST_GET_MORE_FILE_SPACE) {
-
+	case DB_MUST_GET_MORE_FILE_SPACE:
 		fputs("InnoDB: The database cannot continue"
 		      " operation because of\n"
 		      "InnoDB: lack of space. You must add"
@@ -537,8 +559,8 @@ handle_new_error:
 		      "InnoDB: my.cnf and restart the database.\n", stderr);
 
 		exit(1);
-	} else if (err == DB_CORRUPTION) {
 
+	case DB_CORRUPTION:
 		fputs("InnoDB: We detected index corruption"
 		      " in an InnoDB type table.\n"
 		      "InnoDB: You have to dump + drop + reimport"
@@ -550,11 +572,10 @@ handle_new_error:
 		      "InnoDB: If the mysqld server crashes"
 		      " after the startup or when\n"
 		      "InnoDB: you dump the tables, look at\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "forcing-recovery.html"
+		      "InnoDB: " REFMAN "forcing-recovery.html"
 		      " for help.\n", stderr);
-
-	} else {
+		break;
+	default:
 		fprintf(stderr, "InnoDB: unknown error code %lu\n",
 			(ulong) err);
 		ut_error;
@@ -569,62 +590,34 @@ handle_new_error:
 	trx->error_state = DB_SUCCESS;
 
 	return(FALSE);
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(FALSE);
-#endif /* UNIV_HOTBACKUP */
 }
 
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return	own: a prebuilt struct */
+UNIV_INTERN
 row_prebuilt_t*
 row_create_prebuilt(
 /*================*/
-				/* out, own: a prebuilt struct */
-	dict_table_t*	table)	/* in: Innobase table handle */
+	dict_table_t*	table)	/*!< in: Innobase table handle */
 {
 	row_prebuilt_t*	prebuilt;
 	mem_heap_t*	heap;
 	dict_index_t*	clust_index;
 	dtuple_t*	ref;
 	ulint		ref_len;
-	ulint		i;
 
-	heap = mem_heap_create(128);
+	heap = mem_heap_create(sizeof *prebuilt + 128);
 
-	prebuilt = mem_heap_alloc(heap, sizeof(row_prebuilt_t));
+	prebuilt = mem_heap_zalloc(heap, sizeof *prebuilt);
 
 	prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
 	prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
 
 	prebuilt->table = table;
 
-	prebuilt->trx = NULL;
-
 	prebuilt->sql_stat_start = TRUE;
-
-	prebuilt->mysql_has_locked = FALSE;
-
-	prebuilt->index = NULL;
-
-	prebuilt->used_in_HANDLER = FALSE;
-
-	prebuilt->n_template = 0;
-	prebuilt->mysql_template = NULL;
-
 	prebuilt->heap = heap;
-	prebuilt->ins_node = NULL;
-
-	prebuilt->ins_upd_rec_buff = NULL;
-	prebuilt->default_rec = NULL;
-
-	prebuilt->upd_node = NULL;
-	prebuilt->ins_graph = NULL;
-	prebuilt->upd_graph = NULL;
 
 	prebuilt->pcur = btr_pcur_create_for_mysql();
 	prebuilt->clust_pcur = btr_pcur_create_for_mysql();
@@ -632,10 +625,6 @@ row_create_prebuilt(
 	prebuilt->select_lock_type = LOCK_NONE;
 	prebuilt->stored_select_lock_type = 99999999;
 
-	prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
-
-	prebuilt->sel_graph = NULL;
-
 	prebuilt->search_tuple = dtuple_create(
 		heap, 2 * dict_table_get_n_cols(table));
 
@@ -652,16 +641,6 @@ row_create_prebuilt(
 
 	prebuilt->clust_ref = ref;
 
-	for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
-		prebuilt->fetch_cache[i] = NULL;
-	}
-
-	prebuilt->n_fetch_cached = 0;
-
-	prebuilt->blob_heap = NULL;
-
-	prebuilt->old_vers_heap = NULL;
-
 	prebuilt->autoinc_error = 0;
 	prebuilt->autoinc_offset = 0;
 
@@ -674,22 +653,25 @@ row_create_prebuilt(
 	return(prebuilt);
 }
 
-/************************************************************************
+/********************************************************************//**
 Free a prebuilt struct for a MySQL table handle. */
-
+UNIV_INTERN
 void
 row_prebuilt_free(
 /*==============*/
-	row_prebuilt_t*	prebuilt)	/* in, own: prebuilt struct */
+	row_prebuilt_t*	prebuilt,	/*!< in, own: prebuilt struct */
+	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
 {
 	ulint	i;
 
-	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
-	    || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED) {
+	if (UNIV_UNLIKELY
+	    (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
+	     || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
+
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
 			"InnoDB: table handle. Magic n %lu,"
-			" magic n2 %lu, table name",
+			" magic n2 %lu, table name ",
 			(ulong) prebuilt->magic_n,
 			(ulong) prebuilt->magic_n2);
 		ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
@@ -751,22 +733,21 @@ row_prebuilt_free(
 		}
 	}
 
-	dict_table_decrement_handle_count(prebuilt->table);
+	dict_table_decrement_handle_count(prebuilt->table, dict_locked);
 
 	mem_heap_free(prebuilt->heap);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the transaction pointers in query graphs stored in the prebuilt
 struct. */
-
+UNIV_INTERN
 void
 row_update_prebuilt_trx(
 /*====================*/
-					/* out: prebuilt dtuple */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL
-					handle */
-	trx_t*		trx)		/* in: transaction handle */
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct
+					in MySQL handle */
+	trx_t*		trx)		/*!< in: transaction handle */
 {
 	if (trx->magic_n != TRX_MAGIC_N) {
 		fprintf(stderr,
@@ -782,9 +763,9 @@ row_update_prebuilt_trx(
 	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to use a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name",
+			"InnoDB: table handle. Magic n %lu, table name ",
 			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
+		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
 		putc('\n', stderr);
 
 		mem_analyze_corruption(prebuilt);
@@ -807,23 +788,21 @@ row_update_prebuilt_trx(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
 has not yet been built in the prebuilt struct, then this function first
-builds it. */
+builds it.
+@return	prebuilt dtuple; the column type information is also set in it */
 static
 dtuple_t*
 row_get_prebuilt_insert_row(
 /*========================*/
-					/* out: prebuilt dtuple; the column
-					type information is also set in it */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	ins_node_t*	node;
 	dtuple_t*	row;
 	dict_table_t*	table	= prebuilt->table;
-	ulint		i;
 
 	ut_ad(prebuilt && table && prebuilt->trx);
 
@@ -846,14 +825,6 @@ row_get_prebuilt_insert_row(
 
 		dict_table_copy_types(row, table);
 
-		/* We init the value of every field to the SQL NULL to avoid
-		a debug assertion from failing */
-
-		for (i = 0; i < dtuple_get_n_fields(row); i++) {
-
-			dtuple_get_nth_field(row, i)->len = UNIV_SQL_NULL;
-		}
-
 		ins_node_set_new_row(node, row);
 
 		prebuilt->ins_graph = que_node_get_parent(
@@ -866,14 +837,14 @@ row_get_prebuilt_insert_row(
 	return(prebuilt->ins_node->row);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the table modification counter and calculates new estimates
 for table and index statistics if necessary. */
 UNIV_INLINE
 void
 row_update_statistics_if_needed(
 /*============================*/
-	dict_table_t*	table)	/* in: table */
+	dict_table_t*	table)	/*!< in: table */
 {
 	ulint	counter;
 
@@ -888,52 +859,55 @@ row_update_statistics_if_needed(
 	a counter table which is very small and updated very often. */
 
 	if (counter > 2000000000
-	    || ((ib_longlong)counter > 16 + table->stat_n_rows / 16)) {
+	    || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) {
 
 		dict_update_statistics(table);
 	}
 }
 
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. */
+UNIV_INTERN
 void
 row_unlock_table_autoinc_for_mysql(
 /*===============================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
-	if (!trx->auto_inc_lock) {
+	mutex_enter(&kernel_mutex);
 
-		return;
-	}
+	lock_release_autoinc_locks(trx);
 
-	lock_table_unlock_auto_inc(trx);
+	mutex_exit(&kernel_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
 AUTO_INC lock gives exclusive access to the auto-inc counter of the
 table. The lock is reserved only for the duration of an SQL statement.
 It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
+table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in the MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
 					table handle */
 {
-	trx_t*		trx		= prebuilt->trx;
-	ins_node_t*	node		= prebuilt->ins_node;
-	que_thr_t*	thr;
-	ulint		err;
-	ibool		was_lock_wait;
+	trx_t*			trx	= prebuilt->trx;
+	ins_node_t*		node	= prebuilt->ins_node;
+	const dict_table_t*	table	= prebuilt->table;
+	que_thr_t*		thr;
+	ulint			err;
+	ibool			was_lock_wait;
 
 	ut_ad(trx);
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 
-	if (trx->auto_inc_lock) {
+	/* If we already hold an AUTOINC lock on the table then do nothing.
+        Note: We peek at the value of the current owner without acquiring
+	the kernel mutex. **/
+	if (trx == table->autoinc_trx) {
 
 		return(DB_SUCCESS);
 	}
@@ -986,20 +960,20 @@ run_again:
 	return((int) err);
 }
 
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
+/*********************************************************************//**
+Sets a table lock on the table mentioned in prebuilt.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_lock_table_for_mysql(
 /*=====================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in the MySQL
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in the MySQL
 					table handle */
-	dict_table_t*	table,		/* in: table to lock, or NULL
+	dict_table_t*	table,		/*!< in: table to lock, or NULL
 					if prebuilt->table should be
 					locked as
 					prebuilt->select_lock_type */
-	ulint		mode)		/* in: lock mode of table
+	ulint		mode)		/*!< in: lock mode of table
 					(ignored if table==NULL) */
 {
 	trx_t*		trx		= prebuilt->trx;
@@ -1063,15 +1037,15 @@ run_again:
 	return((int) err);
 }
 
-/*************************************************************************
-Does an insert for MySQL. */
-
+/*********************************************************************//**
+Does an insert for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_insert_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	byte*		mysql_rec,	/* in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	byte*		mysql_rec,	/*!< in: row in the MySQL format */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	trx_savept_t	savept;
@@ -1095,20 +1069,18 @@ row_insert_for_mysql(
 			"InnoDB: the MySQL datadir, or have you"
 			" used DISCARD TABLESPACE?\n"
 			"InnoDB: Look from\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
 			"InnoDB: how you can resolve the problem.\n",
 			prebuilt->table->name);
 		return(DB_ERROR);
 	}
 
-	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name",
+			"InnoDB: table handle. Magic n %lu, table name ",
 			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, prebuilt->trx, TRUE,
-			      prebuilt->table->name);
+		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
 		putc('\n', stderr);
 
 		mem_analyze_corruption(prebuilt);
@@ -1116,7 +1088,7 @@ row_insert_for_mysql(
 		ut_error;
 	}
 
-	if (srv_created_new_raw || srv_force_recovery) {
+	if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
 		fputs("InnoDB: A new raw disk partition was initialized or\n"
 		      "InnoDB: innodb_force_recovery is on: we do not allow\n"
 		      "InnoDB: database modifications by the user. Shut down\n"
@@ -1197,13 +1169,13 @@ run_again:
 	return((int) err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds a dummy query graph used in selects. */
-
+UNIV_INTERN
 void
 row_prebuild_sel_graph(
 /*===================*/
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	sel_node_t*	node;
@@ -1223,16 +1195,16 @@ row_prebuild_sel_graph(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
+interface.
+@return	own: update node */
+UNIV_INTERN
 upd_node_t*
 row_create_update_node_for_mysql(
 /*=============================*/
-				/* out, own: update node */
-	dict_table_t*	table,	/* in: table to update */
-	mem_heap_t*	heap)	/* in: mem heap from which allocated */
+	dict_table_t*	table,	/*!< in: table to update */
+	mem_heap_t*	heap)	/*!< in: mem heap from which allocated */
 {
 	upd_node_t*	node;
 
@@ -1241,7 +1213,6 @@ row_create_update_node_for_mysql(
 	node->in_mysql_interface = TRUE;
 	node->is_delete = FALSE;
 	node->searched_update = FALSE;
-	node->select_will_do_update = FALSE;
 	node->select = NULL;
 	node->pcur = btr_pcur_create_for_mysql();
 	node->table = table;
@@ -1260,16 +1231,16 @@ row_create_update_node_for_mysql(
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
+first builds it.
+@return	prebuilt update vector */
+UNIV_INTERN
 upd_t*
 row_get_prebuilt_update_vector(
 /*===========================*/
-					/* out: prebuilt update vector */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	dict_table_t*	table	= prebuilt->table;
@@ -1296,16 +1267,16 @@ row_get_prebuilt_update_vector(
 	return(prebuilt->upd_node->update);
 }
 
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
+/*********************************************************************//**
+Does an update or delete of a row for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_update_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	byte*		mysql_rec,	/* in: the row to be updated, in
+	byte*		mysql_rec,	/*!< in: the row to be updated, in
 					the MySQL format */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	trx_savept_t	savept;
@@ -1333,20 +1304,18 @@ row_update_for_mysql(
 			"InnoDB: the MySQL datadir, or have you"
 			" used DISCARD TABLESPACE?\n"
 			"InnoDB: Look from\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
 			"InnoDB: how you can resolve the problem.\n",
 			prebuilt->table->name);
 		return(DB_ERROR);
 	}
 
-	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name",
+			"InnoDB: table handle. Magic n %lu, table name ",
 			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, prebuilt->trx, TRUE,
-			      prebuilt->table->name);
+		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
 		putc('\n', stderr);
 
 		mem_analyze_corruption(prebuilt);
@@ -1354,7 +1323,7 @@ row_update_for_mysql(
 		ut_error;
 	}
 
-	if (srv_created_new_raw || srv_force_recovery) {
+	if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
 		fputs("InnoDB: A new raw disk partition was initialized or\n"
 		      "InnoDB: innodb_force_recovery is on: we do not allow\n"
 		      "InnoDB: database modifications by the user. Shut down\n"
@@ -1453,7 +1422,7 @@ run_again:
 	return((int) err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This can only be used when srv_locks_unsafe_for_binlog is TRUE or
 this session is using a READ COMMITTED isolation level. Before
 calling this function we must use trx_reset_new_rec_lock_info() and
@@ -1463,15 +1432,15 @@ and also under prebuilt->clust_pcur. Currently, this is only used and tested
 in the case of an UPDATE or a DELETE statement, where the row lock is of the
 LOCK_X type.
 Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
+locks we set.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_unlock_for_mysql(
 /*=================*/
-					/* out: error code or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in MySQL
 					handle */
-	ibool		has_latches_on_recs)/* TRUE if called so that we have
+	ibool		has_latches_on_recs)/*!< TRUE if called so that we have
 					the latches on the records under pcur
 					and clust_pcur, and we do not need to
 					reposition the cursors. */
@@ -1500,9 +1469,9 @@ row_unlock_for_mysql(
 
 	if (prebuilt->new_rec_locks >= 1) {
 
-		rec_t*		rec;
+		const rec_t*	rec;
 		dict_index_t*	index;
-		dulint		rec_trx_id;
+		trx_id_t	rec_trx_id;
 		mtr_t		mtr;
 
 		mtr_start(&mtr);
@@ -1529,9 +1498,14 @@ row_unlock_for_mysql(
 			index = btr_pcur_get_btr_cur(clust_pcur)->index;
 		}
 
+		if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
+			/* This is not a clustered index record.  We
+			do not know how to unlock the record. */
+			goto no_unlock;
+		}
+
 		/* If the record has been modified by this
 		transaction, do not unlock it. */
-		ut_a(index->type & DICT_CLUSTERED);
 
 		if (index->trx_id_offset) {
 			rec_trx_id = trx_read_trx_id(rec
@@ -1541,7 +1515,7 @@ row_unlock_for_mysql(
 			ulint	offsets_[REC_OFFS_NORMAL_SIZE];
 			ulint*	offsets				= offsets_;
 
-			*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+			rec_offs_init(offsets_);
 			offsets = rec_get_offsets(rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
 
@@ -1558,17 +1532,20 @@ row_unlock_for_mysql(
 			rec = btr_pcur_get_rec(pcur);
 			index = btr_pcur_get_btr_cur(pcur)->index;
 
-			lock_rec_unlock(trx, rec, prebuilt->select_lock_type);
+			lock_rec_unlock(trx, btr_pcur_get_block(pcur),
+					rec, prebuilt->select_lock_type);
 
 			if (prebuilt->new_rec_locks >= 2) {
 				rec = btr_pcur_get_rec(clust_pcur);
 				index = btr_pcur_get_btr_cur(clust_pcur)->index;
 
-				lock_rec_unlock(trx, rec,
+				lock_rec_unlock(trx,
+						btr_pcur_get_block(clust_pcur),
+						rec,
 						prebuilt->select_lock_type);
 			}
 		}
-
+no_unlock:
 		mtr_commit(&mtr);
 	}
 
@@ -1577,17 +1554,17 @@ row_unlock_for_mysql(
 	return(DB_SUCCESS);
 }
 
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 ulint
 row_update_cascade_for_mysql(
 /*=========================*/
-				/* out: error code or DB_SUCCESS */
-	que_thr_t*	thr,	/* in: query thread */
-	upd_node_t*	node,	/* in: update node used in the cascade
+	que_thr_t*	thr,	/*!< in: query thread */
+	upd_node_t*	node,	/*!< in: update node used in the cascade
 				or set null operation */
-	dict_table_t*	table)	/* in: table where we do the operation */
+	dict_table_t*	table)	/*!< in: table where we do the operation */
 {
 	ulint	err;
 	trx_t*	trx;
@@ -1646,14 +1623,15 @@ run_again:
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
+index on it (on row id).
+@return	TRUE if the clustered index was generated automatically */
+UNIV_INTERN
 ibool
 row_table_got_default_clust_index(
 /*==============================*/
-	dict_table_t*	table)
+	const dict_table_t*	table)	/*!< in: table */
 {
 	const dict_index_t*	clust_index;
 
@@ -1662,17 +1640,18 @@ row_table_got_default_clust_index(
 	return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
+to take into account if we generated a default clustered index for the table
+@return	the key number used inside MySQL */
+UNIV_INTERN
 ulint
 row_get_mysql_key_number_for_index(
 /*===============================*/
-	dict_index_t*	index)
+	const dict_index_t*	index)	/*!< in: index */
 {
-	dict_index_t*	ind;
-	ulint		i;
+	const dict_index_t*	ind;
+	ulint			i;
 
 	ut_a(index);
 
@@ -1692,29 +1671,31 @@ row_get_mysql_key_number_for_index(
 	return(i);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Locks the data dictionary in shared mode from modifications, for performing
 foreign key check, rollback, or other operation invisible to MySQL. */
-
+UNIV_INTERN
 void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
-	trx_t*	trx)	/* in: transaction */
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line)	/*!< in: line number */
 {
 	ut_a(trx->dict_operation_lock_mode == 0);
 
-	rw_lock_s_lock(&dict_operation_lock);
+	rw_lock_s_lock_func(&dict_operation_lock, 0, file, line);
 
 	trx->dict_operation_lock_mode = RW_S_LATCH;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Unlocks the data dictionary shared lock. */
-
+UNIV_INTERN
 void
 row_mysql_unfreeze_data_dictionary(
 /*===============================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
 
@@ -1723,14 +1704,16 @@ row_mysql_unfreeze_data_dictionary(
 	trx->dict_operation_lock_mode = 0;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
-
+UNIV_INTERN
 void
-row_mysql_lock_data_dictionary(
-/*===========================*/
-	trx_t*	trx)	/* in: transaction */
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line)	/*!< in: line number */
 {
 	ut_a(trx->dict_operation_lock_mode == 0
 	     || trx->dict_operation_lock_mode == RW_X_LATCH);
@@ -1738,19 +1721,19 @@ row_mysql_lock_data_dictionary(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks or lock waits can occur then in these operations */
 
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock_func(&dict_operation_lock, 0, file, line);
 	trx->dict_operation_lock_mode = RW_X_LATCH;
 
 	mutex_enter(&(dict_sys->mutex));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Unlocks the data dictionary exclusive lock. */
-
+UNIV_INTERN
 void
 row_mysql_unlock_data_dictionary(
 /*=============================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
 
@@ -1763,19 +1746,20 @@ row_mysql_unlock_data_dictionary(
 	trx->dict_operation_lock_mode = 0;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Creates a table for MySQL. If the name of the table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also start the printing of monitor
 output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
+InnoDB will try to invoke mem_validate().
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_create_table_for_mysql(
 /*=======================*/
-				/* out: error code or DB_SUCCESS */
-	dict_table_t*	table,	/* in: table definition */
-	trx_t*		trx)	/* in: transaction handle */
+	dict_table_t*	table,	/*!< in, own: table definition
+				(will be freed) */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	tab_node_t*	node;
 	mem_heap_t*	heap;
@@ -1798,7 +1782,7 @@ row_create_table_for_mysql(
 		      " by the user.\n"
 		      "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
 		      " is replaced with raw.\n", stderr);
-
+err_exit:
 		dict_mem_table_free(table);
 		trx_commit_for_mysql(trx);
 
@@ -1815,11 +1799,7 @@ row_create_table_for_mysql(
 			"InnoDB: MySQL system tables must be"
 			" of the MyISAM type!\n",
 			table->name);
-
-		dict_mem_table_free(table);
-		trx_commit_for_mysql(trx);
-
-		return(DB_ERROR);
+		goto err_exit;
 	}
 
 	/* Check that no reserved column names are used. */
@@ -1827,10 +1807,7 @@ row_create_table_for_mysql(
 		if (dict_col_name_is_reserved(
 			    dict_table_get_col_name(table, i))) {
 
-			dict_mem_table_free(table);
-			trx_commit_for_mysql(trx);
-
-			return(DB_ERROR);
+			goto err_exit;
 		}
 	}
 
@@ -1895,7 +1872,7 @@ row_create_table_for_mysql(
 
 	heap = mem_heap_create(512);
 
-	trx->dict_operation = TRUE;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
 	node = tab_create_graph_create(table, heap);
 
@@ -1906,60 +1883,56 @@ row_create_table_for_mysql(
 
 	err = trx->error_state;
 
-	if (err != DB_SUCCESS) {
-		/* We have special error handling here */
-
+	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 		trx->error_state = DB_SUCCESS;
-
 		trx_general_rollback_for_mysql(trx, FALSE, NULL);
+	}
 
-		if (err == DB_OUT_OF_FILE_SPACE) {
-			ut_print_timestamp(stderr);
+	switch (err) {
+	case DB_OUT_OF_FILE_SPACE:
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Warning: cannot create table ",
+		      stderr);
+		ut_print_name(stderr, trx, TRUE, table->name);
+		fputs(" because tablespace full\n", stderr);
 
-			fputs("  InnoDB: Warning: cannot create table ",
-			      stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs(" because tablespace full\n", stderr);
+		if (dict_table_get_low(table->name)) {
 
-			if (dict_table_get_low(table->name)) {
-
-				row_drop_table_for_mysql(table->name, trx,
-							 FALSE);
-			}
-
-		} else if (err == DB_DUPLICATE_KEY) {
-			ut_print_timestamp(stderr);
-
-			fputs("  InnoDB: Error: table ", stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs(" already exists in InnoDB internal\n"
-			      "InnoDB: data dictionary. Have you deleted"
-			      " the .frm file\n"
-			      "InnoDB: and not used DROP TABLE?"
-			      " Have you used DROP DATABASE\n"
-			      "InnoDB: for InnoDB tables in"
-			      " MySQL version <= 3.23.43?\n"
-			      "InnoDB: See the Restrictions section"
-			      " of the InnoDB manual.\n"
-			      "InnoDB: You can drop the orphaned table"
-			      " inside InnoDB by\n"
-			      "InnoDB: creating an InnoDB table with"
-			      " the same name in another\n"
-			      "InnoDB: database and copying the .frm file"
-			      " to the current database.\n"
-			      "InnoDB: Then MySQL thinks the table exists,"
-			      " and DROP TABLE will\n"
-			      "InnoDB: succeed.\n"
-			      "InnoDB: You can look for further help from\n"
-			      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			      "innodb-troubleshooting.html\n",
-			      stderr);
+			row_drop_table_for_mysql(table->name, trx, FALSE);
+			trx_commit_for_mysql(trx);
 		}
+		break;
+
+	case DB_DUPLICATE_KEY:
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Error: table ", stderr);
+		ut_print_name(stderr, trx, TRUE, table->name);
+		fputs(" already exists in InnoDB internal\n"
+		      "InnoDB: data dictionary. Have you deleted"
+		      " the .frm file\n"
+		      "InnoDB: and not used DROP TABLE?"
+		      " Have you used DROP DATABASE\n"
+		      "InnoDB: for InnoDB tables in"
+		      " MySQL version <= 3.23.43?\n"
+		      "InnoDB: See the Restrictions section"
+		      " of the InnoDB manual.\n"
+		      "InnoDB: You can drop the orphaned table"
+		      " inside InnoDB by\n"
+		      "InnoDB: creating an InnoDB table with"
+		      " the same name in another\n"
+		      "InnoDB: database and copying the .frm file"
+		      " to the current database.\n"
+		      "InnoDB: Then MySQL thinks the table exists,"
+		      " and DROP TABLE will\n"
+		      "InnoDB: succeed.\n"
+		      "InnoDB: You can look for further help from\n"
+		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
+		      stderr);
 
 		/* We may also get err == DB_ERROR if the .ibd file for the
 		table already exists */
 
-		trx->error_state = DB_SUCCESS;
+		break;
 	}
 
 	que_graph_free((que_t*) que_node_get_parent(thr));
@@ -1969,18 +1942,19 @@ row_create_table_for_mysql(
 	return((int) err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
+currently as all indexes must be created at the same time as the table.
+@return	error number or DB_SUCCESS */
+UNIV_INTERN
 int
 row_create_index_for_mysql(
 /*=======================*/
-					/* out: error number or DB_SUCCESS */
-	dict_index_t*	index,		/* in: index definition */
-	trx_t*		trx,		/* in: transaction handle */
-	const ulint*	field_lengths)	/* in: if not NULL, must contain
+	dict_index_t*	index,		/*!< in, own: index definition
+					(will be freed) */
+	trx_t*		trx,		/*!< in: transaction handle */
+	const ulint*	field_lengths)	/*!< in: if not NULL, must contain
 					dict_index_get_n_fields(index)
 					actual field lengths for the
 					index columns, which are
@@ -1991,7 +1965,7 @@ row_create_index_for_mysql(
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
 	ulint		err;
-	ulint		i, j;
+	ulint		i;
 	ulint		len;
 	char*		table_name;
 
@@ -2015,11 +1989,12 @@ row_create_index_for_mysql(
 	safer not to allow them. */
 
 	for (i = 0; i < dict_index_get_n_fields(index); i++) {
+		ulint		j;
+
 		for (j = 0; j < i; j++) {
 			if (0 == ut_strcmp(
 				    dict_index_get_nth_field(index, j)->name,
 				    dict_index_get_nth_field(index, i)->name)) {
-
 				ut_print_timestamp(stderr);
 
 				fputs("  InnoDB: Error: column ", stderr);
@@ -2056,7 +2031,7 @@ row_create_index_for_mysql(
 
 	heap = mem_heap_create(512);
 
-	trx->dict_operation = TRUE;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
 	/* Note that the space id where we store the index is inherited from
 	the table in dict_build_index_def_step() in dict0crea.c. */
@@ -2082,6 +2057,8 @@ error_handling:
 
 		row_drop_table_for_mysql(table_name, trx, FALSE);
 
+		trx_commit_for_mysql(trx);
+
 		trx->error_state = DB_SUCCESS;
 	}
 
@@ -2092,29 +2069,29 @@ error_handling:
 	return((int) err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
 should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok. */
-
+constraints which reference this table are ok.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_table_add_foreign_constraints(
 /*==============================*/
-					/* out: error code or DB_SUCCESS */
-	trx_t*		trx,		/* in: transaction */
-	const char*	sql_string,	/* in: table create statement where
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
 					foreign keys are declared like:
 				FOREIGN KEY (a, b) REFERENCES table2(c, d),
 					table2 can be written also with the
 					database name before it: test.table2 */
-	const char*	name,		/* in: table full name in the
+	const char*	name,		/*!< in: table full name in the
 					normalized form
 					database_name/table_name */
-	ibool		reject_fks)	/* in: if TRUE, fail with error
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
 {
@@ -2130,11 +2107,10 @@ row_table_add_foreign_constraints(
 
 	trx_start_if_not_started(trx);
 
-	trx->dict_operation = TRUE;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
 	err = dict_create_foreign_constraints(trx, sql_string, name,
 					      reject_fks);
-
 	if (err == DB_SUCCESS) {
 		/* Check that also referencing constraints are ok */
 		err = dict_load_foreigns(name, TRUE);
@@ -2149,25 +2125,27 @@ row_table_add_foreign_constraints(
 
 		row_drop_table_for_mysql(name, trx, FALSE);
 
+		trx_commit_for_mysql(trx);
+
 		trx->error_state = DB_SUCCESS;
 	}
 
 	return((int) err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Drops a table for MySQL as a background operation. MySQL relies on Unix
 in ALTER TABLE to the fact that the table handler does not remove the
 table before all handles to it has been removed. Furhermore, the MySQL's
 call to drop table must be non-blocking. Therefore we do the drop table
 as a background operation, which is taken care of by the master thread
-in srv0srv.c. */
+in srv0srv.c.
+@return	error code or DB_SUCCESS */
 static
 int
 row_drop_table_for_mysql_in_background(
 /*===================================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name)	/* in: table name */
+	const char*	name)	/*!< in: table name */
 {
 	ulint	error;
 	trx_t*	trx;
@@ -2201,16 +2179,15 @@ row_drop_table_for_mysql_in_background(
 	return((int) error);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 The master thread in srv0srv.c calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
+dropping of tables is needed in ALTER TABLE on Unix.
+@return	how many tables dropped + remaining tables in list */
+UNIV_INTERN
 ulint
 row_drop_tables_for_mysql_in_background(void)
 /*=========================================*/
-					/* out: how many tables dropped
-					+ remaining tables in list */
 {
 	row_mysql_drop_t*	drop;
 	dict_table_t*		table;
@@ -2264,9 +2241,9 @@ already_dropped:
 	UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
 
 	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Dropped table %s in background drop queue.\n",
-		drop->table_name);
+	fputs("  InnoDB: Dropped table ", stderr);
+	ut_print_name(stderr, NULL, TRUE, drop->table_name);
+	fputs(" in background drop queue.\n", stderr);
 
 	mem_free(drop->table_name);
 
@@ -2277,14 +2254,14 @@ already_dropped:
 	goto loop;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
+mutex!
+@return	how many tables in list */
+UNIV_INTERN
 ulint
 row_get_background_drop_list_len_low(void)
 /*======================================*/
-					/* out: how many tables in list */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
@@ -2297,19 +2274,18 @@ row_get_background_drop_list_len_low(void)
 	return(UT_LIST_GET_LEN(row_mysql_drop_list));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 If a table is not yet in the drop list, adds the table to the list of tables
 which the master thread drops in background. We need this on Unix because in
 ALTER TABLE MySQL may call drop table even if the table has running queries on
 it. Also, if there are running foreign key checks on the table, we drop the
-table lazily. */
+table lazily.
+@return	TRUE if the table was not yet in the drop list, and was added there */
 static
 ibool
 row_add_table_to_background_drop_list(
 /*==================================*/
-				/* out: TRUE if the table was not yet in the
-				drop list, and was added there */
-	dict_table_t*	table)	/* in: table */
+	const char*	name)	/*!< in: table name */
 {
 	row_mysql_drop_t*	drop;
 
@@ -2325,7 +2301,7 @@ row_add_table_to_background_drop_list(
 	drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
 
 	while (drop != NULL) {
-		if (strcmp(drop->table_name, table->name) == 0) {
+		if (strcmp(drop->table_name, name) == 0) {
 			/* Already in the list */
 
 			mutex_exit(&kernel_mutex);
@@ -2338,7 +2314,7 @@ row_add_table_to_background_drop_list(
 
 	drop = mem_alloc(sizeof(row_mysql_drop_t));
 
-	drop->table_name = mem_strdup(table->name);
+	drop->table_name = mem_strdup(name);
 
 	UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
 
@@ -2351,18 +2327,17 @@ row_add_table_to_background_drop_list(
 	return(TRUE);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
 Discards the tablespace of a table which stored in an .ibd file. Discarding
 means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
+the table. Also the flag table->ibd_file_missing is set TRUE.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_discard_tablespace_for_mysql(
 /*=============================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx)	/* in: transaction handle */
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_foreign_t*	foreign;
 	dulint		new_id;
@@ -2469,10 +2444,10 @@ row_discard_tablespace_for_mysql(
 		ut_print_timestamp(ef);
 
 		fputs("  Cannot DISCARD table ", ef);
-		ut_print_name(ef, trx, TRUE, name);
+		ut_print_name(stderr, trx, TRUE, name);
 		fputs("\n"
 		      "because it is referenced by ", ef);
-		ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+		ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
 		putc('\n', ef);
 		mutex_exit(&dict_foreign_err_mutex);
 
@@ -2544,20 +2519,20 @@ funct_exit:
 	return((int) err);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_import_tablespace_for_mysql(
 /*============================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx)	/* in: transaction handle */
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_table_t*	table;
 	ibool		success;
-	dulint		current_lsn;
+	ib_uint64_t	current_lsn;
 	ulint		err		= DB_SUCCESS;
 
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@@ -2650,8 +2625,10 @@ row_import_tablespace_for_mysql(
 
 	ibuf_delete_for_discarded_space(table->space);
 
-	success = fil_open_single_table_tablespace(TRUE, table->space,
-						   table->name);
+	success = fil_open_single_table_tablespace(
+		TRUE, table->space,
+		table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+		table->name);
 	if (success) {
 		table->ibd_file_missing = FALSE;
 		table->tablespace_discarded = FALSE;
@@ -2680,15 +2657,15 @@ funct_exit:
 	return((int) err);
 }
 
-/*************************************************************************
-Truncates a table for MySQL. */
-
+/*********************************************************************//**
+Truncates a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_truncate_table_for_mysql(
 /*=========================*/
-				/* out: error code or DB_SUCCESS */
-	dict_table_t*	table,	/* in: table handle */
-	trx_t*		trx)	/* in: transaction handle */
+	dict_table_t*	table,	/*!< in: table handle */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_foreign_t*	foreign;
 	ulint		err;
@@ -2700,6 +2677,7 @@ row_truncate_table_for_mysql(
 	btr_pcur_t	pcur;
 	mtr_t		mtr;
 	dulint		new_id;
+	ulint		recreate_space = 0;
 	pars_info_t*	info = NULL;
 
 	/* How do we prevent crashes caused by ongoing operations on
@@ -2724,17 +2702,23 @@ row_truncate_table_for_mysql(
 	reallocated, the allocator will remove the ibuf entries for
 	it.
 
-	TODO: when we truncate *.ibd files (analogous to DISCARD
-	TABLESPACE), we will have to remove we remove all entries for
-	the table in the insert buffer tree!
+	When we truncate *.ibd files by recreating them (analogous to
+	DISCARD TABLESPACE), we remove all entries for the table in the
+	insert buffer tree.  This is not strictly necessary, because
+	in 6) we will assign a new tablespace identifier, but we can
+	free up some space in the system tablespace.
 
 	4) Linear readahead and random readahead: we use the same
-	method as in 3) to discard ongoing operations. (This will only
-	be relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
+	method as in 3) to discard ongoing operations. (This is only
+	relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
 
 	5) FOREIGN KEY operations: if
 	table->n_foreign_key_checks_running > 0, we do not allow the
-	TRUNCATE. We also reserve the data dictionary latch. */
+	TRUNCATE. We also reserve the data dictionary latch.
+
+	6) Crash recovery: To prevent the application of pre-truncation
+	redo log records on the truncated tablespace, we will assign
+	a new tablespace identifier to the truncated tablespace. */
 
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 	ut_ad(table);
@@ -2822,6 +2806,51 @@ row_truncate_table_for_mysql(
 
 	trx->table_id = table->id;
 
+	if (table->space && !table->dir_path_of_temp_table) {
+		/* Discard and create the single-table tablespace. */
+		ulint	space	= table->space;
+		ulint	flags	= fil_space_get_flags(space);
+
+		if (flags != ULINT_UNDEFINED
+		    && fil_discard_tablespace(space)) {
+
+			dict_index_t*	index;
+
+			space = 0;
+
+			if (fil_create_new_single_table_tablespace(
+				    &space, table->name, FALSE, flags,
+				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"  InnoDB: TRUNCATE TABLE %s failed to"
+					" create a new tablespace\n",
+					table->name);
+				table->ibd_file_missing = 1;
+				err = DB_ERROR;
+				goto funct_exit;
+			}
+
+			recreate_space = space;
+
+			/* Replace the space_id in the data dictionary cache.
+			The persisent data dictionary (SYS_TABLES.SPACE
+			and SYS_INDEXES.SPACE) are updated later in this
+			function. */
+			table->space = space;
+			index = dict_table_get_first_index(table);
+			do {
+				index->space = space;
+				index = dict_table_get_next_index(index);
+			} while (index);
+
+			mtr_start(&mtr);
+			fsp_header_init(space,
+					FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+			mtr_commit(&mtr);
+		}
+	}
+
 	/* scan SYS_INDEXES for all indexes of the table */
 	heap = mem_heap_create(800);
 
@@ -2844,7 +2873,7 @@ row_truncate_table_for_mysql(
 		ulint		len;
 		ulint		root_page_no;
 
-		if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+		if (!btr_pcur_is_on_user_rec(&pcur)) {
 			/* The end of SYS_INDEXES has been reached. */
 			break;
 		}
@@ -2866,7 +2895,8 @@ row_truncate_table_for_mysql(
 
 		/* This call may commit and restart mtr
 		and reposition pcur. */
-		root_page_no = dict_truncate_index_tree(table, &pcur, &mtr);
+		root_page_no = dict_truncate_index_tree(table, recreate_space,
+							&pcur, &mtr);
 
 		rec = btr_pcur_get_rec(&pcur);
 
@@ -2898,17 +2928,20 @@ next_rec:
 
 	info = pars_info_create();
 
+	pars_info_add_int4_literal(info, "space", (lint) table->space);
 	pars_info_add_dulint_literal(info, "old_id", table->id);
 	pars_info_add_dulint_literal(info, "new_id", new_id);
 
 	err = que_eval_sql(info,
 			   "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
 			   "BEGIN\n"
-			   "UPDATE SYS_TABLES SET ID = :new_id\n"
+			   "UPDATE SYS_TABLES"
+			   " SET ID = :new_id, SPACE = :space\n"
 			   " WHERE ID = :old_id;\n"
 			   "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
 			   " WHERE TABLE_ID = :old_id;\n"
-			   "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
+			   "UPDATE SYS_INDEXES"
+			   " SET TABLE_ID = :new_id, SPACE = :space\n"
 			   " WHERE TABLE_ID = :old_id;\n"
 			   "COMMIT WORK;\n"
 			   "END;\n"
@@ -2949,21 +2982,22 @@ funct_exit:
 
 	return((int) err);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
+/*********************************************************************//**
+Drops a table for MySQL.  If the name of the dropped table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
+output by the master thread.  If the data dictionary was not already locked
+by the transaction, the transaction will be committed.  Otherwise, the
+data dictionary will remain locked.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_drop_table_for_mysql(
 /*=====================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: table name */
-	trx_t*		trx,	/* in: transaction handle */
-	ibool		drop_db)/* in: TRUE=dropping whole database */
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx,	/*!< in: transaction handle */
+	ibool		drop_db)/*!< in: TRUE=dropping whole database */
 {
 	dict_foreign_t*	foreign;
 	dict_table_t*	table;
@@ -3058,8 +3092,7 @@ row_drop_table_for_mysql(
 		      "InnoDB: MySQL database directory"
 		      " from another database?\n"
 		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-troubleshooting.html\n",
+		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
 		      stderr);
 		goto funct_exit;
 	}
@@ -3106,7 +3139,7 @@ check_next_foreign:
 	if (table->n_mysql_handles_opened > 0) {
 		ibool	added;
 
-		added = row_add_table_to_background_drop_list(table);
+		added = row_add_table_to_background_drop_list(table->name);
 
 		if (added) {
 			ut_print_timestamp(stderr);
@@ -3122,7 +3155,6 @@ check_next_foreign:
 
 			/* We return DB_SUCCESS to MySQL though the drop will
 			happen lazily later */
-
 			err = DB_SUCCESS;
 		} else {
 			/* The table is already in the background drop list */
@@ -3140,15 +3172,16 @@ check_next_foreign:
 
 	if (table->n_foreign_key_checks_running > 0) {
 
-		ibool	added;
+		const char*	table_name = table->name;
+		ibool		added;
 
-		added = row_add_table_to_background_drop_list(table);
+		added = row_add_table_to_background_drop_list(table_name);
 
 		if (added) {
 			ut_print_timestamp(stderr);
 			fputs("  InnoDB: You are trying to drop table ",
 			      stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
+			ut_print_name(stderr, trx, TRUE, table_name);
 			fputs("\n"
 			      "InnoDB: though there is a"
 			      " foreign key check running on it.\n"
@@ -3171,7 +3204,7 @@ check_next_foreign:
 	/* Remove all locks there are on the table or its records */
 	lock_remove_all_on_table(table, TRUE);
 
-	trx->dict_operation = TRUE;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 	trx->table_id = table->id;
 
 	/* We use the private SQL parser of Innobase to generate the
@@ -3196,7 +3229,6 @@ check_next_foreign:
 			   "WHERE NAME = :table_name\n"
 			   "LOCK IN SHARE MODE;\n"
 			   "IF (SQL % NOTFOUND) THEN\n"
-			   "       COMMIT WORK;\n"
 			   "       RETURN;\n"
 			   "END IF;\n"
 			   "found := 1;\n"
@@ -3249,7 +3281,6 @@ check_next_foreign:
 			   "WHERE TABLE_ID = table_id;\n"
 			   "DELETE FROM SYS_TABLES\n"
 			   "WHERE ID = table_id;\n"
-			   "COMMIT WORK;\n"
 			   "END;\n"
 			   , FALSE, trx);
 
@@ -3333,31 +3364,29 @@ check_next_foreign:
 	}
 funct_exit:
 
-	trx_commit_for_mysql(trx);
-
 	if (locked_dictionary) {
+		trx_commit_for_mysql(trx);
+
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
 	trx->op_info = "";
 
-#ifndef UNIV_HOTBACKUP
 	srv_wake_master_thread();
-#endif /* !UNIV_HOTBACKUP */
 
 	return((int) err);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql(). */
+Called at the end of row_drop_database_for_mysql().
+@return	error code or DB_SUCCESS */
 static
 ulint
 drop_all_foreign_keys_in_db(
 /*========================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: database name which ends to '/' */
-	trx_t*		trx)	/* in: transaction handle */
+	const char*	name,	/*!< in: database name which ends to '/' */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	pars_info_t*	pinfo;
 	ulint		err;
@@ -3368,7 +3397,7 @@ drop_all_foreign_keys_in_db(
 
 	pars_info_add_str_literal(pinfo, "dbname", name);
 
-/* true if for_name is not prefixed with dbname */
+/** true if for_name is not prefixed with dbname */
 #define TABLE_NOT_IN_THIS_DB \
 "SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
 
@@ -3408,15 +3437,15 @@ drop_all_foreign_keys_in_db(
 	return(err);
 }
 
-/*************************************************************************
-Drops a database for MySQL. */
-
+/*********************************************************************//**
+Drops a database for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 row_drop_database_for_mysql(
 /*========================*/
-				/* out: error code or DB_SUCCESS */
-	const char*	name,	/* in: database name which ends to '/' */
-	trx_t*		trx)	/* in: transaction handle */
+	const char*	name,	/*!< in: database name which ends to '/' */
+	trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_table_t* table;
 	char*	table_name;
@@ -3464,6 +3493,7 @@ loop:
 		}
 
 		err = row_drop_table_for_mysql(table_name, trx, TRUE);
+		trx_commit_for_mysql(trx);
 
 		if (err != DB_SUCCESS) {
 			fputs("InnoDB: DROP DATABASE ", stderr);
@@ -3501,30 +3531,30 @@ loop:
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL. */
+tables in MySQL.
+@return	TRUE if temporary table */
 static
 ibool
 row_is_mysql_tmp_table_name(
 /*========================*/
-				/* out: TRUE if temporary table */
-	const char*	name)	/* in: table name in the form
+	const char*	name)	/*!< in: table name in the form
 				'database/tablename' */
 {
 	return(strstr(name, "/#sql") != NULL);
 	/* return(strstr(name, "/@0023sql") != NULL); */
 }
 
-/********************************************************************
-Delete a single constraint. */
+/****************************************************************//**
+Delete a single constraint.
+@return	error code or DB_SUCCESS */
 static
 int
 row_delete_constraint_low(
 /*======================*/
-					/* out: error code or DB_SUCCESS */
-	const char*	id,		/* in: constraint id */
-	trx_t*		trx)		/* in: transaction handle */
+	const char*	id,		/*!< in: constraint id */
+	trx_t*		trx)		/*!< in: transaction handle */
 {
 	pars_info_t*	info = pars_info_create();
 
@@ -3539,18 +3569,18 @@ row_delete_constraint_low(
 			    , FALSE, trx));
 }
 
-/********************************************************************
-Delete a single constraint. */
+/****************************************************************//**
+Delete a single constraint.
+@return	error code or DB_SUCCESS */
 static
 int
 row_delete_constraint(
 /*==================*/
-					/* out: error code or DB_SUCCESS */
-	const char*	id,		/* in: constraint id */
-	const char*	database_name,	/* in: database name, with the
+	const char*	id,		/*!< in: constraint id */
+	const char*	database_name,	/*!< in: database name, with the
 					trailing '/' */
-	mem_heap_t*	heap,		/* in: memory heap */
-	trx_t*		trx)		/* in: transaction handle */
+	mem_heap_t*	heap,		/*!< in: memory heap */
+	trx_t*		trx)		/*!< in: transaction handle */
 {
 	ulint		err;
 
@@ -3572,19 +3602,20 @@ row_delete_constraint(
 	return((int) err);
 }
 
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
+/*********************************************************************//**
+Renames a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
 row_rename_table_for_mysql(
 /*=======================*/
-					/* out: error code or DB_SUCCESS */
-	const char*	old_name,	/* in: old table name */
-	const char*	new_name,	/* in: new table name */
-	trx_t*		trx)		/* in: transaction handle */
+	const char*	old_name,	/*!< in: old table name */
+	const char*	new_name,	/*!< in: new table name */
+	trx_t*		trx,		/*!< in: transaction handle */
+	ibool		commit)		/*!< in: if TRUE then commit trx */
 {
 	dict_table_t*	table;
-	ulint		err;
+	ulint		err			= DB_ERROR;
 	mem_heap_t*	heap			= NULL;
 	const char**	constraints_to_drop	= NULL;
 	ulint		n_constraints_to_drop	= 0;
@@ -3604,11 +3635,8 @@ row_rename_table_for_mysql(
 		      "InnoDB: with raw, and innodb_force_... is removed.\n",
 		      stderr);
 
-		trx_commit_for_mysql(trx);
-		return(DB_ERROR);
-	}
-
-	if (row_mysql_is_system_table(new_name)) {
+		goto funct_exit;
+	} else if (row_mysql_is_system_table(new_name)) {
 
 		fprintf(stderr,
 			"InnoDB: Error: trying to create a MySQL"
@@ -3617,8 +3645,7 @@ row_rename_table_for_mysql(
 			" of the MyISAM type!\n",
 			new_name);
 
-		trx_commit_for_mysql(trx);
-		return(DB_ERROR);
+		goto funct_exit;
 	}
 
 	trx->op_info = "renaming table";
@@ -3627,11 +3654,6 @@ row_rename_table_for_mysql(
 	old_is_tmp = row_is_mysql_tmp_table_name(old_name);
 	new_is_tmp = row_is_mysql_tmp_table_name(new_name);
 
-	/* Serialize data dictionary operations with dictionary mutex:
-	no deadlocks can occur then in these operations */
-
-	row_mysql_lock_data_dictionary(trx);
-
 	table = dict_table_get_low(old_name);
 
 	if (!table) {
@@ -3648,13 +3670,10 @@ row_rename_table_for_mysql(
 		      "InnoDB: MySQL database directory"
 		      " from another database?\n"
 		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-troubleshooting.html\n",
+		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
 		      stderr);
 		goto funct_exit;
-	}
-
-	if (table->ibd_file_missing) {
+	} else if (table->ibd_file_missing) {
 		err = DB_TABLE_NOT_FOUND;
 		ut_print_timestamp(stderr);
 
@@ -3663,13 +3682,10 @@ row_rename_table_for_mysql(
 		fputs(" does not have an .ibd file"
 		      " in the database directory.\n"
 		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-		      "innodb-troubleshooting.html\n",
+		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
 		      stderr);
 		goto funct_exit;
-	}
-
-	if (new_is_tmp) {
+	} else if (new_is_tmp) {
 		/* MySQL is doing an ALTER TABLE command and it renames the
 		original table to a temporary table name. We want to preserve
 		the original foreign key constraint definitions despite the
@@ -3689,9 +3705,7 @@ row_rename_table_for_mysql(
 	}
 
 	/* We use the private SQL parser of Innobase to generate the query
-	graphs needed in deleting the dictionary data from system tables in
-	Innobase. Deleting a row from SYS_INDEXES table also frees the file
-	segments of the B-tree associated with the index. */
+	graphs needed in updating the dictionary data from system tables. */
 
 	info = pars_info_create();
 
@@ -3709,9 +3723,7 @@ row_rename_table_for_mysql(
 	if (err != DB_SUCCESS) {
 
 		goto end;
-	}
-
-	if (!new_is_tmp) {
+	} else if (!new_is_tmp) {
 		/* Rename all constraints. */
 
 		info = pars_info_create();
@@ -3822,8 +3834,7 @@ end:
 			      "InnoDB: Have you deleted the .frm file"
 			      " and not used DROP TABLE?\n"
 			      "InnoDB: You can look for further help from\n"
-			      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			      "innodb-troubleshooting.html\n"
+			      "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
 			      "InnoDB: If table ", stderr);
 			ut_print_name(stderr, trx, TRUE, new_name);
 			fputs(" is a temporary table #sql..., then"
@@ -3849,22 +3860,11 @@ end:
 		/* The following call will also rename the .ibd data file if
 		the table is stored in a single-table tablespace */
 
-		ibool	success = dict_table_rename_in_cache(table, new_name,
-							     !new_is_tmp);
-
-		if (!success) {
+		if (!dict_table_rename_in_cache(table, new_name,
+						!new_is_tmp)) {
 			trx->error_state = DB_SUCCESS;
 			trx_general_rollback_for_mysql(trx, FALSE, NULL);
 			trx->error_state = DB_SUCCESS;
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error in table rename,"
-			      " cannot rename ", stderr);
-			ut_print_name(stderr, trx, TRUE, old_name);
-			fputs(" to ", stderr);
-			ut_print_name(stderr, trx, TRUE, new_name);
-			putc('\n', stderr);
-			err = DB_ERROR;
-
 			goto funct_exit;
 		}
 
@@ -3872,7 +3872,7 @@ end:
 		an ALTER, not in a RENAME. */
 
 		err = dict_load_foreigns(
-			new_name, old_is_tmp ? trx->check_foreigns : TRUE);
+			new_name, !old_is_tmp || trx->check_foreigns);
 
 		if (err != DB_SUCCESS) {
 			ut_print_timestamp(stderr);
@@ -3909,8 +3909,10 @@ end:
 	}
 
 funct_exit:
-	trx_commit_for_mysql(trx);
-	row_mysql_unlock_data_dictionary(trx);
+
+	if (commit) {
+		trx_commit_for_mysql(trx);
+	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
@@ -3918,21 +3920,21 @@ funct_exit:
 
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks that the index contains entries in an ascending order, unique
 constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction. */
+in the read view of the current transaction.
+@return	TRUE if ok */
 static
 ibool
 row_scan_and_check_index(
 /*=====================*/
-					/* out: TRUE if ok */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL */
-	dict_index_t*	index,		/* in: index */
-	ulint*		n_rows)		/* out: number of entries seen in the
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in MySQL */
+	dict_index_t*	index,		/*!< in: index */
+	ulint*		n_rows)		/*!< out: number of entries seen in the
 					current consistent read */
 {
 	dtuple_t*	prev_entry	= NULL;
@@ -3947,12 +3949,21 @@ row_scan_and_check_index(
 	ulint		i;
 	ulint		cnt;
 	mem_heap_t*	heap		= NULL;
+	ulint		n_ext;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	ulint*		offsets;
+	rec_offs_init(offsets_);
 
 	*n_rows = 0;
 
+	if (!row_merge_is_index_usable(prebuilt->trx, index)) {
+		/* A newly created index may lack some delete-marked
+		records that may exist in the read view of
+		prebuilt->trx.  Thus, such indexes must not be
+		accessed by consistent read. */
+		return(is_ok);
+	}
+
 	buf = mem_alloc(UNIV_PAGE_SIZE);
 	heap = mem_heap_create(100);
 
@@ -3960,6 +3971,8 @@ row_scan_and_check_index(
 	in scanning the index entries */
 
 	prebuilt->index = index;
+	/* row_merge_is_index_usable() was already checked above. */
+	prebuilt->index_usable = TRUE;
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
 	prebuilt->n_template = 0;
@@ -3979,7 +3992,17 @@ loop:
 		}
 		cnt = 1000;
 	}
-	if (ret != DB_SUCCESS) {
+
+	switch (ret) {
+	case DB_SUCCESS:
+		break;
+	default:
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Warning: CHECK TABLE on ", stderr);
+		dict_index_name_print(stderr, prebuilt->trx, index);
+		fprintf(stderr, " returned %lu\n", ret);
+		/* fall through (this error is ignored by CHECK TABLE) */
+	case DB_END_OF_INDEX:
 func_exit:
 		mem_free(buf);
 		mem_heap_free(heap);
@@ -3995,12 +4018,13 @@ func_exit:
 
 	rec = buf + mach_read_from_4(buf);
 
+	offsets = rec_get_offsets(rec, index, offsets_,
+				  ULINT_UNDEFINED, &heap);
+
 	if (prev_entry != NULL) {
 		matched_fields = 0;
 		matched_bytes = 0;
 
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
 		cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
 						&matched_fields,
 						&matched_bytes);
@@ -4033,7 +4057,7 @@ not_ok:
 			rec_print_new(stderr, rec, offsets);
 			putc('\n', stderr);
 			is_ok = FALSE;
-		} else if ((index->type & DICT_UNIQUE)
+		} else if (dict_index_is_unique(index)
 			   && !contains_null
 			   && matched_fields
 			   >= dict_index_get_n_ordering_defined_by_user(
@@ -4044,24 +4068,44 @@ not_ok:
 		}
 	}
 
-	mem_heap_empty(heap);
-	offsets = offsets_;
+	{
+		mem_heap_t*	tmp_heap = NULL;
 
-	prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+		/* Empty the heap on each round.  But preserve offsets[]
+		for the row_rec_to_index_entry() call, by copying them
+		into a separate memory heap when needed. */
+		if (UNIV_UNLIKELY(offsets != offsets_)) {
+			ulint	size = rec_offs_get_n_alloc(offsets)
+				* sizeof *offsets;
+
+			tmp_heap = mem_heap_create(size);
+			offsets = mem_heap_dup(tmp_heap, offsets, size);
+		}
+
+		mem_heap_empty(heap);
+
+		prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec,
+						    index, offsets,
+						    &n_ext, heap);
+
+		if (UNIV_LIKELY_NULL(tmp_heap)) {
+			mem_heap_free(tmp_heap);
+		}
+	}
 
 	ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
 
 	goto loop;
 }
 
-/*************************************************************************
-Checks a table for corruption. */
-
+/*********************************************************************//**
+Checks a table for corruption.
+@return	DB_ERROR or DB_SUCCESS */
+UNIV_INTERN
 ulint
 row_check_table_for_mysql(
 /*======================*/
-					/* out: DB_ERROR or DB_SUCCESS */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
 	dict_table_t*	table		= prebuilt->table;
@@ -4071,7 +4115,7 @@ row_check_table_for_mysql(
 	ulint		ret		= DB_SUCCESS;
 	ulint		old_isolation_level;
 
-	if (prebuilt->table->ibd_file_missing) {
+	if (table->ibd_file_missing) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr, "  InnoDB: Error:\n"
 			"InnoDB: MySQL is trying to use a table handle"
@@ -4082,10 +4126,9 @@ row_check_table_for_mysql(
 			"InnoDB: the MySQL datadir, or have you"
 			" used DISCARD TABLESPACE?\n"
 			"InnoDB: Look from\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
 			"InnoDB: how you can resolve the problem.\n",
-			prebuilt->table->name);
+			table->name);
 		return(DB_ERROR);
 	}
 
@@ -4115,8 +4158,7 @@ row_check_table_for_mysql(
 		if (!btr_validate_index(index, prebuilt->trx)) {
 			ret = DB_ERROR;
 		} else {
-			if (!row_scan_and_check_index(prebuilt,
-						      index, &n_rows)) {
+			if (!row_scan_and_check_index(prebuilt,index, &n_rows)){
 				ret = DB_ERROR;
 			}
 
@@ -4168,14 +4210,14 @@ row_check_table_for_mysql(
 	return(ret);
 }
 
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
+/*********************************************************************//**
+Determines if a table is a magic monitor table.
+@return	TRUE if monitor table */
+UNIV_INTERN
 ibool
 row_is_magic_monitor_table(
 /*=======================*/
-					/* out: TRUE if monitor table */
-	const char*	table_name)	/* in: name of the table, in the
+	const char*	table_name)	/*!< in: name of the table, in the
 					form database/table_name */
 {
 	const char*	name; /* table_name without database/ */
diff --git a/storage/innobase/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c
similarity index 68%
rename from storage/innobase/row/row0purge.c
rename to storage/innodb_plugin/row/row0purge.c
index 1fef47da13f..500ebe571ab 100644
--- a/storage/innobase/row/row0purge.c
+++ b/storage/innodb_plugin/row/row0purge.c
@@ -1,7 +1,24 @@
-/******************************************************
-Purge obsolete records
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0purge.c
+Purge obsolete records
 
 Created 3/14/1997 Heikki Tuuri
 *******************************************************/
@@ -27,15 +44,15 @@ Created 3/14/1997 Heikki Tuuri
 #include "row0mysql.h"
 #include "log0log.h"
 
-/************************************************************************
-Creates a purge node to a query graph. */
-
+/********************************************************************//**
+Creates a purge node to a query graph.
+@return	own: purge node */
+UNIV_INTERN
 purge_node_t*
 row_purge_node_create(
 /*==================*/
-				/* out, own: purge node */
-	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
-	mem_heap_t*	heap)	/* in: memory heap where created */
+	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
+	mem_heap_t*	heap)	/*!< in: memory heap where created */
 {
 	purge_node_t*	node;
 
@@ -51,17 +68,17 @@ row_purge_node_create(
 	return(node);
 }
 
-/***************************************************************
+/***********************************************************//**
 Repositions the pcur in the purge node on the clustered index record,
-if found. */
+if found.
+@return	TRUE if the record was found */
 static
 ibool
 row_purge_reposition_pcur(
 /*======================*/
-				/* out: TRUE if the record was found */
-	ulint		mode,	/* in: latching mode */
-	purge_node_t*	node,	/* in: row purge node */
-	mtr_t*		mtr)	/* in: mtr */
+	ulint		mode,	/*!< in: latching mode */
+	purge_node_t*	node,	/*!< in: row purge node */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ibool	found;
 
@@ -82,16 +99,16 @@ row_purge_reposition_pcur(
 	return(found);
 }
 
-/***************************************************************
-Removes a delete marked clustered index record if possible. */
+/***********************************************************//**
+Removes a delete marked clustered index record if possible.
+@return TRUE if success, or if not found, or if modified after the
+delete marking */
 static
 ibool
 row_purge_remove_clust_if_poss_low(
 /*===============================*/
-				/* out: TRUE if success, or if not found, or
-				if modified after the delete marking */
-	purge_node_t*	node,	/* in: row purge node */
-	ulint		mode)	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+	purge_node_t*	node,	/*!< in: row purge node */
+	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
 	dict_index_t*	index;
 	btr_pcur_t*	pcur;
@@ -102,7 +119,7 @@ row_purge_remove_clust_if_poss_low(
 	rec_t*		rec;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	index = dict_table_get_first_index(node->table);
 
@@ -144,7 +161,8 @@ row_purge_remove_clust_if_poss_low(
 		success = btr_cur_optimistic_delete(btr_cur, &mtr);
 	} else {
 		ut_ad(mode == BTR_MODIFY_TREE);
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+					   RB_NONE, &mtr);
 
 		if (err == DB_SUCCESS) {
 			success = TRUE;
@@ -160,14 +178,14 @@ row_purge_remove_clust_if_poss_low(
 	return(success);
 }
 
-/***************************************************************
+/***********************************************************//**
 Removes a clustered index record if it has not been modified after the delete
 marking. */
 static
 void
 row_purge_remove_clust_if_poss(
 /*===========================*/
-	purge_node_t*	node)	/* in: row purge node */
+	purge_node_t*	node)	/*!< in: row purge node */
 {
 	ibool	success;
 	ulint	n_tries	= 0;
@@ -196,17 +214,17 @@ retry:
 	ut_a(success);
 }
 
-/***************************************************************
-Removes a secondary index entry if possible. */
+/***********************************************************//**
+Removes a secondary index entry if possible.
+@return	TRUE if success or if not found */
 static
 ibool
 row_purge_remove_sec_if_poss_low(
 /*=============================*/
-				/* out: TRUE if success or if not found */
-	purge_node_t*	node,	/* in: row purge node */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint		mode)	/* in: latch mode BTR_MODIFY_LEAF or
+	purge_node_t*	node,	/*!< in: row purge node */
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: index entry */
+	ulint		mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
 				BTR_MODIFY_TREE */
 {
 	btr_pcur_t	pcur;
@@ -216,7 +234,7 @@ row_purge_remove_sec_if_poss_low(
 	ibool		found;
 	ulint		err;
 	mtr_t		mtr;
-	mtr_t*		mtr_vers;
+	mtr_t		mtr_vers;
 
 	log_free_check();
 	mtr_start(&mtr);
@@ -224,10 +242,18 @@ row_purge_remove_sec_if_poss_low(
 	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
 
 	if (!found) {
-		/* Not found */
+		/* Not found.  This is a legitimate condition.  In a
+		rollback, InnoDB will remove secondary recs that would
+		be purged anyway.  Then the actual purge will not find
+		the secondary index record.  Also, the purge itself is
+		eager: if it comes to consider a secondary index
+		record, and notices it does not need to exist in the
+		index, it will remove it.  Then if/when the purge
+		comes to consider the secondary index record a second
+		time, it will not exist any more in the index. */
 
 		/* fputs("PURGE:........sec entry not found\n", stderr); */
-		/* dtuple_print(entry); */
+		/* dtuple_print(stderr, entry); */
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
@@ -241,21 +267,17 @@ row_purge_remove_sec_if_poss_low(
 	which cannot be purged yet, requires its existence. If some requires,
 	we should do nothing. */
 
-	mtr_vers = mem_alloc(sizeof(mtr_t));
+	mtr_start(&mtr_vers);
 
-	mtr_start(mtr_vers);
-
-	success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
+	success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr_vers);
 
 	if (success) {
 		old_has = row_vers_old_has_index_entry(
 			TRUE, btr_pcur_get_rec(&(node->pcur)),
-			mtr_vers, index, entry);
+			&mtr_vers, index, entry);
 	}
 
-	btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);
-
-	mem_free(mtr_vers);
+	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
 
 	if (!success || !old_has) {
 		/* Remove the index record */
@@ -265,14 +287,9 @@ row_purge_remove_sec_if_poss_low(
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
-						   FALSE, &mtr);
-			if (err == DB_SUCCESS) {
-				success = TRUE;
-			} else if (err == DB_OUT_OF_FILE_SPACE) {
-				success = FALSE;
-			} else {
-				ut_error;
-			}
+						   RB_NONE, &mtr);
+			success = err == DB_SUCCESS;
+			ut_a(success || err == DB_OUT_OF_FILE_SPACE);
 		}
 	}
 
@@ -282,15 +299,15 @@ row_purge_remove_sec_if_poss_low(
 	return(success);
 }
 
-/***************************************************************
+/***********************************************************//**
 Removes a secondary index entry if possible. */
 UNIV_INLINE
 void
 row_purge_remove_sec_if_poss(
 /*=========================*/
-	purge_node_t*	node,	/* in: row purge node */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry)	/* in: index entry */
+	purge_node_t*	node,	/*!< in: row purge node */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry */
 {
 	ibool	success;
 	ulint	n_tries		= 0;
@@ -322,13 +339,13 @@ retry:
 	ut_a(success);
 }
 
-/***************************************************************
+/***********************************************************//**
 Purges a delete marking of a record. */
 static
 void
 row_purge_del_mark(
 /*===============*/
-	purge_node_t*	node)	/* in: row purge node */
+	purge_node_t*	node)	/*!< in: row purge node */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
@@ -342,8 +359,8 @@ row_purge_del_mark(
 		index = node->index;
 
 		/* Build the index entry */
-		entry = row_build_index_entry(node->row, index, heap);
-
+		entry = row_build_index_entry(node->row, NULL, index, heap);
+		ut_a(entry);
 		row_purge_remove_sec_if_poss(node, index, entry);
 
 		node->index = dict_table_get_next_index(node->index);
@@ -354,26 +371,22 @@ row_purge_del_mark(
 	row_purge_remove_clust_if_poss(node);
 }
 
-/***************************************************************
+/***********************************************************//**
 Purges an update of an existing record. Also purges an update of a delete
 marked record if that record contained an externally stored field. */
 static
 void
 row_purge_upd_exist_or_extern(
 /*==========================*/
-	purge_node_t*	node)	/* in: row purge node */
+	purge_node_t*	node)	/*!< in: row purge node */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
 	dict_index_t*	index;
-	upd_field_t*	ufield;
 	ibool		is_insert;
 	ulint		rseg_id;
 	ulint		page_no;
 	ulint		offset;
-	ulint		internal_offset;
-	byte*		data_field;
-	ulint		data_field_len;
 	ulint		i;
 	mtr_t		mtr;
 
@@ -392,8 +405,9 @@ row_purge_upd_exist_or_extern(
 		if (row_upd_changes_ord_field_binary(NULL, node->index,
 						     node->update)) {
 			/* Build the older version of the index entry */
-			entry = row_build_index_entry(node->row, index, heap);
-
+			entry = row_build_index_entry(node->row, NULL,
+						      index, heap);
+			ut_a(entry);
 			row_purge_remove_sec_if_poss(node, index, entry);
 		}
 
@@ -406,16 +420,23 @@ skip_secondaries:
 	/* Free possible externally stored fields */
 	for (i = 0; i < upd_get_n_fields(node->update); i++) {
 
-		ufield = upd_get_nth_field(node->update, i);
+		const upd_field_t*	ufield
+			= upd_get_nth_field(node->update, i);
+
+		if (dfield_is_ext(&ufield->new_val)) {
+			buf_block_t*	block;
+			ulint		internal_offset;
+			byte*		data_field;
 
-		if (ufield->extern_storage) {
 			/* We use the fact that new_val points to
 			node->undo_rec and get thus the offset of
-			dfield data inside the unod record. Then we
+			dfield data inside the undo record. Then we
 			can calculate from node->roll_ptr the file
 			address of the new_val data */
 
-			internal_offset = ((byte*)ufield->new_val.data)
+			internal_offset
+				= ((const byte*)
+				   dfield_get_data(&ufield->new_val))
 				- node->undo_rec;
 
 			ut_a(internal_offset < UNIV_PAGE_SIZE);
@@ -446,46 +467,45 @@ skip_secondaries:
 			/* We assume in purge of externally stored fields
 			that the space id of the undo log record is 0! */
 
-			data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+			block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
+			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+			data_field = buf_block_get_frame(block)
 				+ offset + internal_offset;
 
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(buf_frame_align(data_field),
-					       SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
-			data_field_len = ufield->new_val.len;
-
-			btr_free_externally_stored_field(index, data_field,
-							 data_field_len,
-							 FALSE, &mtr);
+			ut_a(dfield_get_len(&ufield->new_val)
+			     >= BTR_EXTERN_FIELD_REF_SIZE);
+			btr_free_externally_stored_field(
+				index,
+				data_field + dfield_get_len(&ufield->new_val)
+				- BTR_EXTERN_FIELD_REF_SIZE,
+				NULL, NULL, NULL, 0, RB_NONE, &mtr);
 			mtr_commit(&mtr);
 		}
 	}
 }
 
-/***************************************************************
-Parses the row reference and other info in a modify undo log record. */
+/***********************************************************//**
+Parses the row reference and other info in a modify undo log record.
+@return TRUE if purge operation required: NOTE that then the CALLER
+must unfreeze data dictionary! */
 static
 ibool
 row_purge_parse_undo_rec(
 /*=====================*/
-				/* out: TRUE if purge operation required:
-				NOTE that then the CALLER must unfreeze
-				data dictionary! */
-	purge_node_t*	node,	/* in: row undo node */
+	purge_node_t*	node,	/*!< in: row undo node */
 	ibool*		updated_extern,
-				/* out: TRUE if an externally stored field
+				/*!< out: TRUE if an externally stored field
 				was updated */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_index_t*	clust_index;
 	byte*		ptr;
 	trx_t*		trx;
-	dulint		undo_no;
+	undo_no_t	undo_no;
 	dulint		table_id;
-	dulint		trx_id;
-	dulint		roll_ptr;
+	trx_id_t	trx_id;
+	roll_ptr_t	roll_ptr;
 	ulint		info_bits;
 	ulint		type;
 	ulint		cmpl_info;
@@ -528,9 +548,8 @@ row_purge_parse_undo_rec(
 
 	if (node->table == NULL) {
 		/* The table has been dropped: no need to do purge */
-
+err_exit:
 		row_mysql_unfreeze_data_dictionary(trx);
-
 		return(FALSE);
 	}
 
@@ -539,9 +558,7 @@ row_purge_parse_undo_rec(
 
 		node->table = NULL;
 
-		row_mysql_unfreeze_data_dictionary(trx);
-
-		return(FALSE);
+		goto err_exit;
 	}
 
 	clust_index = dict_table_get_first_index(node->table);
@@ -549,9 +566,7 @@ row_purge_parse_undo_rec(
 	if (clust_index == NULL) {
 		/* The table was corrupt in the data dictionary */
 
-		row_mysql_unfreeze_data_dictionary(trx);
-
-		return(FALSE);
+		goto err_exit;
 	}
 
 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
@@ -564,30 +579,31 @@ row_purge_parse_undo_rec(
 	/* Read to the partial row the fields that occur in indexes */
 
 	if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-		ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
-						   &(node->row), node->heap);
+		ptr = trx_undo_rec_get_partial_row(
+			ptr, clust_index, &node->row,
+			type == TRX_UNDO_UPD_DEL_REC,
+			node->heap);
 	}
 
 	return(TRUE);
 }
 
-/***************************************************************
+/***********************************************************//**
 Fetches an undo log record and does the purge for the recorded operation.
 If none left, or the current purge completed, returns the control to the
-parent node, which is always a query thread node. */
+parent node, which is always a query thread node.
+@return	DB_SUCCESS if operation successfully completed, else error code */
 static
 ulint
 row_purge(
 /*======*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code */
-	purge_node_t*	node,	/* in: row purge node */
-	que_thr_t*	thr)	/* in: query thread */
+	purge_node_t*	node,	/*!< in: row purge node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
-	dulint	roll_ptr;
-	ibool	purge_needed;
-	ibool	updated_extern;
-	trx_t*	trx;
+	roll_ptr_t	roll_ptr;
+	ibool		purge_needed;
+	ibool		updated_extern;
+	trx_t*		trx;
 
 	ut_ad(node && thr);
 
@@ -646,15 +662,15 @@ row_purge(
 	return(DB_SUCCESS);
 }
 
-/***************************************************************
+/***********************************************************//**
 Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
+function used in an SQL execution graph.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_purge_step(
 /*===========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	purge_node_t*	node;
 	ulint		err;
diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c
new file mode 100644
index 00000000000..128ac3ba3e8
--- /dev/null
+++ b/storage/innodb_plugin/row/row0row.c
@@ -0,0 +1,1168 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0row.c
+General row routines
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+
+#ifdef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#include "data0type.h"
+#include "dict0dict.h"
+#include "btr0btr.h"
+#include "ha_prototypes.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "row0ext.h"
+#include "row0upd.h"
+#include "rem0cmp.h"
+#include "read0read.h"
+#include "ut0mem.h"
+
+/*********************************************************************//**
+Gets the offset of trx id field, in bytes relative to the origin of
+a clustered index record.
+@return	offset of DATA_TRX_ID */
+UNIV_INTERN
+ulint
+row_get_trx_id_offset(
+/*==================*/
+	const rec_t*	rec __attribute__((unused)),
+				/*!< in: record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	pos;
+	ulint	offset;
+	ulint	len;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+	offset = rec_get_nth_field_offs(offsets, pos, &len);
+
+	ut_ad(len == DATA_TRX_ID_LEN);
+
+	return(offset);
+}
+
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+	const dtuple_t*	row,	/*!< in: row which should be
+				inserted or purged */
+	row_ext_t*	ext,	/*!< in: externally stored column prefixes,
+				or NULL */
+	dict_index_t*	index,	/*!< in: index on the table */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
+				the index entry is allocated */
+{
+	dtuple_t*	entry;
+	ulint		entry_len;
+	ulint		i;
+
+	ut_ad(row && index && heap);
+	ut_ad(dtuple_check_typed(row));
+
+	entry_len = dict_index_get_n_fields(index);
+	entry = dtuple_create(heap, entry_len);
+
+	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
+		dtuple_set_n_fields_cmp(entry, entry_len);
+		/* There may only be externally stored columns
+		in a clustered index B-tree of a user table. */
+		ut_a(!ext);
+	} else {
+		dtuple_set_n_fields_cmp(
+			entry, dict_index_get_n_unique_in_tree(index));
+	}
+
+	for (i = 0; i < entry_len; i++) {
+		const dict_field_t*	ind_field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= ind_field->col;
+		ulint			col_no
+			= dict_col_get_no(col);
+		dfield_t*		dfield
+			= dtuple_get_nth_field(entry, i);
+		const dfield_t*		dfield2
+			= dtuple_get_nth_field(row, col_no);
+		ulint			len
+			= dfield_get_len(dfield2);
+
+		dfield_copy(dfield, dfield2);
+
+		if (dfield_is_null(dfield) || ind_field->prefix_len == 0) {
+			continue;
+		}
+
+		/* If a column prefix index, take only the prefix.
+		Prefix-indexed columns may be externally stored. */
+		ut_ad(col->ord_part);
+
+		if (UNIV_LIKELY_NULL(ext)) {
+			/* See if the column is stored externally. */
+			const byte*	buf = row_ext_lookup(ext, col_no,
+							     &len);
+			if (UNIV_LIKELY_NULL(buf)) {
+				if (UNIV_UNLIKELY(buf == field_ref_zero)) {
+					return(NULL);
+				}
+				dfield_set_data(dfield, buf, len);
+			}
+		} else if (dfield_is_ext(dfield)) {
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			len -= BTR_EXTERN_FIELD_REF_SIZE;
+			ut_a(ind_field->prefix_len <= len
+			     || dict_index_is_clust(index));
+		}
+
+		len = dtype_get_at_most_n_mbchars(
+			col->prtype, col->mbminlen, col->mbmaxlen,
+			ind_field->prefix_len, len, dfield_get_data(dfield));
+		dfield_set_len(dfield, len);
+	}
+
+	ut_ad(dtuple_check_typed(entry));
+
+	return(entry);
+}
+
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return	own: row built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build(
+/*======*/
+	ulint			type,	/*!< in: ROW_COPY_POINTERS or
+					ROW_COPY_DATA; the latter
+					copies also the data fields to
+					heap while the first only
+					places pointers to data fields
+					on the index page, and thus is
+					more efficient */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const rec_t*		rec,	/*!< in: record in the clustered
+					index; NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the row dtuple is used! */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
+					or NULL, in which case this function
+					will invoke rec_get_offsets() */
+	const dict_table_t*	col_table,
+					/*!< in: table, to check which
+					externally stored columns
+					occur in the ordering columns
+					of an index, or NULL if
+					index->table should be
+					consulted instead */
+	row_ext_t**		ext,	/*!< out, own: cache of
+					externally stored column
+					prefixes, or NULL */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+{
+	dtuple_t*		row;
+	const dict_table_t*	table;
+	ulint			n_fields;
+	ulint			n_ext_cols;
+	ulint*			ext_cols	= NULL; /* remove warning */
+	ulint			len;
+	ulint			row_len;
+	byte*			buf;
+	ulint			i;
+	ulint			j;
+	mem_heap_t*		tmp_heap	= NULL;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs_init(offsets_);
+
+	ut_ad(index && rec && heap);
+	ut_ad(dict_index_is_clust(index));
+
+	if (!offsets) {
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  ULINT_UNDEFINED, &tmp_heap);
+	} else {
+		ut_ad(rec_offs_validate(rec, index, offsets));
+	}
+
+	if (type != ROW_COPY_POINTERS) {
+		/* Take a copy of rec to heap */
+		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
+		rec = rec_copy(buf, rec, offsets);
+		/* Avoid a debug assertion in rec_offs_validate(). */
+		rec_offs_make_valid(rec, index, (ulint*) offsets);
+	}
+
+	table = index->table;
+	row_len = dict_table_get_n_cols(table);
+
+	row = dtuple_create(heap, row_len);
+
+	dict_table_copy_types(row, table);
+
+	dtuple_set_info_bits(row, rec_get_info_bits(
+				     rec, dict_table_is_comp(table)));
+
+	n_fields = rec_offs_n_fields(offsets);
+	n_ext_cols = rec_offs_n_extern(offsets);
+	if (n_ext_cols) {
+		ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
+	}
+
+	for (i = j = 0; i < n_fields; i++) {
+		dict_field_t*		ind_field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(ind_field);
+		ulint			col_no
+			= dict_col_get_no(col);
+		dfield_t*		dfield
+			= dtuple_get_nth_field(row, col_no);
+
+		if (ind_field->prefix_len == 0) {
+
+			const byte*	field = rec_get_nth_field(
+				rec, offsets, i, &len);
+
+			dfield_set_data(dfield, field, len);
+		}
+
+		if (rec_offs_nth_extern(offsets, i)) {
+			dfield_set_ext(dfield);
+
+			if (UNIV_LIKELY_NULL(col_table)) {
+				ut_a(col_no
+				     < dict_table_get_n_cols(col_table));
+				col = dict_table_get_nth_col(
+					col_table, col_no);
+			}
+
+			if (col->ord_part) {
+				/* We will have to fetch prefixes of
+				externally stored columns that are
+				referenced by column prefixes. */
+				ext_cols[j++] = col_no;
+			}
+		}
+	}
+
+	ut_ad(dtuple_check_typed(row));
+
+	if (j) {
+		*ext = row_ext_create(j, ext_cols, row,
+				      dict_table_zip_size(index->table),
+				      heap);
+	} else {
+		*ext = NULL;
+	}
+
+	if (tmp_heap) {
+		mem_heap_free(tmp_heap);
+	}
+
+	return(row);
+}
+
+/*******************************************************************//**
+Converts an index record to a typed data tuple.
+@return index entry built; does not set info_bits, and the data fields
+in the entry will point directly to rec */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+	const rec_t*		rec,	/*!< in: record in the index */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+{
+	dtuple_t*	entry;
+	dfield_t*	dfield;
+	ulint		i;
+	const byte*	field;
+	ulint		len;
+	ulint		rec_len;
+
+	ut_ad(rec && heap && index);
+	/* Because this function may be invoked by row0merge.c
+	on a record whose header is in different format, the check
+	rec_offs_validate(rec, index, offsets) must be avoided here. */
+	ut_ad(n_ext);
+	*n_ext = 0;
+
+	rec_len = rec_offs_n_fields(offsets);
+
+	entry = dtuple_create(heap, rec_len);
+
+	dtuple_set_n_fields_cmp(entry,
+				dict_index_get_n_unique_in_tree(index));
+	ut_ad(rec_len == dict_index_get_n_fields(index));
+
+	dict_index_copy_types(entry, index, rec_len);
+
+	for (i = 0; i < rec_len; i++) {
+
+		dfield = dtuple_get_nth_field(entry, i);
+		field = rec_get_nth_field(rec, offsets, i, &len);
+
+		dfield_set_data(dfield, field, len);
+
+		if (rec_offs_nth_extern(offsets, i)) {
+			dfield_set_ext(dfield);
+			(*n_ext)++;
+		}
+	}
+
+	ut_ad(dtuple_check_typed(entry));
+
+	return(entry);
+}
+
+/*******************************************************************//**
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap.
+@return	own: index entry built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+	ulint			type,	/*!< in: ROW_COPY_DATA, or
+					ROW_COPY_POINTERS: the former
+					copies also the data fields to
+					heap as the latter only places
+					pointers to data fields on the
+					index page */
+	const rec_t*		rec,	/*!< in: record in the index;
+					NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the dtuple is used! */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint*			offsets,/*!< in/out: rec_get_offsets(rec) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+{
+	dtuple_t*	entry;
+	byte*		buf;
+
+	ut_ad(rec && heap && index);
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	if (type == ROW_COPY_DATA) {
+		/* Take a copy of rec to heap */
+		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
+		rec = rec_copy(buf, rec, offsets);
+		/* Avoid a debug assertion in rec_offs_validate(). */
+		rec_offs_make_valid(rec, index, offsets);
+	}
+
+	entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
+
+	dtuple_set_info_bits(entry,
+			     rec_get_info_bits(rec, rec_offs_comp(offsets)));
+
+	return(entry);
+}
+
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record.
+@return	own: row reference built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+	ulint		type,	/*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap, whereas the latter only places pointers
+				to data fields on the index page */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const rec_t*	rec,	/*!< in: record in the index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row reference is used! */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
+				needed is allocated */
+{
+	dict_table_t*	table;
+	dict_index_t*	clust_index;
+	dfield_t*	dfield;
+	dtuple_t*	ref;
+	const byte*	field;
+	ulint		len;
+	ulint		ref_len;
+	ulint		pos;
+	byte*		buf;
+	ulint		clust_col_prefix_len;
+	ulint		i;
+	mem_heap_t*	tmp_heap	= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(index && rec && heap);
+	ut_ad(!dict_index_is_clust(index));
+
+	offsets = rec_get_offsets(rec, index, offsets,
+				  ULINT_UNDEFINED, &tmp_heap);
+	/* Secondary indexes must not contain externally stored columns. */
+	ut_ad(!rec_offs_any_extern(offsets));
+
+	if (type == ROW_COPY_DATA) {
+		/* Take a copy of rec to heap */
+
+		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
+
+		rec = rec_copy(buf, rec, offsets);
+		/* Avoid a debug assertion in rec_offs_validate(). */
+		rec_offs_make_valid(rec, index, offsets);
+	}
+
+	table = index->table;
+
+	clust_index = dict_table_get_first_index(table);
+
+	ref_len = dict_index_get_n_unique(clust_index);
+
+	ref = dtuple_create(heap, ref_len);
+
+	dict_index_copy_types(ref, clust_index, ref_len);
+
+	for (i = 0; i < ref_len; i++) {
+		dfield = dtuple_get_nth_field(ref, i);
+
+		pos = dict_index_get_nth_field_pos(index, clust_index, i);
+
+		ut_a(pos != ULINT_UNDEFINED);
+
+		field = rec_get_nth_field(rec, offsets, pos, &len);
+
+		dfield_set_data(dfield, field, len);
+
+		/* If the primary key contains a column prefix, then the
+		secondary index may contain a longer prefix of the same
+		column, or the full column, and we must adjust the length
+		accordingly. */
+
+		clust_col_prefix_len = dict_index_get_nth_field(
+			clust_index, i)->prefix_len;
+
+		if (clust_col_prefix_len > 0) {
+			if (len != UNIV_SQL_NULL) {
+
+				const dtype_t*	dtype
+					= dfield_get_type(dfield);
+
+				dfield_set_len(dfield,
+					       dtype_get_at_most_n_mbchars(
+						       dtype->prtype,
+						       dtype->mbminlen,
+						       dtype->mbmaxlen,
+						       clust_col_prefix_len,
+						       len, (char*) field));
+			}
+		}
+	}
+
+	ut_ad(dtuple_check_typed(ref));
+	if (tmp_heap) {
+		mem_heap_free(tmp_heap);
+	}
+
+	return(ref);
+}
+
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INTERN
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+	dtuple_t*		ref,	/*!< in/out: row reference built;
+					see the NOTE below! */
+	const rec_t*		rec,	/*!< in: record in the index;
+					NOTE: the data fields in ref
+					will point directly into this
+					record, therefore, the buffer
+					page of this record must be at
+					least s-latched and the latch
+					held as long as the row
+					reference is used! */
+	const dict_index_t*	index,	/*!< in: secondary index */
+	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+					or NULL */
+	trx_t*			trx)	/*!< in: transaction */
+{
+	const dict_index_t*	clust_index;
+	dfield_t*		dfield;
+	const byte*		field;
+	ulint			len;
+	ulint			ref_len;
+	ulint			pos;
+	ulint			clust_col_prefix_len;
+	ulint			i;
+	mem_heap_t*		heap		= NULL;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs_init(offsets_);
+
+	ut_a(ref);
+	ut_a(index);
+	ut_a(rec);
+	ut_ad(!dict_index_is_clust(index));
+
+	if (UNIV_UNLIKELY(!index->table)) {
+		fputs("InnoDB: table ", stderr);
+notfound:
+		ut_print_name(stderr, trx, TRUE, index->table_name);
+		fputs(" for index ", stderr);
+		ut_print_name(stderr, trx, FALSE, index->name);
+		fputs(" not found\n", stderr);
+		ut_error;
+	}
+
+	clust_index = dict_table_get_first_index(index->table);
+
+	if (UNIV_UNLIKELY(!clust_index)) {
+		fputs("InnoDB: clust index for table ", stderr);
+		goto notfound;
+	}
+
+	if (!offsets) {
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  ULINT_UNDEFINED, &heap);
+	} else {
+		ut_ad(rec_offs_validate(rec, index, offsets));
+	}
+
+	/* Secondary indexes must not contain externally stored columns. */
+	ut_ad(!rec_offs_any_extern(offsets));
+	ref_len = dict_index_get_n_unique(clust_index);
+
+	ut_ad(ref_len == dtuple_get_n_fields(ref));
+
+	dict_index_copy_types(ref, clust_index, ref_len);
+
+	for (i = 0; i < ref_len; i++) {
+		dfield = dtuple_get_nth_field(ref, i);
+
+		pos = dict_index_get_nth_field_pos(index, clust_index, i);
+
+		ut_a(pos != ULINT_UNDEFINED);
+
+		field = rec_get_nth_field(rec, offsets, pos, &len);
+
+		dfield_set_data(dfield, field, len);
+
+		/* If the primary key contains a column prefix, then the
+		secondary index may contain a longer prefix of the same
+		column, or the full column, and we must adjust the length
+		accordingly. */
+
+		clust_col_prefix_len = dict_index_get_nth_field(
+			clust_index, i)->prefix_len;
+
+		if (clust_col_prefix_len > 0) {
+			if (len != UNIV_SQL_NULL) {
+
+				const dtype_t*	dtype
+					= dfield_get_type(dfield);
+
+				dfield_set_len(dfield,
+					       dtype_get_at_most_n_mbchars(
+						       dtype->prtype,
+						       dtype->mbminlen,
+						       dtype->mbmaxlen,
+						       clust_col_prefix_len,
+						       len, (char*) field));
+			}
+		}
+	}
+
+	ut_ad(dtuple_check_typed(ref));
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/***************************************************************//**
+Searches the clustered index record for a row, if we have the row reference.
+@return	TRUE if found */
+UNIV_INTERN
+ibool
+row_search_on_row_ref(
+/*==================*/
+	btr_pcur_t*		pcur,	/*!< out: persistent cursor, which must
+					be closed by the caller */
+	ulint			mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const dict_table_t*	table,	/*!< in: table */
+	const dtuple_t*		ref,	/*!< in: row reference */
+	mtr_t*			mtr)	/*!< in/out: mtr */
+{
+	ulint		low_match;
+	rec_t*		rec;
+	dict_index_t*	index;
+
+	ut_ad(dtuple_check_typed(ref));
+
+	index = dict_table_get_first_index(table);
+
+	ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
+
+	btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
+
+	low_match = btr_pcur_get_low_match(pcur);
+
+	rec = btr_pcur_get_rec(pcur);
+
+	if (page_rec_is_infimum(rec)) {
+
+		return(FALSE);
+	}
+
+	if (low_match != dtuple_get_n_fields(ref)) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved.
+@return	record or NULL, if no record found */
+UNIV_INTERN
+rec_t*
+row_get_clust_rec(
+/*==============*/
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: secondary index */
+	dict_index_t**	clust_index,/*!< out: clustered index */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	mem_heap_t*	heap;
+	dtuple_t*	ref;
+	dict_table_t*	table;
+	btr_pcur_t	pcur;
+	ibool		found;
+	rec_t*		clust_rec;
+
+	ut_ad(!dict_index_is_clust(index));
+
+	table = index->table;
+
+	heap = mem_heap_create(256);
+
+	ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
+
+	found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
+
+	clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
+
+	mem_heap_free(heap);
+
+	btr_pcur_close(&pcur);
+
+	*clust_index = dict_table_get_first_index(table);
+
+	return(clust_rec);
+}
+
+/***************************************************************//**
+Searches an index record.
+@return	TRUE if found */
+UNIV_INTERN
+ibool
+row_search_index_entry(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: index entry */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor, which must
+				be closed by the caller */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint	n_fields;
+	ulint	low_match;
+	rec_t*	rec;
+
+	ut_ad(dtuple_check_typed(entry));
+
+	btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+	low_match = btr_pcur_get_low_match(pcur);
+
+	rec = btr_pcur_get_rec(pcur);
+
+	n_fields = dtuple_get_n_fields(entry);
+
+	return(!page_rec_is_infimum(rec) && low_match == n_fields);
+}
+
+#include <my_sys.h>
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_INT using "prtype" and writes the result to "buf".
+If the data is in unknown format, then nothing is written to "buf",
+0 is returned and "format_in_hex" is set to TRUE, otherwise
+"format_in_hex" is left untouched.
+Not more than "buf_size" bytes are written to "buf".
+The result is always '\0'-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating '\0').
+@return	number of bytes that were written */
+static
+ulint
+row_raw_format_int(
+/*===============*/
+	const char*	data,		/*!< in: raw data */
+	ulint		data_len,	/*!< in: raw data length
+					in bytes */
+	ulint		prtype,		/*!< in: precise type */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size,	/*!< in: output buffer size
+					in bytes */
+	ibool*		format_in_hex)	/*!< out: should the data be
+					formated in hex */
+{
+	ulint	ret;
+
+	if (data_len <= sizeof(ullint)) {
+
+		ullint		value;
+		ibool		unsigned_type = prtype & DATA_UNSIGNED;
+
+		value = mach_read_int_type((const byte*) data,
+					   data_len, unsigned_type);
+
+		if (unsigned_type) {
+
+			ret = ut_snprintf(buf, buf_size, "%llu",
+					  value) + 1;
+		} else {
+
+			ret = ut_snprintf(buf, buf_size, "%lld",
+					  (long long) value) + 1;
+		}
+
+	} else {
+
+		*format_in_hex = TRUE;
+		ret = 0;
+	}
+
+	return(ut_min(ret, buf_size));
+}
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "prtype" and writes the
+result to "buf".
+If the data is in binary format, then nothing is written to "buf",
+0 is returned and "format_in_hex" is set to TRUE, otherwise
+"format_in_hex" is left untouched.
+Not more than "buf_size" bytes are written to "buf".
+The result is always '\0'-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating '\0').
+@return	number of bytes that were written */
+static
+ulint
+row_raw_format_str(
+/*===============*/
+	const char*	data,		/*!< in: raw data */
+	ulint		data_len,	/*!< in: raw data length
+					in bytes */
+	ulint		prtype,		/*!< in: precise type */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size,	/*!< in: output buffer size
+					in bytes */
+	ibool*		format_in_hex)	/*!< out: should the data be
+					formated in hex */
+{
+	ulint	charset_coll;
+
+	if (buf_size == 0) {
+
+		return(0);
+	}
+
+	/* we assume system_charset_info is UTF-8 */
+
+	charset_coll = dtype_get_charset_coll(prtype);
+
+	if (UNIV_LIKELY(dtype_is_utf8(prtype))) {
+
+		return(ut_str_sql_format(data, data_len, buf, buf_size));
+	}
+	/* else */
+
+	if (charset_coll == DATA_MYSQL_BINARY_CHARSET_COLL) {
+
+		*format_in_hex = TRUE;
+		return(0);
+	}
+	/* else */
+
+	return(innobase_raw_format(data, data_len, charset_coll,
+					  buf, buf_size));
+}
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) using
+"dict_field" and writes the result to "buf".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size is positive) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+UNIV_INTERN
+ulint
+row_raw_format(
+/*===========*/
+	const char*		data,		/*!< in: raw data */
+	ulint			data_len,	/*!< in: raw data length
+						in bytes */
+	const dict_field_t*	dict_field,	/*!< in: index field */
+	char*			buf,		/*!< out: output buffer */
+	ulint			buf_size)	/*!< in: output buffer size
+						in bytes */
+{
+	ulint	mtype;
+	ulint	prtype;
+	ulint	ret;
+	ibool	format_in_hex;
+
+	if (buf_size == 0) {
+
+		return(0);
+	}
+
+	if (data_len == UNIV_SQL_NULL) {
+
+		ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1;
+
+		return(ut_min(ret, buf_size));
+	}
+
+	mtype = dict_field->col->mtype;
+	prtype = dict_field->col->prtype;
+
+	format_in_hex = FALSE;
+
+	switch (mtype) {
+	case DATA_INT:
+
+		ret = row_raw_format_int(data, data_len, prtype,
+					 buf, buf_size, &format_in_hex);
+		break;
+	case DATA_CHAR:
+	case DATA_VARCHAR:
+	case DATA_MYSQL:
+	case DATA_VARMYSQL:
+
+		ret = row_raw_format_str(data, data_len, prtype,
+					 buf, buf_size, &format_in_hex);
+		break;
+	/* XXX support more data types */
+	default:
+
+		format_in_hex = TRUE;
+	}
+
+	if (format_in_hex) {
+
+		if (UNIV_LIKELY(buf_size > 2)) {
+
+			memcpy(buf, "0x", 2);
+			buf += 2;
+			buf_size -= 2;
+			ret = 2 + ut_raw_to_hex(data, data_len,
+						buf, buf_size);
+		} else {
+
+			buf[0] = '\0';
+			ret = 1;
+		}
+	}
+
+	return(ret);
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include "ut0dbg.h"
+
+void
+test_row_raw_format_int()
+{
+	ulint	ret;
+	char	buf[128];
+	ibool	format_in_hex;
+
+#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
+		      ret_expected, buf_expected, format_in_hex_expected)\
+	do {\
+		ibool	ok = TRUE;\
+		ulint	i;\
+		memset(buf, 'x', 10);\
+		buf[10] = '\0';\
+		format_in_hex = FALSE;\
+		fprintf(stderr, "TESTING \"\\x");\
+		for (i = 0; i < data_len; i++) {\
+			fprintf(stderr, "%02hhX", data[i]);\
+		}\
+		fprintf(stderr, "\", %lu, %lu, %lu\n",\
+                        (ulint) data_len, (ulint) prtype,\
+			(ulint) buf_size);\
+		ret = row_raw_format_int(data, data_len, prtype,\
+					 buf, buf_size, &format_in_hex);\
+		if (ret != ret_expected) {\
+			fprintf(stderr, "expected ret %lu, got %lu\n",\
+				(ulint) ret_expected, ret);\
+			ok = FALSE;\
+                }\
+                if (strcmp((char*) buf, buf_expected) != 0) {\
+                        fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
+                                buf_expected, buf);\
+                        ok = FALSE;\
+                }\
+                if (format_in_hex != format_in_hex_expected) {\
+                        fprintf(stderr, "expected format_in_hex %d, got %d\n",\
+                                (int) format_in_hex_expected,\
+				(int) format_in_hex);\
+                        ok = FALSE;\
+                }\
+                if (ok) {\
+                        fprintf(stderr, "OK: %lu, \"%s\" %d\n\n",\
+                                (ulint) ret, buf, (int) format_in_hex);\
+                } else {\
+                        return;\
+                }\
+        } while (0)
+
+#if 1
+	/* min values for signed 1-8 byte integers */
+
+	CALL_AND_TEST("\x00", 1, 0,
+		      buf, sizeof(buf), 5, "-128", 0);
+
+	CALL_AND_TEST("\x00\x00", 2, 0,
+		      buf, sizeof(buf), 7, "-32768", 0);
+
+	CALL_AND_TEST("\x00\x00\x00", 3, 0,
+		      buf, sizeof(buf), 9, "-8388608", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00", 4, 0,
+		      buf, sizeof(buf), 12, "-2147483648", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, 0,
+		      buf, sizeof(buf), 14, "-549755813888", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, 0,
+		      buf, sizeof(buf), 17, "-140737488355328", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, 0,
+		      buf, sizeof(buf), 19, "-36028797018963968", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, 0,
+		      buf, sizeof(buf), 21, "-9223372036854775808", 0);
+
+	/* min values for unsigned 1-8 byte integers */
+
+	CALL_AND_TEST("\x00", 1, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00", 2, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00", 3, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00", 4, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, DATA_UNSIGNED,
+		      buf, sizeof(buf), 2, "0", 0);
+
+	/* max values for signed 1-8 byte integers */
+
+	CALL_AND_TEST("\xFF", 1, 0,
+		      buf, sizeof(buf), 4, "127", 0);
+
+	CALL_AND_TEST("\xFF\xFF", 2, 0,
+		      buf, sizeof(buf), 6, "32767", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF", 3, 0,
+		      buf, sizeof(buf), 8, "8388607", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, 0,
+		      buf, sizeof(buf), 11, "2147483647", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, 0,
+		      buf, sizeof(buf), 13, "549755813887", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, 0,
+		      buf, sizeof(buf), 16, "140737488355327", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, 0,
+		      buf, sizeof(buf), 18, "36028797018963967", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, 0,
+		      buf, sizeof(buf), 20, "9223372036854775807", 0);
+
+	/* max values for unsigned 1-8 byte integers */
+
+	CALL_AND_TEST("\xFF", 1, DATA_UNSIGNED,
+		      buf, sizeof(buf), 4, "255", 0);
+
+	CALL_AND_TEST("\xFF\xFF", 2, DATA_UNSIGNED,
+		      buf, sizeof(buf), 6, "65535", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF", 3, DATA_UNSIGNED,
+		      buf, sizeof(buf), 9, "16777215", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, DATA_UNSIGNED,
+		      buf, sizeof(buf), 11, "4294967295", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, DATA_UNSIGNED,
+		      buf, sizeof(buf), 14, "1099511627775", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, DATA_UNSIGNED,
+		      buf, sizeof(buf), 16, "281474976710655", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, DATA_UNSIGNED,
+		      buf, sizeof(buf), 18, "72057594037927935", 0);
+
+	CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, DATA_UNSIGNED,
+		      buf, sizeof(buf), 21, "18446744073709551615", 0);
+
+	/* some random values */
+
+	CALL_AND_TEST("\x52", 1, 0,
+		      buf, sizeof(buf), 4, "-46", 0);
+
+	CALL_AND_TEST("\x0E", 1, DATA_UNSIGNED,
+		      buf, sizeof(buf), 3, "14", 0);
+
+	CALL_AND_TEST("\x62\xCE", 2, 0,
+		      buf, sizeof(buf), 6, "-7474", 0);
+
+	CALL_AND_TEST("\x29\xD6", 2, DATA_UNSIGNED,
+		      buf, sizeof(buf), 6, "10710", 0);
+
+	CALL_AND_TEST("\x7F\xFF\x90", 3, 0,
+		      buf, sizeof(buf), 5, "-112", 0);
+
+	CALL_AND_TEST("\x00\xA1\x16", 3, DATA_UNSIGNED,
+		      buf, sizeof(buf), 6, "41238", 0);
+
+	CALL_AND_TEST("\x7F\xFF\xFF\xF7", 4, 0,
+		      buf, sizeof(buf), 3, "-9", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x5C", 4, DATA_UNSIGNED,
+		      buf, sizeof(buf), 3, "92", 0);
+
+	CALL_AND_TEST("\x7F\xFF\xFF\xFF\xFF\xFF\xDC\x63", 8, 0,
+		      buf, sizeof(buf), 6, "-9117", 0);
+
+	CALL_AND_TEST("\x00\x00\x00\x00\x00\x01\x64\x62", 8, DATA_UNSIGNED,
+		      buf, sizeof(buf), 6, "91234", 0);
+#endif
+
+	/* speed test */
+
+	speedo_t	speedo;
+	ulint		i;
+
+	speedo_reset(&speedo);
+
+	for (i = 0; i < 1000000; i++) {
+		row_raw_format_int("\x23", 1,
+				   0, buf, sizeof(buf),
+				   &format_in_hex);
+		row_raw_format_int("\x23", 1,
+				   DATA_UNSIGNED, buf, sizeof(buf),
+				   &format_in_hex);
+
+		row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
+				   0, buf, sizeof(buf),
+				   &format_in_hex);
+		row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
+				   DATA_UNSIGNED, buf, sizeof(buf),
+				   &format_in_hex);
+	}
+
+	speedo_show(&speedo);
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c
similarity index 83%
rename from storage/innobase/row/row0sel.c
rename to storage/innodb_plugin/row/row0sel.c
index 29efb2861b7..3ef9726588e 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innodb_plugin/row/row0sel.c
@@ -1,7 +1,31 @@
-/*******************************************************
-Select
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***************************************************//**
+@file row/row0sel.c
+Select
 
 Created 12/19/1997 Heikki Tuuri
 *******************************************************/
@@ -51,29 +75,79 @@ to que_run_threads: this is to allow canceling runaway queries */
 #define	SEL_EXHAUSTED	1
 #define SEL_RETRY	2
 
-/************************************************************************
+/********************************************************************//**
+Returns TRUE if the user-defined column in a secondary index record
+is alphabetically the same as the corresponding BLOB column in the clustered
+index record.
+NOTE: the comparison is NOT done as a binary comparison, but character
+fields are compared with collation!
+@return	TRUE if the columns are equal */
+static
+ibool
+row_sel_sec_rec_is_for_blob(
+/*========================*/
+	ulint		mtype,		/*!< in: main type */
+	ulint		prtype,		/*!< in: precise type */
+	ulint		mbminlen,	/*!< in: minimum length of a
+					multi-byte character */
+	ulint		mbmaxlen,	/*!< in: maximum length of a
+					multi-byte character */
+	const byte*	clust_field,	/*!< in: the locally stored part of
+					the clustered index column, including
+					the BLOB pointer; the clustered
+					index record must be covered by
+					a lock or a page latch to protect it
+					against deletion (rollback or purge) */
+	ulint		clust_len,	/*!< in: length of clust_field */
+	const byte*	sec_field,	/*!< in: column in secondary index */
+	ulint		sec_len,	/*!< in: length of sec_field */
+	ulint		zip_size)	/*!< in: compressed page size, or 0 */
+{
+	ulint	len;
+	byte	buf[DICT_MAX_INDEX_COL_LEN];
+
+	len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
+						      zip_size,
+						      clust_field, clust_len);
+
+	if (UNIV_UNLIKELY(len == 0)) {
+		/* The BLOB was being deleted as the server crashed.
+		There should not be any secondary index records
+		referring to this clustered index record, because
+		btr_free_externally_stored_field() is called after all
+		secondary index entries of the row have been purged. */
+		return(FALSE);
+	}
+
+	len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen,
+					  sec_len, len, (const char*) buf);
+
+	return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
+}
+
+/********************************************************************//**
 Returns TRUE if the user-defined column values in a secondary index record
 are alphabetically the same as the corresponding columns in the clustered
 index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation! */
+fields are compared with collation!
+@return TRUE if the secondary record is equal to the corresponding
+fields in the clustered record, when compared with collation */
 static
 ibool
 row_sel_sec_rec_is_for_clust_rec(
 /*=============================*/
-					/* out: TRUE if the secondary
-					record is equal to the corresponding
-					fields in the clustered record,
-					when compared with collation */
-	rec_t*		sec_rec,	/* in: secondary index record */
-	dict_index_t*	sec_index,	/* in: secondary index */
-	rec_t*		clust_rec,	/* in: clustered index record */
-	dict_index_t*	clust_index)	/* in: clustered index */
+	const rec_t*	sec_rec,	/*!< in: secondary index record */
+	dict_index_t*	sec_index,	/*!< in: secondary index */
+	const rec_t*	clust_rec,	/*!< in: clustered index record;
+					must be protected by a lock or
+					a page latch against deletion
+					in rollback or purge */
+	dict_index_t*	clust_index)	/*!< in: clustered index */
 {
-	byte*		sec_field;
+	const byte*	sec_field;
 	ulint		sec_len;
-	byte*		clust_field;
-	ulint		clust_len;
+	const byte*	clust_field;
 	ulint		n;
 	ulint		i;
 	mem_heap_t*	heap		= NULL;
@@ -83,8 +157,18 @@ row_sel_sec_rec_is_for_clust_rec(
 	ulint*		sec_offs	= sec_offsets_;
 	ibool		is_equal	= TRUE;
 
-	*clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_;
-	*sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_;
+	rec_offs_init(clust_offsets_);
+	rec_offs_init(sec_offsets_);
+
+	if (rec_get_deleted_flag(clust_rec,
+				 dict_table_is_comp(clust_index->table))) {
+
+		/* The clustered index record is delete-marked;
+		it is not visible in the read view.  Besides,
+		if there are any externally stored columns,
+		some of them may have already been purged. */
+		return(FALSE);
+	}
 
 	clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
 				     ULINT_UNDEFINED, &heap);
@@ -96,26 +180,50 @@ row_sel_sec_rec_is_for_clust_rec(
 	for (i = 0; i < n; i++) {
 		const dict_field_t*	ifield;
 		const dict_col_t*	col;
+		ulint			clust_pos;
+		ulint			clust_len;
+		ulint			len;
 
 		ifield = dict_index_get_nth_field(sec_index, i);
 		col = dict_field_get_col(ifield);
+		clust_pos = dict_col_get_clust_pos(col, clust_index);
 
 		clust_field = rec_get_nth_field(
-			clust_rec, clust_offs,
-			dict_col_get_clust_pos(col, clust_index), &clust_len);
+			clust_rec, clust_offs, clust_pos, &clust_len);
 		sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
 
-		if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) {
+		len = clust_len;
 
-			clust_len = dtype_get_at_most_n_mbchars(
+		if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
+
+			if (rec_offs_nth_extern(clust_offs, clust_pos)) {
+				len -= BTR_EXTERN_FIELD_REF_SIZE;
+			}
+
+			len = dtype_get_at_most_n_mbchars(
 				col->prtype, col->mbminlen, col->mbmaxlen,
-				ifield->prefix_len,
-				clust_len, (char*) clust_field);
+				ifield->prefix_len, len, (char*) clust_field);
+
+			if (rec_offs_nth_extern(clust_offs, clust_pos)
+			    && len < sec_len) {
+				if (!row_sel_sec_rec_is_for_blob(
+					    col->mtype, col->prtype,
+					    col->mbminlen, col->mbmaxlen,
+					    clust_field, clust_len,
+					    sec_field, sec_len,
+					    dict_table_zip_size(
+						    clust_index->table))) {
+					goto inequal;
+				}
+
+				continue;
+			}
 		}
 
 		if (0 != cmp_data_data(col->mtype, col->prtype,
-				       clust_field, clust_len,
+				       clust_field, len,
 				       sec_field, sec_len)) {
+inequal:
 			is_equal = FALSE;
 			goto func_exit;
 		}
@@ -128,14 +236,14 @@ func_exit:
 	return(is_equal);
 }
 
-/*************************************************************************
-Creates a select node struct. */
-
+/*********************************************************************//**
+Creates a select node struct.
+@return	own: select node struct */
+UNIV_INTERN
 sel_node_t*
 sel_node_create(
 /*============*/
-				/* out, own: select node struct */
-	mem_heap_t*	heap)	/* in: memory heap where created */
+	mem_heap_t*	heap)	/*!< in: memory heap where created */
 {
 	sel_node_t*	node;
 
@@ -143,22 +251,19 @@ sel_node_create(
 	node->common.type = QUE_NODE_SELECT;
 	node->state = SEL_NODE_OPEN;
 
-	node->select_will_do_update = FALSE;
-	node->latch_mode = BTR_SEARCH_LEAF;
-
 	node->plans = NULL;
 
 	return(node);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Frees the memory private to a select node when a query graph is freed,
 does not free the heap where the node was originally created. */
-
+UNIV_INTERN
 void
 sel_node_free_private(
 /*==================*/
-	sel_node_t*	node)	/* in: select node struct */
+	sel_node_t*	node)	/*!< in: select node struct */
 {
 	ulint	i;
 	plan_t*	plan;
@@ -177,14 +282,14 @@ sel_node_free_private(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Evaluates the values in a select list. If there are aggregate functions,
 their argument value is added to the aggregate total. */
 UNIV_INLINE
 void
 sel_eval_select_list(
 /*=================*/
-	sel_node_t*	node)	/* in: select node */
+	sel_node_t*	node)	/*!< in: select node */
 {
 	que_node_t*	exp;
 
@@ -197,15 +302,15 @@ sel_eval_select_list(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Assigns the values in the select list to the possible into-variables in
 SELECT ... INTO ... */
 UNIV_INLINE
 void
 sel_assign_into_var_values(
 /*=======================*/
-	sym_node_t*	var,	/* in: first variable in a list of variables */
-	sel_node_t*	node)	/* in: select node */
+	sym_node_t*	var,	/*!< in: first variable in a list of variables */
+	sel_node_t*	node)	/*!< in: select node */
 {
 	que_node_t*	exp;
 
@@ -226,14 +331,14 @@ sel_assign_into_var_values(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resets the aggregate value totals in the select list of an aggregate type
 query. */
 UNIV_INLINE
 void
 sel_reset_aggregate_vals(
 /*=====================*/
-	sel_node_t*	node)	/* in: select node */
+	sel_node_t*	node)	/*!< in: select node */
 {
 	func_node_t*	func_node;
 
@@ -250,13 +355,13 @@ sel_reset_aggregate_vals(
 	node->aggregate_already_fetched = FALSE;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Copies the input variable values when an explicit cursor is opened. */
 UNIV_INLINE
 void
 row_sel_copy_input_variable_vals(
 /*=============================*/
-	sel_node_t*	node)	/* in: select node */
+	sel_node_t*	node)	/*!< in: select node */
 {
 	sym_node_t*	var;
 
@@ -271,28 +376,28 @@ row_sel_copy_input_variable_vals(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Fetches the column values from a record. */
 static
 void
 row_sel_fetch_columns(
 /*==================*/
-	dict_index_t*	index,	/* in: record index */
-	rec_t*		rec,	/* in: record in a clustered or non-clustered
-				index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	sym_node_t*	column)	/* in: first column in a column list, or
+	dict_index_t*	index,	/*!< in: record index */
+	const rec_t*	rec,	/*!< in: record in a clustered or non-clustered
+				index; must be protected by a page latch */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	sym_node_t*	column)	/*!< in: first column in a column list, or
 				NULL */
 {
 	dfield_t*	val;
 	ulint		index_type;
 	ulint		field_no;
-	byte*		data;
+	const byte*	data;
 	ulint		len;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		index_type = SYM_CLUST_FIELD_NO;
 	} else {
 		index_type = SYM_SEC_FIELD_NO;
@@ -315,7 +420,9 @@ row_sel_fetch_columns(
 				heap = mem_heap_create(1);
 
 				data = btr_rec_copy_externally_stored_field(
-					rec, offsets, field_no, &len, heap);
+					rec, offsets,
+					dict_table_zip_size(index->table),
+					field_no, &len, heap);
 
 				ut_a(len != UNIV_SQL_NULL);
 
@@ -324,6 +431,10 @@ row_sel_fetch_columns(
 				data = rec_get_nth_field(rec, offsets,
 							 field_no, &len);
 
+				if (len == UNIV_SQL_NULL) {
+					len = UNIV_SQL_NULL;
+				}
+
 				needs_copy = column->copy_val;
 			}
 
@@ -344,13 +455,13 @@ row_sel_fetch_columns(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Allocates a prefetch buffer for a column when prefetch is first time done. */
 static
 void
 sel_col_prefetch_buf_alloc(
 /*=======================*/
-	sym_node_t*	column)	/* in: symbol table node for a column */
+	sym_node_t*	column)	/*!< in: symbol table node for a column */
 {
 	sel_buf_t*	sel_buf;
 	ulint		i;
@@ -368,14 +479,14 @@ sel_col_prefetch_buf_alloc(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Frees a prefetch buffer for a column, including the dynamically allocated
 memory for data stored there. */
-
+UNIV_INTERN
 void
 sel_col_prefetch_buf_free(
 /*======================*/
-	sel_buf_t*	prefetch_buf)	/* in, own: prefetch buffer */
+	sel_buf_t*	prefetch_buf)	/*!< in, own: prefetch buffer */
 {
 	sel_buf_t*	sel_buf;
 	ulint		i;
@@ -390,14 +501,14 @@ sel_col_prefetch_buf_free(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Pops the column values for a prefetched, cached row from the column prefetch
 buffers and places them to the val fields in the column nodes. */
 static
 void
 sel_pop_prefetched_row(
 /*===================*/
-	plan_t*	plan)	/* in: plan node for a table */
+	plan_t*	plan)	/*!< in: plan node for a table */
 {
 	sym_node_t*	column;
 	sel_buf_t*	sel_buf;
@@ -419,13 +530,13 @@ sel_pop_prefetched_row(
 
 			ut_ad(!column->prefetch_buf);
 			ut_ad(que_node_get_val_buf_size(column) == 0);
-#ifdef UNIV_DEBUG
-			dfield_set_data(val, NULL, 0);
-#endif
+			ut_d(dfield_set_null(val));
+
 			goto next_col;
 		}
 
 		ut_ad(column->prefetch_buf);
+		ut_ad(!dfield_is_ext(val));
 
 		sel_buf = column->prefetch_buf + plan->first_prefetched;
 
@@ -452,14 +563,14 @@ next_col:
 	plan->first_prefetched++;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Pushes the column values for a prefetched, cached row to the column prefetch
 buffers from the val fields in the column nodes. */
 UNIV_INLINE
 void
 sel_push_prefetched_row(
 /*====================*/
-	plan_t*	plan)	/* in: plan node for a table */
+	plan_t*	plan)	/*!< in: plan node for a table */
 {
 	sym_node_t*	column;
 	sel_buf_t*	sel_buf;
@@ -524,26 +635,26 @@ next_col:
 	}
 }
 
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
+/*********************************************************************//**
+Builds a previous version of a clustered index record for a consistent read
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel_build_prev_vers(
 /*====================*/
-					/* out: DB_SUCCESS or error code */
-	read_view_t*	read_view,	/* in: read view */
-	dict_index_t*	index,		/* in: plan node for table */
-	rec_t*		rec,		/* in: record in a clustered index */
-	ulint**		offsets,	/* in/out: offsets returned by
+	read_view_t*	read_view,	/*!< in: read view */
+	dict_index_t*	index,		/*!< in: plan node for table */
+	rec_t*		rec,		/*!< in: record in a clustered index */
+	ulint**		offsets,	/*!< in/out: offsets returned by
 					rec_get_offsets(rec, plan->index) */
-	mem_heap_t**	offset_heap,	/* in/out: memory heap from which
+	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
 					the offsets are allocated */
-	mem_heap_t**    old_vers_heap,  /* out: old version heap to use */
-	rec_t**		old_vers,	/* out: old version, or NULL if the
+	mem_heap_t**    old_vers_heap,  /*!< out: old version heap to use */
+	rec_t**		old_vers,	/*!< out: old version, or NULL if the
 					record does not exist in the view:
 					i.e., it was freshly inserted
 					afterwards */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	err;
 
@@ -559,26 +670,26 @@ row_sel_build_prev_vers(
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Builds the last committed version of a clustered index record for a
-semi-consistent read. */
+semi-consistent read.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel_build_committed_vers_for_mysql(
 /*===================================*/
-					/* out: DB_SUCCESS or error code */
-	dict_index_t*	clust_index,	/* in: clustered index */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
-	rec_t*		rec,		/* in: record in a clustered index */
-	ulint**		offsets,	/* in/out: offsets returned by
+	dict_index_t*	clust_index,	/*!< in: clustered index */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
+	const rec_t*	rec,		/*!< in: record in a clustered index */
+	ulint**		offsets,	/*!< in/out: offsets returned by
 					rec_get_offsets(rec, clust_index) */
-	mem_heap_t**	offset_heap,	/* in/out: memory heap from which
+	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
 					the offsets are allocated */
-	rec_t**		old_vers,	/* out: old version, or NULL if the
+	const rec_t**	old_vers,	/*!< out: old version, or NULL if the
 					record does not exist in the view:
 					i.e., it was freshly inserted
 					afterwards */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	err;
 
@@ -594,15 +705,15 @@ row_sel_build_committed_vers_for_mysql(
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Tests the conditions which determine when the index segment we are searching
-through has been exhausted. */
+through has been exhausted.
+@return	TRUE if row passed the tests */
 UNIV_INLINE
 ibool
 row_sel_test_end_conds(
 /*===================*/
-			/* out: TRUE if row passed the tests */
-	plan_t*	plan)	/* in: plan for the table; the column values must
+	plan_t*	plan)	/*!< in: plan for the table; the column values must
 			already have been retrieved and the right sides of
 			comparisons evaluated */
 {
@@ -632,14 +743,14 @@ row_sel_test_end_conds(
 	return(TRUE);
 }
 
-/*************************************************************************
-Tests the other conditions. */
+/*********************************************************************//**
+Tests the other conditions.
+@return	TRUE if row passed the tests */
 UNIV_INLINE
 ibool
 row_sel_test_other_conds(
 /*=====================*/
-			/* out: TRUE if row passed the tests */
-	plan_t*	plan)	/* in: plan for the table; the column values must
+	plan_t*	plan)	/*!< in: plan for the table; the column values must
 			already have been retrieved */
 {
 	func_node_t*	cond;
@@ -660,23 +771,23 @@ row_sel_test_other_conds(
 	return(TRUE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. */
+non-clustered index. Does the necessary locking.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel_get_clust_rec(
 /*==================*/
-				/* out: DB_SUCCESS or error code */
-	sel_node_t*	node,	/* in: select_node */
-	plan_t*		plan,	/* in: plan node for table */
-	rec_t*		rec,	/* in: record in a non-clustered index */
-	que_thr_t*	thr,	/* in: query thread */
-	rec_t**		out_rec,/* out: clustered record or an old version of
+	sel_node_t*	node,	/*!< in: select_node */
+	plan_t*		plan,	/*!< in: plan node for table */
+	rec_t*		rec,	/*!< in: record in a non-clustered index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	rec_t**		out_rec,/*!< out: clustered record or an old version of
 				it, NULL if the old version did not exist
 				in the read view, i.e., it was a fresh
 				inserted version */
-	mtr_t*		mtr)	/* in: mtr used to get access to the
+	mtr_t*		mtr)	/*!< in: mtr used to get access to the
 				non-clustered record; the same mtr is used to
 				access the clustered index */
 {
@@ -687,7 +798,7 @@ row_sel_get_clust_rec(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	*out_rec = NULL;
 
@@ -700,7 +811,7 @@ row_sel_get_clust_rec(
 	index = dict_table_get_first_index(plan->table);
 
 	btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
-				   node->latch_mode, &(plan->clust_pcur),
+				   BTR_SEARCH_LEAF, &plan->clust_pcur,
 				   0, mtr);
 
 	clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
@@ -751,7 +862,8 @@ row_sel_get_clust_rec(
 		}
 
 		err = lock_clust_rec_read_check_and_lock(
-			0, clust_rec, index, offsets,
+			0, btr_pcur_get_block(&plan->clust_pcur),
+			clust_rec, index, offsets,
 			node->row_lock_mode, lock_type, thr);
 
 		if (err != DB_SUCCESS) {
@@ -806,7 +918,10 @@ row_sel_get_clust_rec(
 		}
 	}
 
-	/* Fetch the columns needed in test conditions */
+	/* Fetch the columns needed in test conditions.  The clustered
+	index record is protected by a page latch that was acquired
+	when plan->clust_pcur was positioned.  The latch will not be
+	released until mtr_commit(mtr). */
 
 	row_sel_fetch_columns(index, clust_rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
@@ -820,20 +935,21 @@ err_exit:
 	return(err);
 }
 
-/*************************************************************************
-Sets a lock on a record. */
+/*********************************************************************//**
+Sets a lock on a record.
+@return	DB_SUCCESS or error code */
 UNIV_INLINE
 ulint
 sel_set_rec_lock(
 /*=============*/
-				/* out: DB_SUCCESS or error code */
-	rec_t*		rec,	/* in: record */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	ulint		mode,	/* in: lock mode */
-	ulint		type,	/* in: LOCK_ORDINARY, LOCK_GAP, or
-				LOC_REC_NOT_GAP */
-	que_thr_t*	thr)	/* in: query thread */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint			mode,	/*!< in: lock mode */
+	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOC_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*	trx;
 	ulint	err;
@@ -847,30 +963,29 @@ sel_set_rec_lock(
 		}
 	}
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		err = lock_clust_rec_read_check_and_lock(
-			0, rec, index, offsets, mode, type, thr);
+			0, block, rec, index, offsets, mode, type, thr);
 	} else {
 		err = lock_sec_rec_read_check_and_lock(
-			0, rec, index, offsets, mode, type, thr);
+			0, block, rec, index, offsets, mode, type, thr);
 	}
 
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Opens a pcur to a table index. */
 static
 void
 row_sel_open_pcur(
 /*==============*/
-	sel_node_t*	node,		/* in: select node */
-	plan_t*		plan,		/* in: table plan */
+	plan_t*		plan,		/*!< in: table plan */
 	ibool		search_latch_locked,
-					/* in: TRUE if the thread currently
+					/*!< in: TRUE if the thread currently
 					has the search latch locked in
 					s-mode */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dict_index_t*	index;
 	func_node_t*	cond;
@@ -917,13 +1032,13 @@ row_sel_open_pcur(
 		/* Open pcur to the index */
 
 		btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
-					   node->latch_mode, &(plan->pcur),
+					   BTR_SEARCH_LEAF, &plan->pcur,
 					   has_search_latch, mtr);
 	} else {
 		/* Open the cursor to the start or the end of the index
 		(FALSE: no init) */
 
-		btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
+		btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
 					    &(plan->pcur), FALSE, mtr);
 	}
 
@@ -934,20 +1049,18 @@ row_sel_open_pcur(
 	plan->pcur_is_open = TRUE;
 }
 
-/*************************************************************************
-Restores a stored pcur position to a table index. */
+/*********************************************************************//**
+Restores a stored pcur position to a table index.
+@return TRUE if the cursor should be moved to the next record after we
+return from this function (moved to the previous, in the case of a
+descending cursor) without processing again the current cursor
+record */
 static
 ibool
 row_sel_restore_pcur_pos(
 /*=====================*/
-				/* out: TRUE if the cursor should be moved to
-				the next record after we return from this
-				function (moved to the previous, in the case
-				of a descending cursor) without processing
-				again the current cursor record */
-	sel_node_t*	node,	/* in: select node */
-	plan_t*		plan,	/* in: table plan */
-	mtr_t*		mtr)	/* in: mtr */
+	plan_t*		plan,	/*!< in: table plan */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ibool	equal_position;
 	ulint	relative_position;
@@ -956,7 +1069,7 @@ row_sel_restore_pcur_pos(
 
 	relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
 
-	equal_position = btr_pcur_restore_position(node->latch_mode,
+	equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
 						   &(plan->pcur), mtr);
 
 	/* If the cursor is traveling upwards, and relative_position is
@@ -1031,13 +1144,13 @@ row_sel_restore_pcur_pos(
 	return(TRUE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Resets a plan cursor to a closed state. */
 UNIV_INLINE
 void
 plan_reset_cursor(
 /*==============*/
-	plan_t*	plan)	/* in: plan */
+	plan_t*	plan)	/*!< in: plan */
 {
 	plan->pcur_is_open = FALSE;
 	plan->cursor_at_end = FALSE;
@@ -1045,18 +1158,18 @@ plan_reset_cursor(
 	plan->n_rows_prefetched = 0;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). */
+using the hash index if possible (not always).
+@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
 static
 ulint
 row_sel_try_search_shortcut(
 /*========================*/
-				/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-	sel_node_t*	node,	/* in: select node for a consistent read */
-	plan_t*		plan,	/* in: plan for a unique search in clustered
+	sel_node_t*	node,	/*!< in: select node for a consistent read */
+	plan_t*		plan,	/*!< in: plan for a unique search in clustered
 				index */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dict_index_t*	index;
 	rec_t*		rec;
@@ -1064,7 +1177,7 @@ row_sel_try_search_shortcut(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	ulint		ret;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	index = plan->index;
 
@@ -1075,7 +1188,7 @@ row_sel_try_search_shortcut(
 	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
 
-	row_sel_open_pcur(node, plan, TRUE, mtr);
+	row_sel_open_pcur(plan, TRUE, mtr);
 
 	rec = btr_pcur_get_rec(&(plan->pcur));
 
@@ -1100,22 +1213,19 @@ row_sel_try_search_shortcut(
 
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
-	if (index->type & DICT_CLUSTERED) {
+	if (dict_index_is_clust(index)) {
 		if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
 						   node->read_view)) {
 			ret = SEL_RETRY;
 			goto func_exit;
 		}
-	} else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
+	} else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
 
 		ret = SEL_RETRY;
 		goto func_exit;
 	}
 
-	/* Test deleted flag. Fetch the columns needed in test conditions. */
-
-	row_sel_fetch_columns(index, rec, offsets,
-			      UT_LIST_GET_FIRST(plan->columns));
+	/* Test the deleted flag. */
 
 	if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
 
@@ -1123,6 +1233,14 @@ row_sel_try_search_shortcut(
 		goto func_exit;
 	}
 
+	/* Fetch the columns needed in test conditions.  The index
+	record is protected by a page latch that was acquired when
+	plan->pcur was positioned.  The latch will not be released
+	until mtr_commit(mtr). */
+
+	row_sel_fetch_columns(index, rec, offsets,
+			      UT_LIST_GET_FIRST(plan->columns));
+
 	/* Test the rest of search conditions */
 
 	if (!row_sel_test_other_conds(plan)) {
@@ -1131,7 +1249,7 @@ row_sel_try_search_shortcut(
 		goto func_exit;
 	}
 
-	ut_ad(plan->pcur.latch_mode == node->latch_mode);
+	ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
 
 	plan->n_rows_fetched++;
 	ret = SEL_FOUND;
@@ -1142,15 +1260,15 @@ func_exit:
 	return(ret);
 }
 
-/*************************************************************************
-Performs a select step. */
+/*********************************************************************//**
+Performs a select step.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel(
 /*====*/
-				/* out: DB_SUCCESS or error code */
-	sel_node_t*	node,	/* in: select node */
-	que_thr_t*	thr)	/* in: query thread */
+	sel_node_t*	node,	/*!< in: select node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_index_t*	index;
 	plan_t*		plan;
@@ -1171,13 +1289,6 @@ row_sel(
 	ulint		cost_counter			= 0;
 	ibool		cursor_just_opened;
 	ibool		must_go_to_next;
-	ibool		leaf_contains_updates		= FALSE;
-	/* TRUE if select_will_do_update is
-	TRUE and the current clustered index
-	leaf page has been updated during
-	the current mtr: mtr must be committed
-	at the same time as the leaf x-latch
-	is released */
 	ibool		mtr_has_extra_clust_latch	= FALSE;
 	/* TRUE if the search was made using
 	a non-clustered index, and we had to
@@ -1190,7 +1301,7 @@ row_sel(
 	mem_heap_t*	heap				= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets				= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(thr->run_node == node);
 
@@ -1216,7 +1327,6 @@ table_loop:
 	node->fetch_table changes, and after adding a row to aggregate totals
 	and, of course, when this function is called. */
 
-	ut_ad(leaf_contains_updates == FALSE);
 	ut_ad(mtr_has_extra_clust_latch == FALSE);
 
 	plan = sel_node_get_nth_plan(node, node->fetch_table);
@@ -1291,7 +1401,7 @@ table_loop:
 		/* Evaluate the expressions to build the search tuple and
 		open the cursor */
 
-		row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
+		row_sel_open_pcur(plan, search_latch_locked, &mtr);
 
 		cursor_just_opened = TRUE;
 
@@ -1300,7 +1410,7 @@ table_loop:
 	} else {
 		/* Restore pcur position to the index */
 
-		must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
+		must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
 
 		cursor_just_opened = FALSE;
 
@@ -1370,7 +1480,8 @@ rec_loop:
 				lock_type = LOCK_ORDINARY;
 			}
 
-			err = sel_set_rec_lock(next_rec, index, offsets,
+			err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+					       next_rec, index, offsets,
 					       node->row_lock_mode,
 					       lock_type, thr);
 
@@ -1426,7 +1537,8 @@ skip_lock:
 			lock_type = LOCK_ORDINARY;
 		}
 
-		err = sel_set_rec_lock(rec, index, offsets,
+		err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+				       rec, index, offsets,
 				       node->row_lock_mode, lock_type, thr);
 
 		if (err != DB_SUCCESS) {
@@ -1489,7 +1601,7 @@ skip_lock:
 		/* This is a non-locking consistent read: if necessary, fetch
 		a previous version of the record */
 
-		if (index->type & DICT_CLUSTERED) {
+		if (dict_index_is_clust(index)) {
 
 			if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
 							   node->read_view)) {
@@ -1508,6 +1620,16 @@ skip_lock:
 					offsets = rec_get_offsets(
 						rec, index, offsets,
 						ULINT_UNDEFINED, &heap);
+
+					/* Fetch the columns needed in
+					test conditions. The clustered
+					index record is protected by a
+					page latch that was acquired
+					by row_sel_open_pcur() or
+					row_sel_restore_pcur_pos().
+					The latch will not be released
+					until mtr_commit(mtr). */
+
 					row_sel_fetch_columns(
 						index, rec, offsets,
 						UT_LIST_GET_FIRST(
@@ -1523,7 +1645,7 @@ skip_lock:
 
 				rec = old_vers;
 			}
-		} else if (!lock_sec_rec_cons_read_sees(rec, index,
+		} else if (!lock_sec_rec_cons_read_sees(rec,
 							node->read_view)) {
 			cons_read_requires_clust_rec = TRUE;
 		}
@@ -1531,7 +1653,10 @@ skip_lock:
 
 	/* PHASE 4: Test search end conditions and deleted flag */
 
-	/* Fetch the columns needed in test conditions */
+	/* Fetch the columns needed in test conditions.  The record is
+	protected by a page latch that was acquired by
+	row_sel_open_pcur() or row_sel_restore_pcur_pos().  The latch
+	will not be released until mtr_commit(mtr). */
 
 	row_sel_fetch_columns(index, rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
@@ -1624,29 +1749,7 @@ skip_lock:
 
 	plan->n_rows_fetched++;
 
-	ut_ad(plan->pcur.latch_mode == node->latch_mode);
-
-	if (node->select_will_do_update) {
-		/* This is a searched update and we can do the update in-place,
-		saving CPU time */
-
-		row_upd_in_place_in_select(node, thr, &mtr);
-
-		leaf_contains_updates = TRUE;
-
-		/* When the database is in the online backup mode, the number
-		of log records for a single mtr should be small: increment the
-		cost counter to ensure it */
-
-		cost_counter += 1 + (SEL_COST_LIMIT / 8);
-
-		if (plan->unique_search) {
-
-			goto table_exhausted;
-		}
-
-		goto next_rec;
-	}
+	ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
 
 	if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
 	    || plan->unique_search || plan->no_prefetch
@@ -1681,19 +1784,6 @@ next_rec:
 		goto commit_mtr_for_a_while;
 	}
 
-	if (leaf_contains_updates
-	    && btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
-
-		/* We must commit &mtr if we are moving to a different page,
-		because we have done updates to the x-latched leaf page, and
-		the latch would be released in btr_pcur_move_to_next, without
-		&mtr getting committed there */
-
-		ut_ad(node->asc);
-
-		goto commit_mtr_for_a_while;
-	}
-
 	if (node->asc) {
 		moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
 	} else {
@@ -1715,7 +1805,7 @@ next_table:
 	/* We found a record which satisfies the conditions: we can move to
 	the next table or return a row in the result set */
 
-	ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
+	ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
 
 	if (plan->unique_search && !node->can_get_updated) {
 
@@ -1730,7 +1820,6 @@ next_table:
 
 	mtr_commit(&mtr);
 
-	leaf_contains_updates = FALSE;
 	mtr_has_extra_clust_latch = FALSE;
 
 next_table_no_mtr:
@@ -1750,10 +1839,6 @@ next_table_no_mtr:
 
 		thr->run_node = que_node_get_parent(node);
 
-		if (search_latch_locked) {
-			rw_lock_s_unlock(&btr_search_latch);
-		}
-
 		err = DB_SUCCESS;
 		goto func_exit;
 	}
@@ -1775,7 +1860,6 @@ table_exhausted:
 
 	mtr_commit(&mtr);
 
-	leaf_contains_updates = FALSE;
 	mtr_has_extra_clust_latch = FALSE;
 
 	if (plan->n_rows_prefetched > 0) {
@@ -1797,20 +1881,10 @@ table_exhausted_no_mtr:
 			sel_assign_into_var_values(node->into_list, node);
 
 			thr->run_node = que_node_get_parent(node);
+		} else {
+			node->state = SEL_NODE_NO_MORE_ROWS;
 
-			if (search_latch_locked) {
-				rw_lock_s_unlock(&btr_search_latch);
-			}
-
-			goto func_exit;
-		}
-
-		node->state = SEL_NODE_NO_MORE_ROWS;
-
-		thr->run_node = que_node_get_parent(node);
-
-		if (search_latch_locked) {
-			rw_lock_s_unlock(&btr_search_latch);
+			thr->run_node = que_node_get_parent(node);
 		}
 
 		goto func_exit;
@@ -1854,7 +1928,6 @@ commit_mtr_for_a_while:
 
 	mtr_commit(&mtr);
 
-	leaf_contains_updates = FALSE;
 	mtr_has_extra_clust_latch = FALSE;
 
 #ifdef UNIV_SYNC_DEBUG
@@ -1866,8 +1939,7 @@ commit_mtr_for_a_while:
 lock_wait_or_error:
 	/* See the note at stop_for_a_while: the same holds for this case */
 
-	ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
-	      || !node->asc);
+	ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
 	ut_ad(!search_latch_locked);
 
 	plan->stored_cursor_rec_processed = FALSE;
@@ -1880,21 +1952,24 @@ lock_wait_or_error:
 #endif /* UNIV_SYNC_DEBUG */
 
 func_exit:
+	if (search_latch_locked) {
+		rw_lock_s_unlock(&btr_search_latch);
+	}
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 	return(err);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_sel_step(
 /*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint		i_lock_mode;
 	sym_node_t*	table_node;
@@ -1988,14 +2063,14 @@ row_sel_step(
 	return(thr);
 }
 
-/**************************************************************************
-Performs a fetch for a cursor. */
-
+/**********************************************************************//**
+Performs a fetch for a cursor.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 fetch_step(
 /*=======*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	sel_node_t*	sel_node;
 	fetch_node_t*	node;
@@ -2051,15 +2126,15 @@ fetch_step(
 	return(thr);
 }
 
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
+/****************************************************************//**
+Sample callback function for fetch that prints each row.
+@return	always returns non-NULL */
+UNIV_INTERN
 void*
 row_fetch_print(
 /*============*/
-				/* out: always returns non-NULL */
-	void*	row,		/* in:  sel_node_t* */
-	void*	user_arg)	/* in:  not used */
+	void*	row,		/*!< in:  sel_node_t* */
+	void*	user_arg)	/*!< in:  not used */
 {
 	sel_node_t*	node = row;
 	que_node_t*	exp;
@@ -2073,22 +2148,21 @@ row_fetch_print(
 
 	while (exp) {
 		dfield_t*	dfield = que_node_get_val(exp);
-		dtype_t*	type = dfield_get_type(dfield);
+		const dtype_t*	type = dfield_get_type(dfield);
 
 		fprintf(stderr, " column %lu:\n", (ulong)i);
 
 		dtype_print(type);
-		fprintf(stderr, "\n");
+		putc('\n', stderr);
 
 		if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
 			ut_print_buf(stderr, dfield_get_data(dfield),
 				     dfield_get_len(dfield));
+			putc('\n', stderr);
 		} else {
-			fprintf(stderr, " <NULL>;");
+			fputs(" <NULL>;\n", stderr);
 		}
 
-		fprintf(stderr, "\n");
-
 		exp = que_node_get_next(exp);
 		i++;
 	}
@@ -2096,24 +2170,24 @@ row_fetch_print(
 	return((void*)42);
 }
 
-/********************************************************************
+/****************************************************************//**
 Callback function for fetch that stores an unsigned 4 byte integer to the
 location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
+= 4.
+@return	always returns NULL */
+UNIV_INTERN
 void*
 row_fetch_store_uint4(
 /*==================*/
-				/* out: always returns NULL */
-	void*	row,		/* in:  sel_node_t* */
-	void*	user_arg)	/* in:  data pointer */
+	void*	row,		/*!< in:  sel_node_t* */
+	void*	user_arg)	/*!< in:  data pointer */
 {
 	sel_node_t*	node = row;
 	ib_uint32_t*	val = user_arg;
 	ulint		tmp;
 
 	dfield_t*	dfield = que_node_get_val(node->select_list);
-	dtype_t*	type = dfield_get_type(dfield);
+	const dtype_t*	type = dfield_get_type(dfield);
 	ulint		len = dfield_get_len(dfield);
 
 	ut_a(dtype_get_mtype(type) == DATA_INT);
@@ -2126,14 +2200,14 @@ row_fetch_store_uint4(
 	return(NULL);
 }
 
-/***************************************************************
-Prints a row in a select result. */
-
+/***********************************************************//**
+Prints a row in a select result.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_printf_step(
 /*============*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	row_printf_node_t*	node;
 	sel_node_t*		sel_node;
@@ -2189,40 +2263,39 @@ row_printf_step(
 	return(thr);
 }
 
-/********************************************************************
+/****************************************************************//**
 Converts a key value stored in MySQL format to an Innobase dtuple. The last
 field of the key value may be just a prefix of a fixed length field: hence
 the parameter key_len. But currently we do not allow search keys where the
 last field is only a prefix of the full key field len and print a warning if
 such appears. A counterpart of this function is
 ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
+UNIV_INTERN
 void
 row_sel_convert_mysql_key_to_innobase(
 /*==================================*/
-	dtuple_t*	tuple,		/* in: tuple where to build;
+	dtuple_t*	tuple,		/*!< in/out: tuple where to build;
 					NOTE: we assume that the type info
 					in the tuple is already according
 					to index! */
-	byte*		buf,		/* in: buffer to use in field
+	byte*		buf,		/*!< in: buffer to use in field
 					conversions */
-	ulint		buf_len,	/* in: buffer length */
-	dict_index_t*	index,		/* in: index of the key value */
-	byte*		key_ptr,	/* in: MySQL key value */
-	ulint		key_len,	/* in: MySQL key value length */
-	trx_t*		trx)		/* in: transaction */
+	ulint		buf_len,	/*!< in: buffer length */
+	dict_index_t*	index,		/*!< in: index of the key value */
+	const byte*	key_ptr,	/*!< in: MySQL key value */
+	ulint		key_len,	/*!< in: MySQL key value length */
+	trx_t*		trx)		/*!< in: transaction */
 {
 	byte*		original_buf	= buf;
-	byte*		original_key_ptr = key_ptr;
+	const byte*	original_key_ptr = key_ptr;
 	dict_field_t*	field;
 	dfield_t*	dfield;
 	ulint		data_offset;
 	ulint		data_len;
 	ulint		data_field_len;
 	ibool		is_null;
-	byte*		key_end;
+	const byte*	key_end;
 	ulint		n_fields = 0;
-	ulint		type;
 
 	/* For documentation of the key value storage format in MySQL, see
 	ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
@@ -2236,7 +2309,7 @@ row_sel_convert_mysql_key_to_innobase(
 	dfield = dtuple_get_nth_field(tuple, 0);
 	field = dict_index_get_nth_field(index, 0);
 
-	if (dfield_get_type(dfield)->mtype == DATA_SYS) {
+	if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
 		/* A special case: we are looking for a position in the
 		generated clustered index which InnoDB automatically added
 		to a table with no primary key: the first and the only
@@ -2254,7 +2327,8 @@ row_sel_convert_mysql_key_to_innobase(
 
 	while (key_ptr < key_end) {
 
-		ut_a(field->col->mtype == dfield_get_type(dfield)->mtype);
+		ulint	type = dfield_get_type(dfield)->mtype;
+		ut_a(field->col->mtype == type);
 
 		data_offset = 0;
 		is_null = FALSE;
@@ -2266,14 +2340,12 @@ row_sel_convert_mysql_key_to_innobase(
 			data_offset = 1;
 
 			if (*key_ptr != 0) {
-				dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+				dfield_set_null(dfield);
 
 				is_null = TRUE;
 			}
 		}
 
-		type = dfield_get_type(dfield)->mtype;
-
 		/* Calculate data length and data field total length */
 
 		if (type == DATA_BLOB) {
@@ -2319,9 +2391,10 @@ row_sel_convert_mysql_key_to_innobase(
 			data_field_len = data_offset + data_len;
 		}
 
-		if (dtype_get_mysql_type(dfield_get_type(dfield))
-		    == DATA_MYSQL_TRUE_VARCHAR
-		    && dfield_get_type(dfield)->mtype != DATA_INT) {
+		if (UNIV_UNLIKELY
+		    (dtype_get_mysql_type(dfield_get_type(dfield))
+		     == DATA_MYSQL_TRUE_VARCHAR)
+		    && UNIV_LIKELY(type != DATA_INT)) {
 			/* In a MySQL key value format, a true VARCHAR is
 			always preceded by 2 bytes of a length field.
 			dfield_get_type(dfield)->len returns the maximum
@@ -2337,7 +2410,7 @@ row_sel_convert_mysql_key_to_innobase(
 
 		/* Storing may use at most data_len bytes of buf */
 
-		if (!is_null) {
+		if (UNIV_LIKELY(!is_null)) {
 			row_mysql_store_col_in_innobase_format(
 				dfield, buf,
 				FALSE, /* MySQL key value format col */
@@ -2348,7 +2421,7 @@ row_sel_convert_mysql_key_to_innobase(
 
 		key_ptr += data_field_len;
 
-		if (key_ptr > key_end) {
+		if (UNIV_UNLIKELY(key_ptr > key_end)) {
 			/* The last field in key was not a complete key field
 			but a prefix of it.
 
@@ -2372,10 +2445,12 @@ row_sel_convert_mysql_key_to_innobase(
 				(ulong) (key_ptr - key_end));
 			fflush(stderr);
 			ut_print_buf(stderr, original_key_ptr, key_len);
-			fprintf(stderr, "\n");
+			putc('\n', stderr);
 
 			if (!is_null) {
-				dfield->len -= (ulint)(key_ptr - key_end);
+				ulint	len = dfield_get_len(dfield);
+				dfield_set_len(dfield, len
+					       - (ulint) (key_ptr - key_end));
 			}
 		}
 
@@ -2392,20 +2467,20 @@ row_sel_convert_mysql_key_to_innobase(
 	dtuple_set_n_fields(tuple, n_fields);
 }
 
-/******************************************************************
+/**************************************************************//**
 Stores the row id to the prebuilt struct. */
 static
 void
 row_sel_store_row_id_to_prebuilt(
 /*=============================*/
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt */
-	rec_t*		index_rec,	/* in: record */
-	dict_index_t*	index,		/* in: index of the record */
-	const ulint*	offsets)	/* in: rec_get_offsets
-					(index_rec, index) */
+	row_prebuilt_t*		prebuilt,	/*!< in/out: prebuilt */
+	const rec_t*		index_rec,	/*!< in: record */
+	const dict_index_t*	index,		/*!< in: index of the record */
+	const ulint*		offsets)	/*!< in: rec_get_offsets
+						(index_rec, index) */
 {
-	byte*	data;
-	ulint	len;
+	const byte*	data;
+	ulint		len;
 
 	ut_ad(rec_offs_validate(index_rec, index, offsets));
 
@@ -2413,7 +2488,7 @@ row_sel_store_row_id_to_prebuilt(
 		index_rec, offsets,
 		dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
 
-	if (len != DATA_ROW_ID_LEN) {
+	if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
 		fprintf(stderr,
 			"InnoDB: Error: Row id field is"
 			" wrong length %lu in ", (ulong) len);
@@ -2430,22 +2505,26 @@ row_sel_store_row_id_to_prebuilt(
 	ut_memcpy(prebuilt->row_id, data, len);
 }
 
-/******************************************************************
+/**************************************************************//**
 Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
 function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
 static
 void
 row_sel_field_store_in_mysql_format(
 /*================================*/
-	byte*	dest,	/* in/out: buffer where to store; NOTE that BLOBs
-			are not in themselves stored here: the caller must
-			allocate and copy the BLOB into buffer before, and pass
-			the pointer to the BLOB in 'data' */
-	const mysql_row_templ_t* templ,	/* in: MySQL column template.
-			Its following fields are referenced:
-			type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */
-	byte*	data,	/* in: data to store */
-	ulint	len)	/* in: length of the data */
+	byte*		dest,	/*!< in/out: buffer where to store; NOTE
+				that BLOBs are not in themselves
+				stored here: the caller must allocate
+				and copy the BLOB into buffer before,
+				and pass the pointer to the BLOB in
+				'data' */
+	const mysql_row_templ_t* templ,
+				/*!< in: MySQL column template.
+				Its following fields are referenced:
+				type, is_unsigned, mysql_col_len,
+				mbminlen, mbmaxlen */
+	const byte*	data,	/*!< in: data to store */
+	ulint		len)	/*!< in: length of the data */
 {
 	byte*	ptr;
 	byte*	field_end;
@@ -2453,7 +2532,8 @@ row_sel_field_store_in_mysql_format(
 
 	ut_ad(len != UNIV_SQL_NULL);
 
-	if (templ->type == DATA_INT) {
+	switch (templ->type) {
+	case DATA_INT:
 		/* Convert integer data from Innobase to a little-endian
 		format, sign bit restored to normal */
 
@@ -2473,10 +2553,11 @@ row_sel_field_store_in_mysql_format(
 		}
 
 		ut_ad(templ->mysql_col_len == len);
-	} else if (templ->type == DATA_VARCHAR
-		   || templ->type == DATA_VARMYSQL
-		   || templ->type == DATA_BINARY) {
+		break;
 
+	case DATA_VARCHAR:
+	case DATA_VARMYSQL:
+	case DATA_BINARY:
 		field_end = dest + templ->mysql_col_len;
 
 		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
@@ -2527,13 +2608,17 @@ row_sel_field_store_in_mysql_format(
 
 			memset(pad_ptr, 0x20, field_end - pad_ptr);
 		}
-	} else if (templ->type == DATA_BLOB) {
+		break;
+
+	case DATA_BLOB:
 		/* Store a pointer to the BLOB buffer to dest: the BLOB was
 		already copied to the buffer in row_sel_store_mysql_rec */
 
 		row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
 					 len);
-	} else if (templ->type == DATA_MYSQL) {
+		break;
+
+	case DATA_MYSQL:
 		memcpy(dest, data, len);
 
 		ut_ad(templ->mysql_col_len >= len);
@@ -2554,45 +2639,51 @@ row_sel_field_store_in_mysql_format(
 
 			memset(dest + len, 0x20, templ->mysql_col_len - len);
 		}
-	} else {
-		ut_ad(templ->type == DATA_CHAR
-		      || templ->type == DATA_FIXBINARY
-		      /*|| templ->type == DATA_SYS_CHILD
-		      || templ->type == DATA_SYS*/
-		      || templ->type == DATA_FLOAT
-		      || templ->type == DATA_DOUBLE
-		      || templ->type == DATA_DECIMAL);
-		ut_ad(templ->mysql_col_len == len);
+		break;
 
+	default:
+#ifdef UNIV_DEBUG
+	case DATA_SYS_CHILD:
+	case DATA_SYS:
+		/* These column types should never be shipped to MySQL. */
+		ut_ad(0);
+
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+	case DATA_DECIMAL:
+		/* Above are the valid column types for MySQL data. */
+#endif /* UNIV_DEBUG */
+		ut_ad(templ->mysql_col_len == len);
 		memcpy(dest, data, len);
 	}
 }
 
-/******************************************************************
+/**************************************************************//**
 Convert a row in the Innobase format to a row in the MySQL format.
 Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
-be needed in the query. */
+be needed in the query.
+@return TRUE if success, FALSE if could not allocate memory for a BLOB
+(though we may also assert in that case) */
 static
 ibool
 row_sel_store_mysql_rec(
 /*====================*/
-					/* out: TRUE if success, FALSE if
-					could not allocate memory for a BLOB
-					(though we may also assert in that
-					case) */
-	byte*		mysql_rec,	/* out: row in the MySQL format */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
-	rec_t*		rec,		/* in: Innobase record in the index
+	byte*		mysql_rec,	/*!< out: row in the MySQL format */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
+	const rec_t*	rec,		/*!< in: Innobase record in the index
 					which was described in prebuilt's
-					template */
-	const ulint*	offsets)	/* in: array returned by
+					template; must be protected by
+					a page latch */
+	const ulint*	offsets)	/*!< in: array returned by
 					rec_get_offsets() */
 {
 	mysql_row_templ_t*	templ;
 	mem_heap_t*		extern_field_heap	= NULL;
 	mem_heap_t*		heap;
-	byte*			data;
+	const byte*		data;
 	ulint			len;
 	ulint			i;
 
@@ -2636,8 +2727,9 @@ row_sel_store_mysql_rec(
 			causes an assert */
 
 			data = btr_rec_copy_externally_stored_field(
-				rec, offsets, templ->rec_field_no,
-				&len, heap);
+				rec, offsets,
+				dict_table_zip_size(prebuilt->table),
+				templ->rec_field_no, &len, heap);
 
 			ut_a(len != UNIV_SQL_NULL);
 		} else {
@@ -2690,7 +2782,8 @@ row_sel_store_mysql_rec(
 			mysql_rec[templ->mysql_null_byte_offset]
 				|= (byte) templ->mysql_null_bit_mask;
 			memcpy(mysql_rec + templ->mysql_col_offset,
-			       prebuilt->default_rec + templ->mysql_col_offset,
+			       (const byte*) prebuilt->default_rec
+			       + templ->mysql_col_offset,
 			       templ->mysql_col_len);
 		}
 	}
@@ -2698,26 +2791,26 @@ row_sel_store_mysql_rec(
 	return(TRUE);
 }
 
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
+/*********************************************************************//**
+Builds a previous version of a clustered index record for a consistent read
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel_build_prev_vers_for_mysql(
 /*==============================*/
-					/* out: DB_SUCCESS or error code */
-	read_view_t*	read_view,	/* in: read view */
-	dict_index_t*	clust_index,	/* in: clustered index */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
-	rec_t*		rec,		/* in: record in a clustered index */
-	ulint**		offsets,	/* in/out: offsets returned by
+	read_view_t*	read_view,	/*!< in: read view */
+	dict_index_t*	clust_index,	/*!< in: clustered index */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
+	const rec_t*	rec,		/*!< in: record in a clustered index */
+	ulint**		offsets,	/*!< in/out: offsets returned by
 					rec_get_offsets(rec, clust_index) */
-	mem_heap_t**	offset_heap,	/* in/out: memory heap from which
+	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
 					the offsets are allocated */
-	rec_t**		old_vers,	/* out: old version, or NULL if the
+	rec_t**		old_vers,	/*!< out: old version, or NULL if the
 					record does not exist in the view:
 					i.e., it was freshly inserted
 					afterwards */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	err;
 
@@ -2733,36 +2826,38 @@ row_sel_build_prev_vers_for_mysql(
 	return(err);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Retrieves the clustered index record corresponding to a record in a
 non-clustered index. Does the necessary locking. Used in the MySQL
-interface. */
+interface.
+@return	DB_SUCCESS or error code */
 static
 ulint
 row_sel_get_clust_rec_for_mysql(
 /*============================*/
-				/* out: DB_SUCCESS or error code */
-	row_prebuilt_t*	prebuilt,/* in: prebuilt struct in the handle */
-	dict_index_t*	sec_index,/* in: secondary index where rec resides */
-	rec_t*		rec,	/* in: record in a non-clustered index; if
+	row_prebuilt_t*	prebuilt,/*!< in: prebuilt struct in the handle */
+	dict_index_t*	sec_index,/*!< in: secondary index where rec resides */
+	const rec_t*	rec,	/*!< in: record in a non-clustered index; if
 				this is a locking read, then rec is not
 				allowed to be delete-marked, and that would
 				not make sense either */
-	que_thr_t*	thr,	/* in: query thread */
-	rec_t**		out_rec,/* out: clustered record or an old version of
+	que_thr_t*	thr,	/*!< in: query thread */
+	const rec_t**	out_rec,/*!< out: clustered record or an old version of
 				it, NULL if the old version did not exist
 				in the read view, i.e., it was a fresh
 				inserted version */
-	ulint**		offsets,/* out: offsets returned by
+	ulint**		offsets,/*!< in: offsets returned by
+				rec_get_offsets(rec, sec_index);
+				out: offsets returned by
 				rec_get_offsets(out_rec, clust_index) */
-	mem_heap_t**	offset_heap,/* in/out: memory heap from which
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
-	mtr_t*		mtr)	/* in: mtr used to get access to the
+	mtr_t*		mtr)	/*!< in: mtr used to get access to the
 				non-clustered record; the same mtr is used to
 				access the clustered index */
 {
 	dict_index_t*	clust_index;
-	rec_t*		clust_rec;
+	const rec_t*	clust_rec;
 	rec_t*		old_vers;
 	ulint		err;
 	trx_t*		trx;
@@ -2770,7 +2865,8 @@ row_sel_get_clust_rec_for_mysql(
 	*out_rec = NULL;
 	trx = thr_get_trx(thr);
 
-	row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx);
+	row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
+				   sec_index, *offsets, trx);
 
 	clust_index = dict_table_get_first_index(sec_index->table);
 
@@ -2834,7 +2930,8 @@ row_sel_get_clust_rec_for_mysql(
 		we set a LOCK_REC_NOT_GAP type lock */
 
 		err = lock_clust_rec_read_check_and_lock(
-			0, clust_rec, clust_index, *offsets,
+			0, btr_pcur_get_block(prebuilt->clust_pcur),
+			clust_rec, clust_index, *offsets,
 			prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
 		if (err != DB_SUCCESS) {
 
@@ -2861,7 +2958,7 @@ row_sel_get_clust_rec_for_mysql(
 				clust_rec, offsets, offset_heap, &old_vers,
 				mtr);
 
-			if (err != DB_SUCCESS) {
+			if (err != DB_SUCCESS || old_vers == NULL) {
 
 				goto err_exit;
 			}
@@ -2882,15 +2979,15 @@ row_sel_get_clust_rec_for_mysql(
 		visit through secondary index records that would not really
 		exist in our snapshot. */
 
-		if (clust_rec && (old_vers || rec_get_deleted_flag(
-					  rec,
-					  dict_table_is_comp(
-						  sec_index->table)))
+		if (clust_rec
+		    && (old_vers
+			|| rec_get_deleted_flag(rec, dict_table_is_comp(
+							sec_index->table)))
 		    && !row_sel_sec_rec_is_for_clust_rec(
 			    rec, sec_index, clust_rec, clust_index)) {
 			clust_rec = NULL;
-		} else {
 #ifdef UNIV_SEARCH_DEBUG
+		} else {
 			ut_a(clust_rec == NULL
 			     || row_sel_sec_rec_is_for_clust_rec(
 				     rec, sec_index, clust_rec, clust_index));
@@ -2913,29 +3010,27 @@ err_exit:
 	return(err);
 }
 
-/************************************************************************
+/********************************************************************//**
 Restores cursor position after it has been stored. We have to take into
 account that the record cursor was positioned on may have been deleted.
-Then we may have to move the cursor one step up or down. */
+Then we may have to move the cursor one step up or down.
+@return TRUE if we may need to process the record the cursor is now
+positioned on (i.e. we should not go to the next record yet) */
 static
 ibool
 sel_restore_position_for_mysql(
 /*===========================*/
-					/* out: TRUE if we may need to
-					process the record the cursor is
-					now positioned on (i.e. we should
-					not go to the next record yet) */
-	ibool*		same_user_rec,	/* out: TRUE if we were able to restore
+	ibool*		same_user_rec,	/*!< out: TRUE if we were able to restore
 					the cursor on a user record with the
 					same ordering prefix in in the
 					B-tree index */
-	ulint		latch_mode,	/* in: latch mode wished in
+	ulint		latch_mode,	/*!< in: latch mode wished in
 					restoration */
-	btr_pcur_t*	pcur,		/* in: cursor whose position
+	btr_pcur_t*	pcur,		/*!< in: cursor whose position
 					has been stored */
-	ibool		moves_up,	/* in: TRUE if the cursor moves up
+	ibool		moves_up,	/*!< in: TRUE if the cursor moves up
 					in the index */
-	mtr_t*		mtr)		/* in: mtr; CAUTION: may commit
+	mtr_t*		mtr)		/*!< in: mtr; CAUTION: may commit
 					mtr temporarily! */
 {
 	ibool	success;
@@ -2966,7 +3061,7 @@ sel_restore_position_for_mysql(
 			return(TRUE);
 		}
 
-		if (btr_pcur_is_on_user_rec(pcur, mtr)) {
+		if (btr_pcur_is_on_user_rec(pcur)) {
 			btr_pcur_move_to_prev(pcur, mtr);
 		}
 
@@ -2976,22 +3071,22 @@ sel_restore_position_for_mysql(
 	ut_ad(relative_position == BTR_PCUR_BEFORE
 	      || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
 
-	if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
+	if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
 		btr_pcur_move_to_next(pcur, mtr);
 	}
 
 	return(TRUE);
 }
 
-/************************************************************************
+/********************************************************************//**
 Pops a cached row for MySQL from the fetch cache. */
 UNIV_INLINE
 void
 row_sel_pop_cached_row_for_mysql(
 /*=============================*/
-	byte*		buf,		/* in/out: buffer where to copy the
+	byte*		buf,		/*!< in/out: buffer where to copy the
 					row */
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct */
 {
 	ulint			i;
 	mysql_row_templ_t*	templ;
@@ -3033,15 +3128,16 @@ row_sel_pop_cached_row_for_mysql(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Pushes a row for MySQL to the fetch cache. */
 UNIV_INLINE
 void
 row_sel_push_cache_row_for_mysql(
 /*=============================*/
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
-	rec_t*		rec,		/* in: record to push */
-	const ulint*	offsets)	/* in: rec_get_offsets() */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
+	const rec_t*	rec,		/*!< in: record to push; must
+					be protected by a page latch */
+	const ulint*	offsets)	/*!< in: rec_get_offsets() */
 {
 	byte*	buf;
 	ulint	i;
@@ -3081,29 +3177,29 @@ row_sel_push_cache_row_for_mysql(
 	prebuilt->n_fetch_cached++;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Tries to do a shortcut to fetch a clustered index record with a unique key,
 using the hash index if possible (not always). We assume that the search
 mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
-btr search latch has been locked in S-mode. */
+btr search latch has been locked in S-mode.
+@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
 static
 ulint
 row_sel_try_search_shortcut_for_mysql(
 /*==================================*/
-				/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-	rec_t**		out_rec,/* out: record if found */
-	row_prebuilt_t*	prebuilt,/* in: prebuilt struct */
-	ulint**		offsets,/* in/out: for rec_get_offsets(*out_rec) */
-	mem_heap_t**	heap,	/* in/out: heap for rec_get_offsets() */
-	mtr_t*		mtr)	/* in: started mtr */
+	const rec_t**	out_rec,/*!< out: record if found */
+	row_prebuilt_t*	prebuilt,/*!< in: prebuilt struct */
+	ulint**		offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
+	mem_heap_t**	heap,	/*!< in/out: heap for rec_get_offsets() */
+	mtr_t*		mtr)	/*!< in: started mtr */
 {
 	dict_index_t*	index		= prebuilt->index;
-	dtuple_t*	search_tuple	= prebuilt->search_tuple;
+	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
 	btr_pcur_t*	pcur		= prebuilt->pcur;
 	trx_t*		trx		= prebuilt->trx;
-	rec_t*		rec;
+	const rec_t*	rec;
 
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(!prebuilt->templ_contains_blob);
 
 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
@@ -3152,34 +3248,31 @@ row_sel_try_search_shortcut_for_mysql(
 	return(SEL_FOUND);
 }
 
-/************************************************************************
+/********************************************************************//**
 Searches for rows in the database. This is used in the interface to
 MySQL. This function opens a cursor, and also implements fetch next
 and fetch prev. NOTE that if we do a search with a full key value
 from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
+position and fetch next or fetch prev must not be tried to the cursor!
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INTERN
 ulint
 row_search_for_mysql(
 /*=================*/
-					/* out: DB_SUCCESS,
-					DB_RECORD_NOT_FOUND,
-					DB_END_OF_INDEX, DB_DEADLOCK,
-					DB_LOCK_TABLE_FULL, DB_CORRUPTION,
-					or DB_TOO_BIG_RECORD */
-	byte*		buf,		/* in/out: buffer for the fetched
+	byte*		buf,		/*!< in/out: buffer for the fetched
 					row in the MySQL format */
-	ulint		mode,		/* in: search mode PAGE_CUR_L, ... */
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct for the
+	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
 					table handle; this contains the info
 					of search_tuple, index; if search
 					tuple contains 0 fields then we
 					position the cursor at the start or
 					the end of the index, depending on
 					'mode' */
-	ulint		match_mode,	/* in: 0 or ROW_SEL_EXACT or
+	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
 					ROW_SEL_EXACT_PREFIX */
-	ulint		direction)	/* in: 0 or ROW_SEL_NEXT or
+	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
 					ROW_SEL_PREV; NOTE: if this is != 0,
 					then prebuilt must have a pcur
 					with stored position! In opening of a
@@ -3187,14 +3280,14 @@ row_search_for_mysql(
 {
 	dict_index_t*	index		= prebuilt->index;
 	ibool		comp		= dict_table_is_comp(index->table);
-	dtuple_t*	search_tuple	= prebuilt->search_tuple;
+	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
 	btr_pcur_t*	pcur		= prebuilt->pcur;
 	trx_t*		trx		= prebuilt->trx;
 	dict_index_t*	clust_index;
 	que_thr_t*	thr;
-	rec_t*		rec;
-	rec_t*		result_rec;
-	rec_t*		clust_rec;
+	const rec_t*	rec;
+	const rec_t*	result_rec;
+	const rec_t*	clust_rec;
 	ulint		err				= DB_SUCCESS;
 	ibool		unique_search			= FALSE;
 	ibool		unique_search_from_clust_index	= FALSE;
@@ -3217,7 +3310,7 @@ row_search_for_mysql(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets				= offsets_;
 
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(index && pcur && search_tuple);
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@@ -3233,14 +3326,18 @@ row_search_for_mysql(
 			"InnoDB: the MySQL datadir, or have you used"
 			" DISCARD TABLESPACE?\n"
 			"InnoDB: Look from\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"innodb-troubleshooting.html\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
 			"InnoDB: how you can resolve the problem.\n",
 			prebuilt->table->name);
 
 		return(DB_ERROR);
 	}
 
+	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+
+		return(DB_MISSING_HISTORY);
+	}
+
 	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
@@ -3381,10 +3478,10 @@ row_search_for_mysql(
 	locks when locking delete-marked records. */
 
 	if (match_mode == ROW_SEL_EXACT
-	    && index->type & DICT_UNIQUE
+	    && dict_index_is_unique(index)
 	    && dtuple_get_n_fields(search_tuple)
 	    == dict_index_get_n_unique(index)
-	    && (index->type & DICT_CLUSTERED
+	    && (dict_index_is_clust(index)
 		|| !dtuple_contains_null(search_tuple))) {
 
 		/* Note above that a UNIQUE secondary index can contain many
@@ -3421,7 +3518,7 @@ row_search_for_mysql(
 
 	if (UNIV_UNLIKELY(direction == 0)
 	    && unique_search
-	    && index->type & DICT_CLUSTERED
+	    && dict_index_is_clust(index)
 	    && !prebuilt->templ_contains_blob
 	    && !prebuilt->used_in_HANDLER
 	    && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
@@ -3461,6 +3558,12 @@ row_search_for_mysql(
 				ut_a(0 == cmp_dtuple_rec(search_tuple,
 							 rec, offsets));
 #endif
+				/* At this point, rec is protected by
+				a page latch that was acquired by
+				row_sel_try_search_shortcut_for_mysql().
+				The latch will not be released until
+				mtr_commit(&mtr). */
+
 				if (!row_sel_store_mysql_rec(buf, prebuilt,
 							     rec, offsets)) {
 					err = DB_TOO_BIG_RECORD;
@@ -3477,19 +3580,8 @@ row_search_for_mysql(
 
 				srv_n_rows_read++;
 
-				if (trx->search_latch_timeout > 0
-				    && trx->has_search_latch) {
-
-					trx->search_latch_timeout--;
-
-					rw_lock_s_unlock(&btr_search_latch);
-					trx->has_search_latch = FALSE;
-				}
-
-				/* NOTE that we do NOT store the cursor
-				position */
 				err = DB_SUCCESS;
-				goto func_exit;
+				goto release_search_latch_if_needed;
 
 			case SEL_EXHAUSTED:
 				mtr_commit(&mtr);
@@ -3497,6 +3589,8 @@ row_search_for_mysql(
 				/* ut_print_name(stderr, index->name);
 				fputs(" record not found 2\n", stderr); */
 
+				err = DB_RECORD_NOT_FOUND;
+release_search_latch_if_needed:
 				if (trx->search_latch_timeout > 0
 				    && trx->has_search_latch) {
 
@@ -3508,9 +3602,13 @@ row_search_for_mysql(
 
 				/* NOTE that we do NOT store the cursor
 				position */
-
-				err = DB_RECORD_NOT_FOUND;
 				goto func_exit;
+
+			case SEL_RETRY:
+				break;
+
+			default:
+				ut_ad(0);
 			}
 shortcut_fails_too_big_rec:
 			mtr_commit(&mtr);
@@ -3601,12 +3699,12 @@ shortcut_fails_too_big_rec:
 
 			/* Try to place a gap lock on the next index record
 			to prevent phantoms in ORDER BY ... DESC queries */
+			const rec_t*	next = page_rec_get_next_const(rec);
 
-			offsets = rec_get_offsets(page_rec_get_next(rec),
-						  index, offsets,
+			offsets = rec_get_offsets(next, index, offsets,
 						  ULINT_UNDEFINED, &heap);
-			err = sel_set_rec_lock(page_rec_get_next(rec),
-					       index, offsets,
+			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+					       next, index, offsets,
 					       prebuilt->select_lock_type,
 					       LOCK_GAP, thr);
 
@@ -3674,7 +3772,7 @@ rec_loop:
 	fputs("Using ", stderr);
 	dict_index_name_print(stderr, index);
 	fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
-	buf_frame_get_page_no(buf_frame_align(rec)));
+	page_get_page_no(page_align(rec)));
 	rec_print(rec);
 	*/
 #endif /* UNIV_SEARCH_DEBUG */
@@ -3704,7 +3802,8 @@ rec_loop:
 
 			offsets = rec_get_offsets(rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
-			err = sel_set_rec_lock(rec, index, offsets,
+			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+					       rec, index, offsets,
 					       prebuilt->select_lock_type,
 					       LOCK_ORDINARY, thr);
 
@@ -3742,22 +3841,20 @@ rec_loop:
 wrong_offs:
 		if (srv_force_recovery == 0 || moves_up == FALSE) {
 			ut_print_timestamp(stderr);
-			buf_page_print(buf_frame_align(rec));
+			buf_page_print(page_align(rec), 0);
 			fprintf(stderr,
-				"\nInnoDB: rec address %p, first"
-				" buffer frame %p\n"
-				"InnoDB: buffer pool high end %p,"
+				"\nInnoDB: rec address %p,"
 				" buf block fix count %lu\n",
-				(void*) rec, (void*) buf_pool->frame_zero,
-				(void*) buf_pool->high_end,
-				(ulong)buf_block_align(rec)->buf_fix_count);
+				(void*) rec, (ulong)
+				btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
+				->page.buf_fix_count);
 			fprintf(stderr,
 				"InnoDB: Index corruption: rec offs %lu"
 				" next offs %lu, page no %lu,\n"
 				"InnoDB: ",
 				(ulong) page_offset(rec),
 				(ulong) next_offs,
-				(ulong) buf_frame_get_page_no(rec));
+				(ulong) page_get_page_no(page_align(rec)));
 			dict_index_name_print(stderr, trx, index);
 			fputs(". Run CHECK TABLE. You may need to\n"
 			      "InnoDB: restore from a backup, or"
@@ -3777,7 +3874,7 @@ wrong_offs:
 				"InnoDB: ",
 				(ulong) page_offset(rec),
 				(ulong) next_offs,
-				(ulong) buf_frame_get_page_no(rec));
+				(ulong) page_get_page_no(page_align(rec)));
 			dict_index_name_print(stderr, trx, index);
 			fputs(". We try to skip the rest of the page.\n",
 			      stderr);
@@ -3802,7 +3899,7 @@ wrong_offs:
 				"InnoDB: ",
 				(ulong) page_offset(rec),
 				(ulong) next_offs,
-				(ulong) buf_frame_get_page_no(rec));
+				(ulong) page_get_page_no(page_align(rec)));
 			dict_index_name_print(stderr, trx, index);
 			fputs(". We try to skip the record.\n",
 			      stderr);
@@ -3836,6 +3933,7 @@ wrong_offs:
 				using a READ COMMITTED isolation level. */
 
 				err = sel_set_rec_lock(
+					btr_pcur_get_block(pcur),
 					rec, index, offsets,
 					prebuilt->select_lock_type, LOCK_GAP,
 					thr);
@@ -3871,6 +3969,7 @@ wrong_offs:
 				using a READ COMMITTED isolation level. */
 
 				err = sel_set_rec_lock(
+					btr_pcur_get_block(pcur),
 					rec, index, offsets,
 					prebuilt->select_lock_type, LOCK_GAP,
 					thr);
@@ -3939,12 +4038,13 @@ no_gap_lock:
 			lock_type = LOCK_REC_NOT_GAP;
 		}
 
-		err = sel_set_rec_lock(rec, index, offsets,
+		err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+				       rec, index, offsets,
 				       prebuilt->select_lock_type,
 				       lock_type, thr);
 
 		switch (err) {
-			rec_t*	old_vers;
+			const rec_t*	old_vers;
 		case DB_SUCCESS:
 			if (srv_locks_unsafe_for_binlog
 			    || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
@@ -4056,8 +4156,7 @@ no_gap_lock:
 
 				rec = old_vers;
 			}
-		} else if (!lock_sec_rec_cons_read_sees(rec, index,
-							trx->read_view)) {
+		} else if (!lock_sec_rec_cons_read_sees(rec, trx->read_view)) {
 			/* We are looking into a non-clustered index,
 			and to get the right version of the record we
 			have to look also into the clustered index: this
@@ -4197,6 +4296,10 @@ requires_clust_rec:
 				result_rec != rec ? clust_index : index,
 				offsets));
 
+	/* At this point, the clustered index record is protected
+	by a page latch that was acquired when pcur was positioned.
+	The latch will not be released until mtr_commit(&mtr). */
+
 	if ((match_mode == ROW_SEL_EXACT
 	     || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
 	    && prebuilt->select_lock_type == LOCK_NONE
@@ -4446,17 +4549,16 @@ func_exit:
 	return(err);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
+consistent read result, or store it to the query cache.
+@return	TRUE if storing or retrieving from the query cache is permitted */
+UNIV_INTERN
 ibool
 row_search_check_if_query_cache_permitted(
 /*======================================*/
-					/* out: TRUE if storing or retrieving
-					from the query cache is permitted */
-	trx_t*		trx,		/* in: transaction object */
-	const char*	norm_name)	/* in: concatenation of database name,
+	trx_t*		trx,		/*!< in: transaction object */
+	const char*	norm_name)	/*!< in: concatenation of database name,
 					'/' char, table name */
 {
 	dict_table_t*	table;
@@ -4503,63 +4605,58 @@ row_search_check_if_query_cache_permitted(
 	return(ret);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Read the AUTOINC column from the current row. If the value is less than
-0 and the type is not unsigned then we reset the value to 0. */
+0 and the type is not unsigned then we reset the value to 0.
+@return	value read from the column */
 static
-ib_ulonglong
+ib_uint64_t
 row_search_autoinc_read_column(
 /*===========================*/
-					/* out: value read from the column */
-	dict_index_t*	index,		/* in: index to read from */
-	const rec_t*	rec,		/* in: current rec */
-	ulint		col_no,		/* in: column number */
-	ibool		unsigned_type)	/* in: signed or unsigned flag */
+	dict_index_t*	index,		/*!< in: index to read from */
+	const rec_t*	rec,		/*!< in: current rec */
+	ulint		col_no,		/*!< in: column number */
+	ibool		unsigned_type)	/*!< in: signed or unsigned flag */
 {
 	ulint		len;
 	const byte*	data;
-	ib_ulonglong	value;
+	ib_uint64_t	value;
 	mem_heap_t*	heap = NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets	= offsets_;
 
-	*offsets_ = sizeof offsets_ / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
-	/* TODO: We have to cast away the const of rec for now.  This needs
-	to be fixed later.*/
-	offsets = rec_get_offsets(
-		(rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
-	/* TODO: We have to cast away the const of rec for now.  This needs
-	to be fixed later.*/
-	data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
+	data = rec_get_nth_field(rec, offsets, col_no, &len);
 
 	ut_a(len != UNIV_SQL_NULL);
 	ut_a(len <= sizeof value);
 
+	/* we assume AUTOINC value cannot be negative */
 	value = mach_read_int_type(data, len, unsigned_type);
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
-	/* We assume that the autoinc counter can't be negative. */
-	if (!unsigned_type && (ib_longlong) value < 0) {
+	if (!unsigned_type && (ib_int64_t) value < 0) {
 		value = 0;
 	}
 
 	return(value);
 }
 
-/***********************************************************************
-Get the last row. */
+/*******************************************************************//**
+Get the last row.
+@return	current rec or NULL */
 static
 const rec_t*
 row_search_autoinc_get_rec(
 /*=======================*/
-					/* out: current rec or NULL */
-	btr_pcur_t*	pcur,		/* in: the current cursor */
-	mtr_t*		mtr)		/* in: mini transaction */
+	btr_pcur_t*	pcur,		/*!< in: the current cursor */
+	mtr_t*		mtr)		/*!< in: mini transaction */
 {
 	do {
 		const rec_t* rec = btr_pcur_get_rec(pcur);
@@ -4572,18 +4669,17 @@ row_search_autoinc_get_rec(
 	return(NULL);
 }
 
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
+/*******************************************************************//**
+Read the max AUTOINC value from an index.
+@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
+column name can't be found in index */
+UNIV_INTERN
 ulint
 row_search_max_autoinc(
 /*===================*/
-					/* out: DB_SUCCESS if all OK else
-					error code, DB_RECORD_NOT_FOUND if
-					column name can't be found in index */
-	dict_index_t*	index,		/* in: index to search */
-	const char*	col_name,	/* in: name of autoinc column */
-	ib_ulonglong*	value)		/* out: AUTOINC value read */
+	dict_index_t*	index,		/*!< in: index to search */
+	const char*	col_name,	/*!< in: name of autoinc column */
+	ib_uint64_t*	value)		/*!< out: AUTOINC value read */
 {
 	ulint		i;
 	ulint		n_cols;
diff --git a/storage/innobase/row/row0uins.c b/storage/innodb_plugin/row/row0uins.c
similarity index 55%
rename from storage/innobase/row/row0uins.c
rename to storage/innodb_plugin/row/row0uins.c
index ce9ab792204..9f9c814f1a5 100644
--- a/storage/innobase/row/row0uins.c
+++ b/storage/innodb_plugin/row/row0uins.c
@@ -1,7 +1,24 @@
-/******************************************************
-Fresh insert undo
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0uins.c
+Fresh insert undo
 
 Created 2/25/1997 Heikki Tuuri
 *******************************************************/
@@ -29,15 +46,15 @@ Created 2/25/1997 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "log0log.h"
 
-/*******************************************************************
+/***************************************************************//**
 Removes a clustered index record. The pcur in node was positioned on the
-record, now it is detached. */
+record, now it is detached.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_ins_remove_clust_rec(
 /*==========================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node)	/* in: undo node */
+	undo_node_t*	node)	/*!< in: undo node */
 {
 	btr_cur_t*	btr_cur;
 	ibool		success;
@@ -52,6 +69,7 @@ row_undo_ins_remove_clust_rec(
 	ut_a(success);
 
 	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
 
 		/* Drop the index tree associated with the row in
 		SYS_INDEXES table: */
@@ -86,7 +104,10 @@ retry:
 					    &(node->pcur), &mtr);
 	ut_a(success);
 
-	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
+	btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+				   trx_is_recv(node->trx)
+				   ? RB_RECOVERY
+				   : RB_NORMAL, &mtr);
 
 	/* The delete operation may fail if we have little
 	file space left: TODO: easiest to crash the database
@@ -111,19 +132,18 @@ retry:
 	return(err);
 }
 
-/*******************************************************************
-Removes a secondary index entry if found. */
+/***************************************************************//**
+Removes a secondary index entry if found.
+@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_ins_remove_sec_low(
 /*========================*/
-				/* out: DB_SUCCESS, DB_FAIL, or
-				DB_OUT_OF_FILE_SPACE */
-	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether we wish optimistic or
 				pessimistic descent down the index tree */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry)	/* in: index entry to remove */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry to remove */
 {
 	btr_pcur_t	pcur;
 	btr_cur_t*	btr_cur;
@@ -159,7 +179,14 @@ row_undo_ins_remove_sec_low(
 	} else {
 		ut_ad(mode == BTR_MODIFY_TREE);
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
+		/* No need to distinguish RB_RECOVERY here, because we
+		are deleting a secondary index record: the distinction
+		between RB_NORMAL and RB_RECOVERY only matters when
+		deleting a record that contains externally stored
+		columns. */
+		ut_ad(!dict_index_is_clust(index));
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+					   RB_NORMAL, &mtr);
 	}
 
 	btr_pcur_close(&pcur);
@@ -168,16 +195,16 @@ row_undo_ins_remove_sec_low(
 	return(err);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Removes a secondary index entry from the index if found. Tries first
-optimistic, then pessimistic descent down the tree. */
+optimistic, then pessimistic descent down the tree.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_ins_remove_sec(
 /*====================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry)	/* in: index entry to insert */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry to insert */
 {
 	ulint	err;
 	ulint	n_tries	= 0;
@@ -211,17 +238,17 @@ retry:
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Parses the row reference and other info in a fresh insert undo record. */
 static
 void
 row_undo_ins_parse_undo_rec(
 /*========================*/
-	undo_node_t*	node)	/* in: row undo node */
+	undo_node_t*	node)	/*!< in/out: row undo node */
 {
 	dict_index_t*	clust_index;
 	byte*		ptr;
-	dulint		undo_no;
+	undo_no_t	undo_no;
 	dulint		table_id;
 	ulint		type;
 	ulint		dummy;
@@ -234,75 +261,90 @@ row_undo_ins_parse_undo_rec(
 	ut_ad(type == TRX_UNDO_INSERT_REC);
 	node->rec_type = type;
 
+	node->update = NULL;
 	node->table = dict_table_get_on_id(table_id, node->trx);
 
-	if (node->table == NULL) {
-
-		return;
-	}
-
-	if (node->table->ibd_file_missing) {
-		/* We skip undo operations to missing .ibd files */
+	/* Skip the UNDO if we can't find the table or the .ibd file. */
+	if (UNIV_UNLIKELY(node->table == NULL)) {
+	} else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
 		node->table = NULL;
+	} else {
+		clust_index = dict_table_get_first_index(node->table);
 
-		return;
+		if (clust_index != NULL) {
+			ptr = trx_undo_rec_get_row_ref(
+				ptr, clust_index, &node->ref, node->heap);
+		} else {
+			ut_print_timestamp(stderr);
+			fprintf(stderr, "  InnoDB: table ");
+			ut_print_name(stderr, node->trx, TRUE,
+				      node->table->name);
+			fprintf(stderr, " has no indexes, "
+				"ignoring the table\n");
+
+			node->table = NULL;
+		}
 	}
-
-	clust_index = dict_table_get_first_index(node->table);
-
-	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
-				       node->heap);
 }
 
-/***************************************************************
+/***********************************************************//**
 Undoes a fresh insert of a row to a table. A fresh insert means that
 the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
+marked, at the time of the insert.  InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
 ulint
 row_undo_ins(
 /*=========*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node)	/* in: row undo node */
+	undo_node_t*	node)	/*!< in: row undo node */
 {
-	dtuple_t*	entry;
-	ibool		found;
-	ulint		err;
-
 	ut_ad(node);
 	ut_ad(node->state == UNDO_NODE_INSERT);
 
 	row_undo_ins_parse_undo_rec(node);
 
-	if (node->table == NULL) {
-		found = FALSE;
-	} else {
-		found = row_undo_search_clust_to_pcur(node);
-	}
-
-	if (!found) {
+	if (!node->table || !row_undo_search_clust_to_pcur(node)) {
 		trx_undo_rec_release(node->trx, node->undo_no);
 
 		return(DB_SUCCESS);
 	}
 
+	/* Iterate over all the indexes and undo the insert.*/
+
+	/* Skip the clustered index (the first index) */
 	node->index = dict_table_get_next_index(
 		dict_table_get_first_index(node->table));
 
 	while (node->index != NULL) {
-		entry = row_build_index_entry(node->row, node->index,
-					      node->heap);
-		err = row_undo_ins_remove_sec(node->index, entry);
+		dtuple_t*	entry;
+		ulint		err;
 
-		if (err != DB_SUCCESS) {
+		entry = row_build_index_entry(node->row, node->ext,
+					      node->index, node->heap);
+		if (UNIV_UNLIKELY(!entry)) {
+			/* The database must have crashed after
+			inserting a clustered index record but before
+			writing all the externally stored columns of
+			that record.  Because secondary index entries
+			are inserted after the clustered index record,
+			we may assume that the secondary index record
+			does not exist.  However, this situation may
+			only occur during the rollback of incomplete
+			transactions. */
+			ut_a(trx_is_recv(node->trx));
+		} else {
+			err = row_undo_ins_remove_sec(node->index, entry);
 
-			return(err);
+			if (err != DB_SUCCESS) {
+
+				return(err);
+			}
 		}
 
 		node->index = dict_table_get_next_index(node->index);
 	}
 
-	err = row_undo_ins_remove_clust_rec(node);
-
-	return(err);
+	return(row_undo_ins_remove_clust_rec(node));
 }
diff --git a/storage/innobase/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c
similarity index 68%
rename from storage/innobase/row/row0umod.c
rename to storage/innodb_plugin/row/row0umod.c
index 68139da116e..6be475d8c78 100644
--- a/storage/innobase/row/row0umod.c
+++ b/storage/innodb_plugin/row/row0umod.c
@@ -1,7 +1,24 @@
-/******************************************************
-Undo modify of a row
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0umod.c
+Undo modify of a row
 
 Created 2/27/1997 Heikki Tuuri
 *******************************************************/
@@ -41,18 +58,17 @@ delete marked clustered index record was delete unmarked and possibly also
 some of its fields were changed. Now, it is possible that the delete marked
 version has become obsolete at the time the undo is started. */
 
-/***************************************************************
+/***********************************************************//**
 Checks if also the previous version of the clustered index record was
 modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback. */
+that it should be undone in the same rollback.
+@return	TRUE if also previous modify or insert of this row should be undone */
 UNIV_INLINE
 ibool
 row_undo_mod_undo_also_prev_vers(
 /*=============================*/
-				/* out: TRUE if also previous modify or
-				insert of this row should be undone */
-	undo_node_t*	node,	/* in: row undo node */
-	dulint*		undo_no)/* out: the undo number */
+	undo_node_t*	node,	/*!< in: row undo node */
+	undo_no_t*	undo_no)/*!< out: the undo number */
 {
 	trx_undo_rec_t*	undo_rec;
 	trx_t*		trx;
@@ -72,20 +88,19 @@ row_undo_mod_undo_also_prev_vers(
 	return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
 }
 
-/***************************************************************
-Undoes a modify in a clustered index record. */
+/***********************************************************//**
+Undoes a modify in a clustered index record.
+@return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 static
 ulint
 row_undo_mod_clust_low(
 /*===================*/
-				/* out: DB_SUCCESS, DB_FAIL, or error code:
-				we may run out of file space */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr,	/* in: mtr */
-	ulint		mode)	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in: mtr; must be committed before
+				latching any further pages */
+	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
-	big_rec_t*	dummy_big_rec;
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
 	ulint		err;
@@ -106,31 +121,38 @@ row_undo_mod_clust_low(
 						btr_cur, node->update,
 						node->cmpl_info, thr, mtr);
 	} else {
+		mem_heap_t*	heap		= NULL;
+		big_rec_t*	dummy_big_rec;
+
 		ut_ad(mode == BTR_MODIFY_TREE);
 
 		err = btr_cur_pessimistic_update(
 			BTR_NO_LOCKING_FLAG
 			| BTR_NO_UNDO_LOG_FLAG
 			| BTR_KEEP_SYS_FLAG,
-			btr_cur, &dummy_big_rec, node->update,
+			btr_cur, &heap, &dummy_big_rec, node->update,
 			node->cmpl_info, thr, mtr);
+
+		ut_a(!dummy_big_rec);
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
 	}
 
 	return(err);
 }
 
-/***************************************************************
-Removes a clustered index record after undo if possible. */
+/***********************************************************//**
+Removes a clustered index record after undo if possible.
+@return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 static
 ulint
 row_undo_mod_remove_clust_low(
 /*==========================*/
-				/* out: DB_SUCCESS, DB_FAIL, or error code:
-				we may run out of file space */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr __attribute__((unused)), /* in: query thread */
-	mtr_t*		mtr,	/* in: mtr */
-	ulint		mode)	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr __attribute__((unused)), /*!< in: query thread */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
@@ -170,9 +192,9 @@ row_undo_mod_remove_clust_low(
 
 		/* Note that since this operation is analogous to purge,
 		we can free also inherited externally stored fields:
-		hence the last FALSE in the call below */
+		hence the RB_NONE in the call below */
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr);
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
 
 		/* The delete operation may fail if we have little
 		file space left: TODO: easiest to crash the database
@@ -182,24 +204,23 @@ row_undo_mod_remove_clust_low(
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Undoes a modify in a clustered index record. Sets also the node state for the
-next round of undo. */
+next round of undo.
+@return	DB_SUCCESS or error code: we may run out of file space */
 static
 ulint
 row_undo_mod_clust(
 /*===============*/
-				/* out: DB_SUCCESS or error code: we may run
-				out of file space */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	btr_pcur_t*	pcur;
 	mtr_t		mtr;
 	ulint		err;
 	ibool		success;
 	ibool		more_vers;
-	dulint		new_undo_no;
+	undo_no_t	new_undo_no;
 
 	ut_ad(node && thr);
 
@@ -272,19 +293,18 @@ row_undo_mod_clust(
 	return(err);
 }
 
-/***************************************************************
-Delete marks or removes a secondary index entry if found. */
+/***********************************************************//**
+Delete marks or removes a secondary index entry if found.
+@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_del_mark_or_remove_sec_low(
 /*====================================*/
-				/* out: DB_SUCCESS, DB_FAIL, or
-				DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr,	/* in: query thread */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: index entry */
-	ulint		mode)	/* in: latch mode BTR_MODIFY_LEAF or
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr,	/*!< in: query thread */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: index entry */
+	ulint		mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
 				BTR_MODIFY_TREE */
 {
 	ibool		found;
@@ -304,7 +324,15 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
 	if (!found) {
-		/* Not found */
+		/* In crash recovery, the secondary index record may
+		be missing if the UPDATE did not have time to insert
+		the secondary index records before the crash.  When we
+		are undoing that UPDATE in crash recovery, the record
+		may be missing.
+
+		In normal processing, if an update ends in a deadlock
+		before it has inserted all updated secondary index
+		records, then the undo will not find those records. */
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
@@ -342,8 +370,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
 
+			/* No need to distinguish RB_RECOVERY here, because we
+			are deleting a secondary index record: the distinction
+			between RB_NORMAL and RB_RECOVERY only matters when
+			deleting a record that contains externally stored
+			columns. */
+			ut_ad(!dict_index_is_clust(index));
 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
-						   TRUE, &mtr);
+						   RB_NORMAL, &mtr);
 
 			/* The delete operation may fail if we have little
 			file space left: TODO: easiest to crash the database
@@ -358,23 +392,23 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Delete marks or removes a secondary index entry if found.
 NOTE that if we updated the fields of a delete-marked secondary index record
 so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
 return to the original values because we do not know them. But this should
 not cause problems because in row0sel.c, in queries we always retrieve the
 clustered index record or an earlier version of it, if the secondary index
-record through which we do the search is delete-marked. */
+record through which we do the search is delete-marked.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_del_mark_or_remove_sec(
 /*================================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr,	/* in: query thread */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry)	/* in: index entry */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr,	/*!< in: query thread */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry */
 {
 	ulint	err;
 
@@ -390,38 +424,41 @@ row_undo_mod_del_mark_or_remove_sec(
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Delete unmarks a secondary index entry which must be found. It might not be
 delete-marked at the moment, but it does not harm to unmark it anyway. We also
 need to update the fields of the secondary index record if we updated its
-fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */
+fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
+@return	DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_del_unmark_sec_and_undo_update(
 /*========================================*/
-				/* out: DB_FAIL or DB_SUCCESS or
-				DB_OUT_OF_FILE_SPACE */
-	ulint		mode,	/* in: search mode: BTR_MODIFY_LEAF or
+	ulint		mode,	/*!< in: search mode: BTR_MODIFY_LEAF or
 				BTR_MODIFY_TREE */
-	que_thr_t*	thr,	/* in: query thread */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry)	/* in: index entry */
+	que_thr_t*	thr,	/*!< in: query thread */
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry)	/*!< in: index entry */
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
 	upd_t*		update;
 	ulint		err		= DB_SUCCESS;
-	ibool		found;
 	big_rec_t*	dummy_big_rec;
 	mtr_t		mtr;
 	trx_t*		trx		= thr_get_trx(thr);
 
+	/* Ignore indexes that are being created. */
+	if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
+
+		return(DB_SUCCESS);
+	}
+
 	log_free_check();
 	mtr_start(&mtr);
 
-	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
-	if (!found) {
+	if (UNIV_UNLIKELY(!row_search_index_entry(index, entry,
+						  mode, &pcur, &mtr))) {
 		fputs("InnoDB: error in sec index entry del undo in\n"
 		      "InnoDB: ", stderr);
 		dict_index_name_print(stderr, trx, index);
@@ -457,15 +494,19 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 			err = btr_cur_optimistic_update(
 				BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
 				btr_cur, update, 0, thr, &mtr);
-			if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+			switch (err) {
+			case DB_OVERFLOW:
+			case DB_UNDERFLOW:
+			case DB_ZIP_OVERFLOW:
 				err = DB_FAIL;
 			}
 		} else {
 			ut_a(mode == BTR_MODIFY_TREE);
 			err = btr_cur_pessimistic_update(
 				BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
-				btr_cur, &dummy_big_rec,
+				btr_cur, &heap, &dummy_big_rec,
 				update, 0, thr, &mtr);
+			ut_a(!dummy_big_rec);
 		}
 
 		mem_heap_free(heap);
@@ -477,54 +518,68 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 	return(err);
 }
 
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_DEL. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is UPD_DEL.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_upd_del_sec(
 /*=====================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
 	dict_index_t*	index;
-	ulint		err;
+	ulint		err	= DB_SUCCESS;
 
 	heap = mem_heap_create(1024);
 
 	while (node->index != NULL) {
 		index = node->index;
 
-		entry = row_build_index_entry(node->row, index, heap);
+		entry = row_build_index_entry(node->row, node->ext,
+					      index, heap);
+		if (UNIV_UNLIKELY(!entry)) {
+			/* The database must have crashed after
+			inserting a clustered index record but before
+			writing all the externally stored columns of
+			that record.  Because secondary index entries
+			are inserted after the clustered index record,
+			we may assume that the secondary index record
+			does not exist.  However, this situation may
+			only occur during the rollback of incomplete
+			transactions. */
+			ut_a(trx_is_recv(thr_get_trx(thr)));
+		} else {
+			err = row_undo_mod_del_mark_or_remove_sec(
+				node, thr, index, entry);
 
-		err = row_undo_mod_del_mark_or_remove_sec(node, thr, index,
-							  entry);
-		if (err != DB_SUCCESS) {
+			if (err != DB_SUCCESS) {
 
-			mem_heap_free(heap);
-
-			return(err);
+				break;
+			}
 		}
 
+		mem_heap_empty(heap);
+
 		node->index = dict_table_get_next_index(node->index);
 	}
 
 	mem_heap_free(heap);
 
-	return(DB_SUCCESS);
+	return(err);
 }
 
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is DEL_MARK. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is DEL_MARK.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_del_mark_sec(
 /*======================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
@@ -536,8 +591,9 @@ row_undo_mod_del_mark_sec(
 	while (node->index != NULL) {
 		index = node->index;
 
-		entry = row_build_index_entry(node->row, index, heap);
-
+		entry = row_build_index_entry(node->row, node->ext,
+					      index, heap);
+		ut_a(entry);
 		err = row_undo_mod_del_unmark_sec_and_undo_update(
 			BTR_MODIFY_LEAF, thr, index, entry);
 		if (err == DB_FAIL) {
@@ -560,15 +616,15 @@ row_undo_mod_del_mark_sec(
 	return(DB_SUCCESS);
 }
 
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_EXIST. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static
 ulint
 row_undo_mod_upd_exist_sec(
 /*=======================*/
-				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
@@ -590,8 +646,9 @@ row_undo_mod_upd_exist_sec(
 						     node->update)) {
 
 			/* Build the newest version of the index entry */
-			entry = row_build_index_entry(node->row, index, heap);
-
+			entry = row_build_index_entry(node->row, node->ext,
+						      index, heap);
+			ut_a(entry);
 			/* NOTE that if we updated the fields of a
 			delete-marked secondary index record so that
 			alphabetically they stayed the same, e.g.,
@@ -617,9 +674,12 @@ row_undo_mod_upd_exist_sec(
 			the secondary index record if we updated its fields
 			but alphabetically they stayed the same, e.g.,
 			'abc' -> 'aBc'. */
+			mem_heap_empty(heap);
+			entry = row_build_index_entry(node->undo_row,
+						      node->undo_ext,
+						      index, heap);
+			ut_a(entry);
 
-			row_upd_index_replace_new_col_vals(entry, index,
-							   node->update, NULL);
 			err = row_undo_mod_del_unmark_sec_and_undo_update(
 				BTR_MODIFY_LEAF, thr, index, entry);
 			if (err == DB_FAIL) {
@@ -642,21 +702,21 @@ row_undo_mod_upd_exist_sec(
 	return(DB_SUCCESS);
 }
 
-/***************************************************************
+/***********************************************************//**
 Parses the row reference and other info in a modify undo log record. */
 static
 void
 row_undo_mod_parse_undo_rec(
 /*========================*/
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_index_t*	clust_index;
 	byte*		ptr;
-	dulint		undo_no;
+	undo_no_t	undo_no;
 	dulint		table_id;
-	dulint		trx_id;
-	dulint		roll_ptr;
+	trx_id_t	trx_id;
+	roll_ptr_t	roll_ptr;
 	ulint		info_bits;
 	ulint		type;
 	ulint		cmpl_info;
@@ -702,17 +762,16 @@ row_undo_mod_parse_undo_rec(
 	node->cmpl_info = cmpl_info;
 }
 
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
+/***********************************************************//**
+Undoes a modify operation on a row of a table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 row_undo_mod(
 /*=========*/
-				/* out: DB_SUCCESS or error code */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ibool	found;
 	ulint	err;
 
 	ut_ad(node && thr);
@@ -720,13 +779,7 @@ row_undo_mod(
 
 	row_undo_mod_parse_undo_rec(node, thr);
 
-	if (node->table == NULL) {
-		found = FALSE;
-	} else {
-		found = row_undo_search_clust_to_pcur(node);
-	}
-
-	if (!found) {
+	if (!node->table || !row_undo_search_clust_to_pcur(node)) {
 		/* It is already undone, or will be undone by another query
 		thread, or table was dropped */
 
diff --git a/storage/innobase/row/row0undo.c b/storage/innodb_plugin/row/row0undo.c
similarity index 80%
rename from storage/innobase/row/row0undo.c
rename to storage/innodb_plugin/row/row0undo.c
index f03f84ed1b0..3d739c9689a 100644
--- a/storage/innobase/row/row0undo.c
+++ b/storage/innodb_plugin/row/row0undo.c
@@ -1,7 +1,24 @@
-/******************************************************
-Row undo
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0undo.c
+Row undo
 
 Created 1/8/1997 Heikki Tuuri
 *******************************************************/
@@ -24,6 +41,7 @@ Created 1/8/1997 Heikki Tuuri
 #include "row0row.h"
 #include "row0uins.h"
 #include "row0umod.h"
+#include "row0upd.h"
 #include "row0mysql.h"
 #include "srv0srv.h"
 
@@ -102,16 +120,16 @@ doing the purge. Similarly, during a rollback, a record can be removed
 if the stored roll ptr in the undo log points to a trx already (being) purged,
 or if the roll ptr is NULL, i.e., it was a fresh insert. */
 
-/************************************************************************
-Creates a row undo node to a query graph. */
-
+/********************************************************************//**
+Creates a row undo node to a query graph.
+@return	own: undo node */
+UNIV_INTERN
 undo_node_t*
 row_undo_node_create(
 /*=================*/
-				/* out, own: undo node */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
-	mem_heap_t*	heap)	/* in: memory heap where created */
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
+	mem_heap_t*	heap)	/*!< in: memory heap where created */
 {
 	undo_node_t*	undo;
 
@@ -132,19 +150,18 @@ row_undo_node_create(
 	return(undo);
 }
 
-/***************************************************************
+/***********************************************************//**
 Looks for the clustered index record when node has the row reference.
 The pcur in node is used in the search. If found, stores the row to node,
 and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
+by the caller in any case.
+@return TRUE if found; NOTE the node->pcur must be closed by the
+caller, regardless of the return value */
+UNIV_INTERN
 ibool
 row_undo_search_clust_to_pcur(
 /*==========================*/
-				/* out: TRUE if found; NOTE the node->pcur
-				must be closed by the caller, regardless of
-				the return value */
-	undo_node_t*	node)	/* in: row undo node */
+	undo_node_t*	node)	/*!< in: row undo node */
 {
 	dict_index_t*	clust_index;
 	ibool		found;
@@ -154,7 +171,7 @@ row_undo_search_clust_to_pcur(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	mtr_start(&mtr);
 
@@ -183,7 +200,16 @@ row_undo_search_clust_to_pcur(
 		ret = FALSE;
 	} else {
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
-				      offsets, node->heap);
+				      offsets, NULL, &node->ext, node->heap);
+		if (node->update) {
+			node->undo_row = dtuple_copy(node->row, node->heap);
+			row_upd_replace(node->undo_row, &node->undo_ext,
+					clust_index, node->update, node->heap);
+		} else {
+			node->undo_row = NULL;
+			node->undo_ext = NULL;
+		}
+
 		btr_pcur_store_position(&(node->pcur), &mtr);
 
 		ret = TRUE;
@@ -197,23 +223,22 @@ row_undo_search_clust_to_pcur(
 	return(ret);
 }
 
-/***************************************************************
+/***********************************************************//**
 Fetches an undo log record and does the undo for the recorded operation.
 If none left, or a partial rollback completed, returns control to the
-parent node, which is always a query thread node. */
+parent node, which is always a query thread node.
+@return	DB_SUCCESS if operation successfully completed, else error code */
 static
 ulint
 row_undo(
 /*=====*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code */
-	undo_node_t*	node,	/* in: row undo node */
-	que_thr_t*	thr)	/* in: query thread */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err;
-	trx_t*	trx;
-	dulint	roll_ptr;
-	ibool	locked_data_dict;
+	ulint		err;
+	trx_t*		trx;
+	roll_ptr_t	roll_ptr;
+	ibool		locked_data_dict;
 
 	ut_ad(node && thr);
 
@@ -300,15 +325,15 @@ row_undo(
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
+in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_undo_step(
 /*==========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint		err;
 	undo_node_t*	node;
diff --git a/storage/innobase/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c
similarity index 60%
rename from storage/innobase/row/row0upd.c
rename to storage/innodb_plugin/row/row0upd.c
index c91cc449b96..58dfd43ead9 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innodb_plugin/row/row0upd.c
@@ -1,7 +1,24 @@
-/******************************************************
-Update of a row
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0upd.c
+Update of a row
 
 Created 12/27/1996 Heikki Tuuri
 *******************************************************/
@@ -13,13 +30,16 @@ Created 12/27/1996 Heikki Tuuri
 #endif
 
 #include "dict0dict.h"
+#include "trx0undo.h"
+#include "rem0rec.h"
+#ifndef UNIV_HOTBACKUP
 #include "dict0boot.h"
 #include "dict0crea.h"
 #include "mach0data.h"
-#include "trx0undo.h"
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "que0que.h"
+#include "row0ext.h"
 #include "row0ins.h"
 #include "row0sel.h"
 #include "row0row.h"
@@ -72,40 +92,41 @@ the x-latch freed? The most efficient way for performing a
 searched delete is obviously to keep the x-latch for several
 steps of query graph execution. */
 
-/***************************************************************
+/***********************************************************//**
 Checks if an update vector changes some of the first ordering fields of an
 index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
+that index does not contain column prefixes.
+@return	TRUE if changes */
 static
 ibool
 row_upd_changes_first_fields_binary(
 /*================================*/
-				/* out: TRUE if changes */
-	dtuple_t*	entry,	/* in: old value of index entry */
-	dict_index_t*	index,	/* in: index of entry */
-	upd_t*		update,	/* in: update vector for the row */
-	ulint		n);	/* in: how many first fields to check */
+	dtuple_t*	entry,	/*!< in: old value of index entry */
+	dict_index_t*	index,	/*!< in: index of entry */
+	const upd_t*	update,	/*!< in: update vector for the row */
+	ulint		n);	/*!< in: how many first fields to check */
 
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if index currently is mentioned as a referenced index in a foreign
-key constraint. */
+key constraint.
+
+NOTE that since we do not hold dict_operation_lock when leaving the
+function, it may be that the referencing table has been dropped when
+we leave this function: this function is only for heuristic use!
+
+@return TRUE if referenced */
 static
 ibool
 row_upd_index_is_referenced(
 /*========================*/
-				/* out: TRUE if referenced; NOTE that since
-				we do not hold dict_operation_lock
-				when leaving the function, it may be that
-				the referencing table has been dropped when
-				we leave this function: this function is only
-				for heuristic use! */
-	dict_index_t*	index,	/* in: index */
-	trx_t*		trx)	/* in: transaction */
+	dict_index_t*	index,	/*!< in: index */
+	trx_t*		trx)	/*!< in: transaction */
 {
 	dict_table_t*	table		= index->table;
 	dict_foreign_t*	foreign;
 	ibool		froze_data_dict	= FALSE;
+	ibool		is_referenced	= FALSE;
 
 	if (!UT_LIST_GET_FIRST(table->referenced_list)) {
 
@@ -122,45 +143,48 @@ row_upd_index_is_referenced(
 	while (foreign) {
 		if (foreign->referenced_index == index) {
 
-			if (froze_data_dict) {
-				row_mysql_unfreeze_data_dictionary(trx);
-			}
-
-			return(TRUE);
+			is_referenced = TRUE;
+			goto func_exit;
 		}
 
 		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
+func_exit:
 	if (froze_data_dict) {
 		row_mysql_unfreeze_data_dictionary(trx);
 	}
 
-	return(FALSE);
+	return(is_referenced);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Checks if possible foreign key constraints hold after a delete of the record
-under pcur. NOTE that this function will temporarily commit mtr and lose the
-pcur position! */
+under pcur.
+
+NOTE that this function will temporarily commit mtr and lose the
+pcur position!
+
+@return	DB_SUCCESS or an error code */
 static
 ulint
 row_upd_check_references_constraints(
 /*=================================*/
-				/* out: DB_SUCCESS or an error code */
-	upd_node_t*	node,	/* in: row update node */
-	btr_pcur_t*	pcur,	/* in: cursor positioned on a record; NOTE: the
+	upd_node_t*	node,	/*!< in: row update node */
+	btr_pcur_t*	pcur,	/*!< in: cursor positioned on a record; NOTE: the
 				cursor position is lost in this function! */
-	dict_table_t*	table,	/* in: table in question */
-	dict_index_t*	index,	/* in: index of the cursor */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
+	dict_table_t*	table,	/*!< in: table in question */
+	dict_index_t*	index,	/*!< in: index of the cursor */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dict_foreign_t*	foreign;
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
 	trx_t*		trx;
-	rec_t*		rec;
+	const rec_t*	rec;
+	ulint		n_ext;
 	ulint		err;
 	ibool		got_s_lock	= FALSE;
 
@@ -172,10 +196,12 @@ row_upd_check_references_constraints(
 	trx = thr_get_trx(thr);
 
 	rec = btr_pcur_get_rec(pcur);
+	ut_ad(rec_offs_validate(rec, index, offsets));
 
 	heap = mem_heap_create(500);
 
-	entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+	entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+				       &n_ext, heap);
 
 	mtr_commit(mtr);
 
@@ -236,37 +262,34 @@ row_upd_check_references_constraints(
 			}
 
 			if (err != DB_SUCCESS) {
-				if (got_s_lock) {
-					row_mysql_unfreeze_data_dictionary(
-						trx);
-				}
 
-				mem_heap_free(heap);
-
-				return(err);
+				goto func_exit;
 			}
 		}
 
 		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
+	err = DB_SUCCESS;
+
+func_exit:
 	if (got_s_lock) {
 		row_mysql_unfreeze_data_dictionary(trx);
 	}
 
 	mem_heap_free(heap);
 
-	return(DB_SUCCESS);
+	return(err);
 }
 
-/*************************************************************************
-Creates an update node for a query graph. */
-
+/*********************************************************************//**
+Creates an update node for a query graph.
+@return	own: update node */
+UNIV_INTERN
 upd_node_t*
 upd_node_create(
 /*============*/
-				/* out, own: update node */
-	mem_heap_t*	heap)	/* in: mem heap where created */
+	mem_heap_t*	heap)	/*!< in: mem heap where created */
 {
 	upd_node_t*	node;
 
@@ -274,11 +297,12 @@ upd_node_create(
 	node->common.type = QUE_NODE_UPDATE;
 
 	node->state = UPD_NODE_UPDATE_CLUSTERED;
-	node->select_will_do_update = FALSE;
 	node->in_mysql_interface = FALSE;
 
 	node->row = NULL;
-	node->ext_vec = NULL;
+	node->ext = NULL;
+	node->upd_row = NULL;
+	node->upd_ext = NULL;
 	node->index = NULL;
 	node->update = NULL;
 
@@ -295,51 +319,61 @@ upd_node_create(
 
 	return(node);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
+/*********************************************************************//**
 Updates the trx id and roll ptr field in a clustered index record in database
 recovery. */
-
+UNIV_INTERN
 void
 row_upd_rec_sys_fields_in_recovery(
 /*===============================*/
-	rec_t*		rec,	/* in: record */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	ulint		pos,	/* in: TRX_ID position in rec */
-	dulint		trx_id,	/* in: transaction id */
-	dulint		roll_ptr)/* in: roll ptr of the undo log record */
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		pos,	/*!< in: TRX_ID position in rec */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	roll_ptr_t	roll_ptr)/*!< in: roll ptr of the undo log record */
 {
-	byte*	field;
-	ulint	len;
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
 
-	field = rec_get_nth_field(rec, offsets, pos, &len);
-	ut_ad(len == DATA_TRX_ID_LEN);
-	trx_write_trx_id(field, trx_id);
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_write_trx_id_and_roll_ptr(
+			page_zip, rec, offsets, pos, trx_id, roll_ptr);
+	} else {
+		byte*	field;
+		ulint	len;
 
-	field = rec_get_nth_field(rec, offsets, pos + 1, &len);
-	ut_ad(len == DATA_ROLL_PTR_LEN);
-	trx_write_roll_ptr(field, roll_ptr);
+		field = rec_get_nth_field(rec, offsets, pos, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+		trx_write_trx_id(field, trx_id);
+		trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
+	}
 }
 
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
 Sets the trx id or roll ptr field of a clustered index entry. */
-
+UNIV_INTERN
 void
 row_upd_index_entry_sys_field(
 /*==========================*/
-	dtuple_t*	entry,	/* in: index entry, where the memory buffers
+	const dtuple_t*	entry,	/*!< in: index entry, where the memory buffers
 				for sys fields are already allocated:
 				the function just copies the new values to
 				them */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
-	dulint		val)	/* in: value to write */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
+	dulint		val)	/*!< in: value to write */
 {
 	dfield_t*	dfield;
 	byte*		field;
 	ulint		pos;
 
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 
 	pos = dict_index_get_sys_col_pos(index, type);
 
@@ -354,26 +388,25 @@ row_upd_index_entry_sys_field(
 	}
 }
 
-/***************************************************************
+/***********************************************************//**
 Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
+field to be updated is stored externally in rec or update.
+@return TRUE if the update changes the size of some field in index or
+the field is external in rec or update */
+UNIV_INTERN
 ibool
 row_upd_changes_field_size_or_external(
 /*===================================*/
-				/* out: TRUE if the update changes the size of
-				some field in index or the field is external
-				in rec or update */
-	dict_index_t*	index,	/* in: index */
-	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
-	upd_t*		update)	/* in: update vector */
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update)	/*!< in: update vector */
 {
-	upd_field_t*	upd_field;
-	dfield_t*	new_val;
-	ulint		old_len;
-	ulint		new_len;
-	ulint		n_fields;
-	ulint		i;
+	const upd_field_t*	upd_field;
+	const dfield_t*		new_val;
+	ulint			old_len;
+	ulint			new_len;
+	ulint			n_fields;
+	ulint			i;
 
 	ut_ad(rec_offs_validate(NULL, index, offsets));
 	n_fields = upd_get_n_fields(update);
@@ -382,9 +415,9 @@ row_upd_changes_field_size_or_external(
 		upd_field = upd_get_nth_field(update, i);
 
 		new_val = &(upd_field->new_val);
-		new_len = new_val->len;
+		new_len = dfield_get_len(new_val);
 
-		if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) {
+		if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) {
 			/* A bug fixed on Dec 31st, 2004: we looked at the
 			SQL NULL size from the wrong field! We may backport
 			this fix also to 4.0. The merge to 5.0 will be made
@@ -392,7 +425,8 @@ row_upd_changes_field_size_or_external(
 
 			new_len = dict_col_get_sql_null_size(
 				dict_index_get_nth_col(index,
-						       upd_field->field_no));
+						       upd_field->field_no),
+				0);
 		}
 
 		old_len = rec_offs_nth_size(offsets, upd_field->field_no);
@@ -410,17 +444,8 @@ row_upd_changes_field_size_or_external(
 			old_len = UNIV_SQL_NULL;
 		}
 
-		if (old_len != new_len) {
-
-			return(TRUE);
-		}
-
-		if (rec_offs_nth_extern(offsets, upd_field->field_no)) {
-
-			return(TRUE);
-		}
-
-		if (upd_field->extern_storage) {
+		if (dfield_is_ext(new_val) || old_len != new_len
+		    || rec_offs_nth_extern(offsets, upd_field->field_no)) {
 
 			return(TRUE);
 		}
@@ -428,56 +453,70 @@ row_upd_changes_field_size_or_external(
 
 	return(FALSE);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
+/***********************************************************//**
 Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
+given. No field size changes are allowed. */
+UNIV_INTERN
 void
 row_upd_rec_in_place(
 /*=================*/
-	rec_t*		rec,	/* in/out: record where replaced */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	upd_t*		update)	/* in: update vector */
+	rec_t*		rec,	/*!< in/out: record where replaced */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	page_zip_des_t*	page_zip)/*!< in: compressed page with enough space
+				available, or NULL */
 {
-	upd_field_t*	upd_field;
-	dfield_t*	new_val;
-	ulint		n_fields;
-	ulint		i;
+	const upd_field_t*	upd_field;
+	const dfield_t*		new_val;
+	ulint			n_fields;
+	ulint			i;
 
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits);
+	if (rec_offs_comp(offsets)) {
+		rec_set_info_bits_new(rec, update->info_bits);
+	} else {
+		rec_set_info_bits_old(rec, update->info_bits);
+	}
 
 	n_fields = upd_get_n_fields(update);
 
 	for (i = 0; i < n_fields; i++) {
 		upd_field = upd_get_nth_field(update, i);
 		new_val = &(upd_field->new_val);
+		ut_ad(!dfield_is_ext(new_val) ==
+		      !rec_offs_nth_extern(offsets, upd_field->field_no));
 
 		rec_set_nth_field(rec, offsets, upd_field->field_no,
 				  dfield_get_data(new_val),
 				  dfield_get_len(new_val));
 	}
+
+	if (UNIV_LIKELY_NULL(page_zip)) {
+		page_zip_write_rec(page_zip, rec, index, offsets, 0);
+	}
 }
 
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
 Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
+to determine their positions within a clustered index record.
+@return	new pointer to mlog */
+UNIV_INTERN
 byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
-				/* out: new pointer to mlog */
-	dict_index_t*	index,	/* in: clustered index */
-	trx_t*		trx,	/* in: transaction */
-	dulint		roll_ptr,/* in: roll ptr of the undo log record */
-	byte*		log_ptr,/* pointer to a buffer of size > 20 opened
+	dict_index_t*	index,	/*!< in: clustered index */
+	trx_t*		trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr,/*!< in: roll ptr of the undo log record */
+	byte*		log_ptr,/*!< pointer to a buffer of size > 20 opened
 				in mlog */
-	mtr_t*		mtr __attribute__((unused))) /* in: mtr */
+	mtr_t*		mtr __attribute__((unused))) /*!< in: mtr */
 {
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr);
 
 	log_ptr += mach_write_compressed(log_ptr,
@@ -491,19 +530,20 @@ row_upd_write_sys_vals_to_log(
 
 	return(log_ptr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
-Parses the log data of system field values. */
-
+/*********************************************************************//**
+Parses the log data of system field values.
+@return	log data end or NULL */
+UNIV_INTERN
 byte*
 row_upd_parse_sys_vals(
 /*===================*/
-			/* out: log data end or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	ulint*	pos,	/* out: TRX_ID position in record */
-	dulint*	trx_id,	/* out: trx id */
-	dulint*	roll_ptr)/* out: roll ptr */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	ulint*		pos,	/*!< out: TRX_ID position in record */
+	trx_id_t*	trx_id,	/*!< out: trx id */
+	roll_ptr_t*	roll_ptr)/*!< out: roll ptr */
 {
 	ptr = mach_parse_compressed(ptr, end_ptr, pos);
 
@@ -525,24 +565,26 @@ row_upd_parse_sys_vals(
 	return(ptr);
 }
 
-/***************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
 Writes to the redo log the new values of the fields occurring in the index. */
-
+UNIV_INTERN
 void
 row_upd_index_write_log(
 /*====================*/
-	upd_t*	update,	/* in: update vector */
-	byte*	log_ptr,/* in: pointer to mlog buffer: must contain at least
-			MLOG_BUF_MARGIN bytes of free space; the buffer is
-			closed within this function */
-	mtr_t*	mtr)	/* in: mtr into whose log to write */
+	const upd_t*	update,	/*!< in: update vector */
+	byte*		log_ptr,/*!< in: pointer to mlog buffer: must
+				contain at least MLOG_BUF_MARGIN bytes
+				of free space; the buffer is closed
+				within this function */
+	mtr_t*		mtr)	/*!< in: mtr into whose log to write */
 {
-	upd_field_t*	upd_field;
-	dfield_t*	new_val;
-	ulint		len;
-	ulint		n_fields;
-	byte*		buf_end;
-	ulint		i;
+	const upd_field_t*	upd_field;
+	const dfield_t*		new_val;
+	ulint			len;
+	ulint			n_fields;
+	byte*			buf_end;
+	ulint			i;
 
 	n_fields = upd_get_n_fields(update);
 
@@ -569,20 +611,22 @@ row_upd_index_write_log(
 
 		new_val = &(upd_field->new_val);
 
-		len = new_val->len;
+		len = dfield_get_len(new_val);
 
 		log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
 		log_ptr += mach_write_compressed(log_ptr, len);
 
 		if (len != UNIV_SQL_NULL) {
 			if (log_ptr + len < buf_end) {
-				ut_memcpy(log_ptr, new_val->data, len);
+				memcpy(log_ptr, dfield_get_data(new_val), len);
 
 				log_ptr += len;
 			} else {
 				mlog_close(mtr, log_ptr);
 
-				mlog_catenate_string(mtr, new_val->data, len);
+				mlog_catenate_string(mtr,
+						     dfield_get_data(new_val),
+						     len);
 
 				log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
 				buf_end = log_ptr + MLOG_BUF_MARGIN;
@@ -592,26 +636,26 @@ row_upd_index_write_log(
 
 	mlog_close(mtr, log_ptr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
+/*********************************************************************//**
+Parses the log data written by row_upd_index_write_log.
+@return	log data end or NULL */
+UNIV_INTERN
 byte*
 row_upd_index_parse(
 /*================*/
-				/* out: log data end or NULL */
-	byte*		ptr,	/* in: buffer */
-	byte*		end_ptr,/* in: buffer end */
-	mem_heap_t*	heap,	/* in: memory heap where update vector is
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
 				built */
-	upd_t**		update_out)/* out: update vector */
+	upd_t**		update_out)/*!< out: update vector */
 {
 	upd_t*		update;
 	upd_field_t*	upd_field;
 	dfield_t*	new_val;
 	ulint		len;
 	ulint		n_fields;
-	byte*		buf;
 	ulint		info_bits;
 	ulint		i;
 
@@ -633,16 +677,19 @@ row_upd_index_parse(
 	update->info_bits = info_bits;
 
 	for (i = 0; i < n_fields; i++) {
+		ulint	field_no;
 		upd_field = upd_get_nth_field(update, i);
 		new_val = &(upd_field->new_val);
 
-		ptr = mach_parse_compressed(ptr, end_ptr,
-					    &(upd_field->field_no));
+		ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
+
 		if (ptr == NULL) {
 
 			return(NULL);
 		}
 
+		upd_field->field_no = field_no;
+
 		ptr = mach_parse_compressed(ptr, end_ptr, &len);
 
 		if (ptr == NULL) {
@@ -650,21 +697,18 @@ row_upd_index_parse(
 			return(NULL);
 		}
 
-		new_val->len = len;
-
 		if (len != UNIV_SQL_NULL) {
 
 			if (end_ptr < ptr + len) {
 
 				return(NULL);
-			} else {
-				buf = mem_heap_alloc(heap, len);
-				ut_memcpy(buf, ptr, len);
-
-				ptr += len;
-
-				new_val->data = buf;
 			}
+
+			dfield_set_data(new_val,
+					mem_heap_dup(heap, ptr, len), len);
+			ptr += len;
+		} else {
+			dfield_set_null(new_val);
 		}
 	}
 
@@ -673,63 +717,35 @@ row_upd_index_parse(
 	return(ptr);
 }
 
-/*******************************************************************
-Returns TRUE if ext_vec contains i. */
-static
-ibool
-upd_ext_vec_contains(
-/*=================*/
-				/* out: TRUE if i is in ext_vec */
-	ulint*	ext_vec,	/* in: array of indexes or NULL */
-	ulint	n_ext_vec,	/* in: number of numbers in ext_vec */
-	ulint	i)		/* in: a number */
-{
-	ulint	j;
-
-	if (ext_vec == NULL) {
-
-		return(FALSE);
-	}
-
-	for (j = 0; j < n_ext_vec; j++) {
-		if (ext_vec[j] == i) {
-
-			return(TRUE);
-		}
-	}
-
-	return(FALSE);
-}
-
-/*******************************************************************
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
 Builds an update vector from those fields which in a secondary index entry
 differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
+the fields as binary strings!
+@return	own: update vector of differing fields */
+UNIV_INTERN
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
-				/* out, own: update vector of differing
-				fields */
-	dict_index_t*	index,	/* in: index */
-	dtuple_t*	entry,	/* in: entry to insert */
-	rec_t*		rec,	/* in: secondary index record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap)	/* in: memory heap from which allocated */
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	const rec_t*	rec,	/*!< in: secondary index record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
 	upd_field_t*	upd_field;
-	dfield_t*	dfield;
-	byte*		data;
+	const dfield_t*	dfield;
+	const byte*	data;
 	ulint		len;
 	upd_t*		update;
 	ulint		n_diff;
 	ulint		i;
 	ulint		offsets_[REC_OFFS_SMALL_SIZE];
 	const ulint*	offsets;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	/* This function is used only for a secondary index */
-	ut_a(0 == (index->type & DICT_CLUSTERED));
+	ut_a(!dict_index_is_clust(index));
 
 	update = upd_create(dtuple_get_n_fields(entry), heap);
 
@@ -762,8 +778,6 @@ row_upd_build_sec_rec_difference_binary(
 
 			upd_field_set_field_no(upd_field, i, index, trx);
 
-			upd_field->extern_storage = FALSE;
-
 			n_diff++;
 		}
 	}
@@ -773,41 +787,37 @@ row_upd_build_sec_rec_difference_binary(
 	return(update);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Builds an update vector from those fields, excluding the roll ptr and
 trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
+the equal ordering fields. NOTE: we compare the fields as binary strings!
+@return own: update vector of differing fields, excluding roll ptr and
+trx id */
+UNIV_INTERN
 upd_t*
 row_upd_build_difference_binary(
 /*============================*/
-				/* out, own: update vector of differing
-				fields, excluding roll ptr and trx id */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t*	entry,	/* in: entry to insert */
-	ulint*		ext_vec,/* in: array containing field numbers of
-				externally stored fields in entry, or NULL */
-	ulint		n_ext_vec,/* in: number of fields in ext_vec */
-	rec_t*		rec,	/* in: clustered index record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap)	/* in: memory heap from which allocated */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	const rec_t*	rec,	/*!< in: clustered index record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
 	upd_field_t*	upd_field;
-	dfield_t*	dfield;
-	byte*		data;
+	const dfield_t*	dfield;
+	const byte*	data;
 	ulint		len;
 	upd_t*		update;
 	ulint		n_diff;
 	ulint		roll_ptr_pos;
 	ulint		trx_id_pos;
-	ibool		extern_bit;
 	ulint		i;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	const ulint*	offsets;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	/* This function is used only for a clustered index */
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 
 	update = upd_create(dtuple_get_n_fields(entry), heap);
 
@@ -833,10 +843,8 @@ row_upd_build_difference_binary(
 			goto skip_compare;
 		}
 
-		extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i);
-
-		if (UNIV_UNLIKELY(extern_bit
-				  == (ibool)!rec_offs_nth_extern(offsets, i))
+		if (UNIV_UNLIKELY(!dfield_is_ext(dfield)
+				  != !rec_offs_nth_extern(offsets, i))
 		    || !dfield_data_is_binary_equal(dfield, len, data)) {
 
 			upd_field = upd_get_nth_field(update, n_diff);
@@ -845,8 +853,6 @@ row_upd_build_difference_binary(
 
 			upd_field_set_field_no(upd_field, i, index, trx);
 
-			upd_field->extern_storage = extern_bit;
-
 			n_diff++;
 		}
 skip_compare:
@@ -858,34 +864,156 @@ skip_compare:
 	return(update);
 }
 
-/***************************************************************
+/***********************************************************//**
+Fetch a prefix of an externally stored column.  This is similar
+to row_ext_lookup(), but the row_ext_t holds the old values
+of the column and must not be poisoned with the new values.
+@return	BLOB prefix */
+static
+byte*
+row_upd_ext_fetch(
+/*==============*/
+	const byte*	data,		/*!< in: 'internally' stored part of the
+					field containing also the reference to
+					the external part */
+	ulint		local_len,	/*!< in: length of data, in bytes */
+	ulint		zip_size,	/*!< in: nonzero=compressed BLOB
+					page size, zero for uncompressed
+					BLOBs */
+	ulint*		len,		/*!< in: length of prefix to fetch;
+					out: fetched length of the prefix */
+	mem_heap_t*	heap)		/*!< in: heap where to allocate */
+{
+	byte*	buf = mem_heap_alloc(heap, *len);
+
+	*len = btr_copy_externally_stored_field_prefix(buf, *len,
+						       zip_size,
+						       data, local_len);
+	/* We should never update records containing a half-deleted BLOB. */
+	ut_a(*len);
+
+	return(buf);
+}
+
+/***********************************************************//**
+Replaces the new column value stored in the update vector in
+the given index entry field. */
+static
+void
+row_upd_index_replace_new_col_val(
+/*==============================*/
+	dfield_t*		dfield,	/*!< in/out: data field
+					of the index entry */
+	const dict_field_t*	field,	/*!< in: index field */
+	const dict_col_t*	col,	/*!< in: field->col */
+	const upd_field_t*	uf,	/*!< in: update field */
+	mem_heap_t*		heap,	/*!< in: memory heap for allocating
+					and copying the new value */
+	ulint			zip_size)/*!< in: compressed page
+					 size of the table, or 0 */
+{
+	ulint		len;
+	const byte*	data;
+
+	dfield_copy_data(dfield, &uf->new_val);
+
+	if (dfield_is_null(dfield)) {
+		return;
+	}
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (field->prefix_len > 0) {
+		ibool		fetch_ext = dfield_is_ext(dfield)
+			&& len < (ulint) field->prefix_len
+			+ BTR_EXTERN_FIELD_REF_SIZE;
+
+		if (fetch_ext) {
+			ulint	l = len;
+
+			len = field->prefix_len;
+
+			data = row_upd_ext_fetch(data, l, zip_size,
+						 &len, heap);
+		}
+
+		len = dtype_get_at_most_n_mbchars(col->prtype,
+						  col->mbminlen, col->mbmaxlen,
+						  field->prefix_len, len,
+						  (const char*) data);
+
+		dfield_set_data(dfield, data, len);
+
+		if (!fetch_ext) {
+			dfield_dup(dfield, heap);
+		}
+
+		return;
+	}
+
+	switch (uf->orig_len) {
+		byte*	buf;
+	case BTR_EXTERN_FIELD_REF_SIZE:
+		/* Restore the original locally stored
+		part of the column.  In the undo log,
+		InnoDB writes a longer prefix of externally
+		stored columns, so that column prefixes
+		in secondary indexes can be reconstructed. */
+		dfield_set_data(dfield,
+				data + len - BTR_EXTERN_FIELD_REF_SIZE,
+				BTR_EXTERN_FIELD_REF_SIZE);
+		dfield_set_ext(dfield);
+		/* fall through */
+	case 0:
+		dfield_dup(dfield, heap);
+		break;
+	default:
+		/* Reconstruct the original locally
+		stored part of the column.  The data
+		will have to be copied. */
+		ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
+		buf = mem_heap_alloc(heap, uf->orig_len);
+		/* Copy the locally stored prefix. */
+		memcpy(buf, data,
+		       uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE);
+		/* Copy the BLOB pointer. */
+		memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE,
+		       data + len - BTR_EXTERN_FIELD_REF_SIZE,
+		       BTR_EXTERN_FIELD_REF_SIZE);
+
+		dfield_set_data(dfield, buf, uf->orig_len);
+		dfield_set_ext(dfield);
+		break;
+	}
+}
+
+/***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-
+UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals_index_pos(
 /*=========================================*/
-	dtuple_t*	entry,	/* in/out: index entry where replaced */
-	dict_index_t*	index,	/* in: index; NOTE that this may also be a
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
 				non-clustered index */
-	upd_t*		update,	/* in: an update vector built for the index so
+	const upd_t*	update,	/*!< in: an update vector built for the index so
 				that the field number in an upd_field is the
 				index position */
 	ibool		order_only,
-				/* in: if TRUE, limit the replacement to
+				/*!< in: if TRUE, limit the replacement to
 				ordering fields of index; note that this
 				does not work for non-clustered indexes. */
-	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
-				copy the new values, set this as NULL if you
-				do not want allocation */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
 {
-	dict_field_t*	field;
-	upd_field_t*	upd_field;
-	dfield_t*	dfield;
-	dfield_t*	new_val;
-	ulint		j;
 	ulint		i;
 	ulint		n_fields;
+	const ulint	zip_size	= dict_table_zip_size(index->table);
 
 	ut_ad(index);
 
@@ -897,144 +1025,168 @@ row_upd_index_replace_new_col_vals_index_pos(
 		n_fields = dict_index_get_n_fields(index);
 	}
 
-	for (j = 0; j < n_fields; j++) {
+	for (i = 0; i < n_fields; i++) {
+		const dict_field_t*	field;
+		const dict_col_t*	col;
+		const upd_field_t*	uf;
 
-		field = dict_index_get_nth_field(index, j);
+		field = dict_index_get_nth_field(index, i);
+		col = dict_field_get_col(field);
+		uf = upd_get_field_by_field_no(update, i);
 
-		for (i = 0; i < upd_get_n_fields(update); i++) {
-
-			upd_field = upd_get_nth_field(update, i);
-
-			if (upd_field->field_no == j) {
-
-				dfield = dtuple_get_nth_field(entry, j);
-
-				new_val = &(upd_field->new_val);
-
-				dfield_set_data(dfield, new_val->data,
-						new_val->len);
-				if (heap && new_val->len != UNIV_SQL_NULL) {
-					dfield->data = mem_heap_alloc(
-						heap, new_val->len);
-					ut_memcpy(dfield->data, new_val->data,
-						  new_val->len);
-				}
-
-				if (field->prefix_len > 0
-				    && new_val->len != UNIV_SQL_NULL) {
-
-					const dict_col_t*	col
-						= dict_field_get_col(field);
-
-					dfield->len
-						= dtype_get_at_most_n_mbchars(
-							col->prtype,
-							col->mbminlen,
-							col->mbmaxlen,
-							field->prefix_len,
-							new_val->len,
-							new_val->data);
-				}
-			}
+		if (uf) {
+			row_upd_index_replace_new_col_val(
+				dtuple_get_nth_field(entry, i),
+				field, col, uf, heap, zip_size);
 		}
 	}
 }
 
-/***************************************************************
+/***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-
+UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
-	dtuple_t*	entry,	/* in/out: index entry where replaced */
-	dict_index_t*	index,	/* in: index; NOTE that this may also be a
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
 				non-clustered index */
-	upd_t*		update,	/* in: an update vector built for the
+	const upd_t*	update,	/*!< in: an update vector built for the
 				CLUSTERED index so that the field number in
 				an upd_field is the clustered index position */
-	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
-				copy the new values, set this as NULL if you
-				do not want allocation */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
 {
-	upd_field_t*	upd_field;
-	dfield_t*	dfield;
-	dfield_t*	new_val;
-	ulint		j;
-	ulint		i;
-	dict_index_t*	clust_index;
-
-	ut_ad(index);
-
-	clust_index = dict_table_get_first_index(index->table);
+	ulint			i;
+	const dict_index_t*	clust_index
+		= dict_table_get_first_index(index->table);
+	const ulint		zip_size
+		= dict_table_zip_size(index->table);
 
 	dtuple_set_info_bits(entry, update->info_bits);
 
-	for (j = 0; j < dict_index_get_n_fields(index); j++) {
+	for (i = 0; i < dict_index_get_n_fields(index); i++) {
+		const dict_field_t*	field;
+		const dict_col_t*	col;
+		const upd_field_t*	uf;
 
-		ulint		clust_pos;
-		dict_field_t*	field = dict_index_get_nth_field(index, j);
+		field = dict_index_get_nth_field(index, i);
+		col = dict_field_get_col(field);
+		uf = upd_get_field_by_field_no(
+			update, dict_col_get_clust_pos(col, clust_index));
 
-		clust_pos = dict_col_get_clust_pos(field->col, clust_index);
-
-		for (i = 0; i < upd_get_n_fields(update); i++) {
-
-			upd_field = upd_get_nth_field(update, i);
-
-			if (upd_field->field_no == clust_pos) {
-
-				dfield = dtuple_get_nth_field(entry, j);
-
-				new_val = &(upd_field->new_val);
-
-				dfield_set_data(dfield, new_val->data,
-						new_val->len);
-				if (heap && new_val->len != UNIV_SQL_NULL) {
-					dfield->data = mem_heap_alloc(
-						heap, new_val->len);
-					ut_memcpy(dfield->data, new_val->data,
-						  new_val->len);
-				}
-
-				if (field->prefix_len > 0
-				    && new_val->len != UNIV_SQL_NULL) {
-
-					const dict_col_t*	col
-						= dict_field_get_col(field);
-
-					dfield->len
-						= dtype_get_at_most_n_mbchars(
-							col->prtype,
-							col->mbminlen,
-							col->mbmaxlen,
-							field->prefix_len,
-							new_val->len,
-							new_val->data);
-				}
-			}
+		if (uf) {
+			row_upd_index_replace_new_col_val(
+				dtuple_get_nth_field(entry, i),
+				field, col, uf, heap, zip_size);
 		}
 	}
 }
 
-/***************************************************************
+/***********************************************************//**
+Replaces the new column values stored in the update vector. */
+UNIV_INTERN
+void
+row_upd_replace(
+/*============*/
+	dtuple_t*		row,	/*!< in/out: row where replaced,
+					indexed by col_no;
+					the clustered index record must be
+					covered by a lock or a page latch to
+					prevent deletion (rollback or purge) */
+	row_ext_t**		ext,	/*!< out, own: NULL, or externally
+					stored column prefixes */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const upd_t*		update,	/*!< in: an update vector built for the
+					clustered index */
+	mem_heap_t*		heap)	/*!< in: memory heap */
+{
+	ulint			col_no;
+	ulint			i;
+	ulint			n_cols;
+	ulint			n_ext_cols;
+	ulint*			ext_cols;
+	const dict_table_t*	table;
+
+	ut_ad(row);
+	ut_ad(ext);
+	ut_ad(index);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(update);
+	ut_ad(heap);
+
+	n_cols = dtuple_get_n_fields(row);
+	table = index->table;
+	ut_ad(n_cols == dict_table_get_n_cols(table));
+
+	ext_cols = mem_heap_alloc(heap, n_cols * sizeof *ext_cols);
+	n_ext_cols = 0;
+
+	dtuple_set_info_bits(row, update->info_bits);
+
+	for (col_no = 0; col_no < n_cols; col_no++) {
+
+		const dict_col_t*	col
+			= dict_table_get_nth_col(table, col_no);
+		const ulint		clust_pos
+			= dict_col_get_clust_pos(col, index);
+		dfield_t*		dfield;
+
+		if (UNIV_UNLIKELY(clust_pos == ULINT_UNDEFINED)) {
+
+			continue;
+		}
+
+		dfield = dtuple_get_nth_field(row, col_no);
+
+		for (i = 0; i < upd_get_n_fields(update); i++) {
+
+			const upd_field_t*	upd_field
+				= upd_get_nth_field(update, i);
+
+			if (upd_field->field_no != clust_pos) {
+
+				continue;
+			}
+
+			dfield_copy_data(dfield, &upd_field->new_val);
+			break;
+		}
+
+		if (dfield_is_ext(dfield) && col->ord_part) {
+			ext_cols[n_ext_cols++] = col_no;
+		}
+	}
+
+	if (n_ext_cols) {
+		*ext = row_ext_create(n_ext_cols, ext_cols, row,
+				      dict_table_zip_size(table), heap);
+	} else {
+		*ext = NULL;
+	}
+}
+
+/***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
+
 This function is fast if the update vector is short or the number of ordering
 fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector changes an ordering field in the index record */
+UNIV_INTERN
 ibool
 row_upd_changes_ord_field_binary(
 /*=============================*/
-				/* out: TRUE if update vector changes
-				an ordering field in the index record;
-				NOTE: the fields are compared as binary
-				strings */
-	dtuple_t*	row,	/* in: old value of row, or NULL if the
+	const dtuple_t*	row,	/*!< in: old value of row, or NULL if the
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
-	dict_index_t*	index,	/* in: index of the record */
-	upd_t*		update)	/* in: update vector for the row; NOTE: the
+	dict_index_t*	index,	/*!< in: index of the record */
+	const upd_t*	update)	/*!< in: update vector for the row; NOTE: the
 				field numbers in this MUST be clustered index
 				positions! */
 {
@@ -1064,7 +1216,7 @@ row_upd_changes_ord_field_binary(
 
 		for (j = 0; j < n_upd_fields; j++) {
 
-			upd_field_t*	upd_field
+			const upd_field_t*	upd_field
 				= upd_get_nth_field(update, j);
 
 			/* Note that if the index field is a column prefix
@@ -1087,17 +1239,17 @@ row_upd_changes_ord_field_binary(
 	return(FALSE);
 }
 
-/***************************************************************
+/***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector may change an ordering field in an index
+record */
+UNIV_INTERN
 ibool
 row_upd_changes_some_index_ord_field_binary(
 /*========================================*/
-				/* out: TRUE if update vector may change
-				an ordering field in an index record */
-	dict_table_t*	table,	/* in: table */
-	upd_t*		update)	/* in: update vector for the row */
+	const dict_table_t*	table,	/*!< in: table */
+	const upd_t*		update)	/*!< in: update vector for the row */
 {
 	upd_field_t*	upd_field;
 	dict_index_t*	index;
@@ -1120,19 +1272,19 @@ row_upd_changes_some_index_ord_field_binary(
 	return(FALSE);
 }
 
-/***************************************************************
+/***********************************************************//**
 Checks if an update vector changes some of the first ordering fields of an
 index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
+that index does not contain column prefixes.
+@return	TRUE if changes */
 static
 ibool
 row_upd_changes_first_fields_binary(
 /*================================*/
-				/* out: TRUE if changes */
-	dtuple_t*	entry,	/* in: index entry */
-	dict_index_t*	index,	/* in: index of entry */
-	upd_t*		update,	/* in: update vector for the row */
-	ulint		n)	/* in: how many first fields to check */
+	dtuple_t*	entry,	/*!< in: index entry */
+	dict_index_t*	index,	/*!< in: index of entry */
+	const upd_t*	update,	/*!< in: update vector for the row */
+	ulint		n)	/*!< in: how many first fields to check */
 {
 	ulint		n_upd_fields;
 	ulint		i, j;
@@ -1174,15 +1326,15 @@ row_upd_changes_first_fields_binary(
 	return(FALSE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Copies the column values from a record. */
 UNIV_INLINE
 void
 row_upd_copy_columns(
 /*=================*/
-	rec_t*		rec,	/* in: record in a clustered index */
-	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
-	sym_node_t*	column)	/* in: first column in a column list, or
+	rec_t*		rec,	/*!< in: record in a clustered index */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	sym_node_t*	column)	/*!< in: first column in a column list, or
 				NULL */
 {
 	byte*	data;
@@ -1192,20 +1344,23 @@ row_upd_copy_columns(
 		data = rec_get_nth_field(rec, offsets,
 					 column->field_nos[SYM_CLUST_FIELD_NO],
 					 &len);
+		if (len == UNIV_SQL_NULL) {
+			len = UNIV_SQL_NULL;
+		}
 		eval_node_copy_and_alloc_val(column, data, len);
 
 		column = UT_LIST_GET_NEXT(col_var_list, column);
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the new values for fields to update. Note that row_upd_copy_columns
 must have been called first. */
 UNIV_INLINE
 void
 row_upd_eval_new_vals(
 /*==================*/
-	upd_t*	update)	/* in: update vector */
+	upd_t*	update)	/*!< in/out: update vector */
 {
 	que_node_t*	exp;
 	upd_field_t*	upd_field;
@@ -1225,27 +1380,25 @@ row_upd_eval_new_vals(
 	}
 }
 
-/***************************************************************
+/***********************************************************//**
 Stores to the heap the row on which the node->pcur is positioned. */
 static
 void
 row_upd_store_row(
 /*==============*/
-	upd_node_t*	node)	/* in: row update node */
+	upd_node_t*	node)	/*!< in: row update node */
 {
 	dict_index_t*	clust_index;
-	upd_t*		update;
 	rec_t*		rec;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	const ulint*	offsets;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
 	ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
 
 	if (node->row != NULL) {
 		mem_heap_empty(node->heap);
-		node->row = NULL;
 	}
 
 	clust_index = dict_table_get_first_index(node->table);
@@ -1255,32 +1408,31 @@ row_upd_store_row(
 	offsets = rec_get_offsets(rec, clust_index, offsets_,
 				  ULINT_UNDEFINED, &heap);
 	node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
-			      node->heap);
-	node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint)
-				       * rec_offs_n_fields(offsets));
+			      NULL, &node->ext, node->heap);
 	if (node->is_delete) {
-		update = NULL;
+		node->upd_row = NULL;
+		node->upd_ext = NULL;
 	} else {
-		update = node->update;
+		node->upd_row = dtuple_copy(node->row, node->heap);
+		row_upd_replace(node->upd_row, &node->upd_ext,
+				clust_index, node->update, node->heap);
 	}
 
-	node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
-							offsets, update);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 }
 
-/***************************************************************
-Updates a secondary index entry of a row. */
+/***********************************************************//**
+Updates a secondary index entry of a row.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 static
 ulint
 row_upd_sec_index_entry(
 /*====================*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	upd_node_t*	node,	/* in: row update node */
-	que_thr_t*	thr)	/* in: query thread */
+	upd_node_t*	node,	/*!< in: row update node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ibool		check_ref;
 	ibool		found;
@@ -1301,7 +1453,8 @@ row_upd_sec_index_entry(
 	heap = mem_heap_create(1024);
 
 	/* Build old index entry */
-	entry = row_build_index_entry(node->row, index, heap);
+	entry = row_build_index_entry(node->row, node->ext, index, heap);
+	ut_a(entry);
 
 	log_free_check();
 	mtr_start(&mtr);
@@ -1340,87 +1493,82 @@ row_upd_sec_index_entry(
 							   thr, &mtr);
 			if (err == DB_SUCCESS && check_ref) {
 
+				ulint*	offsets = rec_get_offsets(
+					rec, index, NULL,
+					ULINT_UNDEFINED, &heap);
 				/* NOTE that the following call loses
 				the position of pcur ! */
 				err = row_upd_check_references_constraints(
 					node, &pcur, index->table,
-					index, thr, &mtr);
-				if (err != DB_SUCCESS) {
-
-					goto close_cur;
-				}
+					index, offsets, thr, &mtr);
 			}
-
 		}
 	}
-close_cur:
+
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
 	if (node->is_delete || err != DB_SUCCESS) {
 
-		mem_heap_free(heap);
-
-		return(err);
+		goto func_exit;
 	}
 
 	/* Build a new index entry */
-	row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
+	entry = row_build_index_entry(node->upd_row, node->upd_ext,
+				      index, heap);
+	ut_a(entry);
 
 	/* Insert new index entry */
-	err = row_ins_index_entry(index, entry, NULL, 0, thr);
+	err = row_ins_index_entry(index, entry, 0, TRUE, thr);
 
+func_exit:
 	mem_heap_free(heap);
 
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Updates the secondary index record if it is changed in the row update or
-deletes it if this is a delete. */
+deletes it if this is a delete.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 UNIV_INLINE
 ulint
 row_upd_sec_step(
 /*=============*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	upd_node_t*	node,	/* in: row update node */
-	que_thr_t*	thr)	/* in: query thread */
+	upd_node_t*	node,	/*!< in: row update node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err;
-
 	ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
 	      || (node->state == UPD_NODE_UPDATE_SOME_SEC));
-	ut_ad(!(node->index->type & DICT_CLUSTERED));
+	ut_ad(!dict_index_is_clust(node->index));
 
 	if (node->state == UPD_NODE_UPDATE_ALL_SEC
 	    || row_upd_changes_ord_field_binary(node->row, node->index,
 						node->update)) {
-		err = row_upd_sec_index_entry(node, thr);
-
-		return(err);
+		return(row_upd_sec_index_entry(node, thr));
 	}
 
 	return(DB_SUCCESS);
 }
 
-/***************************************************************
+/***********************************************************//**
 Marks the clustered index record deleted and inserts the updated version
 of the record to the index. This function should be used when the ordering
 fields of the clustered index record change. This should be quite rare in
-database applications. */
+database applications.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 static
 ulint
 row_upd_clust_rec_by_insert(
 /*========================*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	upd_node_t*	node,	/* in: row update node */
-	dict_index_t*	index,	/* in: clustered index of the record */
-	que_thr_t*	thr,	/* in: query thread */
-	ibool		check_ref,/* in: TRUE if index may be referenced in
+	upd_node_t*	node,	/*!< in: row update node */
+	dict_index_t*	index,	/*!< in: clustered index of the record */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ibool		check_ref,/*!< in: TRUE if index may be referenced in
 				a foreign key constraint */
-	mtr_t*		mtr)	/* in: mtr; gets committed here */
+	mtr_t*		mtr)	/*!< in: mtr; gets committed here */
 {
 	mem_heap_t*	heap	= NULL;
 	btr_pcur_t*	pcur;
@@ -1431,7 +1579,7 @@ row_upd_clust_rec_by_insert(
 	ulint		err;
 
 	ut_ad(node);
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 
 	trx = thr_get_trx(thr);
 	table = node->table;
@@ -1439,8 +1587,11 @@ row_upd_clust_rec_by_insert(
 	btr_cur	= btr_pcur_get_btr_cur(pcur);
 
 	if (node->state != UPD_NODE_INSERT_CLUSTERED) {
-		ulint	offsets_[REC_OFFS_NORMAL_SIZE];
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_t*		rec;
+		dict_index_t*	index;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*		offsets;
+		rec_offs_init(offsets_);
 
 		err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
 						     btr_cur, TRUE, thr, mtr);
@@ -1454,17 +1605,18 @@ row_upd_clust_rec_by_insert(
 		free those externally stored fields even if the delete marked
 		record is removed from the index tree, or updated. */
 
+		rec = btr_cur_get_rec(btr_cur);
+		index = dict_table_get_first_index(table);
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  ULINT_UNDEFINED, &heap);
 		btr_cur_mark_extern_inherited_fields(
-			btr_cur_get_rec(btr_cur),
-			rec_get_offsets(btr_cur_get_rec(btr_cur),
-					dict_table_get_first_index(table),
-					offsets_, ULINT_UNDEFINED, &heap),
-			node->update, mtr);
+			btr_cur_get_page_zip(btr_cur),
+			rec, index, offsets, node->update, mtr);
 		if (check_ref) {
 			/* NOTE that the following call loses
 			the position of pcur ! */
 			err = row_upd_check_references_constraints(
-				node, pcur, table, index, thr, mtr);
+				node, pcur, table, index, offsets, thr, mtr);
 			if (err != DB_SUCCESS) {
 				mtr_commit(mtr);
 				if (UNIV_LIKELY_NULL(heap)) {
@@ -1473,7 +1625,6 @@ row_upd_clust_rec_by_insert(
 				return(err);
 			}
 		}
-
 	}
 
 	mtr_commit(mtr);
@@ -1483,53 +1634,55 @@ row_upd_clust_rec_by_insert(
 	}
 	node->state = UPD_NODE_INSERT_CLUSTERED;
 
-	entry = row_build_index_entry(node->row, index, heap);
-
-	row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
+	entry = row_build_index_entry(node->upd_row, node->upd_ext,
+				      index, heap);
+	ut_a(entry);
 
 	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
 
-	/* If we return from a lock wait, for example, we may have
-	extern fields marked as not-owned in entry (marked in the
-	if-branch above). We must unmark them. */
+	if (node->upd_ext) {
+		/* If we return from a lock wait, for example, we may have
+		extern fields marked as not-owned in entry (marked in the
+		if-branch above). We must unmark them. */
 
-	btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec,
-					    node->n_ext_vec);
-	/* We must mark non-updated extern fields in entry as inherited,
-	so that a possible rollback will not free them */
+		btr_cur_unmark_dtuple_extern_fields(entry);
 
-	btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec,
-					     node->n_ext_vec,
-					     node->update);
+		/* We must mark non-updated extern fields in entry as
+		inherited, so that a possible rollback will not free them. */
 
-	err = row_ins_index_entry(index, entry, node->ext_vec,
-				  node->n_ext_vec, thr);
+		btr_cur_mark_dtuple_inherited_extern(entry, node->update);
+	}
+
+	err = row_ins_index_entry(index, entry,
+				  node->upd_ext ? node->upd_ext->n_ext : 0,
+				  TRUE, thr);
 	mem_heap_free(heap);
 
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Updates a clustered index record of a row when the ordering fields do
-not change. */
+not change.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 static
 ulint
 row_upd_clust_rec(
 /*==============*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	upd_node_t*	node,	/* in: row update node */
-	dict_index_t*	index,	/* in: clustered index */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr; gets committed here */
+	upd_node_t*	node,	/*!< in: row update node */
+	dict_index_t*	index,	/*!< in: clustered index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in: mtr; gets committed here */
 {
+	mem_heap_t*	heap	= NULL;
 	big_rec_t*	big_rec	= NULL;
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
 	ulint		err;
 
 	ut_ad(node);
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 
 	pcur = node->pcur;
 	btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -1553,9 +1706,9 @@ row_upd_clust_rec(
 
 	mtr_commit(mtr);
 
-	if (err == DB_SUCCESS) {
+	if (UNIV_LIKELY(err == DB_SUCCESS)) {
 
-		return(err);
+		return(DB_SUCCESS);
 	}
 
 	if (buf_LRU_buf_pool_running_out()) {
@@ -1579,31 +1732,31 @@ row_upd_clust_rec(
 				    dict_table_is_comp(index->table)));
 
 	err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
-					 &big_rec, node->update,
+					 &heap, &big_rec, node->update,
 					 node->cmpl_info, thr, mtr);
 	mtr_commit(mtr);
 
 	if (err == DB_SUCCESS && big_rec) {
-		mem_heap_t*	heap		= NULL;
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		rec_t*		rec;
-		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+		rec_offs_init(offsets_);
 
 		mtr_start(mtr);
 
 		ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
 		rec = btr_cur_get_rec(btr_cur);
 		err = btr_store_big_rec_extern_fields(
-			index, rec,
+			index, btr_cur_get_block(btr_cur), rec,
 			rec_get_offsets(rec, index, offsets_,
 					ULINT_UNDEFINED, &heap),
-			 big_rec, mtr);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
+			big_rec, mtr);
 		mtr_commit(mtr);
 	}
 
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
 	if (big_rec) {
 		dtuple_big_rec_free(big_rec);
 	}
@@ -1611,27 +1764,28 @@ row_upd_clust_rec(
 	return(err);
 }
 
-/***************************************************************
-Delete marks a clustered index record. */
+/***********************************************************//**
+Delete marks a clustered index record.
+@return	DB_SUCCESS if operation successfully completed, else error code */
 static
 ulint
 row_upd_del_mark_clust_rec(
 /*=======================*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code */
-	upd_node_t*	node,	/* in: row update node */
-	dict_index_t*	index,	/* in: clustered index */
-	que_thr_t*	thr,	/* in: query thread */
-	ibool		check_ref,/* in: TRUE if index may be referenced in
+	upd_node_t*	node,	/*!< in: row update node */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint*		offsets,/*!< in/out: rec_get_offsets() for the
+				record under the cursor */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ibool		check_ref,/*!< in: TRUE if index may be referenced in
 				a foreign key constraint */
-	mtr_t*		mtr)	/* in: mtr; gets committed here */
+	mtr_t*		mtr)	/*!< in: mtr; gets committed here */
 {
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
 	ulint		err;
 
 	ut_ad(node);
-	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(node->is_delete);
 
 	pcur = node->pcur;
@@ -1652,12 +1806,8 @@ row_upd_del_mark_clust_rec(
 
 		err = row_upd_check_references_constraints(node,
 							   pcur, index->table,
-							   index, thr, mtr);
-		if (err != DB_SUCCESS) {
-			mtr_commit(mtr);
-
-			return(err);
-		}
+							   index, offsets,
+							   thr, mtr);
 	}
 
 	mtr_commit(mtr);
@@ -1665,17 +1815,16 @@ row_upd_del_mark_clust_rec(
 	return(err);
 }
 
-/***************************************************************
-Updates the clustered index record. */
+/***********************************************************//**
+Updates the clustered index record.
+@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
+in case of a lock wait, else error code */
 static
 ulint
 row_upd_clust_step(
 /*===============*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, DB_LOCK_WAIT in case of a lock wait,
-				else error code */
-	upd_node_t*	node,	/* in: row update node */
-	que_thr_t*	thr)	/* in: query thread */
+	upd_node_t*	node,	/*!< in: row update node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_index_t*	index;
 	btr_pcur_t*	pcur;
@@ -1687,8 +1836,8 @@ row_upd_clust_step(
 	rec_t*		rec;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	const ulint*	offsets;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	ulint*		offsets;
+	rec_offs_init(offsets_);
 
 	index = dict_table_get_first_index(node->table);
 
@@ -1751,7 +1900,8 @@ row_upd_clust_step(
 
 	if (!node->has_clust_rec_x_lock) {
 		err = lock_clust_rec_modify_check_and_lock(
-			0, rec, index, offsets, thr);
+			0, btr_pcur_get_block(pcur),
+			rec, index, offsets, thr);
 		if (err != DB_SUCCESS) {
 			mtr_commit(mtr);
 			goto exit_func;
@@ -1761,8 +1911,8 @@ row_upd_clust_step(
 	/* NOTE: the following function calls will also commit mtr */
 
 	if (node->is_delete) {
-		err = row_upd_del_mark_clust_rec(node, index, thr, check_ref,
-						 mtr);
+		err = row_upd_del_mark_clust_rec(node, index, offsets,
+						 thr, check_ref, mtr);
 		if (err == DB_SUCCESS) {
 			node->state = UPD_NODE_UPDATE_ALL_SEC;
 			node->index = dict_table_get_next_index(index);
@@ -1777,7 +1927,7 @@ exit_func:
 	/* If the update is made for MySQL, we already have the update vector
 	ready, else we have to do some evaluation: */
 
-	if (!node->in_mysql_interface) {
+	if (UNIV_UNLIKELY(!node->in_mysql_interface)) {
 		/* Copy the necessary columns from clust_rec and calculate the
 		new values to set */
 		row_upd_copy_columns(rec, offsets,
@@ -1834,18 +1984,18 @@ exit_func:
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Updates the affected index records of a row. When the control is transferred
 to this node, we assume that we have a persistent cursor which was on a
-record, and the position of the cursor is stored in the cursor. */
+record, and the position of the cursor is stored in the cursor.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
 static
 ulint
 row_upd(
 /*====*/
-				/* out: DB_SUCCESS if operation successfully
-				completed, else error code or DB_LOCK_WAIT */
-	upd_node_t*	node,	/* in: row update node */
-	que_thr_t*	thr)	/* in: query thread */
+	upd_node_t*	node,	/*!< in: row update node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ulint	err	= DB_SUCCESS;
 
@@ -1898,7 +2048,9 @@ function_exit:
 
 		if (node->row != NULL) {
 			node->row = NULL;
-			node->n_ext_vec = 0;
+			node->ext = NULL;
+			node->upd_row = NULL;
+			node->upd_ext = NULL;
 			mem_heap_empty(node->heap);
 		}
 
@@ -1908,15 +2060,15 @@ function_exit:
 	return(err);
 }
 
-/***************************************************************
+/***********************************************************//**
 Updates a row in a table. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
 que_thr_t*
 row_upd_step(
 /*=========*/
-				/* out: query thread to run next or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	upd_node_t*	node;
 	sel_node_t*	sel_node;
@@ -2022,60 +2174,4 @@ error_handling:
 
 	return(thr);
 }
-
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
-	sel_node_t*	sel_node,	/* in: select node */
-	que_thr_t*	thr,		/* in: query thread */
-	mtr_t*		mtr)		/* in: mtr */
-{
-	upd_node_t*	node;
-	btr_pcur_t*	pcur;
-	btr_cur_t*	btr_cur;
-	ulint		err;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-	ut_ad(sel_node->select_will_do_update);
-	ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
-	ut_ad(sel_node->asc);
-
-	node = que_node_get_parent(sel_node);
-
-	ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
-	pcur = node->pcur;
-	btr_cur = btr_pcur_get_btr_cur(pcur);
-
-	/* Copy the necessary columns from clust_rec and calculate the new
-	values to set */
-
-	row_upd_copy_columns(btr_pcur_get_rec(pcur),
-			     rec_get_offsets(btr_pcur_get_rec(pcur),
-					     btr_cur->index, offsets_,
-					     ULINT_UNDEFINED, &heap),
-			     UT_LIST_GET_FIRST(node->columns));
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	row_upd_eval_new_vals(node->update);
-
-	ut_ad(!rec_get_deleted_flag(
-		      btr_pcur_get_rec(pcur),
-		      dict_table_is_comp(btr_cur->index->table)));
-
-	ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
-	ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
-	ut_ad(node->select_will_do_update);
-
-	err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
-				      node->update, node->cmpl_info,
-				      thr, mtr);
-	ut_ad(err == DB_SUCCESS);
-}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0vers.c b/storage/innodb_plugin/row/row0vers.c
similarity index 68%
rename from storage/innobase/row/row0vers.c
rename to storage/innodb_plugin/row/row0vers.c
index 03d9a2f1203..a4fbb5289aa 100644
--- a/storage/innobase/row/row0vers.c
+++ b/storage/innodb_plugin/row/row0vers.c
@@ -1,7 +1,24 @@
-/******************************************************
-Row versions
+/*****************************************************************************
 
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0vers.c
+Row versions
 
 Created 2/6/1997 Heikki Tuuri
 *******************************************************/
@@ -29,35 +46,30 @@ Created 2/6/1997 Heikki Tuuri
 #include "read0read.h"
 #include "lock0lock.h"
 
-/*********************************************************************
+/*****************************************************************//**
 Finds out if an active transaction has inserted or modified a secondary
 index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
+function!
+@return NULL if committed, else the active transaction */
+UNIV_INTERN
 trx_t*
 row_vers_impl_x_locked_off_kernel(
 /*==============================*/
-				/* out: NULL if committed, else the active
-				transaction; NOTE that the kernel mutex is
-				temporarily released! */
-	rec_t*		rec,	/* in: record in a secondary index */
-	dict_index_t*	index,	/* in: the secondary index */
-	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	dict_index_t*	clust_index;
 	rec_t*		clust_rec;
 	ulint*		clust_offsets;
 	rec_t*		version;
-	rec_t*		prev_version;
-	dulint		trx_id;
-	dulint		prev_trx_id;
+	trx_id_t	trx_id;
 	mem_heap_t*	heap;
 	mem_heap_t*	heap2;
 	dtuple_t*	row;
 	dtuple_t*	entry	= NULL; /* assignment to eliminate compiler
 					warning */
 	trx_t*		trx;
-	ulint		vers_del;
 	ulint		rec_del;
 	ulint		err;
 	mtr_t		mtr;
@@ -141,6 +153,11 @@ row_vers_impl_x_locked_off_kernel(
 	version = clust_rec;
 
 	for (;;) {
+		rec_t*		prev_version;
+		ulint		vers_del;
+		row_ext_t*	ext;
+		trx_id_t	prev_trx_id;
+
 		mutex_exit(&kernel_mutex);
 
 		/* While we retrieve an earlier version of clust_rec, we
@@ -157,15 +174,59 @@ row_vers_impl_x_locked_off_kernel(
 						  heap, &prev_version);
 		mem_heap_free(heap2); /* free version and clust_offsets */
 
-		if (prev_version) {
-			clust_offsets = rec_get_offsets(
-				prev_version, clust_index, NULL,
-				ULINT_UNDEFINED, &heap);
-			row = row_build(ROW_COPY_POINTERS, clust_index,
-					prev_version, clust_offsets, heap);
-			entry = row_build_index_entry(row, index, heap);
+		if (prev_version == NULL) {
+			mutex_enter(&kernel_mutex);
+
+			if (!trx_is_active(trx_id)) {
+				/* Transaction no longer active: no
+				implicit x-lock */
+
+				break;
+			}
+
+			/* If the transaction is still active,
+			clust_rec must be a fresh insert, because no
+			previous version was found. */
+			ut_ad(err == DB_SUCCESS);
+
+			/* It was a freshly inserted version: there is an
+			implicit x-lock on rec */
+
+			trx = trx_get_on_id(trx_id);
+
+			break;
 		}
 
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL, ULINT_UNDEFINED, &heap);
+
+		vers_del = rec_get_deleted_flag(prev_version, comp);
+		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
+						 clust_offsets);
+
+		/* If the trx_id and prev_trx_id are different and if
+		the prev_version is marked deleted then the
+		prev_trx_id must have already committed for the trx_id
+		to be able to modify the row. Therefore, prev_trx_id
+		cannot hold any implicit lock. */
+		if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
+
+			mutex_enter(&kernel_mutex);
+			break;
+		}
+
+		/* The stack of versions is locked by mtr.  Thus, it
+		is safe to fetch the prefixes for externally stored
+		columns. */
+		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
+				clust_offsets, NULL, &ext, heap);
+		entry = row_build_index_entry(row, ext, index, heap);
+		/* entry may be NULL if a record was inserted in place
+		of a deleted record, and the BLOB pointers of the new
+		record were not initialized yet.  But in that case,
+		prev_version should be NULL. */
+		ut_a(entry);
+
 		mutex_enter(&kernel_mutex);
 
 		if (!trx_is_active(trx_id)) {
@@ -174,27 +235,15 @@ row_vers_impl_x_locked_off_kernel(
 			break;
 		}
 
-		/* If the transaction is still active, the previous version
-		of clust_rec must be accessible if not a fresh insert; we
-		may assert the following: */
-
-		ut_ad(err == DB_SUCCESS);
-
-		if (prev_version == NULL) {
-			/* It was a freshly inserted version: there is an
-			implicit x-lock on rec */
-
-			trx = trx_get_on_id(trx_id);
-
-			break;
-		}
-
 		/* If we get here, we know that the trx_id transaction is
 		still active and it has modified prev_version. Let us check
 		if prev_version would require rec to be in a different
 		state. */
 
-		vers_del = rec_get_deleted_flag(prev_version, comp);
+		/* The previous version of clust_rec must be
+		accessible, because the transaction is still active
+		and clust_rec was not a fresh insert. */
+		ut_ad(err == DB_SUCCESS);
 
 		/* We check if entry and rec are identified in the alphabetical
 		ordering */
@@ -231,9 +280,6 @@ row_vers_impl_x_locked_off_kernel(
 			break;
 		}
 
-		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
-						 clust_offsets);
-
 		if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
 			/* The versions modified by the trx_id transaction end
 			to prev_version: no implicit x-lock */
@@ -251,17 +297,18 @@ exit_func:
 	return(trx);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
+index record, because it is >= the purge view.
+@return	TRUE if earlier version should be preserved */
+UNIV_INTERN
 ibool
 row_vers_must_preserve_del_marked(
 /*==============================*/
-			/* out: TRUE if earlier version should be preserved */
-	dulint	trx_id,	/* in: transaction id in the version */
-	mtr_t*	mtr)	/* in: mtr holding the latch on the clustered index
-			record; it will also hold the latch on purge_view */
+	trx_id_t	trx_id,	/*!< in: transaction id in the version */
+	mtr_t*		mtr)	/*!< in: mtr holding the latch on the
+				clustered index record; it will also
+				hold the latch on purge_view */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -280,41 +327,40 @@ row_vers_must_preserve_del_marked(
 	return(FALSE);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
 id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE. */
-
+the alphabetical ordering; exactly in this case we return TRUE.
+@return	TRUE if earlier version should have */
+UNIV_INTERN
 ibool
 row_vers_old_has_index_entry(
 /*=========================*/
-				/* out: TRUE if earlier version should have */
-	ibool		also_curr,/* in: TRUE if also rec is included in the
+	ibool		also_curr,/*!< in: TRUE if also rec is included in the
 				versions to search; otherwise only versions
 				prior to it are searched */
-	rec_t*		rec,	/* in: record in the clustered index; the
+	const rec_t*	rec,	/*!< in: record in the clustered index; the
 				caller must have a latch on the page */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec; it will
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
 				also hold the latch on purge_view */
-	dict_index_t*	index,	/* in: the secondary index */
-	dtuple_t*	ientry)	/* in: the secondary index entry */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const dtuple_t*	ientry)	/*!< in: the secondary index entry */
 {
-	rec_t*		version;
+	const rec_t*	version;
 	rec_t*		prev_version;
 	dict_index_t*	clust_index;
 	ulint*		clust_offsets;
 	mem_heap_t*	heap;
 	mem_heap_t*	heap2;
-	dtuple_t*	row;
-	dtuple_t*	entry;
+	const dtuple_t*	row;
+	const dtuple_t*	entry;
 	ulint		err;
 	ulint		comp;
 
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(rec),
-				   MTR_MEMO_PAGE_S_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
@@ -329,17 +375,37 @@ row_vers_old_has_index_entry(
 					ULINT_UNDEFINED, &heap);
 
 	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
+		row_ext_t*	ext;
+
+		/* The stack of versions is locked by mtr.
+		Thus, it is safe to fetch the prefixes for
+		externally stored columns. */
 		row = row_build(ROW_COPY_POINTERS, clust_index,
-				rec, clust_offsets, heap);
-		entry = row_build_index_entry(row, index, heap);
+				rec, clust_offsets, NULL, &ext, heap);
+		entry = row_build_index_entry(row, ext, index, heap);
+
+		/* If entry == NULL, the record contains unset BLOB
+		pointers.  This must be a freshly inserted record.  If
+		this is called from
+		row_purge_remove_sec_if_poss_low(), the thread will
+		hold latches on the clustered index and the secondary
+		index.  Because the insert works in three steps:
+
+			(1) insert the record to clustered index
+			(2) store the BLOBs and update BLOB pointers
+			(3) insert records to secondary indexes
+
+		the purge thread can safely ignore freshly inserted
+		records and delete the secondary index record.  The
+		thread that inserted the new record will be inserting
+		the secondary index records. */
 
 		/* NOTE that we cannot do the comparison as binary
 		fields because the row is maybe being modified so that
-		the clustered index record has already been updated
-		to a different binary value in a char field, but the
+		the clustered index record has already been updated to
+		a different binary value in a char field, but the
 		collation identifies the old and new value anyway! */
-
-		if (dtuple_datas_are_ordering_equal(ientry, entry)) {
+		if (entry && !dtuple_coll_cmp(ientry, entry)) {
 
 			mem_heap_free(heap);
 
@@ -369,9 +435,21 @@ row_vers_old_has_index_entry(
 						NULL, ULINT_UNDEFINED, &heap);
 
 		if (!rec_get_deleted_flag(prev_version, comp)) {
+			row_ext_t*	ext;
+
+			/* The stack of versions is locked by mtr.
+			Thus, it is safe to fetch the prefixes for
+			externally stored columns. */
 			row = row_build(ROW_COPY_POINTERS, clust_index,
-					prev_version, clust_offsets, heap);
-			entry = row_build_index_entry(row, index, heap);
+					prev_version, clust_offsets,
+					NULL, &ext, heap);
+			entry = row_build_index_entry(row, ext, index, heap);
+
+			/* If entry == NULL, the record contains unset
+			BLOB pointers.  This must be a freshly
+			inserted record that we can safely ignore.
+			For the justification, see the comments after
+			the previous row_build_index_entry() call. */
 
 			/* NOTE that we cannot do the comparison as binary
 			fields because maybe the secondary index record has
@@ -379,7 +457,7 @@ row_vers_old_has_index_entry(
 			a char field, but the collation identifies the old
 			and new value anyway! */
 
-			if (dtuple_datas_are_ordering_equal(ientry, entry)) {
+			if (entry && !dtuple_coll_cmp(ientry, entry)) {
 
 				mem_heap_free(heap);
 
@@ -391,45 +469,44 @@ row_vers_old_has_index_entry(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Constructs the version of a clustered index record which a consistent
 read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
+the consistent read should not see rec in its present version.
+@return	DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
 ulint
 row_vers_build_for_consistent_read(
 /*===============================*/
-				/* out: DB_SUCCESS or DB_MISSING_HISTORY */
-	rec_t*		rec,	/* in: record in a clustered index; the
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
 				caller must have a latch on the page; this
 				latch locks the top of the stack of versions
 				of this records */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec */
-	dict_index_t*	index,	/* in: the clustered index */
-	ulint**		offsets,/* in/out: offsets returned by
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
 				rec_get_offsets(rec, index) */
-	read_view_t*	view,	/* in: the consistent read view */
-	mem_heap_t**	offset_heap,/* in/out: memory heap from which
+	read_view_t*	view,	/*!< in: the consistent read view */
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
-	mem_heap_t*	in_heap,/* in: memory heap from which the memory for
-				old_vers is allocated; memory for possible
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/* out, own: old version, or NULL if the
+	rec_t**		old_vers)/*!< out, own: old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
 {
-	rec_t*		version;
+	const rec_t*	version;
 	rec_t*		prev_version;
-	dulint		trx_id;
+	trx_id_t	trx_id;
 	mem_heap_t*	heap		= NULL;
 	byte*		buf;
 	ulint		err;
 
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(rec),
-				   MTR_MEMO_PAGE_S_FIX));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
@@ -446,8 +523,8 @@ row_vers_build_for_consistent_read(
 	for (;;) {
 		mem_heap_t*	heap2	= heap;
 		trx_undo_rec_t* undo_rec;
-		dulint		roll_ptr;
-		dulint		undo_no;
+		roll_ptr_t	roll_ptr;
+		undo_no_t	undo_no;
 		heap = mem_heap_create(1024);
 
 		/* If we have high-granularity consistent read view and
@@ -525,42 +602,41 @@ row_vers_build_for_consistent_read(
 	return(err);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
+which should be seen by a semi-consistent read.
+@return	DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
 ulint
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
-				/* out: DB_SUCCESS or DB_MISSING_HISTORY */
-	rec_t*		rec,	/* in: record in a clustered index; the
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
 				caller must have a latch on the page; this
 				latch locks the top of the stack of versions
 				of this records */
-	mtr_t*		mtr,	/* in: mtr holding the latch on rec */
-	dict_index_t*	index,	/* in: the clustered index */
-	ulint**		offsets,/* in/out: offsets returned by
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
 				rec_get_offsets(rec, index) */
-	mem_heap_t**	offset_heap,/* in/out: memory heap from which
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
-	mem_heap_t*	in_heap,/* in: memory heap from which the memory for
-				old_vers is allocated; memory for possible
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/* out, own: rec, old version, or NULL if the
+	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
 {
-	rec_t*		version;
+	const rec_t*	version;
 	mem_heap_t*	heap		= NULL;
 	byte*		buf;
 	ulint		err;
-	dulint		rec_trx_id	= ut_dulint_create(0, 0);
+	trx_id_t	rec_trx_id	= ut_dulint_zero;
 
-	ut_ad(index->type & DICT_CLUSTERED);
-	ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains(mtr, buf_block_align(rec),
-				   MTR_MEMO_PAGE_S_FIX));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
@@ -579,7 +655,7 @@ row_vers_build_for_semi_consistent_read(
 		trx_t*		version_trx;
 		mem_heap_t*	heap2;
 		rec_t*		prev_version;
-		dulint		version_trx_id;
+		trx_id_t	version_trx_id;
 
 		version_trx_id = row_get_rec_trx_id(version, index, *offsets);
 		if (rec == version) {
diff --git a/storage/innodb_plugin/scripts/install_innodb_plugins.sql b/storage/innodb_plugin/scripts/install_innodb_plugins.sql
new file mode 100644
index 00000000000..3fdb8f11e22
--- /dev/null
+++ b/storage/innodb_plugin/scripts/install_innodb_plugins.sql
@@ -0,0 +1,9 @@
+-- execute these to install InnoDB if it is built as a dynamic plugin
+INSTALL PLUGIN innodb SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_trx SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_locks SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_lock_waits SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_cmp SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so';
diff --git a/storage/innodb_plugin/scripts/install_innodb_plugins_win.sql b/storage/innodb_plugin/scripts/install_innodb_plugins_win.sql
new file mode 100644
index 00000000000..8c94b4e240d
--- /dev/null
+++ b/storage/innodb_plugin/scripts/install_innodb_plugins_win.sql
@@ -0,0 +1,9 @@
+-- execute these to install InnoDB if it is built as a dynamic plugin
+INSTALL PLUGIN innodb SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_trx SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_locks SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_lock_waits SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_cmp SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.dll';
+INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.dll';
diff --git a/storage/innodb_plugin/setup.sh b/storage/innodb_plugin/setup.sh
new file mode 100755
index 00000000000..23fe729a406
--- /dev/null
+++ b/storage/innodb_plugin/setup.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+#
+# Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Prepare the MySQL source code tree for building
+# with checked-out InnoDB Subversion directory.
+
+# This script assumes that the current directory is storage/innobase.
+
+set -eu
+
+TARGETDIR=../storage/innobase
+
+# link the build scripts
+BUILDSCRIPTS="compile-innodb compile-innodb-debug"
+for script in $BUILDSCRIPTS ; do
+	ln -sf $TARGETDIR/$script ../../BUILD/
+done
+
+cd ../../mysql-test/t
+ln -sf ../$TARGETDIR/mysql-test/*.test ../$TARGETDIR/mysql-test/*.opt .
+cd ../r
+ln -sf ../$TARGETDIR/mysql-test/*.result .
+cd ../include
+ln -sf ../$TARGETDIR/mysql-test/*.inc .
+
+# Apply any patches that are needed to make the mysql-test suite successful.
+# These patches are usually needed because of deviations of behavior between
+# the stock InnoDB and the InnoDB Plugin.
+cd ../..
+for patch in storage/innobase/mysql-test/patches/*.diff ; do
+	if [ "${patch}" != "storage/innobase/mysql-test/patches/*.diff" ] ; then
+		patch -p0 < ${patch}
+	fi
+done
diff --git a/storage/innodb_plugin/srv/srv0que.c b/storage/innodb_plugin/srv/srv0que.c
new file mode 100644
index 00000000000..fc50a86a55c
--- /dev/null
+++ b/storage/innodb_plugin/srv/srv0que.c
@@ -0,0 +1,49 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0que.c
+Server query execution
+
+Created 6/5/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0que.h"
+
+#include "srv0srv.h"
+#include "sync0sync.h"
+#include "os0thread.h"
+#include "usr0sess.h"
+#include "que0que.h"
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	ut_ad(thr);
+	ut_ad(mutex_own(&kernel_mutex));
+
+	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
+
+	srv_release_threads(SRV_WORKER, 1);
+}
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
similarity index 70%
rename from storage/innobase/srv/srv0srv.c
rename to storage/innodb_plugin/srv/srv0srv.c
index 36c3d450aae..79fa08e7cdf 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innodb_plugin/srv/srv0srv.c
@@ -1,4 +1,56 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file srv/srv0srv.c
 The database server main program
 
 NOTE: SQL Server 7 uses something which the documentation
@@ -20,14 +72,14 @@ Windows 2000 will have something called thread pooling
 Another possibility could be to use some very fast user space
 thread library. This might confuse NT though.
 
-(c) 1995 Innobase Oy
-
 Created 10/8/1995 Heikki Tuuri
 *******************************************************/
+
 /* Dummy comment */
 #include "srv0srv.h"
 
 #include "ut0mem.h"
+#include "ut0ut.h"
 #include "os0proc.h"
 #include "mem0mem.h"
 #include "mem0pool.h"
@@ -42,154 +94,132 @@ Created 10/8/1995 Heikki Tuuri
 #include "trx0purge.h"
 #include "ibuf0ibuf.h"
 #include "buf0flu.h"
+#include "buf0lru.h"
 #include "btr0sea.h"
 #include "dict0load.h"
 #include "dict0boot.h"
 #include "srv0start.h"
 #include "row0mysql.h"
 #include "ha_prototypes.h"
+#include "trx0i_s.h"
 
 /* This is set to TRUE if the MySQL user has set it in MySQL; currently
 affects only FOREIGN KEY definition parsing */
-ibool	srv_lower_case_table_names	= FALSE;
+UNIV_INTERN ibool	srv_lower_case_table_names	= FALSE;
 
 /* The following counter is incremented whenever there is some user activity
 in the server */
-ulint	srv_activity_count	= 0;
+UNIV_INTERN ulint	srv_activity_count	= 0;
 
 /* The following is the maximum allowed duration of a lock wait. */
-ulint	srv_fatal_semaphore_wait_threshold = 600;
+UNIV_INTERN ulint	srv_fatal_semaphore_wait_threshold = 600;
 
 /* How much data manipulation language (DML) statements need to be delayed,
 in microseconds, in order to reduce the lagging of the purge thread. */
-ulint	srv_dml_needed_delay = 0;
+UNIV_INTERN ulint	srv_dml_needed_delay = 0;
 
-ibool	srv_lock_timeout_and_monitor_active = FALSE;
-ibool	srv_error_monitor_active = FALSE;
+UNIV_INTERN ibool	srv_lock_timeout_and_monitor_active = FALSE;
+UNIV_INTERN ibool	srv_error_monitor_active = FALSE;
 
-const char*	srv_main_thread_op_info = "";
+UNIV_INTERN const char*	srv_main_thread_op_info = "";
 
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-const char	srv_mysql50_table_name_prefix[9] = "#mysql50#";
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+UNIV_INTERN const char	srv_mysql50_table_name_prefix[9] = "#mysql50#";
 
 /* Server parameters which are read from the initfile */
 
 /* The following three are dir paths which are catenated before file
 names, where the file name itself may also contain a path */
 
-char*	srv_data_home	= NULL;
+UNIV_INTERN char*	srv_data_home	= NULL;
 #ifdef UNIV_LOG_ARCHIVE
-char*	srv_arch_dir	= NULL;
+UNIV_INTERN char*	srv_arch_dir	= NULL;
 #endif /* UNIV_LOG_ARCHIVE */
 
-ibool	srv_file_per_table = FALSE;	/* store to its own file each table
-					created by an user; data dictionary
-					tables are in the system tablespace
-					0 */
-ibool	srv_locks_unsafe_for_binlog = FALSE;	/* Place locks to
-						records only i.e. do
-						not use next-key
-						locking except on
-						duplicate key checking
-						and foreign key
-						checking */
-ulint	srv_n_data_files = 0;
-char**	srv_data_file_names = NULL;
-ulint*	srv_data_file_sizes = NULL;	/* size in database pages */
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+UNIV_INTERN my_bool	srv_file_per_table;
+/** The file format to use on new *.ibd files. */
+UNIV_INTERN ulint	srv_file_format = 0;
+/** Whether to check file format during startup.  A value of
+DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
+set it to the highest format we support. */
+UNIV_INTERN ulint	srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
 
-ibool	srv_auto_extend_last_data_file	= FALSE; /* if TRUE, then we
-						 auto-extend the last data
-						 file */
-ulint	srv_last_file_size_max	= 0;		 /* if != 0, this tells
-						 the max size auto-extending
-						 may increase the last data
-						 file size */
-ulong	srv_auto_extend_increment = 8;		 /* If the last data file is
-						 auto-extended, we add this
-						 many pages to it at a time */
-ulint*	srv_data_file_is_raw_partition = NULL;
+#if DICT_TF_FORMAT_51
+# error "DICT_TF_FORMAT_51 must be 0!"
+#endif
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
+UNIV_INTERN ibool	srv_locks_unsafe_for_binlog = FALSE;
+
+UNIV_INTERN ulint	srv_n_data_files = 0;
+UNIV_INTERN char**	srv_data_file_names = NULL;
+/* size in database pages */
+UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
+
+/* if TRUE, then we auto-extend the last data file */
+UNIV_INTERN ibool	srv_auto_extend_last_data_file	= FALSE;
+/* if != 0, this tells the max size auto-extending may increase the
+last data file size */
+UNIV_INTERN ulint	srv_last_file_size_max	= 0;
+/* If the last data file is auto-extended, we add this
+many pages to it at a time */
+UNIV_INTERN ulong	srv_auto_extend_increment = 8;
+UNIV_INTERN ulint*	srv_data_file_is_raw_partition = NULL;
 
 /* If the following is TRUE we do not allow inserts etc. This protects
 the user from forgetting the 'newraw' keyword to my.cnf */
 
-ibool	srv_created_new_raw	= FALSE;
+UNIV_INTERN ibool	srv_created_new_raw	= FALSE;
 
-char**	srv_log_group_home_dirs = NULL;
+UNIV_INTERN char**	srv_log_group_home_dirs = NULL;
 
-ulint	srv_n_log_groups	= ULINT_MAX;
-ulint	srv_n_log_files		= ULINT_MAX;
-ulint	srv_log_file_size	= ULINT_MAX;	/* size in database pages */
-ulint	srv_log_buffer_size	= ULINT_MAX;	/* size in database pages */
-ulong	srv_flush_log_at_trx_commit = 1;
+UNIV_INTERN ulint	srv_n_log_groups	= ULINT_MAX;
+UNIV_INTERN ulint	srv_n_log_files		= ULINT_MAX;
+/* size in database pages */
+UNIV_INTERN ulint	srv_log_file_size	= ULINT_MAX;
+/* size in database pages */
+UNIV_INTERN ulint	srv_log_buffer_size	= ULINT_MAX;
+UNIV_INTERN ulong	srv_flush_log_at_trx_commit = 1;
 
-byte	srv_latin1_ordering[256]	/* The sort order table of the latin1
-					character set. The following table is
-					the MySQL order as of Feb 10th, 2002 */
-= {
-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
-, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
-, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
-, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
-, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
-, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
-, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
-, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
-, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
-, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
-, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
-, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
-, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
-, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
-, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
-};
+/* Try to flush dirty pages so as to avoid IO bursts at
+the checkpoints. */
+UNIV_INTERN char	srv_adaptive_flushing	= TRUE;
 
-ulint	srv_pool_size		= ULINT_MAX;	/* size in pages; MySQL inits
-						this to size in kilobytes but
-						we normalize this to pages in
-						srv_boot() */
-ulint	srv_awe_window_size	= 0;		/* size in pages; MySQL inits
-						this to bytes, but we
-						normalize it to pages in
-						srv_boot() */
-ulint	srv_mem_pool_size	= ULINT_MAX;	/* size in bytes */
-ulint	srv_lock_table_size	= ULINT_MAX;
+/* The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+UNIV_INTERN const byte*	srv_latin1_ordering;
 
+/* use os/external memory allocator */
+UNIV_INTERN my_bool	srv_use_sys_malloc	= TRUE;
+/* requested size in kilobytes */
+UNIV_INTERN ulint	srv_buf_pool_size	= ULINT_MAX;
+/* previously requested size */
+UNIV_INTERN ulint	srv_buf_pool_old_size;
+/* current size in kilobytes */
+UNIV_INTERN ulint	srv_buf_pool_curr_size	= 0;
+/* size in bytes */
+UNIV_INTERN ulint	srv_mem_pool_size	= ULINT_MAX;
+UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
 
-ulint   srv_io_capacity         = ULINT_MAX;    /* Number of IO operations per
-                                                   second the server can do */
+/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
+instead. */
+UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
+UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
+UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
 
-ibool   srv_extra_dirty_writes = TRUE;  /* Write dirty pages to disk when pct
-                                           dirty < max dirty pct */
-
-ulint	srv_n_read_io_threads	= ULINT_MAX;
-ulint	srv_n_write_io_threads	= ULINT_MAX;
-ulint	srv_max_merged_io = 64;
+/* User settable value of the number of pages that must be present
+in the buffer cache and accessed sequentially for InnoDB to trigger a
+readahead request. */
+UNIV_INTERN ulong	srv_read_ahead_threshold	= 56;
 
 #ifdef UNIV_LOG_ARCHIVE
-ibool	srv_log_archive_on	= FALSE;
-ibool	srv_archive_recovery	= 0;
-dulint	srv_archive_recovery_limit_lsn;
+UNIV_INTERN ibool		srv_log_archive_on	= FALSE;
+UNIV_INTERN ibool		srv_archive_recovery	= 0;
+UNIV_INTERN ib_uint64_t	srv_archive_recovery_limit_lsn;
 #endif /* UNIV_LOG_ARCHIVE */
 
-ulint	srv_lock_wait_timeout	= 1024 * 1024 * 1024;
-
 /* This parameter is used to throttle the number of insert buffers that are
 merged in a batch. By increasing this parameter on a faster disk you can
 possibly reduce the number of I/O operations performed to complete the
@@ -198,92 +228,89 @@ background loop when the system is idle (low load), on a busy system
 the parameter is scaled down by a factor of 4, this is to avoid putting
 a heavier load on the I/O sub system. */
 
-ulong	srv_insert_buffer_batch_size = 20;
+UNIV_INTERN ulong	srv_insert_buffer_batch_size = 20;
 
-char*	srv_file_flush_method_str = NULL;
-ulint	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+UNIV_INTERN char*	srv_file_flush_method_str = NULL;
+UNIV_INTERN ulint	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+UNIV_INTERN ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 
-ulint	srv_max_n_open_files	  = 300;
+UNIV_INTERN ulint	srv_max_n_open_files	  = 300;
+
+/* Number of IO operations per second the server can do */
+UNIV_INTERN ulong	srv_io_capacity         = 200;
 
 /* The InnoDB main thread tries to keep the ratio of modified pages
 in the buffer pool to all database pages in the buffer pool smaller than
 the following number. But it is not guaranteed that the value stays below
 that during a time of heavy update/insert activity. */
 
-ulong	srv_max_buf_pool_modified_pct	= 90;
+UNIV_INTERN ulong	srv_max_buf_pool_modified_pct	= 75;
 
 /* variable counts amount of data read in total (in bytes) */
-ulint srv_data_read = 0;
+UNIV_INTERN ulint srv_data_read = 0;
 
 /* here we count the amount of data written in total (in bytes) */
-ulint srv_data_written = 0;
+UNIV_INTERN ulint srv_data_written = 0;
 
 /* the number of the log write requests done */
-ulint srv_log_write_requests = 0;
+UNIV_INTERN ulint srv_log_write_requests = 0;
 
 /* the number of physical writes to the log performed */
-ulint srv_log_writes = 0;
+UNIV_INTERN ulint srv_log_writes = 0;
 
 /* amount of data written to the log files in bytes */
-ulint srv_os_log_written = 0;
+UNIV_INTERN ulint srv_os_log_written = 0;
 
 /* amount of writes being done to the log files */
-ulint srv_os_log_pending_writes = 0;
+UNIV_INTERN ulint srv_os_log_pending_writes = 0;
 
 /* we increase this counter, when there we don't have enough space in the
 log buffer and have to flush it */
-ulint srv_log_waits = 0;
+UNIV_INTERN ulint srv_log_waits = 0;
 
 /* this variable counts the amount of times, when the doublewrite buffer
 was flushed */
-ulint srv_dblwr_writes = 0;
+UNIV_INTERN ulint srv_dblwr_writes = 0;
 
 /* here we store the number of pages that have been flushed to the
 doublewrite buffer */
-ulint srv_dblwr_pages_written = 0;
+UNIV_INTERN ulint srv_dblwr_pages_written = 0;
 
 /* in this variable we store the number of write requests issued */
-ulint srv_buf_pool_write_requests = 0;
+UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
 
 /* here we store the number of times when we had to wait for a free page
 in the buffer pool. It happens when the buffer pool is full and we need
 to make a flush, in order to be able to read or create a page. */
-ulint srv_buf_pool_wait_free = 0;
+UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
 
 /* variable to count the number of pages that were written from buffer
 pool to the disk */
-ulint srv_buf_pool_flushed = 0;
+UNIV_INTERN ulint srv_buf_pool_flushed = 0;
 
-/* variable to count the number of buffer pool reads that led to the
+/** Number of buffer pool reads that led to the
 reading of a disk page */
-ulint srv_buf_pool_reads = 0;
+UNIV_INTERN ulint srv_buf_pool_reads = 0;
 
-/* variable to count the number of sequential read-aheads */
-ulint srv_read_ahead_seq = 0;
+/** Number of sequential read-aheads */
+UNIV_INTERN ulint srv_read_ahead_seq = 0;
 
-/* variable to count the number of random read-aheads */
-ulint srv_read_ahead_rnd = 0;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-char srv_use_legacy_cardinality_algorithm = TRUE;
+/** Number of random read-aheads */
+UNIV_INTERN ulint srv_read_ahead_rnd = 0;
 
 /* structure to pass status variables to MySQL */
-export_struc export_vars;
+UNIV_INTERN export_struc export_vars;
 
 /* If the following is != 0 we do not allow inserts etc. This protects
 the user from forgetting the innodb_force_recovery keyword to my.cnf */
 
-ulint	srv_force_recovery	= 0;
+UNIV_INTERN ulint	srv_force_recovery	= 0;
 /*-----------------------*/
 /* We are prepared for a situation that we have this many threads waiting for
 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
 value. */
 
-ulint	srv_max_n_threads	= 0;
+UNIV_INTERN ulint	srv_max_n_threads	= 0;
 
 /* The following controls how many threads we let inside InnoDB concurrently:
 threads waiting for locks are not counted into the number because otherwise
@@ -293,45 +320,38 @@ Value 10 should be good if there are less than 4 processors + 4 disks in the
 computer. Bigger computers need bigger values. Value 0 will disable the
 concurrency check. */
 
-ibool   srv_thread_concurrency_timer_based = TRUE;
-ulong	srv_thread_concurrency	= 0;
-ulong	srv_commit_concurrency	= 0;
+UNIV_INTERN ulong	srv_thread_concurrency	= 0;
 
-os_fast_mutex_t	srv_conc_mutex;		/* this mutex protects srv_conc data
-					structures */
-lint	srv_conc_n_threads	= 0;	/* number of transactions that
-					have declared_to_be_inside_innodb
-					set. It used to be a non-error
-					for this value to drop below
-					zero temporarily. This is no
-					longer true. We'll, however,
-					keep the lint datatype to add
-					assertions to catch any corner
-					cases that we may have
-					missed. */
-ulint	srv_conc_n_waiting_threads = 0;	/* number of OS threads waiting in the
-					FIFO for a permission to enter InnoDB
-					*/
+/* this mutex protects srv_conc data structures */
+UNIV_INTERN os_fast_mutex_t	srv_conc_mutex;
+/* number of transactions that have declared_to_be_inside_innodb set.
+It used to be a non-error for this value to drop below zero temporarily.
+This is no longer true. We'll, however, keep the lint datatype to add
+assertions to catch any corner cases that we may have missed. */
+UNIV_INTERN lint	srv_conc_n_threads	= 0;
+/* number of OS threads waiting in the FIFO for a permission to enter
+InnoDB */
+UNIV_INTERN ulint	srv_conc_n_waiting_threads = 0;
 
 typedef struct srv_conc_slot_struct	srv_conc_slot_t;
 struct srv_conc_slot_struct{
-	os_event_t			event;		/* event to wait */
-	ibool				reserved;	/* TRUE if slot
+	os_event_t			event;		/*!< event to wait */
+	ibool				reserved;	/*!< TRUE if slot
 							reserved */
-	ibool				wait_ended;	/* TRUE when another
+	ibool				wait_ended;	/*!< TRUE when another
 							thread has already set
 							the event and the
 							thread in this slot is
 							free to proceed; but
 							reserved may still be
 							TRUE at that point */
-	UT_LIST_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/* queue node */
+	UT_LIST_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/*!< queue node */
 };
 
-UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/* queue of threads
-							waiting to get in */
-srv_conc_slot_t* srv_conc_slots;			/* array of wait
-							slots */
+/* queue of threads waiting to get in */
+UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue;
+/* array of wait slots */
+UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
 
 /* Number of times a thread is allowed to enter InnoDB within the same
 SQL query after it has once got the ticket at srv_conc_enter_innodb */
@@ -342,115 +362,123 @@ SQL query after it has once got the ticket at srv_conc_enter_innodb */
 merge to completion before shutdown. If it is set to 2, do not even flush the
 buffer pool to data files at the shutdown: we effectively 'crash'
 InnoDB (but lose no committed transactions). */
-ulint	srv_fast_shutdown	= 0;
+UNIV_INTERN ulint	srv_fast_shutdown	= 0;
 
 /* Generate a innodb_status.<pid> file */
-ibool	srv_innodb_status	= FALSE;
+UNIV_INTERN ibool	srv_innodb_status	= FALSE;
 
-ibool	srv_use_doublewrite_buf	= TRUE;
-ibool	srv_use_checksums = TRUE;
+/* When estimating number of different key values in an index, sample
+this many index pages */
+UNIV_INTERN unsigned long long	srv_stats_sample_pages = 8;
 
-ibool	srv_set_thread_priorities = TRUE;
-int	srv_query_thread_priority = 0;
+UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
+UNIV_INTERN ibool	srv_use_checksums = TRUE;
 
-/* TRUE if the Address Windowing Extensions of Windows are used; then we must
-disable adaptive hash indexes */
-ibool	srv_use_awe			= FALSE;
-ibool	srv_use_adaptive_hash_indexes	= TRUE;
+UNIV_INTERN ibool	srv_set_thread_priorities = TRUE;
+UNIV_INTERN int	srv_query_thread_priority = 0;
+
+UNIV_INTERN ulong	srv_replication_delay		= 0;
 
 /*-------------------------------------------*/
-ulong	srv_n_spin_wait_rounds	= 30;
-ulong	srv_n_free_tickets_to_enter = 500;
-ulong	srv_thread_sleep_delay = 10000;
-ulint	srv_spin_wait_delay	= 6;
-ibool	srv_priority_boost	= TRUE;
+UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 30;
+UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
+UNIV_INTERN ulong	srv_thread_sleep_delay = 10000;
+UNIV_INTERN ulong	srv_spin_wait_delay	= 6;
+UNIV_INTERN ibool	srv_priority_boost	= TRUE;
 
-ibool	srv_print_thread_releases	= FALSE;
-ibool	srv_print_lock_waits		= FALSE;
-ibool	srv_print_buf_io		= FALSE;
-ibool	srv_print_log_io		= FALSE;
-ibool	srv_print_latch_waits		= FALSE;
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool	srv_print_thread_releases	= FALSE;
+UNIV_INTERN ibool	srv_print_lock_waits		= FALSE;
+UNIV_INTERN ibool	srv_print_buf_io		= FALSE;
+UNIV_INTERN ibool	srv_print_log_io		= FALSE;
+UNIV_INTERN ibool	srv_print_latch_waits		= FALSE;
+#endif /* UNIV_DEBUG */
+
+UNIV_INTERN ulint		srv_n_rows_inserted		= 0;
+UNIV_INTERN ulint		srv_n_rows_updated		= 0;
+UNIV_INTERN ulint		srv_n_rows_deleted		= 0;
+UNIV_INTERN ulint		srv_n_rows_read			= 0;
 
-ulint		srv_n_rows_inserted		= 0;
-ulint		srv_n_rows_updated		= 0;
-ulint		srv_n_rows_deleted		= 0;
-ulint		srv_n_rows_read			= 0;
-#ifndef UNIV_HOTBACKUP
 static ulint	srv_n_rows_inserted_old		= 0;
 static ulint	srv_n_rows_updated_old		= 0;
 static ulint	srv_n_rows_deleted_old		= 0;
 static ulint	srv_n_rows_read_old		= 0;
-#endif /* !UNIV_HOTBACKUP */
 
-ulint		srv_n_lock_wait_count		= 0;
-ulint		srv_n_lock_wait_current_count	= 0;
-ib_longlong	srv_n_lock_wait_time		= 0;
-ulint		srv_n_lock_max_wait_time	= 0;
+UNIV_INTERN ulint		srv_n_lock_wait_count		= 0;
+UNIV_INTERN ulint		srv_n_lock_wait_current_count	= 0;
+UNIV_INTERN ib_int64_t	srv_n_lock_wait_time		= 0;
+UNIV_INTERN ulint		srv_n_lock_max_wait_time	= 0;
 
 
 /*
   Set the following to 0 if you want InnoDB to write messages on
   stderr on startup/shutdown
 */
-ibool	srv_print_verbose_log		= TRUE;
-ibool	srv_print_innodb_monitor	= FALSE;
-ibool	srv_print_innodb_lock_monitor	= FALSE;
-ibool	srv_print_innodb_tablespace_monitor = FALSE;
-ibool	srv_print_innodb_table_monitor = FALSE;
+UNIV_INTERN ibool	srv_print_verbose_log		= TRUE;
+UNIV_INTERN ibool	srv_print_innodb_monitor	= FALSE;
+UNIV_INTERN ibool	srv_print_innodb_lock_monitor	= FALSE;
+UNIV_INTERN ibool	srv_print_innodb_tablespace_monitor = FALSE;
+UNIV_INTERN ibool	srv_print_innodb_table_monitor = FALSE;
 
 /* Array of English strings describing the current state of an
 i/o handler thread */
 
-const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
+UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
 
-time_t	srv_last_monitor_time;
+UNIV_INTERN time_t	srv_last_monitor_time;
 
-mutex_t	srv_innodb_monitor_mutex;
+UNIV_INTERN mutex_t	srv_innodb_monitor_mutex;
 
 /* Mutex for locking srv_monitor_file */
-mutex_t	srv_monitor_file_mutex;
+UNIV_INTERN mutex_t	srv_monitor_file_mutex;
 /* Temporary file for innodb monitor output */
-FILE*	srv_monitor_file;
+UNIV_INTERN FILE*	srv_monitor_file;
 /* Mutex for locking srv_dict_tmpfile.
 This mutex has a very high rank; threads reserving it should not
 be holding any InnoDB latches. */
-mutex_t	srv_dict_tmpfile_mutex;
+UNIV_INTERN mutex_t	srv_dict_tmpfile_mutex;
 /* Temporary file for output from the data dictionary */
-FILE*	srv_dict_tmpfile;
+UNIV_INTERN FILE*	srv_dict_tmpfile;
 /* Mutex for locking srv_misc_tmpfile.
 This mutex has a very low rank; threads reserving it should not
 acquire any further latches or sleep before releasing this one. */
-mutex_t	srv_misc_tmpfile_mutex;
+UNIV_INTERN mutex_t	srv_misc_tmpfile_mutex;
 /* Temporary file for miscellanous diagnostic output */
-FILE*	srv_misc_tmpfile;
+UNIV_INTERN FILE*	srv_misc_tmpfile;
 
-ulint	srv_main_thread_process_no	= 0;
-ulint	srv_main_thread_id		= 0;
+UNIV_INTERN ulint	srv_main_thread_process_no	= 0;
+UNIV_INTERN ulint	srv_main_thread_id		= 0;
 
 /* The following count work done by srv_master_thread. */
 
-/* Iterations by the 'once per second' loop */
-ulint   srv_main_1_second_loops         = 0;
-/* Calls to sleep by the 'once per second' loop */
-ulint   srv_main_sleeps                 = 0;
-/* Iterations by the 'once per 10 seconds' loop */
-ulint   srv_main_10_second_loops        = 0;
-/* Iterations of the loop bounded by the 'background_loop' label */
-ulint   srv_main_background_loops       = 0;
-/* Iterations of the loop bounded by the 'flush_loop' label */
-ulint   srv_main_flush_loops            = 0;
-/* Calls to log_buffer_flush_to_disk */
-ulint   srv_sync_flush                  = 0;
-/* Calls to log_buffer_flush_maybe_sync */
-ulint   srv_async_flush                 = 0;
+/* Iterations by the 'once per second' loop. */
+static ulint   srv_main_1_second_loops		= 0;
+/* Calls to sleep by the 'once per second' loop. */
+static ulint   srv_main_sleeps			= 0;
+/* Iterations by the 'once per 10 seconds' loop. */
+static ulint   srv_main_10_second_loops		= 0;
+/* Iterations of the loop bounded by the 'background_loop' label. */
+static ulint   srv_main_background_loops	= 0;
+/* Iterations of the loop bounded by the 'flush_loop' label. */
+static ulint   srv_main_flush_loops		= 0;
+/* Log writes involving flush. */
+static ulint   srv_log_writes_and_flush		= 0;
+/* Log writes not including flush. */
+static ulint   srv_log_buffer_writes		= 0;
 
-/* Number of microseconds threads wait because of
-innodb_thread_concurrency */
-static ib_longlong srv_thread_wait_mics = 0;
+/* This is only ever touched by the master thread. It records the
+time when the last flush of log file has happened. The master
+thread ensures that we flush the log files at least once per
+second. */
+static time_t	srv_last_log_flush_time;
 
-/* Number of microseconds for spinlock delay */
-static ib_longlong srv_timed_spin_delay = 0;
+/* The master thread performs various tasks based on the current
+state of IO activity and the level of IO utilization is past
+intervals. Following macros define thresholds for these conditions. */
+#define SRV_PEND_IO_THRESHOLD	(PCT_IO(3))
+#define SRV_RECENT_IO_ACTIVITY	(PCT_IO(5))
+#define SRV_PAST_IO_ACTIVITY	(PCT_IO(200))
 
 /*
 	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
@@ -622,83 +650,63 @@ Unix.*/
 
 /* Thread slot in the thread table */
 struct srv_slot_struct{
-	os_thread_id_t	id;		/* thread id */
-	os_thread_t	handle;		/* thread handle */
-	ulint		type;		/* thread type: user, utility etc. */
-	ibool		in_use;		/* TRUE if this slot is in use */
-	ibool		suspended;	/* TRUE if the thread is waiting
+	os_thread_id_t	id;		/*!< thread id */
+	os_thread_t	handle;		/*!< thread handle */
+	unsigned	type:3;		/*!< thread type: user, utility etc. */
+	unsigned	in_use:1;	/*!< TRUE if this slot is in use */
+	unsigned	suspended:1;	/*!< TRUE if the thread is waiting
 					for the event of this slot */
-	ib_time_t	suspend_time;	/* time when the thread was
+	ib_time_t	suspend_time;	/*!< time when the thread was
 					suspended */
-	os_event_t	event;		/* event used in suspending the
+	os_event_t	event;		/*!< event used in suspending the
 					thread when it has nothing to do */
-	que_thr_t*	thr;		/* suspended query thread (only
+	que_thr_t*	thr;		/*!< suspended query thread (only
 					used for MySQL threads) */
 };
 
 /* Table for MySQL threads where they will be suspended to wait for locks */
-srv_slot_t*	srv_mysql_table = NULL;
+UNIV_INTERN srv_slot_t*	srv_mysql_table = NULL;
 
-os_event_t	srv_lock_timeout_thread_event;
+UNIV_INTERN os_event_t	srv_lock_timeout_thread_event;
 
-srv_sys_t*	srv_sys	= NULL;
+UNIV_INTERN srv_sys_t*	srv_sys	= NULL;
 
-byte		srv_pad1[64];	/* padding to prevent other memory update
-				hotspots from residing on the same memory
-				cache line */
-mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
-				query threads, and lock table */
-byte		srv_pad2[64];	/* padding to prevent other memory update
-				hotspots from residing on the same memory
-				cache line */
+/* padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte	srv_pad1[64];
+/* mutex protecting the server, trx structs, query threads, and lock table */
+UNIV_INTERN mutex_t*	kernel_mutex_temp;
+/* padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte	srv_pad2[64];
 
+#if 0
 /* The following three values measure the urgency of the jobs of
 buffer, version, and insert threads. They may vary from 0 - 1000.
 The server mutex protects all these variables. The low-water values
 tell that the server can acquiesce the utility when the value
 drops below this low-water mark. */
 
-ulint	srv_meter[SRV_MASTER + 1];
-ulint	srv_meter_low_water[SRV_MASTER + 1];
-ulint	srv_meter_high_water[SRV_MASTER + 1];
-ulint	srv_meter_high_water2[SRV_MASTER + 1];
-ulint	srv_meter_foreground[SRV_MASTER + 1];
+static ulint	srv_meter[SRV_MASTER + 1];
+static ulint	srv_meter_low_water[SRV_MASTER + 1];
+static ulint	srv_meter_high_water[SRV_MASTER + 1];
+static ulint	srv_meter_high_water2[SRV_MASTER + 1];
+static ulint	srv_meter_foreground[SRV_MASTER + 1];
+#endif
 
 /* The following values give info about the activity going on in
 the database. They are protected by the server mutex. The arrays
 are indexed by the type of the thread. */
 
-ulint	srv_n_threads_active[SRV_MASTER + 1];
-ulint	srv_n_threads[SRV_MASTER + 1];
+UNIV_INTERN ulint	srv_n_threads_active[SRV_MASTER + 1];
+UNIV_INTERN ulint	srv_n_threads[SRV_MASTER + 1];
 
-static void time_spin_delay()
-{
-  ulint start_sec, end_sec;
-  ulint start_usec, end_usec;
-  int i;
-
-  srv_timed_spin_delay = 0;
-
-  if (ut_usectime(&start_sec, &start_usec))
-    return;
-
-  for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i)
-    ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-
-  if (ut_usectime(&end_sec, &end_usec))
-    return;
-
-  srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec,
-                                    start_sec, start_usec);
-}
-
-/*************************************************************************
+/***********************************************************************
 Prints counters for work done by srv_master_thread. */
-
 static
 void
-srv_print_extra(
-/*===================*/
+srv_print_master_thread_info(
+/*=========================*/
 	FILE  *file)    /* in: output stream */
 {
 	fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
@@ -706,26 +714,19 @@ srv_print_extra(
 		srv_main_1_second_loops, srv_main_sleeps,
 		srv_main_10_second_loops, srv_main_background_loops,
 		srv_main_flush_loops);
-	fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
-		srv_sync_flush, srv_async_flush);
-        fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
-                srv_thread_wait_mics,
-                (double) srv_thread_wait_mics / 1000000.0);
-        fprintf(file,
-                "spinlock delay for %d delay %d rounds is %lld mics\n",
-                (int)srv_spin_wait_delay,
-                (int)SYNC_SPIN_ROUNDS,
-                srv_timed_spin_delay);
+	fprintf(file, "srv_master_thread log flush and writes: %lu "
+		      " log writes only: %lu\n",
+		      srv_log_writes_and_flush, srv_log_buffer_writes);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Sets the info describing an i/o thread current state. */
-
+UNIV_INTERN
 void
 srv_set_io_thread_op_info(
 /*======================*/
-	ulint		i,	/* in: the 'segment' of the i/o thread */
-	const char*	str)	/* in: constant char string describing the
+	ulint		i,	/*!< in: the 'segment' of the i/o thread */
+	const char*	str)	/*!< in: constant char string describing the
 				state */
 {
 	ut_a(i < SRV_MAX_N_IO_THREADS);
@@ -733,25 +734,25 @@ srv_set_io_thread_op_info(
 	srv_io_thread_op_info[i] = str;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Accessor function to get pointer to n'th slot in the server thread
-table. */
+table.
+@return	pointer to the slot */
 static
 srv_slot_t*
 srv_table_get_nth_slot(
 /*===================*/
-				/* out: pointer to the slot */
-	ulint	index)		/* in: index of the slot */
+	ulint	index)		/*!< in: index of the slot */
 {
 	ut_a(index < OS_THREAD_MAX_N);
 
 	return(srv_sys->threads + index);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Gets the number of threads in the system. */
-
+/*********************************************************************//**
+Gets the number of threads in the system.
+@return	sum of srv_n_threads[] */
+UNIV_INTERN
 ulint
 srv_get_n_threads(void)
 /*===================*/
@@ -771,16 +772,16 @@ srv_get_n_threads(void)
 	return(n_threads);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reserves a slot in the thread table for the current thread. Also creates the
 thread local storage struct for the current thread. NOTE! The server mutex
-has to be reserved by the caller! */
+has to be reserved by the caller!
+@return	reserved slot index */
 static
 ulint
 srv_table_reserve_slot(
 /*===================*/
-			/* out: reserved slot index */
-	ulint	type)	/* in: type of the thread: one of SRV_COM, ... */
+	enum srv_thread_type	type)	/*!< in: type of the thread */
 {
 	srv_slot_t*	slot;
 	ulint		i;
@@ -800,9 +801,9 @@ srv_table_reserve_slot(
 
 	slot->in_use = TRUE;
 	slot->suspended = FALSE;
+	slot->type = type;
 	slot->id = os_thread_get_curr_id();
 	slot->handle = os_thread_get_curr();
-	slot->type = type;
 
 	thr_local_create();
 
@@ -811,19 +812,19 @@ srv_table_reserve_slot(
 	return(i);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller! */
+NOTE! The server mutex has to be reserved by the caller!
+@return	event for the calling thread to wait */
 static
 os_event_t
 srv_suspend_thread(void)
 /*====================*/
-			/* out: event for the calling thread to wait */
 {
-	srv_slot_t*	slot;
-	os_event_t	event;
-	ulint		slot_no;
-	ulint		type;
+	srv_slot_t*		slot;
+	os_event_t		event;
+	ulint			slot_no;
+	enum srv_thread_type	type;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
@@ -831,9 +832,8 @@ srv_suspend_thread(void)
 
 	if (srv_print_thread_releases) {
 		fprintf(stderr,
-			"Suspending thread %lu to slot %lu meter %lu\n",
-			(ulong) os_thread_get_curr_id(), (ulong) slot_no,
-			(ulong) srv_meter[SRV_RECOVERY]);
+			"Suspending thread %lu to slot %lu\n",
+			(ulong) os_thread_get_curr_id(), (ulong) slot_no);
 	}
 
 	slot = srv_table_get_nth_slot(slot_no);
@@ -855,20 +855,18 @@ srv_suspend_thread(void)
 
 	return(event);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*************************************************************************
+/*********************************************************************//**
 Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
+UNIV_INTERN
 ulint
 srv_release_threads(
 /*================*/
-			/* out: number of threads released: this may be
-			< n if not enough threads were suspended at the
-			moment */
-	ulint	type,	/* in: thread type */
-	ulint	n)	/* in: number of threads to release */
+	enum srv_thread_type	type,	/*!< in: thread type */
+	ulint			n)	/*!< in: number of threads to release */
 {
 	srv_slot_t*	slot;
 	ulint		i;
@@ -894,10 +892,9 @@ srv_release_threads(
 			if (srv_print_thread_releases) {
 				fprintf(stderr,
 					"Releasing thread %lu type %lu"
-					" from slot %lu meter %lu\n",
+					" from slot %lu\n",
 					(ulong) slot->id, (ulong) type,
-					(ulong) i,
-					(ulong) srv_meter[SRV_RECOVERY]);
+					(ulong) i);
 			}
 
 			count++;
@@ -911,17 +908,17 @@ srv_release_threads(
 	return(count);
 }
 
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
+/*********************************************************************//**
+Returns the calling thread type.
+@return	SRV_COM, ... */
+UNIV_INTERN
+enum srv_thread_type
 srv_get_thread_type(void)
 /*=====================*/
-			/* out: SRV_COM, ... */
 {
-	ulint		slot_no;
-	srv_slot_t*	slot;
-	ulint		type;
+	ulint			slot_no;
+	srv_slot_t*		slot;
+	enum srv_thread_type	type;
 
 	mutex_enter(&kernel_mutex);
 
@@ -939,20 +936,17 @@ srv_get_thread_type(void)
 	return(type);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Initializes the server. */
-
+UNIV_INTERN
 void
 srv_init(void)
 /*==========*/
 {
 	srv_conc_slot_t*	conc_slot;
 	srv_slot_t*		slot;
-	dict_table_t*		table;
 	ulint			i;
 
-        time_spin_delay();
-
 	srv_sys = mem_alloc(sizeof(srv_sys_t));
 
 	kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
@@ -985,39 +979,20 @@ srv_init(void)
 	for (i = 0; i < SRV_MASTER + 1; i++) {
 		srv_n_threads_active[i] = 0;
 		srv_n_threads[i] = 0;
+#if 0
 		srv_meter[i] = 30;
 		srv_meter_low_water[i] = 50;
 		srv_meter_high_water[i] = 100;
 		srv_meter_high_water2[i] = 200;
 		srv_meter_foreground[i] = 250;
+#endif
 	}
 
 	UT_LIST_INIT(srv_sys->tasks);
 
-	/* create dummy table and index for old-style infimum and supremum */
-	table = dict_mem_table_create("SYS_DUMMY1",
-				      DICT_HDR_SPACE, 1, 0);
-	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
-			       DATA_ENGLISH | DATA_NOT_NULL, 8);
+	/* Create dummy indexes for infimum and supremum records */
 
-	srv_sys->dummy_ind1 = dict_mem_index_create(
-		"SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
-	dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
-			   dict_table_get_nth_col(table, 0), 0);
-	srv_sys->dummy_ind1->table = table;
-	/* create dummy table and index for new-style infimum and supremum */
-	table = dict_mem_table_create("SYS_DUMMY2",
-				      DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
-	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
-			       DATA_ENGLISH | DATA_NOT_NULL, 8);
-	srv_sys->dummy_ind2 = dict_mem_index_create(
-		"SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
-	dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
-			   dict_table_get_nth_col(table, 0), 0);
-	srv_sys->dummy_ind2->table = table;
-
-	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
-	srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
+	dict_ind_init();
 
 	/* Init the server concurrency restriction data structures */
 
@@ -1033,11 +1008,14 @@ srv_init(void)
 		conc_slot->event = os_event_create(NULL);
 		ut_a(conc_slot->event);
 	}
+
+	/* Initialize some INFORMATION SCHEMA internal structures */
+	trx_i_s_cache_init(trx_i_s_cache);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Frees the OS fast mutex created in srv_init(). */
-
+UNIV_INTERN
 void
 srv_free(void)
 /*==========*/
@@ -1045,14 +1023,15 @@ srv_free(void)
 	os_fast_mutex_free(&srv_conc_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-
+UNIV_INTERN
 void
 srv_general_init(void)
 /*==================*/
 {
+	ut_mem_init();
 	os_sync_init();
 	sync_init();
 	mem_init(srv_mem_pool_size);
@@ -1062,105 +1041,16 @@ srv_general_init(void)
 /*======================= InnoDB Server FIFO queue =======================*/
 
 /* Maximum allowable purge history length.  <=0 means 'infinite'. */
-ulong	srv_max_purge_lag		= 0;
+UNIV_INTERN ulong	srv_max_purge_lag		= 0;
 
-/*************************************************************************
+/*********************************************************************//**
 Puts an OS thread to wait if there are too many concurrent threads
 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-inc_srv_conc_n_threads(lint *n_threads)
-{
-  *n_threads = os_atomic_increment(&srv_conc_n_threads, 1);
-}
-
-static void
-dec_srv_conc_n_threads()
-{
-  os_atomic_increment(&srv_conc_n_threads, -1);
-}
-#endif
-
-static void
-print_already_in_error(trx_t* trx)
-{
-	ut_print_timestamp(stderr);
-	fputs("  InnoDB: Error: trying to declare trx"
-	      " to enter InnoDB, but\n"
-	      "InnoDB: it already is declared.\n", stderr);
-	trx_print(stderr, trx, 0);
-	putc('\n', stderr);
-        return;
-}
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-enter_innodb_with_tickets(trx_t* trx)
-{
-	trx->declared_to_be_inside_innodb = TRUE;
-	trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-        return;
-}
-
-static void
-srv_conc_enter_innodb_timer_based(trx_t* trx)
-{
-        lint               conc_n_threads;
-        ibool              has_yielded = FALSE;
-        ulint              has_slept = 0;
-
-	if (trx->declared_to_be_inside_innodb) {
-                print_already_in_error(trx);
-        }
-retry:
-	if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
-                inc_srv_conc_n_threads(&conc_n_threads);
-	        if (conc_n_threads <= (lint) srv_thread_concurrency) {
-                       enter_innodb_with_tickets(trx);
-                       return;
-                }
-                dec_srv_conc_n_threads(&conc_n_threads);
-       }
-       if (!has_yielded)
-       {
-               has_yielded = TRUE;
-               os_thread_yield();
-               goto retry;
-       }
-       if (trx->has_search_latch
-           || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
-
-                inc_srv_conc_n_threads(&conc_n_threads);
-                enter_innodb_with_tickets(trx);
-                return;
-       }
-       if (has_slept < 2)
-       {
-               trx->op_info = "sleeping before entering InnoDB";
-               os_thread_sleep(10000);
-               trx->op_info = "";
-               has_slept++;
-       }
-       inc_srv_conc_n_threads(&conc_n_threads);
-       enter_innodb_with_tickets(trx);
-       return;
-}
-
-static void
-srv_conc_exit_innodb_timer_based(trx_t* trx)
-{
-        dec_srv_conc_n_threads();
-	trx->declared_to_be_inside_innodb = FALSE;
-	trx->n_tickets_to_enter_innodb = 0;
-        return;
-}
-#endif
-
+UNIV_INTERN
 void
 srv_conc_enter_innodb(
 /*==================*/
-	trx_t*	trx)	/* in: transaction object associated with the
+	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
 	ibool			has_slept = FALSE;
@@ -1170,11 +1060,10 @@ srv_conc_enter_innodb(
 	if (trx->mysql_thd != NULL
 	    && thd_is_replication_slave_thread(trx->mysql_thd)) {
 
-		/* TODO Do something more interesting (based on a config
-		parameter). Some users what to give the replication
-		thread very low priority, see http://bugs.mysql.com/25078
-		This can be done by introducing
-		innodb_replication_delay(ms) config parameter */
+		UT_WAIT_FOR(srv_conc_n_threads
+			    < (lint)srv_thread_concurrency,
+			    srv_replication_delay * 1000);
+
 		return;
 	}
 
@@ -1187,17 +1076,15 @@ srv_conc_enter_innodb(
 		return;
 	}
 
-#ifdef UNIV_SYNC_ATOMIC
-        if (srv_thread_concurrency_timer_based) {
-          srv_conc_enter_innodb_timer_based(trx);
-          return;
-        }
-#endif
-
 	os_fast_mutex_lock(&srv_conc_mutex);
 retry:
 	if (trx->declared_to_be_inside_innodb) {
-                print_already_in_error(trx);
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Error: trying to declare trx"
+		      " to enter InnoDB, but\n"
+		      "InnoDB: it already is declared.\n", stderr);
+		trx_print(stderr, trx, 0);
+		putc('\n', stderr);
 		os_fast_mutex_unlock(&srv_conc_mutex);
 
 		return;
@@ -1316,48 +1203,40 @@ retry:
 	os_fast_mutex_unlock(&srv_conc_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This lets a thread enter InnoDB regardless of the number of threads inside
 InnoDB. This must be called when a thread ends a lock wait. */
-
+UNIV_INTERN
 void
 srv_conc_force_enter_innodb(
 /*========================*/
-	trx_t*	trx)	/* in: transaction object associated with the
+	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
-
 	if (UNIV_LIKELY(!srv_thread_concurrency)) {
 
 		return;
 	}
 
 	ut_ad(srv_conc_n_threads >= 0);
-#ifdef UNIV_SYNC_ATOMIC
-        if (srv_thread_concurrency_timer_based) {
-                lint               conc_n_threads;
 
-                inc_srv_conc_n_threads(&conc_n_threads);
-	        trx->declared_to_be_inside_innodb = TRUE;
-	        trx->n_tickets_to_enter_innodb = 1;
-                return;
-        }
-#endif
 	os_fast_mutex_lock(&srv_conc_mutex);
+
 	srv_conc_n_threads++;
 	trx->declared_to_be_inside_innodb = TRUE;
 	trx->n_tickets_to_enter_innodb = 1;
+
 	os_fast_mutex_unlock(&srv_conc_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This must be called when a thread exits InnoDB in a lock wait or at the
 end of an SQL statement. */
-
+UNIV_INTERN
 void
 srv_conc_force_exit_innodb(
 /*=======================*/
-	trx_t*	trx)	/* in: transaction object associated with the
+	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
 	srv_conc_slot_t*	slot	= NULL;
@@ -1373,14 +1252,6 @@ srv_conc_force_exit_innodb(
 		return;
 	}
 
-#ifdef UNIV_SYNC_ATOMIC
-        if (srv_thread_concurrency_timer_based)
-        {
-                srv_conc_exit_innodb_timer_based(trx);
-                return;
-        }
-#endif
-
 	os_fast_mutex_lock(&srv_conc_mutex);
 
 	ut_ad(srv_conc_n_threads > 0);
@@ -1415,13 +1286,13 @@ srv_conc_force_exit_innodb(
 	}
 }
 
-/*************************************************************************
+/*********************************************************************//**
 This must be called when a thread exits InnoDB. */
-
+UNIV_INTERN
 void
 srv_conc_exit_innodb(
 /*=================*/
-	trx_t*	trx)	/* in: transaction object associated with the
+	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
 	if (trx->n_tickets_to_enter_innodb > 0) {
@@ -1440,13 +1311,13 @@ srv_conc_exit_innodb(
 
 /*========================================================================*/
 
-/*************************************************************************
-Normalizes init parameter values to use units we use inside InnoDB. */
+/*********************************************************************//**
+Normalizes init parameter values to use units we use inside InnoDB.
+@return	DB_SUCCESS or error code */
 static
 ulint
 srv_normalize_init_values(void)
 /*===========================*/
-				/* out: DB_SUCCESS or error code */
 {
 	ulint	n;
 	ulint	i;
@@ -1465,30 +1336,18 @@ srv_normalize_init_values(void)
 
 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
 
-	srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
-
-	srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
-
-	if (srv_use_awe) {
-		/* If we are using AWE we must save memory in the 32-bit
-		address space of the process, and cannot bind the lock
-		table size to the real buffer pool size. */
-
-		srv_lock_table_size = 20 * srv_awe_window_size;
-	} else {
-		srv_lock_table_size = 5 * srv_pool_size;
-	}
+	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
 
 	return(DB_SUCCESS);
 }
 
-/*************************************************************************
-Boots the InnoDB server. */
-
+/*********************************************************************//**
+Boots the InnoDB server.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 srv_boot(void)
 /*==========*/
-			/* out: DB_SUCCESS or error code */
 {
 	ulint	err;
 
@@ -1513,15 +1372,14 @@ srv_boot(void)
 	return(DB_SUCCESS);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
 Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller! */
+NOTE! The kernel mutex has to be reserved by the caller!
+@return	reserved slot */
 static
 srv_slot_t*
 srv_table_reserve_slot_for_mysql(void)
 /*==================================*/
-			/* out: reserved slot */
 {
 	srv_slot_t*	slot;
 	ulint		i;
@@ -1578,33 +1436,32 @@ srv_table_reserve_slot_for_mysql(void)
 
 	return(slot);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
+/***************************************************************//**
 Puts a MySQL OS thread to wait for a lock to be released. If an error
 occurs during the wait trx->error_state associated with thr is
 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
 are possible errors. DB_DEADLOCK is returned if selective deadlock
 resolution chose this transaction as a victim. */
-
+UNIV_INTERN
 void
 srv_suspend_mysql_thread(
 /*=====================*/
-	que_thr_t*	thr)	/* in: query thread associated with the MySQL
+	que_thr_t*	thr)	/*!< in: query thread associated with the MySQL
 				OS thread */
 {
-#ifndef UNIV_HOTBACKUP
 	srv_slot_t*	slot;
 	os_event_t	event;
 	double		wait_time;
 	trx_t*		trx;
-	ibool		had_dict_lock			= FALSE;
+	ulint		had_dict_lock;
 	ibool		was_declared_inside_innodb	= FALSE;
-	ib_longlong	start_time			= 0;
-	ib_longlong	finish_time;
+	ib_int64_t	start_time			= 0;
+	ib_int64_t	finish_time;
 	ulint		diff_time;
 	ulint		sec;
 	ulint		ms;
+	ulong		lock_wait_timeout;
 
 	ut_ad(!mutex_own(&kernel_mutex));
 
@@ -1653,7 +1510,7 @@ srv_suspend_mysql_thread(
 		if (ut_usectime(&sec, &ms) == -1) {
 			start_time = -1;
 		} else {
-			start_time = (ib_longlong)sec * 1000000 + ms;
+			start_time = (ib_int64_t) sec * 1000000 + ms;
 		}
 	}
 	/* Wake the lock timeout monitor thread, if it is suspended */
@@ -1673,23 +1530,35 @@ srv_suspend_mysql_thread(
 		srv_conc_force_exit_innodb(trx);
 	}
 
-	/* Release possible foreign key check latch */
-	if (trx->dict_operation_lock_mode == RW_S_LATCH) {
-
-		had_dict_lock = TRUE;
+	had_dict_lock = trx->dict_operation_lock_mode;
 
+	switch (had_dict_lock) {
+	case RW_S_LATCH:
+		/* Release foreign key check latch */
 		row_mysql_unfreeze_data_dictionary(trx);
+		break;
+	case RW_X_LATCH:
+		/* Release fast index creation latch */
+		row_mysql_unlock_data_dictionary(trx);
+		break;
 	}
 
 	ut_a(trx->dict_operation_lock_mode == 0);
 
-	/* Wait for the release */
+	/* Suspend this thread and wait for the event. */
 
 	os_event_wait(event);
 
-	if (had_dict_lock) {
+	/* After resuming, reacquire the data dictionary latch if
+	necessary. */
 
+	switch (had_dict_lock) {
+	case RW_S_LATCH:
 		row_mysql_freeze_data_dictionary(trx);
+		break;
+	case RW_X_LATCH:
+		row_mysql_lock_data_dictionary(trx);
+		break;
 	}
 
 	if (was_declared_inside_innodb) {
@@ -1711,7 +1580,7 @@ srv_suspend_mysql_thread(
 		if (ut_usectime(&sec, &ms) == -1) {
 			finish_time = -1;
 		} else {
-			finish_time = (ib_longlong)sec * 1000000 + ms;
+			finish_time = (ib_int64_t) sec * 1000000 + ms;
 		}
 
 		diff_time = (ulint) (finish_time - start_time);
@@ -1734,30 +1603,29 @@ srv_suspend_mysql_thread(
 
 	mutex_exit(&kernel_mutex);
 
-	if (srv_lock_wait_timeout < 100000000
-	    && wait_time > (double)srv_lock_wait_timeout) {
+	/* InnoDB system transactions (such as the purge, and
+	incomplete transactions that are being rolled back after crash
+	recovery) will use the global value of
+	innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
+	lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
+
+	if (lock_wait_timeout < 100000000
+	    && wait_time > (double) lock_wait_timeout) {
 
 		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
 	}
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-#endif /* UNIV_HOTBACKUP */
 }
 
-/************************************************************************
+/********************************************************************//**
 Releases a MySQL OS thread waiting for a lock to be released, if the
 thread is already suspended. */
-
+UNIV_INTERN
 void
 srv_release_mysql_thread_if_suspended(
 /*==================================*/
-	que_thr_t*	thr)	/* in: query thread associated with the
+	que_thr_t*	thr)	/*!< in: query thread associated with the
 				MySQL OS thread	 */
 {
-#ifndef UNIV_HOTBACKUP
 	srv_slot_t*	slot;
 	ulint		i;
 
@@ -1777,28 +1645,16 @@ srv_release_mysql_thread_if_suspended(
 	}
 
 	/* not found */
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-#endif /* UNIV_HOTBACKUP */
 }
 
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
+/******************************************************************//**
 Refreshes the values used to calculate per-second averages. */
 static
-ibool
+void
 srv_refresh_innodb_monitor_stats(void)
 /*==================================*/
 {
-	/* Sometimes we will skip stats update to avoid deadlock, since
-	since this function is called by the background wake-up thread */
-	if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) {
-		/* mutex_enter_nowait returns 1 on failure */
-		return FALSE;
-	}
+	mutex_enter(&srv_innodb_monitor_mutex);
 
 	srv_last_monitor_time = time(NULL);
 
@@ -1817,16 +1673,19 @@ srv_refresh_innodb_monitor_stats(void)
 	srv_n_rows_read_old = srv_n_rows_read;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
-	return TRUE;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Outputs to a file the output of the InnoDB Monitor. */
-
+UNIV_INTERN
 void
 srv_printf_innodb_monitor(
 /*======================*/
-	FILE*	file)		/* in: output stream */
+	FILE*	file,		/*!< in: output stream */
+	ulint*	trx_start,	/*!< out: file position of the start of
+				the list of active transactions */
+	ulint*	trx_end)	/*!< out: file position of the end of
+				the list of active transactions */
 {
 	double	time_elapsed;
 	time_t	current_time;
@@ -1854,10 +1713,10 @@ srv_printf_innodb_monitor(
 		"Per second averages calculated from the last %lu seconds\n",
 		(ulong)time_elapsed);
 
-  	fputs("----------\n"
+	fputs("----------\n"
 		"BACKGROUND THREAD\n"
 		"----------\n", file);
-        srv_print_extra(file);
+	srv_print_master_thread_info(file);
 
 	fputs("----------\n"
 	      "SEMAPHORES\n"
@@ -1880,11 +1739,9 @@ srv_printf_innodb_monitor(
 
 	mutex_exit(&dict_foreign_err_mutex);
 
-	/* Print open transaction details */
 	lock_print_info_summary(file);
-
 	if (trx_start) {
-		long    t = ftell(file);
+		long	t = ftell(file);
 		if (t < 0) {
 			*trx_start = ULINT_UNDEFINED;
 		} else {
@@ -1893,14 +1750,13 @@ srv_printf_innodb_monitor(
 	}
 	lock_print_info_all_transactions(file);
 	if (trx_end) {
-		long    t = ftell(file);
+		long	t = ftell(file);
 		if (t < 0) {
 			*trx_end = ULINT_UNDEFINED;
 		} else {
 			*trx_end = (ulint) t;
 		}
 	}
-
 	fputs("--------\n"
 	      "FILE I/O\n"
 	      "--------\n", file);
@@ -1938,13 +1794,6 @@ srv_printf_innodb_monitor(
 	fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
 		dict_sys->size);
 
-	if (srv_use_awe) {
-		fprintf(file,
-			"In addition to that %lu MB of AWE memory allocated\n",
-			(ulong) (srv_pool_size
-				 / ((1024 * 1024) / UNIV_PAGE_SIZE)));
-	}
-
 	buf_print_io(file);
 
 	fputs("--------------\n"
@@ -2007,11 +1856,12 @@ srv_printf_innodb_monitor(
 	fflush(file);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Function to pass InnoDB status variables to MySQL */
-
+UNIV_INTERN
 void
 srv_export_innodb_status(void)
+/*==========================*/
 {
 	mutex_enter(&srv_innodb_monitor_mutex);
 
@@ -2047,18 +1897,13 @@ srv_export_innodb_status(void)
 #endif /* UNIV_DEBUG */
 	export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
 
-	export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
+	export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size
 		- UT_LIST_GET_LEN(buf_pool->LRU)
 		- UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_SYNC_ATOMIC
-	export_vars.innodb_have_sync_atomic = 1;
+#ifdef HAVE_ATOMIC_BUILTINS
+	export_vars.innodb_have_atomic_builtins = 1;
 #else
-	export_vars.innodb_have_sync_atomic = 0;
-#endif
-#ifdef UNIV_DISABLE_MEM_POOL
-	export_vars.innodb_heap_enabled = 0;
-#else
-	export_vars.innodb_heap_enabled = 1;
+	export_vars.innodb_have_atomic_builtins = 0;
 #endif
 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
 	export_vars.innodb_log_waits = srv_log_waits;
@@ -2089,21 +1934,20 @@ srv_export_innodb_status(void)
 	export_vars.innodb_rows_inserted = srv_n_rows_inserted;
 	export_vars.innodb_rows_updated = srv_n_rows_updated;
 	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
-	export_vars.innodb_wake_ups = sync_wake_ups;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
+This also prints the info output by various InnoDB monitors.
+@return	a dummy parameter */
+UNIV_INTERN
 os_thread_ret_t
 srv_lock_timeout_and_monitor_thread(
 /*================================*/
-			/* out: a dummy parameter */
 	void*	arg __attribute__((unused)))
-			/* in: a dummy parameter required by
+			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
 	srv_slot_t*	slot;
@@ -2141,13 +1985,14 @@ loop:
 		last_monitor_time = time(NULL);
 
 		if (srv_print_innodb_monitor) {
-			srv_printf_innodb_monitor(stderr);
+			srv_printf_innodb_monitor(stderr, NULL, NULL);
 		}
 
 		if (srv_innodb_status) {
 			mutex_enter(&srv_monitor_file_mutex);
 			rewind(srv_monitor_file);
-			srv_printf_innodb_monitor(srv_monitor_file);
+			srv_printf_innodb_monitor(srv_monitor_file, NULL,
+						  NULL);
 			os_file_set_eof(srv_monitor_file);
 			mutex_exit(&srv_monitor_file_mutex);
 		}
@@ -2212,12 +2057,19 @@ loop:
 		slot = srv_mysql_table + i;
 
 		if (slot->in_use) {
+			trx_t*	trx;
+			ulong	lock_wait_timeout;
+
 			some_waits = TRUE;
 
 			wait_time = ut_difftime(ut_time(), slot->suspend_time);
 
-			if (srv_lock_wait_timeout < 100000000
-			    && (wait_time > (double) srv_lock_wait_timeout
+			trx = thr_get_trx(slot->thr);
+			lock_wait_timeout = thd_lock_wait_timeout(
+				trx->mysql_thd);
+
+			if (lock_wait_timeout < 100000000
+			    && (wait_time > (double) lock_wait_timeout
 				|| wait_time < 0)) {
 
 				/* Timeout exceeded or a wrap-around in system
@@ -2227,10 +2079,9 @@ loop:
 				possible that the lock has already been
 				granted: in that case do nothing */
 
-				if (thr_get_trx(slot->thr)->wait_lock) {
+				if (trx->wait_lock) {
 					lock_cancel_waiting_and_release(
-						thr_get_trx(slot->thr)
-						->wait_lock);
+						trx->wait_lock);
 				}
 			}
 		}
@@ -2274,23 +2125,24 @@ exit_func:
 	OS_THREAD_DUMMY_RETURN;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 A thread which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
-NOTE: This thread should not wait for any innodb mutexes or rw_locks.
-A deadlock could arise where the thread holding that lock requires waking
-by this background thread while this thread is blocked on that lock. */
-
+@return	a dummy parameter */
+UNIV_INTERN
 os_thread_ret_t
 srv_error_monitor_thread(
 /*=====================*/
-			/* out: a dummy parameter */
 	void*	arg __attribute__((unused)))
-			/* in: a dummy parameter required by
+			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
 	/* number of successive fatal timeouts observed */
-	ulint	fatal_cnt	= 0;
+	ulint		fatal_cnt	= 0;
+	ib_uint64_t	old_lsn;
+	ib_uint64_t	new_lsn;
+
+	old_lsn = srv_start_lsn;
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Error monitor thread starts, id %lu\n",
@@ -2299,17 +2151,42 @@ srv_error_monitor_thread(
 loop:
 	srv_error_monitor_active = TRUE;
 
+	/* Try to track a strange bug reported by Harald Fuchs and others,
+	where the lsn seems to decrease at times */
+
+	new_lsn = log_get_lsn();
+
+	if (new_lsn < old_lsn) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: Error: old log sequence number %llu"
+			" was greater\n"
+			"InnoDB: than the new log sequence number %llu!\n"
+			"InnoDB: Please submit a bug report"
+			" to http://bugs.mysql.com\n",
+			old_lsn, new_lsn);
+	}
+
+	old_lsn = new_lsn;
+
 	if (difftime(time(NULL), srv_last_monitor_time) > 60) {
-		/* We refresh InnoDB Monitor values so that averages are
+		/* We referesh InnoDB Monitor values so that averages are
 		printed from at most 60 last seconds */
 
 		srv_refresh_innodb_monitor_stats();
 	}
 
+	/* Update the statistics collected for deciding LRU
+	eviction policy. */
+	buf_LRU_stat_update();
+
+	/* Update the statistics collected for flush rate policy. */
+	buf_flush_stat_update();
+
 	/* In case mutex_exit is not a memory barrier, it is
 	theoretically possible some threads are left waiting though
 	the semaphore is already released. Wake up those threads: */
-	
+
 	sync_arr_wake_threads_if_sema_free();
 
 	if (sync_array_print_long_waits()) {
@@ -2351,13 +2228,13 @@ loop:
 	OS_THREAD_DUMMY_RETURN;
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Tells the InnoDB server that there has been activity in the database
 and wakes up the master thread if it is suspended (not sleeping). Used
 in the MySQL interface. Note that there is a small chance that the master
 thread stays suspended (we do not protect our operation with the kernel
 mutex, for performace reasons). */
-
+UNIV_INTERN
 void
 srv_active_wake_master_thread(void)
 /*===============================*/
@@ -2374,9 +2251,9 @@ srv_active_wake_master_thread(void)
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Wakes up the master thread if it is suspended or being suspended. */
-
+UNIV_INTERN
 void
 srv_wake_master_thread(void)
 /*========================*/
@@ -2390,30 +2267,46 @@ srv_wake_master_thread(void)
 	mutex_exit(&kernel_mutex);
 }
 
-/*************************************************************************
-Returns the number of IO operations that is X percent of the capacity.
+/**********************************************************************
+The master thread is tasked to ensure that flush of log file happens
+once every second in the background. This is to ensure that not more
+than one second of trxs are lost in case of crash when
+innodb_flush_logs_at_trx_commit != 1 */
+static
+void
+srv_sync_log_buffer_in_background(void)
+/*===================================*/
+{
+	time_t	current_time = time(NULL);
 
-PCT_IO(5) -> returns the number of IO operations that is 5% of the max
-where max is srv_io_capacity.
-*/
-#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
-
-/*************************************************************************
-The master thread controlling the server. */
+	srv_main_thread_op_info = "flushing log";
+	if (difftime(current_time, srv_last_log_flush_time) >= 1) {
+		log_buffer_sync_in_background(TRUE);
+		srv_last_log_flush_time = current_time;
+		srv_log_writes_and_flush++;
+	} else {
+		/* Actually we don't need to write logs here.
+		We are just being extra safe here by forcing
+		the log buffer to log file. */
+		log_buffer_sync_in_background(FALSE);
+		srv_log_buffer_writes++;
+	}
+}
 
+/*********************************************************************//**
+The master thread controlling the server.
+@return	a dummy parameter */
+UNIV_INTERN
 os_thread_ret_t
 srv_master_thread(
 /*==============*/
-			/* out: a dummy parameter */
 	void*	arg __attribute__((unused)))
-			/* in: a dummy parameter required by
+			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
 	os_event_t	event;
-	time_t		last_flush_time;
-	time_t		current_time;
 	ulint		old_activity_count;
-	ulint		n_pages_purged;
+	ulint		n_pages_purged	= 0;
 	ulint		n_bytes_merged;
 	ulint		n_pages_flushed;
 	ulint		n_bytes_archived;
@@ -2429,9 +2322,6 @@ srv_master_thread(
 	fprintf(stderr, "Master thread starts, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
 #endif
-        fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
-                srv_io_capacity);
-
 	srv_main_thread_process_no = os_proc_get_number();
 	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
 
@@ -2467,13 +2357,14 @@ loop:
 	/* ---- We run the following loop approximately once per second
 	when there is database activity */
 
+	srv_last_log_flush_time = time(NULL);
 	skip_sleep = FALSE;
 
 	for (i = 0; i < 10; i++) {
 		n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
 			+ buf_pool->n_pages_written;
 		srv_main_thread_op_info = "sleeping";
-		srv_main_1_second_loops++;	
+		srv_main_1_second_loops++;
 
 		if (!skip_sleep) {
 
@@ -2498,13 +2389,8 @@ loop:
 			goto background_loop;
 		}
 
-		/* We flush the log once in a second even if no commit
-		is issued or the we have specified in my.cnf no flush
-		at transaction commit */
-
-		srv_main_thread_op_info = "flushing log";
-		log_buffer_flush_to_disk();
-		srv_sync_flush++;
+		/* Flush logs if needed */
+		srv_sync_log_buffer_in_background();
 
 		srv_main_thread_op_info = "making checkpoint";
 		log_free_check();
@@ -2517,15 +2403,13 @@ loop:
 			+ log_sys->n_pending_writes;
 		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
 			+ buf_pool->n_pages_written;
-		if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
+		if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+		    && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
 			srv_main_thread_op_info = "doing insert buffer merge";
-			ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
+			ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
 
-			srv_main_thread_op_info = "flushing log";
-
-			/* No fsync when srv_flush_log_at_trx_commit != 1 */
-			log_buffer_flush_maybe_sync();
-			srv_async_flush++;
+			/* Flush logs if needed */
+			srv_sync_log_buffer_in_background();
 		}
 
 		if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
@@ -2536,7 +2420,7 @@ loop:
 
 			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
 							  PCT_IO(100),
-							  ut_dulint_max);
+							  IB_ULONGLONG_MAX);
 
 			/* If we had to do the flush, it may have taken
 			even more than 1 second, and also, there may be more
@@ -2544,6 +2428,22 @@ loop:
 			iteration of this loop. */
 
 			skip_sleep = TRUE;
+		} else if (srv_adaptive_flushing) {
+
+			/* Try to keep the rate of flushing of dirty
+			pages such that redo log generation does not
+			produce bursts of IO at checkpoint time. */
+			ulint n_flush = buf_flush_get_desired_flush_rate();
+
+			if (n_flush) {
+				n_flush = ut_min(PCT_IO(100), n_flush);
+				n_pages_flushed =
+					buf_flush_batch(
+						BUF_FLUSH_LIST,
+						n_flush,
+						IB_ULONGLONG_MAX);
+				skip_sleep = TRUE;
+			}
 		}
 
 		if (srv_activity_count == old_activity_count) {
@@ -2564,48 +2464,42 @@ loop:
 	mem_validate_all_blocks();
 #endif
 	/* If i/os during the 10 second period were less than 200% of
-         capacity, we assume that there is free disk i/o capacity
-         available, and it makes sense to flush srv_io_capacity pages.
+	capacity, we assume that there is free disk i/o capacity
+	available, and it makes sense to flush srv_io_capacity pages.
 
-         Note that this is done regardless of the fraction of dirty
-         pages relative to the max requested by the user. The one second
-         loop above requests writes for that case. The writes done here
-         are not required, and may be disabled. */
+	Note that this is done regardless of the fraction of dirty
+	pages relative to the max requested by the user. The one second
+	loop above requests writes for that case. The writes done here
+	are not required, and may be disabled. */
 
 	n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
 	n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
 		+ buf_pool->n_pages_written;
-	if (srv_extra_dirty_writes &&
-	    n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
+
+	srv_main_10_second_loops++;
+	if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+	    && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
 
 		srv_main_thread_op_info = "flushing buffer pool pages";
-		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
+		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+				IB_ULONGLONG_MAX);
 
-		srv_main_thread_op_info = "flushing log";
-		/* No fsync when srv_flush_log_at_trx_commit != 1 */
-		log_buffer_flush_maybe_sync();
-		srv_async_flush++;
+		/* Flush logs if needed */
+		srv_sync_log_buffer_in_background();
 	}
 
 	/* We run a batch of insert buffer merge every 10 seconds,
 	even if the server were active */
 
 	srv_main_thread_op_info = "doing insert buffer merge";
-	ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
+	ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
 
-	srv_main_thread_op_info = "flushing log";
-	/* No fsync when srv_flush_log_at_trx_commit != 1 */
-	log_buffer_flush_maybe_sync();
-	srv_async_flush++;
+	/* Flush logs if needed */
+	srv_sync_log_buffer_in_background();
 
 	/* We run a full purge every 10 seconds, even if the server
 	were active */
-
-	n_pages_purged = 1;
-
-	last_flush_time = time(NULL);
-
-	while (n_pages_purged) {
+	do {
 
 		if (srv_fast_shutdown && srv_shutdown_state > 0) {
 
@@ -2615,16 +2509,10 @@ loop:
 		srv_main_thread_op_info = "purging";
 		n_pages_purged = trx_purge();
 
-		current_time = time(NULL);
+		/* Flush logs if needed */
+		srv_sync_log_buffer_in_background();
 
-		if (difftime(current_time, last_flush_time) > 1) {
-			srv_main_thread_op_info = "flushing log";
-
-			log_buffer_flush_to_disk();
-			last_flush_time = current_time;
-			srv_sync_flush++;
-		}
-	}
+	} while (n_pages_purged);
 
 	srv_main_thread_op_info = "flushing buffer pool pages";
 
@@ -2638,7 +2526,7 @@ loop:
 
 		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
 						  PCT_IO(100),
-						  ut_dulint_max);
+						  IB_ULONGLONG_MAX);
 	} else {
 		/* Otherwise, we only flush a small number of pages so that
 		we do not unnecessarily use much disk i/o capacity from
@@ -2646,7 +2534,7 @@ loop:
 
 		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
 						  PCT_IO(10),
-						  ut_dulint_max);
+						  IB_ULONGLONG_MAX);
 	}
 
 	srv_main_thread_op_info = "making checkpoint";
@@ -2679,7 +2567,7 @@ background_loop:
 
 	/* The server has been quiet for a while: start running background
 	operations */
-	srv_main_background_loops++;		
+	srv_main_background_loops++;
 	srv_main_thread_op_info = "doing background drop tables";
 
 	n_tables_to_drop = row_drop_tables_for_mysql_in_background();
@@ -2696,12 +2584,7 @@ background_loop:
 	srv_main_thread_op_info = "purging";
 
 	/* Run a full purge */
-
-	n_pages_purged = 1;
-
-	last_flush_time = time(NULL);
-
-	while (n_pages_purged) {
+	do {
 		if (srv_fast_shutdown && srv_shutdown_state > 0) {
 
 			break;
@@ -2710,16 +2593,10 @@ background_loop:
 		srv_main_thread_op_info = "purging";
 		n_pages_purged = trx_purge();
 
-		current_time = time(NULL);
+		/* Flush logs if needed */
+		srv_sync_log_buffer_in_background();
 
-		if (difftime(current_time, last_flush_time) > 1) {
-			srv_main_thread_op_info = "flushing log";
-
-			log_buffer_flush_to_disk();
-			last_flush_time = current_time;
-                        srv_sync_flush++;
-		}
-	}
+	} while (n_pages_purged);
 
 	srv_main_thread_op_info = "reserving kernel mutex";
 
@@ -2736,10 +2613,11 @@ background_loop:
 		n_bytes_merged = 0;
 	} else {
 		/* This should do an amount of IO similar to the number of
-		* dirty pages that will be flushed in the call to
-		* buf_flush_batch below. Otherwise, the system favors
-		* clean pages over cleanup throughput. */
-		n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
+		dirty pages that will be flushed in the call to
+		buf_flush_batch below. Otherwise, the system favors
+		clean pages over cleanup throughput. */
+		n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
+							   PCT_IO(100));
 	}
 
 	srv_main_thread_op_info = "reserving kernel mutex";
@@ -2757,7 +2635,7 @@ flush_loop:
 	if (srv_fast_shutdown < 2) {
 		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
 						  PCT_IO(100),
-						  ut_dulint_max);
+						  IB_ULONGLONG_MAX);
 	} else {
 		/* In the fastest shutdown we do not flush the buffer pool
 		to data files: we set n_pages_flushed to 0 artificially. */
@@ -2777,19 +2655,8 @@ flush_loop:
 	srv_main_thread_op_info = "waiting for buffer pool flush to end";
 	buf_flush_wait_batch_end(BUF_FLUSH_LIST);
 
-	srv_main_thread_op_info = "flushing log";
-
-	current_time = time(NULL);
-	if (difftime(current_time, last_flush_time) > 1) {
-		srv_main_thread_op_info = (char*) "flushing log";
-		log_buffer_flush_to_disk();
-		last_flush_time = current_time;
- 		srv_sync_flush++;
-	} else {
-		/* No fsync when srv_flush_log_at_trx_commit != 1 */
-		log_buffer_flush_maybe_sync();
-		srv_async_flush++;
-	}
+	/* Flush logs if needed */
+	srv_sync_log_buffer_in_background();
 
 	srv_main_thread_op_info = "making checkpoint";
 
@@ -2882,4 +2749,3 @@ suspend_thread:
 
 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
 }
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
similarity index 75%
rename from storage/innobase/srv/srv0start.c
rename to storage/innodb_plugin/srv/srv0start.c
index 0b63d1a0b86..a942fd439a3 100644
--- a/storage/innobase/srv/srv0start.c
+++ b/storage/innodb_plugin/srv/srv0start.c
@@ -1,128 +1,173 @@
-/************************************************************************
-Starts the InnoDB database server
+/*****************************************************************************
 
-(c) 1996-2000 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/********************************************************************//**
+@file srv/srv0start.c
+Starts the InnoDB database server
 
 Created 2/16/1996 Heikki Tuuri
 *************************************************************************/
 
-#include "os0proc.h"
-#include "sync0sync.h"
 #include "ut0mem.h"
 #include "mem0mem.h"
-#include "mem0pool.h"
 #include "data0data.h"
 #include "data0type.h"
 #include "dict0dict.h"
 #include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
 #include "os0file.h"
 #include "os0thread.h"
 #include "fil0fil.h"
 #include "fsp0fsp.h"
 #include "rem0rec.h"
-#include "rem0cmp.h"
 #include "mtr0mtr.h"
 #include "log0log.h"
 #include "log0recv.h"
 #include "page0page.h"
 #include "page0cur.h"
 #include "trx0trx.h"
-#include "dict0boot.h"
-#include "dict0load.h"
 #include "trx0sys.h"
-#include "dict0crea.h"
 #include "btr0btr.h"
-#include "btr0pcur.h"
 #include "btr0cur.h"
-#include "btr0sea.h"
 #include "rem0rec.h"
-#include "srv0srv.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
 #include "ibuf0ibuf.h"
-#include "pars0pars.h"
-#include "btr0sea.h"
 #include "srv0start.h"
-#include "que0que.h"
+#include "srv0srv.h"
+#ifndef UNIV_HOTBACKUP
+# include "os0proc.h"
+# include "sync0sync.h"
+# include "buf0flu.h"
+# include "buf0rea.h"
+# include "dict0boot.h"
+# include "dict0load.h"
+# include "que0que.h"
+# include "usr0sess.h"
+# include "lock0lock.h"
+# include "trx0roll.h"
+# include "trx0purge.h"
+# include "lock0lock.h"
+# include "pars0pars.h"
+# include "btr0sea.h"
+# include "rem0cmp.h"
+# include "dict0crea.h"
+# include "row0ins.h"
+# include "row0sel.h"
+# include "row0upd.h"
+# include "row0row.h"
+# include "row0mysql.h"
+# include "btr0pcur.h"
 
-/* Log sequence number immediately after startup */
-dulint		srv_start_lsn;
-/* Log sequence number at shutdown */
-dulint		srv_shutdown_lsn;
+/** Log sequence number immediately after startup */
+UNIV_INTERN ib_uint64_t	srv_start_lsn;
+/** Log sequence number at shutdown */
+UNIV_INTERN ib_uint64_t	srv_shutdown_lsn;
 
 #ifdef HAVE_DARWIN_THREADS
 # include <sys/utsname.h>
-ibool		srv_have_fullfsync = FALSE;
+/** TRUE if the F_FULLFSYNC option is available */
+UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
 #endif
 
-ibool		srv_start_raw_disk_in_use = FALSE;
+/** TRUE if a raw partition is in use */
+UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
 
-ulint		srv_sizeof_trx_t_in_ha_innodb_cc;
-
-ibool		srv_startup_is_before_trx_rollback_phase = FALSE;
-ibool		srv_is_being_started = FALSE;
-#ifndef UNIV_HOTBACKUP
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
+/** TRUE if the server is being started */
+UNIV_INTERN ibool	srv_is_being_started = FALSE;
+/** TRUE if the server was successfully started */
+UNIV_INTERN ibool	srv_was_started = FALSE;
+/** TRUE if innobase_start_or_create_for_mysql() has been called */
 static ibool	srv_start_has_been_called = FALSE;
-static ibool	srv_was_started = FALSE;
-#endif /* !UNIV_HOTBACKUP */
 
-/* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE */
-ulint		srv_shutdown_state = 0;
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
 
-#ifndef UNIV_HOTBACKUP
+/** Files comprising the system tablespace */
 static os_file_t	files[1000];
 
+/** Mutex protecting the ios count */
 static mutex_t		ios_mutex;
+/** Count of I/O operations in io_handler_thread() */
 static ulint		ios;
 
+/** io_handler_thread parameters for thread identification */
 static ulint		n[SRV_MAX_N_IO_THREADS + 5];
+/** io_handler_thread identifiers */
 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 5];
 
-/* We use this mutex to test the return value of pthread_mutex_trylock
+/** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
 static os_fast_mutex_t	srv_os_test_mutex;
 
-/* Name of srv_monitor_file */
+/** Name of srv_monitor_file */
 static char*	srv_monitor_file_name;
 #endif /* !UNIV_HOTBACKUP */
 
+/** */
 #define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
 #define SRV_MAX_N_PENDING_SYNC_IOS	100
 
 
-/* Avoid warnings when using purify */
-
-#ifdef HAVE_purify
-static int inno_bcmp(register const char *s1, register const char *s2,
-	register uint len)
-{
-	while ((len-- != 0) && (*s1++ == *s2++))
-		;
-
-	return(len + 1);
-}
-#define memcmp(A,B,C) inno_bcmp((A),(B),(C))
-#endif
-
+/*********************************************************************//**
+Convert a numeric string that optionally ends in G or M, to a number
+containing megabytes.
+@return	next character in string */
 static
 char*
 srv_parse_megabytes(
 /*================*/
-			/* out: next character in string */
-	char*	str,	/* in: string containing a quantity in bytes */
-	ulint*	megs)	/* out: the number in megabytes */
+	char*	str,	/*!< in: string containing a quantity in bytes */
+	ulint*	megs)	/*!< out: the number in megabytes */
 {
 	char*	endp;
 	ulint	size;
@@ -147,36 +192,26 @@ srv_parse_megabytes(
 	return(str);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return	TRUE if ok, FALSE on parse error */
+UNIV_INTERN
 ibool
 srv_parse_data_file_paths_and_sizes(
 /*================================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-	char*	str,			/* in: the data file path string */
-	char***	data_file_names,	/* out, own: array of data file
-					names */
-	ulint**	data_file_sizes,	/* out, own: array of data file sizes
-					in megabytes */
-	ulint**	data_file_is_raw_partition,/* out, own: array of flags
-					showing which data files are raw
-					partitions */
-	ulint*	n_data_files,		/* out: number of data files */
-	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
-					auto-extending */
-	ulint*	max_auto_extend_size)	/* out: max auto extend size for the
-					last file if specified, 0 if not */
+	char*	str)	/*!< in/out: the data file path string */
 {
 	char*	input_str;
 	char*	path;
 	ulint	size;
 	ulint	i	= 0;
 
-	*is_auto_extending = FALSE;
-	*max_auto_extend_size = 0;
+	srv_auto_extend_last_data_file = FALSE;
+	srv_last_file_size_max = 0;
+	srv_data_file_names = NULL;
+	srv_data_file_sizes = NULL;
+	srv_data_file_is_raw_partition = NULL;
 
 	input_str = str;
 
@@ -253,11 +288,12 @@ srv_parse_data_file_paths_and_sizes(
 		return(FALSE);
 	}
 
-	*data_file_names = (char**)ut_malloc(i * sizeof(void*));
-	*data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
-	*data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
+	srv_data_file_names = malloc(i * sizeof *srv_data_file_names);
+	srv_data_file_sizes = malloc(i * sizeof *srv_data_file_sizes);
+	srv_data_file_is_raw_partition = malloc(
+		i * sizeof *srv_data_file_is_raw_partition);
 
-	*n_data_files = i;
+	srv_n_data_files = i;
 
 	/* Then store the actual values to our arrays */
 
@@ -287,13 +323,13 @@ srv_parse_data_file_paths_and_sizes(
 
 		str = srv_parse_megabytes(str, &size);
 
-		(*data_file_names)[i] = path;
-		(*data_file_sizes)[i] = size;
+		srv_data_file_names[i] = path;
+		srv_data_file_sizes[i] = size;
 
 		if (0 == strncmp(str, ":autoextend",
 				 (sizeof ":autoextend") - 1)) {
 
-			*is_auto_extending = TRUE;
+			srv_auto_extend_last_data_file = TRUE;
 
 			str += (sizeof ":autoextend") - 1;
 
@@ -303,7 +339,7 @@ srv_parse_data_file_paths_and_sizes(
 				str += (sizeof ":max:") - 1;
 
 				str = srv_parse_megabytes(
-					str, max_auto_extend_size);
+					str, &srv_last_file_size_max);
 			}
 
 			if (*str != '\0') {
@@ -312,21 +348,21 @@ srv_parse_data_file_paths_and_sizes(
 			}
 		}
 
-		(*data_file_is_raw_partition)[i] = 0;
+		(srv_data_file_is_raw_partition)[i] = 0;
 
 		if (strlen(str) >= 6
 		    && *str == 'n'
 		    && *(str + 1) == 'e'
 		    && *(str + 2) == 'w') {
 			str += 3;
-			(*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
+			(srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
 		}
 
 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
 			str += 3;
 
-			if ((*data_file_is_raw_partition)[i] == 0) {
-				(*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
+			if ((srv_data_file_is_raw_partition)[i] == 0) {
+				(srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW;
 			}
 		}
 
@@ -340,22 +376,22 @@ srv_parse_data_file_paths_and_sizes(
 	return(TRUE);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Reads log group home directories from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return	TRUE if ok, FALSE on parse error */
+UNIV_INTERN
 ibool
 srv_parse_log_group_home_dirs(
 /*==========================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-	char*	str,			/* in: character string */
-	char***	log_group_home_dirs)	/* out, own: log group home dirs */
+	char*	str)	/*!< in/out: character string */
 {
 	char*	input_str;
 	char*	path;
 	ulint	i	= 0;
 
+	srv_log_group_home_dirs = NULL;
+
 	input_str = str;
 
 	/* First calculate the number of directories and check syntax:
@@ -385,7 +421,7 @@ srv_parse_log_group_home_dirs(
 		return(FALSE);
 	}
 
-	*log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
+	srv_log_group_home_dirs = malloc(i * sizeof *srv_log_group_home_dirs);
 
 	/* Then store the actual values to our array */
 
@@ -404,7 +440,7 @@ srv_parse_log_group_home_dirs(
 			str++;
 		}
 
-		(*log_group_home_dirs)[i] = path;
+		srv_log_group_home_dirs[i] = path;
 
 		i++;
 	}
@@ -412,15 +448,34 @@ srv_parse_log_group_home_dirs(
 	return(TRUE);
 }
 
-#ifndef UNIV_HOTBACKUP
-/************************************************************************
-I/o-handler thread function. */
-static
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void)
+/*==========================*/
+{
+	free(srv_data_file_names);
+	srv_data_file_names = NULL;
+	free(srv_data_file_sizes);
+	srv_data_file_sizes = NULL;
+	free(srv_data_file_is_raw_partition);
+	srv_data_file_is_raw_partition = NULL;
+	free(srv_log_group_home_dirs);
+	srv_log_group_home_dirs = NULL;
+}
 
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+I/o-handler thread function.
+@return	OS_THREAD_DUMMY_RETURN */
+static
 os_thread_ret_t
 io_handler_thread(
 /*==============*/
-	void*	arg)
+	void*	arg)	/*!< in: pointer to the number of the segment in
+			the aio array */
 {
 	ulint	segment;
 	ulint	i;
@@ -456,13 +511,13 @@ io_handler_thread(
 #define SRV_PATH_SEPARATOR	'/'
 #endif
 
-/*************************************************************************
+/*********************************************************************//**
 Normalizes a directory path for Windows: converts slashes to backslashes. */
-
+UNIV_INTERN
 void
 srv_normalize_path_for_win(
 /*=======================*/
-	char*	str __attribute__((unused)))	/* in/out: null-terminated
+	char*	str __attribute__((unused)))	/*!< in/out: null-terminated
 						character string */
 {
 #ifdef __WIN__
@@ -475,16 +530,15 @@ srv_normalize_path_for_win(
 #endif
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
+and the string is not empty.
+@return	string which has the separator if the string is not empty */
+UNIV_INTERN
 char*
 srv_add_path_separator_if_needed(
 /*=============================*/
-			/* out: string which has the separator if the
-			string is not empty */
-	char*	str)	/* in: null-terminated character string */
+	char*	str)	/*!< in: null-terminated character string */
 {
 	char*	out_str;
 	ulint	len	= ut_strlen(str);
@@ -503,50 +557,48 @@ srv_add_path_separator_if_needed(
 }
 
 #ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
 Calculates the low 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
+database pages is converted to the number of bytes.
+@return	low 32 bytes of file size when expressed in bytes */
 static
 ulint
 srv_calc_low32(
 /*===========*/
-				/* out: low 32 bytes of file size when
-				expressed in bytes */
-	ulint	file_size)	/* in: file size in database pages */
+	ulint	file_size)	/*!< in: file size in database pages */
 {
 	return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Calculates the high 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
+database pages is converted to the number of bytes.
+@return	high 32 bytes of file size when expressed in bytes */
 static
 ulint
 srv_calc_high32(
 /*============*/
-				/* out: high 32 bytes of file size when
-				expressed in bytes */
-	ulint	file_size)	/* in: file size in database pages */
+	ulint	file_size)	/*!< in: file size in database pages */
 {
 	return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
 }
 
-/*************************************************************************
-Creates or opens the log files and closes them. */
+/*********************************************************************//**
+Creates or opens the log files and closes them.
+@return	DB_SUCCESS or error code */
 static
 ulint
 open_or_create_log_file(
 /*====================*/
-					/* out: DB_SUCCESS or error code */
-	ibool	create_new_db,		/* in: TRUE if we should create a
+	ibool	create_new_db,		/*!< in: TRUE if we should create a
 					new database */
-	ibool*	log_file_created,	/* out: TRUE if new log file
+	ibool*	log_file_created,	/*!< out: TRUE if new log file
 					created */
-	ibool	log_file_has_been_opened,/* in: TRUE if a log file has been
+	ibool	log_file_has_been_opened,/*!< in: TRUE if a log file has been
 					opened before: then it is an error
 					to try to create another log file */
-	ulint	k,			/* in: log group number */
-	ulint	i)			/* in: log file number in group */
+	ulint	k,			/*!< in: log group number */
+	ulint	i)			/*!< in: log file number in group */
 {
 	ibool	ret;
 	ulint	size;
@@ -653,7 +705,7 @@ open_or_create_log_file(
 		which is for this log group */
 
 		fil_space_create(name,
-				 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG);
+				 2 * k + SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG);
 	}
 
 	ut_a(fil_validate());
@@ -668,7 +720,7 @@ open_or_create_log_file(
 	if (k == 0 && i == 0) {
 		arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
 
-		fil_space_create("arch_log_space", arch_space_id, FIL_LOG);
+		fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
 	} else {
 		arch_space_id = ULINT_UNDEFINED;
 	}
@@ -684,24 +736,27 @@ open_or_create_log_file(
 	return(DB_SUCCESS);
 }
 
-/*************************************************************************
-Creates or opens database data files and closes them. */
+/*********************************************************************//**
+Creates or opens database data files and closes them.
+@return	DB_SUCCESS or error code */
 static
 ulint
 open_or_create_data_files(
 /*======================*/
-				/* out: DB_SUCCESS or error code */
-	ibool*	create_new_db,	/* out: TRUE if new database should be
-								created */
+	ibool*		create_new_db,	/*!< out: TRUE if new database should be
+					created */
 #ifdef UNIV_LOG_ARCHIVE
-	ulint*	min_arch_log_no,/* out: min of archived log numbers in data
-				files */
-	ulint*	max_arch_log_no,/* out: */
+	ulint*		min_arch_log_no,/*!< out: min of archived log
+					numbers in data files */
+	ulint*		max_arch_log_no,/*!< out: max of archived log
+					numbers in data files */
 #endif /* UNIV_LOG_ARCHIVE */
-	dulint*	min_flushed_lsn,/* out: min of flushed lsn values in data
-				files */
-	dulint*	max_flushed_lsn,/* out: */
-	ulint*	sum_of_new_sizes)/* out: sum of sizes of the new files added */
+	ib_uint64_t*	min_flushed_lsn,/*!< out: min of flushed lsn
+					values in data files */
+	ib_uint64_t*	max_flushed_lsn,/*!< out: max of flushed lsn
+					values in data files */
+	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
+					new files added */
 {
 	ibool	ret;
 	ulint	i;
@@ -937,18 +992,13 @@ skip_size_check:
 		ut_a(ret);
 
 		if (i == 0) {
-			fil_space_create(name, 0, FIL_TABLESPACE);
+			fil_space_create(name, 0, 0, FIL_TABLESPACE);
 		}
 
 		ut_a(fil_validate());
 
-		if (srv_data_file_is_raw_partition[i]) {
-
-			fil_node_create(name, srv_data_file_sizes[i], 0, TRUE);
-		} else {
-			fil_node_create(name, srv_data_file_sizes[i], 0,
-					FALSE);
-		}
+		fil_node_create(name, srv_data_file_sizes[i], 0,
+				srv_data_file_is_raw_partition[i] != 0);
 	}
 
 	ios = 0;
@@ -958,35 +1008,35 @@ skip_size_check:
 	return(DB_SUCCESS);
 }
 
-/********************************************************************
+/****************************************************************//**
 Starts InnoDB and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
+are not found and the user wants.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 int
 innobase_start_or_create_for_mysql(void)
 /*====================================*/
-				/* out: DB_SUCCESS or error code */
 {
 	buf_pool_t*	ret;
-	ibool	create_new_db;
-	ibool	log_file_created;
-	ibool	log_created	= FALSE;
-	ibool	log_opened	= FALSE;
-	dulint	min_flushed_lsn;
-	dulint	max_flushed_lsn;
+	ibool		create_new_db;
+	ibool		log_file_created;
+	ibool		log_created	= FALSE;
+	ibool		log_opened	= FALSE;
+	ib_uint64_t	min_flushed_lsn;
+	ib_uint64_t	max_flushed_lsn;
 #ifdef UNIV_LOG_ARCHIVE
-	ulint	min_arch_log_no;
-	ulint	max_arch_log_no;
+	ulint		min_arch_log_no;
+	ulint		max_arch_log_no;
 #endif /* UNIV_LOG_ARCHIVE */
-	ulint	sum_of_new_sizes;
-	ulint	sum_of_data_file_sizes;
-	ulint	tablespace_size_in_header;
-	ulint	err;
-	ulint	i;
-	ibool	srv_file_per_table_original_value  = srv_file_per_table;
-	mtr_t	mtr;
-	ulint	n_threads;
+	ulint		sum_of_new_sizes;
+	ulint		sum_of_data_file_sizes;
+	ulint		tablespace_size_in_header;
+	ulint		err;
+	ulint		i;
+	ulint		io_limit;
+	my_bool		srv_file_per_table_original_value
+		= srv_file_per_table;
+	mtr_t		mtr;
 #ifdef HAVE_DARWIN_THREADS
 # ifdef F_FULLFSYNC
 	/* This executable has been compiled on Mac OS X 10.3 or later.
@@ -1020,8 +1070,11 @@ innobase_start_or_create_for_mysql(void)
 			(ulong)sizeof(ulint), (ulong)sizeof(void*));
 	}
 
-	srv_file_per_table = FALSE; /* system tables are created in tablespace
-				    0 */
+	/* System tables are created in tablespace 0.  Thus, we must
+	temporarily clear srv_file_per_table.  This is ok, because the
+	server will not accept connections (which could modify
+	innodb_file_per_table) until this function has returned. */
+	srv_file_per_table = FALSE;
 #ifdef UNIV_DEBUG
 	fprintf(stderr,
 		"InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
@@ -1048,31 +1101,39 @@ innobase_start_or_create_for_mysql(void)
 		"InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
 #endif
 
-#ifdef UNIV_SIMULATE_AWE
-	fprintf(stderr,
-		"InnoDB: !!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!\n");
-#endif
-	if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
 		fprintf(stderr,
-			"InnoDB: Error: trx_t size is %lu in ha_innodb.cc"
-			" but %lu in srv0start.c\n"
-			"InnoDB: Check that pthread_mutex_t is defined"
-			" in the same way in these\n"
-			"InnoDB: compilation modules. Cannot continue.\n",
-			(ulong)  srv_sizeof_trx_t_in_ha_innodb_cc,
-			(ulong) sizeof(trx_t));
-		return(DB_ERROR);
+			"InnoDB: The InnoDB memory heap is disabled\n");
 	}
 
-#ifdef UNIV_DISABLE_MEM_POOL
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+# ifdef INNODB_RW_LOCKS_USE_ATOMICS
 	fprintf(stderr,
-		"InnoDB: The InnoDB memory heap has been disabled.\n");
-#endif
-
-#ifdef UNIV_SYNC_ATOMIC
+		"InnoDB: Mutexes and rw_locks use GCC atomic builtins.\n");
+# else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	fprintf(stderr,
-		"InnoDB: Mutex and rw_lock use atomics.\n");
-#endif
+		"InnoDB: Mutexes use GCC atomic builtins, rw_locks do not.\n");
+# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+#elif defined(HAVE_SOLARIS_ATOMICS)
+# ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	fprintf(stderr,
+		"InnoDB: Mutexes and rw_locks use Solaris atomic functions.\n");
+# else
+	fprintf(stderr,
+		"InnoDB: Mutexes use Solaris atomic functions.\n");
+# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+#elif HAVE_WINDOWS_ATOMICS
+# ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	fprintf(stderr,
+		"InnoDB: Mutexes and rw_locks use Windows interlocked functions.\n");
+# else
+	fprintf(stderr,
+		"InnoDB: Mutexes use Windows interlocked functions.\n");
+# endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+#else /* HAVE_GCC_ATOMIC_BUILTINS */
+	fprintf(stderr,
+		"InnoDB: Neither mutexes nor rw_locks use GCC atomic builtins.\n");
+#endif /* HAVE_GCC_ATOMIC_BUILTINS */
 
 	/* Since InnoDB does not currently clean up all its internal data
 	structures in MySQL Embedded Server Library server_end(), we
@@ -1100,35 +1161,22 @@ innobase_start_or_create_for_mysql(void)
 	srv_startup_is_before_trx_rollback_phase = TRUE;
 	os_aio_use_native_aio = FALSE;
 
-#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE)
-	if (srv_use_awe) {
-
-		fprintf(stderr,
-			"InnoDB: Error: You have specified"
-			" innodb_buffer_pool_awe_mem_mb\n"
-			"InnoDB: in my.cnf, but AWE can only"
-			" be used in Windows 2000 and later.\n"
-			"InnoDB: To use AWE, InnoDB must"
-			" be compiled with __WIN2000__ defined.\n");
-
-		return(DB_ERROR);
-	}
-#endif
-
 #ifdef __WIN__
-	if (os_get_os_version() == OS_WIN95
-	    || os_get_os_version() == OS_WIN31
-	    || os_get_os_version() == OS_WINNT) {
-
+	switch (os_get_os_version()) {
+	case OS_WIN95:
+	case OS_WIN31:
+	case OS_WINNT:
 		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
 		and NT use simulated aio. In NT Windows provides async i/o,
 		but when run in conjunction with InnoDB Hot Backup, it seemed
 		to corrupt the data files. */
 
 		os_aio_use_native_aio = FALSE;
-	} else {
+		break;
+	default:
 		/* On Win 2000 and XP use async i/o */
 		os_aio_use_native_aio = TRUE;
+		break;
 	}
 #endif
 	if (srv_file_flush_method_str == NULL) {
@@ -1174,7 +1222,7 @@ innobase_start_or_create_for_mysql(void)
 	}
 
 	/* Note that the call srv_boot() also changes the values of
-	srv_pool_size etc. to the units used by InnoDB internally */
+	some variables to the units used by InnoDB internally */
 
 	/* Set the maximum number of threads which can wait for a semaphore
 	inside InnoDB: this is the 'sync wait array' size, as well as the
@@ -1189,15 +1237,12 @@ innobase_start_or_create_for_mysql(void)
 	NetWare. */
 	srv_max_n_threads = 1000;
 #else
-	if (srv_pool_size >= 1000 * 1024) {
-		/* Here we still have srv_pool_size counted
-		in kilobytes (in 4.0 this was in bytes)
-		srv_boot() converts the value to
-		pages; if buffer pool is less than 1000 MB,
+	if (srv_buf_pool_size >= 1000 * 1024 * 1024) {
+		/* If buffer pool is less than 1000 MB,
 		assume fewer threads. */
 		srv_max_n_threads = 50000;
 
-	} else if (srv_pool_size >= 8 * 1024) {
+	} else if (srv_buf_pool_size >= 8 * 1024 * 1024) {
 
 		srv_max_n_threads = 10000;
 	} else {
@@ -1206,7 +1251,7 @@ innobase_start_or_create_for_mysql(void)
 						computers */
 	}
 #endif
-	err = srv_boot(); /* This changes srv_pool_size to units of a page */
+	err = srv_boot();
 
 	if (err != DB_SUCCESS) {
 
@@ -1249,66 +1294,39 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}
 
-#ifdef __WIN__
-        /*
-           Need to hardcode this to 1 read and 1 write on Windows
-           while searching for problem causing this to crash when
-           higher number of threads are supported.
-        */
-        srv_n_read_io_threads = srv_n_write_io_threads = 1;
-#endif
-	/* Restrict the maximum number of file i/o threads */
-	if ((srv_n_read_io_threads + srv_n_write_io_threads) > SRV_MAX_N_IO_THREADS) {
-		fprintf(stderr,
-			"InnoDB: requested too many read(%d) or write(%d) IO threads, max is %d\n",
-			(int)srv_n_read_io_threads,
-                        (int)srv_n_write_io_threads,
-                        SRV_MAX_N_IO_THREADS);	
-		return(DB_ERROR);
+	/* If user has set the value of innodb_file_io_threads then
+	we'll emit a message telling the user that this parameter
+	is now deprecated. */
+	if (srv_n_file_io_threads != 4) {
+		fprintf(stderr, "InnoDB: Warning:"
+			" innodb_file_io_threads is deprecated."
+			" Please use innodb_read_io_threads and"
+			" innodb_write_io_threads instead\n");
 	}
 
+	/* Now overwrite the value on srv_n_file_io_threads */
+	srv_n_file_io_threads = 2 + srv_n_read_io_threads
+				+ srv_n_write_io_threads;
+
+	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
+
+	/* TODO: Investigate if SRV_N_PENDING_IOS_PER_THREAD (32) limit
+	still applies to windows. */
 	if (!os_aio_use_native_aio) {
- 		/* More than 4 threads are now supported. */
-		n_threads = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD,
-                                        srv_n_read_io_threads,
-                                        srv_n_write_io_threads,
-                                        SRV_MAX_N_PENDING_SYNC_IOS);
+		io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
 	} else {
-                /* Might need more slots here. Alas, I don't do windows. */
-                n_threads = os_aio_init(SRV_N_PENDING_IOS_PER_THREAD,
-                                        srv_n_read_io_threads,
-                                        srv_n_write_io_threads,
-                                        SRV_MAX_N_PENDING_SYNC_IOS);
+		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
 	}
 
-	if (n_threads > SRV_MAX_N_IO_THREADS) {
-		fprintf(stderr,
-			"InnoDB: requested too many IO threads(%d), max is %d\n",
-			(int)n_threads, SRV_MAX_N_IO_THREADS);	
-		return(DB_ERROR);
-	}
+	os_aio_init(io_limit,
+		    srv_n_read_io_threads,
+		    srv_n_write_io_threads,
+		    SRV_MAX_N_PENDING_SYNC_IOS);
 
-	fil_init(srv_max_n_open_files);
+	fil_init(srv_file_per_table ? 50000 : 5000,
+		 srv_max_n_open_files);
 
-	if (srv_use_awe) {
-		fprintf(stderr,
-			"InnoDB: Using AWE: Memory window is %lu MB"
-			" and AWE memory is %lu MB\n",
-			(ulong) (srv_awe_window_size / ((1024 * 1024)
-							/ UNIV_PAGE_SIZE)),
-			(ulong) (srv_pool_size / ((1024 * 1024)
-						  / UNIV_PAGE_SIZE)));
-
-		/* We must disable adaptive hash indexes because they do not
-		tolerate remapping of pages in AWE */
-
-		srv_use_adaptive_hash_indexes = FALSE;
-		ret = buf_pool_init(srv_pool_size, srv_pool_size,
-				    srv_awe_window_size);
-	} else {
-		ret = buf_pool_init(srv_pool_size, srv_pool_size,
-				    srv_pool_size);
-	}
+	ret = buf_pool_init();
 
 	if (ret == NULL) {
 		fprintf(stderr,
@@ -1318,6 +1336,19 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}
 
+#ifdef UNIV_DEBUG
+	/* We have observed deadlocks with a 5MB buffer pool but
+	the actual lower limit could very well be a little higher. */
+
+	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
+
+		fprintf(stderr, "InnoDB: Warning: Small buffer pool size "
+			"(%luM), the flst_validate() debug function "
+			"can cause a deadlock if the buffer pool fills up.\n",
+			srv_buf_pool_size / 1024 / 1024);
+	}
+#endif
+
 	fsp_init();
 	log_init();
 
@@ -1325,7 +1356,7 @@ innobase_start_or_create_for_mysql(void)
 
 	/* Create i/o-handler threads: */
 
-	for (i = 0; i < n_threads; i++) {
+	for (i = 0; i < srv_n_file_io_threads; i++) {
 		n[i] = i;
 
 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
@@ -1447,7 +1478,7 @@ innobase_start_or_create_for_mysql(void)
 	    && !srv_archive_recovery
 #endif /* UNIV_LOG_ARCHIVE */
 	    ) {
-		if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
+		if (max_flushed_lsn != min_flushed_lsn
 #ifdef UNIV_LOG_ARCHIVE
 		    || max_arch_log_no != min_arch_log_no
 #endif /* UNIV_LOG_ARCHIVE */
@@ -1462,8 +1493,7 @@ innobase_start_or_create_for_mysql(void)
 			return(DB_ERROR);
 		}
 
-		if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000))
-		    < 0) {
+		if (max_flushed_lsn < (ib_uint64_t) 1000) {
 			fprintf(stderr,
 				"InnoDB: Cannot initialize created"
 				" log files because\n"
@@ -1491,9 +1521,10 @@ innobase_start_or_create_for_mysql(void)
 		mutex_exit(&(log_sys->mutex));
 	}
 
+	trx_sys_file_format_init();
+
 	if (create_new_db) {
 		mtr_start(&mtr);
-
 		fsp_header_init(0, sum_of_new_sizes, &mtr);
 
 		mtr_commit(&mtr);
@@ -1523,16 +1554,43 @@ innobase_start_or_create_for_mysql(void)
 
 		/* Initialize the fsp free limit global variable in the log
 		system */
-		fsp_header_get_free_limit(0);
+		fsp_header_get_free_limit();
 
 		recv_recovery_from_archive_finish();
 #endif /* UNIV_LOG_ARCHIVE */
 	} else {
+
+		/* Check if we support the max format that is stamped
+		on the system tablespace. 
+		Note:  We are NOT allowed to make any modifications to
+		the TRX_SYS_PAGE_NO page before recovery  because this
+		page also contains the max_trx_id etc. important system
+		variables that are required for recovery.  We need to
+		ensure that we return the system to a state where normal
+		recovery is guaranteed to work. We do this by
+		invalidating the buffer cache, this will force the
+		reread of the page and restoration to its last known
+		consistent state, this is REQUIRED for the recovery
+		process to work. */
+		err = trx_sys_file_format_max_check(
+			srv_check_file_format_at_startup);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+		/* Invalidate the buffer pool to ensure that we reread
+		the page that we read above, during recovery.
+		Note that this is not as heavy weight as it seems. At
+		this point there will be only ONE page in the buf_LRU
+		and there must be no page in the buf_flush list. */
+		buf_pool_invalidate();
+
 		/* We always try to do a recovery, even if the database had
 		been shut down normally: this is the normal startup path */
 
 		err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
-							  ut_dulint_max,
+							  IB_ULONGLONG_MAX,
 							  min_flushed_lsn,
 							  max_flushed_lsn);
 		if (err != DB_SUCCESS) {
@@ -1578,12 +1636,19 @@ innobase_start_or_create_for_mysql(void)
 
 		/* Initialize the fsp free limit global variable in the log
 		system */
-		fsp_header_get_free_limit(0);
+		fsp_header_get_free_limit();
 
 		/* recv_recovery_from_checkpoint_finish needs trx lists which
 		are initialized in trx_sys_init_at_db_start(). */
 
 		recv_recovery_from_checkpoint_finish();
+
+		/* It is possible that file_format tag has never
+		been set. In this case we initialize it to minimum
+		value.  Important to note that we can do it ONLY after
+		we have finished the recovery process so that the
+		image of TRX_SYS_PAGE_NO is not stale. */
+		trx_sys_file_format_tag_init();
 	}
 
 	if (!create_new_db && sum_of_new_sizes > 0) {
@@ -1634,7 +1699,6 @@ innobase_start_or_create_for_mysql(void)
 	/* Create the thread which warns of long semaphore waits */
 	os_thread_create(&srv_error_monitor_thread, NULL,
 			 thread_ids + 3 + SRV_MAX_N_IO_THREADS);
-	srv_was_started = TRUE;
 	srv_is_being_started = FALSE;
 
 	if (trx_doublewrite == NULL) {
@@ -1663,7 +1727,7 @@ innobase_start_or_create_for_mysql(void)
 		sum_of_data_file_sizes += srv_data_file_sizes[i];
 	}
 
-	tablespace_size_in_header = fsp_header_get_tablespace_size(0);
+	tablespace_size_in_header = fsp_header_get_tablespace_size();
 
 	if (!srv_auto_extend_last_data_file
 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
@@ -1747,9 +1811,9 @@ innobase_start_or_create_for_mysql(void)
 	if (srv_print_verbose_log) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			"  InnoDB: Started; log sequence number %lu %lu\n",
-			(ulong) ut_dulint_get_high(srv_start_lsn),
-			(ulong) ut_dulint_get_low(srv_start_lsn));
+			" InnoDB Plugin %s started; "
+			"log sequence number %llu\n",
+			INNODB_VERSION_STR, srv_start_lsn);
 	}
 
 	if (srv_force_recovery > 0) {
@@ -1805,8 +1869,7 @@ innobase_start_or_create_for_mysql(void)
 			" to an earlier version of\n"
 			"InnoDB: InnoDB! But if you absolutely need to"
 			" downgrade, see\n"
-			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
-			"multiple-tablespaces.html\n"
+			"InnoDB: " REFMAN "multiple-tablespaces.html\n"
 			"InnoDB: for instructions.\n");
 	}
 
@@ -1821,16 +1884,18 @@ innobase_start_or_create_for_mysql(void)
 
 	srv_file_per_table = srv_file_per_table_original_value;
 
+	srv_was_started = TRUE;
+
 	return((int) DB_SUCCESS);
 }
 
-/********************************************************************
-Shuts down the InnoDB database. */
-
+/****************************************************************//**
+Shuts down the InnoDB database.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 int
 innobase_shutdown_for_mysql(void)
 /*=============================*/
-				/* out: DB_SUCCESS or error code */
 {
 	ulint	i;
 #ifdef __NETWARE__
@@ -1865,7 +1930,7 @@ innobase_shutdown_for_mysql(void)
 	}
 
 #ifdef __NETWARE__
-	if(!panic_shutdown)
+	if (!panic_shutdown)
 #endif
 		logs_empty_and_mark_files_at_shutdown();
 
@@ -1956,6 +2021,8 @@ innobase_shutdown_for_mysql(void)
 		srv_misc_tmpfile = 0;
 	}
 
+	trx_sys_file_format_close();
+
 	mutex_free(&srv_monitor_file_mutex);
 	mutex_free(&srv_dict_tmpfile_mutex);
 	mutex_free(&srv_misc_tmpfile_mutex);
@@ -1983,6 +2050,7 @@ innobase_shutdown_for_mysql(void)
 	/* 5. Free all allocated memory and the os_fast_mutex created in
 	ut0mem.c */
 
+	buf_pool_free();
 	ut_free_all_mem();
 
 	if (os_thread_count != 0
@@ -2009,11 +2077,12 @@ innobase_shutdown_for_mysql(void)
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Shutdown completed;"
-			" log sequence number %lu %lu\n",
-			(ulong) ut_dulint_get_high(srv_shutdown_lsn),
-			(ulong) ut_dulint_get_low(srv_shutdown_lsn));
+			" log sequence number %llu\n",
+			srv_shutdown_lsn);
 	}
 
+	srv_was_started = FALSE;
+
 	return((int) DB_SUCCESS);
 }
 
diff --git a/storage/innobase/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c
similarity index 78%
rename from storage/innobase/sync/sync0arr.c
rename to storage/innodb_plugin/sync/sync0arr.c
index bb64ac07342..d78ee8f3191 100644
--- a/storage/innobase/sync/sync0arr.c
+++ b/storage/innodb_plugin/sync/sync0arr.c
@@ -1,7 +1,31 @@
-/******************************************************
-The wait array used in synchronization primitives
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0arr.c
+The wait array used in synchronization primitives
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
@@ -50,27 +74,29 @@ wait array for the sake of diagnostics and also to avoid infinite
 wait The error_monitor thread scans the global wait array to signal
 any waiting threads who have missed the signal. */
 
-/* A cell where an individual thread may wait suspended
+/** A cell where an individual thread may wait suspended
 until a resource is released. The suspending is implemented
 using an operating system event semaphore. */
 struct sync_cell_struct {
-	void*		wait_object;	/* pointer to the object the
+	void*		wait_object;	/*!< pointer to the object the
 					thread is waiting for; if NULL
 					the cell is free for use */
-	mutex_t*	old_wait_mutex;	/* the latest wait mutex in cell */
-	rw_lock_t*	old_wait_rw_lock;/* the latest wait rw-lock in cell */
-	ulint		request_type;	/* lock type requested on the
+	mutex_t*	old_wait_mutex;	/*!< the latest wait mutex in cell */
+	rw_lock_t*	old_wait_rw_lock;
+					/*!< the latest wait rw-lock
+					in cell */
+	ulint		request_type;	/*!< lock type requested on the
 					object */
-	const char*	file;		/* in debug version file where
+	const char*	file;		/*!< in debug version file where
 					requested */
-	ulint		line;		/* in debug version line where
+	ulint		line;		/*!< in debug version line where
 					requested */
-	os_thread_id_t	thread;		/* thread id of this waiting
+	os_thread_id_t	thread;		/*!< thread id of this waiting
 					thread */
-	ibool		waiting;	/* TRUE if the thread has already
+	ibool		waiting;	/*!< TRUE if the thread has already
 					called sync_array_event_wait
 					on this cell */
-	ib_longlong	signal_count;	/* We capture the signal_count
+	ib_int64_t	signal_count;	/*!< We capture the signal_count
 					of the wait_object when we
 					reset the event. This value is
 					then passed on to os_event_wait
@@ -78,7 +104,7 @@ struct sync_cell_struct {
 					has not been signalled in the
 					period between the reset and
 					wait call. */
-	time_t		reservation_time;/* time when the thread reserved
+	time_t		reservation_time;/*!< time when the thread reserved
 					the wait cell */
 };
 
@@ -87,58 +113,56 @@ for an event allocated for the array without owning the
 protecting mutex (depending on the case: OS or database mutex), but
 all changes (set or reset) to the state of the event must be made
 while owning the mutex. */
+
+/** Synchronization array */
 struct sync_array_struct {
-	ulint		n_reserved;	/* number of currently reserved
+	ulint		n_reserved;	/*!< number of currently reserved
 					cells in the wait array */
-	ulint		n_cells;	/* number of cells in the
+	ulint		n_cells;	/*!< number of cells in the
 					wait array */
-	sync_cell_t*	array;		/* pointer to wait array */
-	ulint		protection;	/* this flag tells which
+	sync_cell_t*	array;		/*!< pointer to wait array */
+	ulint		protection;	/*!< this flag tells which
 					mutex protects the data */
-	mutex_t		mutex;		/* possible database mutex
+	mutex_t		mutex;		/*!< possible database mutex
 					protecting this data structure */
-	os_mutex_t	os_mutex;	/* Possible operating system mutex
+	os_mutex_t	os_mutex;	/*!< Possible operating system mutex
 					protecting the data structure.
 					As this data structure is used in
 					constructing the database mutex,
 					to prevent infinite recursion
 					in implementation, we fall back to
 					an OS mutex. */
-	ulint		sg_count;	/* count of how many times an
+	ulint		sg_count;	/*!< count of how many times an
 					object has been signalled */
-	ulint		res_count;	/* count of cell reservations
+	ulint		res_count;	/*!< count of cell reservations
 					since creation of the array */
 };
 
-/* Counts the number of times that sync_arr_wake_threads_if_sema_free has
- * found a thread that can run because it may have missed a wakeup signal. */
-ulint sync_wake_ups = 0;
-
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
+of one or more threads because of waits of semaphores.
+@return	TRUE if deadlock detected */
 static
 ibool
 sync_array_detect_deadlock(
 /*=======================*/
-				/* out: TRUE if deadlock detected */
-	sync_array_t*	arr,	/* in: wait array; NOTE! the caller must
+	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
 				own the mutex to array */
-	sync_cell_t*	start,	/* in: cell where recursive search started */
-	sync_cell_t*	cell,	/* in: cell to search */
-	ulint		depth);	/* in: recursion depth */
+	sync_cell_t*	start,	/*!< in: cell where recursive search started */
+	sync_cell_t*	cell,	/*!< in: cell to search */
+	ulint		depth);	/*!< in: recursion depth */
 #endif /* UNIV_SYNC_DEBUG */
 
-/*********************************************************************
-Gets the nth cell in array. */
+/*****************************************************************//**
+Gets the nth cell in array.
+@return	cell */
 static
 sync_cell_t*
 sync_array_get_nth_cell(
 /*====================*/
-				/* out: cell */
-	sync_array_t*	arr,	/* in: sync array */
-	ulint		n)	/* in: index */
+	sync_array_t*	arr,	/*!< in: sync array */
+	ulint		n)	/*!< in: index */
 {
 	ut_a(arr);
 	ut_a(n < arr->n_cells);
@@ -146,13 +170,13 @@ sync_array_get_nth_cell(
 	return(arr->array + n);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Reserves the mutex semaphore protecting a sync array. */
 static
 void
 sync_array_enter(
 /*=============*/
-	sync_array_t*	arr)	/* in: sync wait array */
+	sync_array_t*	arr)	/*!< in: sync wait array */
 {
 	ulint	protection;
 
@@ -167,13 +191,13 @@ sync_array_enter(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases the mutex semaphore protecting a sync array. */
 static
 void
 sync_array_exit(
 /*============*/
-	sync_array_t*	arr)	/* in: sync wait array */
+	sync_array_t*	arr)	/*!< in: sync wait array */
 {
 	ulint	protection;
 
@@ -188,18 +212,18 @@ sync_array_exit(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a synchronization wait array. It is protected by a mutex
 which is automatically reserved when the functions operating on it
-are called. */
-
+are called.
+@return	own: created wait array */
+UNIV_INTERN
 sync_array_t*
 sync_array_create(
 /*==============*/
-				/* out, own: created wait array */
-	ulint	n_cells,	/* in: number of cells in the array
+	ulint	n_cells,	/*!< in: number of cells in the array
 				to create */
-	ulint	protection)	/* in: either SYNC_ARRAY_OS_MUTEX or
+	ulint	protection)	/*!< in: either SYNC_ARRAY_OS_MUTEX or
 				SYNC_ARRAY_MUTEX: determines the type
 				of mutex protecting the data structure */
 {
@@ -241,13 +265,13 @@ sync_array_create(
 	return(arr);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees the resources in a wait array. */
-
+UNIV_INTERN
 void
 sync_array_free(
 /*============*/
-	sync_array_t*	arr)	/* in, own: sync wait array */
+	sync_array_t*	arr)	/*!< in, own: sync wait array */
 {
 	ulint		protection;
 
@@ -271,14 +295,14 @@ sync_array_free(
 	ut_free(arr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
-
+UNIV_INTERN
 void
 sync_array_validate(
 /*================*/
-	sync_array_t*	arr)	/* in: sync wait array */
+	sync_array_t*	arr)	/*!< in: sync wait array */
 {
 	ulint		i;
 	sync_cell_t*	cell;
@@ -298,13 +322,13 @@ sync_array_validate(
 	sync_array_exit(arr);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Returns the event that the thread owning the cell waits for. */
 static
 os_event_t
 sync_cell_get_event(
 /*================*/
-	sync_cell_t*   	cell) /* in: non-empty sync array cell */
+	sync_cell_t*	cell) /*!< in: non-empty sync array cell */
 {
 	ulint type = cell->request_type;
 
@@ -317,20 +341,19 @@ sync_cell_get_event(
 	}
 }
 
-
-/**********************************************************************
+/******************************************************************//**
 Reserves a wait array cell for waiting for an object.
 The event of the cell is reset to nonsignalled state. */
-
+UNIV_INTERN
 void
 sync_array_reserve_cell(
 /*====================*/
-	sync_array_t*	arr,	/* in: wait array */
-	void*		object, /* in: pointer to the object to wait for */
-	ulint		type,	/* in: lock request type */
-	const char*	file,	/* in: file where requested */
-	ulint		line,	/* in: line where requested */
-	ulint*		index)	/* out: index of the reserved cell */
+	sync_array_t*	arr,	/*!< in: wait array */
+	void*		object, /*!< in: pointer to the object to wait for */
+	ulint		type,	/*!< in: lock request type */
+	const char*	file,	/*!< in: file where requested */
+	ulint		line,	/*!< in: line where requested */
+	ulint*		index)	/*!< out: index of the reserved cell */
 {
 	sync_cell_t*	cell;
 	os_event_t      event;
@@ -388,17 +411,17 @@ sync_array_reserve_cell(
 	return;
 }
 
-/**********************************************************************
+/******************************************************************//**
 This function should be called when a thread starts to wait on
 a wait array cell. In the debug version this function checks
 if the wait for a semaphore will result in a deadlock, in which
 case prints info and asserts. */
-
+UNIV_INTERN
 void
 sync_array_wait_event(
 /*==================*/
-	sync_array_t*	arr,	/* in: wait array */
-	ulint		index)	/* in: index of the reserved cell */
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index)	/*!< in: index of the reserved cell */
 {
 	sync_cell_t*	cell;
 	os_event_t	event;
@@ -440,14 +463,14 @@ sync_array_wait_event(
 	sync_array_free_cell(arr, index);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Reports info of a wait array cell. */
 static
 void
 sync_array_cell_print(
 /*==================*/
-	FILE*		file,	/* in: file where to print */
-	sync_cell_t*	cell)	/* in: sync cell */
+	FILE*		file,	/*!< in: file where to print */
+	sync_cell_t*	cell)	/*!< in: sync cell */
 {
 	mutex_t*	mutex;
 	rw_lock_t*	rwlock;
@@ -485,11 +508,7 @@ sync_array_cell_print(
 		   || type == RW_LOCK_WAIT_EX
 		   || type == RW_LOCK_SHARED) {
 
-		switch(type) {
-		case RW_LOCK_EX:      fputs("X-lock on", file);      break;
-		case RW_LOCK_WAIT_EX: fputs("wait-X-lock on", file); break;
-		default:              fputs("S-lock on", file);      break;
-		}
+		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
 
 		rwlock = cell->old_wait_rw_lock;
 
@@ -510,7 +529,7 @@ sync_array_cell_print(
 
 		fprintf(file,
 			"number of readers %lu, waiters flag %lu, "
-                        "lock_word: %ld\n"
+                        "lock_word: %lx\n"
 			"Last time read locked in file %s line %lu\n"
 			"Last time write locked in file %s line %lu\n",
 			(ulong) rw_lock_get_reader_count(rwlock),
@@ -530,16 +549,15 @@ sync_array_cell_print(
 }
 
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Looks for a cell with the given thread id. */
+/******************************************************************//**
+Looks for a cell with the given thread id.
+@return	pointer to cell or NULL if not found */
 static
 sync_cell_t*
 sync_array_find_thread(
 /*===================*/
-				/* out: pointer to cell or NULL
-				if not found */
-	sync_array_t*	arr,	/* in: wait array */
-	os_thread_id_t	thread)	/* in: thread id */
+	sync_array_t*	arr,	/*!< in: wait array */
+	os_thread_id_t	thread)	/*!< in: thread id */
 {
 	ulint		i;
 	sync_cell_t*	cell;
@@ -549,8 +567,7 @@ sync_array_find_thread(
 		cell = sync_array_get_nth_cell(arr, i);
 
 		if (cell->wait_object != NULL
-		    && os_thread_eq(cell->thread, thread)
-		    && cell->waiting)) {
+		    && os_thread_eq(cell->thread, thread)) {
 
 			return(cell);	/* Found */
 		}
@@ -559,20 +576,20 @@ sync_array_find_thread(
 	return(NULL);	/* Not found */
 }
 
-/**********************************************************************
-Recursion step for deadlock detection. */
+/******************************************************************//**
+Recursion step for deadlock detection.
+@return	TRUE if deadlock detected */
 static
 ibool
 sync_array_deadlock_step(
 /*=====================*/
-				/* out: TRUE if deadlock detected */
-	sync_array_t*	arr,	/* in: wait array; NOTE! the caller must
+	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
 				own the mutex to array */
-	sync_cell_t*	start,	/* in: cell where recursive search
+	sync_cell_t*	start,	/*!< in: cell where recursive search
 				started */
-	os_thread_id_t	thread,	/* in: thread to look at */
-	ulint		pass,	/* in: pass value */
-	ulint		depth)	/* in: recursion depth */
+	os_thread_id_t	thread,	/*!< in: thread to look at */
+	ulint		pass,	/*!< in: pass value */
+	ulint		depth)	/*!< in: recursion depth */
 {
 	sync_cell_t*	new;
 	ibool		ret;
@@ -610,19 +627,19 @@ sync_array_deadlock_step(
 	return(FALSE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
+of one or more threads because of waits of semaphores.
+@return	TRUE if deadlock detected */
 static
 ibool
 sync_array_detect_deadlock(
 /*=======================*/
-				/* out: TRUE if deadlock detected */
-	sync_array_t*	arr,	/* in: wait array; NOTE! the caller must
+	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
 				own the mutex to array */
-	sync_cell_t*	start,	/* in: cell where recursive search started */
-	sync_cell_t*	cell,	/* in: cell to search */
-	ulint		depth)	/* in: recursion depth */
+	sync_cell_t*	start,	/*!< in: cell where recursive search started */
+	sync_cell_t*	cell,	/*!< in: cell to search */
+	ulint		depth)	/*!< in: recursion depth */
 {
 	mutex_t*	mutex;
 	rw_lock_t*	lock;
@@ -755,13 +772,13 @@ print:
 }
 #endif /* UNIV_SYNC_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Determines if we can wake up the thread waiting for a sempahore. */
 static
 ibool
 sync_arr_cell_can_wake_up(
 /*======================*/
-	sync_cell_t*	cell)	/* in: cell to search */
+	sync_cell_t*	cell)	/*!< in: cell to search */
 {
 	mutex_t*	mutex;
 	rw_lock_t*	lock;
@@ -779,18 +796,18 @@ sync_arr_cell_can_wake_up(
 
 		lock = cell->wait_object;
 
-                /* X_LOCK_DECR is the unlocked state */
-	    	if (lock->lock_word == X_LOCK_DECR) {
+		if (lock->lock_word > 0) {
+		/* Either unlocked or only read locked. */
 
 			return(TRUE);
 		}
 
         } else if (cell->request_type == RW_LOCK_WAIT_EX) {
 
-	    	lock = cell->wait_object;
+		lock = cell->wait_object;
 
                 /* lock_word == 0 means all readers have left */
-	    	if (lock->lock_word == 0) {
+		if (lock->lock_word == 0) {
 
 			return(TRUE);
 		}
@@ -807,15 +824,15 @@ sync_arr_cell_can_wake_up(
 	return(FALSE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees the cell. NOTE! sync_array_wait_event frees the cell
 automatically! */
-
+UNIV_INTERN
 void
 sync_array_free_cell(
 /*=================*/
-	sync_array_t*	arr,	/* in: wait array */
-	ulint		index)  /* in: index of the cell in array */
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index)  /*!< in: index of the cell in array */
 {
 	sync_cell_t*	cell;
 
@@ -835,16 +852,16 @@ sync_array_free_cell(
 	sync_array_exit(arr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Increments the signalled count. */
-
+UNIV_INTERN
 void
 sync_array_object_signalled(
 /*========================*/
-	sync_array_t*	arr)	/* in: wait array */
+	sync_array_t*	arr)	/*!< in: wait array */
 {
-#ifdef UNIV_SYNC_ATOMIC
-	(void)os_atomic_increment((volatile lint *)&(arr->sg_count), 1);
+#ifdef HAVE_ATOMIC_BUILTINS
+	(void) os_atomic_increment_ulint(&arr->sg_count, 1);
 #else
 	sync_array_enter(arr);
 
@@ -854,7 +871,7 @@ sync_array_object_signalled(
 #endif
 }
 
-/**************************************************************************
+/**********************************************************************//**
 If the wakeup algorithm does not work perfectly at semaphore relases,
 this function will do the waking (see the comment in mutex_exit). This
 function should be called about every 1 second in the server.
@@ -862,7 +879,7 @@ function should be called about every 1 second in the server.
 Note that there's a race condition between this thread and mutex_exit
 changing the lock_word and calling signal_object, so sometimes this finds
 threads to wake up even when nothing has gone wrong. */
-
+UNIV_INTERN
 void
 sync_arr_wake_threads_if_sema_free(void)
 /*====================================*/
@@ -888,16 +905,11 @@ sync_arr_wake_threads_if_sema_free(void)
 		}
 			count++;
 
-		if (!cell->waiting) {
-			continue;
-		}
-
 			if (sync_arr_cell_can_wake_up(cell)) {
 
 			event = sync_cell_get_event(cell);
 
 			os_event_set(event);
-			sync_wake_ups++;
 		}
 
 	}
@@ -905,14 +917,13 @@ sync_arr_wake_threads_if_sema_free(void)
 	sync_array_exit(arr);
 }
 
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return	TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
 ibool
 sync_array_print_long_waits(void)
 /*=============================*/
-			/* out: TRUE if fatal semaphore wait threshold
-			was exceeded */
 {
 	sync_cell_t*	cell;
 	ibool		old_val;
@@ -971,14 +982,14 @@ sync_array_print_long_waits(void)
 	return(fatal);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info of the wait array. */
 static
 void
 sync_array_output_info(
 /*===================*/
-	FILE*		file,	/* in: file where to print */
-	sync_array_t*	arr)	/* in: wait array; NOTE! caller must own the
+	FILE*		file,	/*!< in: file where to print */
+	sync_array_t*	arr)	/*!< in: wait array; NOTE! caller must own the
 				mutex */
 {
 	sync_cell_t*	cell;
@@ -1004,14 +1015,14 @@ sync_array_output_info(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info of the wait array. */
-
+UNIV_INTERN
 void
 sync_array_print_info(
 /*==================*/
-	FILE*		file,	/* in: file where to print */
-	sync_array_t*	arr)	/* in: wait array */
+	FILE*		file,	/*!< in: file where to print */
+	sync_array_t*	arr)	/*!< in: wait array */
 {
 	sync_array_enter(arr);
 
diff --git a/storage/innobase/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c
similarity index 67%
rename from storage/innobase/sync/sync0rw.c
rename to storage/innodb_plugin/sync/sync0rw.c
index 2fcf75009a6..0ed114e330c 100644
--- a/storage/innobase/sync/sync0rw.c
+++ b/storage/innodb_plugin/sync/sync0rw.c
@@ -1,7 +1,31 @@
-/******************************************************
-The read-write lock (for thread synchronization)
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0rw.c
+The read-write lock (for thread synchronization)
 
 Created 9/11/1995 Heikki Tuuri
 *******************************************************/
@@ -21,22 +45,22 @@ Created 9/11/1995 Heikki Tuuri
 The status of a rw_lock is held in lock_word. The initial value of lock_word is
 X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
 for each x-lock. This describes the lock state for each value of lock_word:
- 
+
 lock_word == X_LOCK_DECR:      Unlocked.
 0 < lock_word < X_LOCK_DECR:   Read locked, no waiting writers.
- 			       (X_LOCK_DECR - lock_word) is the
- 			       number of readers that hold the lock.
+			       (X_LOCK_DECR - lock_word) is the
+			       number of readers that hold the lock.
 lock_word == 0:		       Write locked
 -X_LOCK_DECR < lock_word < 0:  Read locked, with a waiting writer.
- 			       (-lock_word) is the number of readers
- 			       that hold the lock.
+			       (-lock_word) is the number of readers
+			       that hold the lock.
 lock_word <= -X_LOCK_DECR:     Recursively write locked. lock_word has been
- 			       decremented by X_LOCK_DECR once for each lock,
- 			       so the number of locks is:
- 			       ((-lock_word) / X_LOCK_DECR) + 1
+			       decremented by X_LOCK_DECR once for each lock,
+			       so the number of locks is:
+			       ((-lock_word) / X_LOCK_DECR) + 1
 When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
 other values of lock_word are invalid.
- 
+
 The lock_word is always read and updated atomically and consistently, so that
 it always represents the state of the lock, and the state of the lock changes
 with a single atomic operation. This lock_word holds all of the information
@@ -46,103 +70,122 @@ must be verified before recursive write locks: to solve this scenario, we make
 writer_thread readable by all threads, but only writeable by the x-lock holder.
 
 The other members of the lock obey the following rules to remain consistent:
- 
-pass:		This is only set to 1 to prevent recursive x-locks. It must
- 		be set as specified by x_lock caller after the lock_word
- 		indicates that the thread holds the lock, but before that
- 		thread resumes execution. It must also be set to 1 during the
- 		final x_unlock, but before the lock_word status is updated.
- 		When an x_lock or move_ownership call wishes to change
- 		pass, it must first update the writer_thread appropriately.
-writer_thread:	Must be set to the writers thread_id after the lock_word
- 		indicates that the thread holds the lock, but before that
- 		thread resumes execution. writer_thread may be invalid and
-                should not be read when pass == 1. A thread trying to become
-                writer never reads its own stale writer_thread, since it sets
-                pass during its previous unlock call.
+
+recursive:	This and the writer_thread field together control the
+		behaviour of recursive x-locking.
+		lock->recursive must be FALSE in following states:
+			1) The writer_thread contains garbage i.e.: the
+			lock has just been initialized.
+			2) The lock is not x-held and there is no
+			x-waiter waiting on WAIT_EX event.
+			3) The lock is x-held or there is an x-waiter
+			waiting on WAIT_EX event but the 'pass' value
+			is non-zero.
+		lock->recursive is TRUE iff:
+			1) The lock is x-held or there is an x-waiter
+			waiting on WAIT_EX event and the 'pass' value
+			is zero.
+		This flag must be set after the writer_thread field
+		has been updated with a memory ordering barrier.
+		It is unset before the lock_word has been incremented.
+writer_thread:	Is used only in recursive x-locking. Can only be safely
+		read iff lock->recursive flag is TRUE.
+		This field is uninitialized at lock creation time and
+		is updated atomically when x-lock is acquired or when
+		move_ownership is called. A thread is only allowed to
+		set the value of this field to it's thread_id i.e.: a
+		thread cannot set writer_thread to some other thread's
+		id.
 waiters:	May be set to 1 anytime, but to avoid unnecessary wake-up
- 		signals, it should only be set to 1 when there are threads
- 		waiting on event. Must be 1 when a writer starts waiting to
- 		ensure the current x-locking thread sends a wake-up signal
- 		during unlock. May only be reset to 0 immediately before a
- 		a wake-up signal is sent to event.
+		signals, it should only be set to 1 when there are threads
+		waiting on event. Must be 1 when a writer starts waiting to
+		ensure the current x-locking thread sends a wake-up signal
+		during unlock. May only be reset to 0 immediately before a
+		a wake-up signal is sent to event. On most platforms, a
+		memory barrier is required after waiters is set, and before
+		verifying lock_word is still held, to ensure some unlocker
+		really does see the flags new value.
 event:		Threads wait on event for read or writer lock when another
- 		thread has an x-lock or an x-lock reservation (wait_ex). A
- 		thread may only	wait on event after performing the following
- 		actions in order:
- 		   (1) Record the counter value of event (with os_event_reset).
- 		   (2) Set waiters to 1.
- 		   (3) Verify lock_word <= 0.
- 		(1) must come before (2) to ensure signal is not missed.
- 		(2) must come before (3) to ensure a signal is sent.
- 		These restrictions force the above ordering.
- 		Immediately before sending the wake-up signal, we should:
- 		   (1) Verify lock_word == X_LOCK_DECR (unlocked)
- 		   (2) Reset waiters to 0.
+		thread has an x-lock or an x-lock reservation (wait_ex). A
+		thread may only	wait on event after performing the following
+		actions in order:
+		   (1) Record the counter value of event (with os_event_reset).
+		   (2) Set waiters to 1.
+		   (3) Verify lock_word <= 0.
+		(1) must come before (2) to ensure signal is not missed.
+		(2) must come before (3) to ensure a signal is sent.
+		These restrictions force the above ordering.
+		Immediately before sending the wake-up signal, we should:
+		   (1) Verify lock_word == X_LOCK_DECR (unlocked)
+		   (2) Reset waiters to 0.
 wait_ex_event:	A thread may only wait on the wait_ex_event after it has
- 		performed the following actions in order:
- 		   (1) Decrement lock_word by X_LOCK_DECR.
- 		   (2) Record counter value of wait_ex_event (os_event_reset,
-                        called from sync_array_reserve_cell).
- 		   (3) Verify that lock_word < 0.
- 		(1) must come first to ensures no other threads become reader
-                 or next writer, and notifies unlocker that signal must be sent.
-                 (2) must come before (3) to ensure the signal is not missed.
- 		These restrictions force the above ordering.
- 		Immediately before sending the wake-up signal, we should:
+		performed the following actions in order:
+		   (1) Decrement lock_word by X_LOCK_DECR.
+		   (2) Record counter value of wait_ex_event (os_event_reset,
+                       called from sync_array_reserve_cell).
+		   (3) Verify that lock_word < 0.
+		(1) must come first to ensures no other threads become reader
+                or next writer, and notifies unlocker that signal must be sent.
+                (2) must come before (3) to ensure the signal is not missed.
+		These restrictions force the above ordering.
+		Immediately before sending the wake-up signal, we should:
 		   Verify lock_word == 0 (waiting thread holds x_lock)
 */
 
 
-/* number of spin waits on rw-latches,
+/** number of spin waits on rw-latches,
 resulted during shared (read) locks */
-ib_longlong	rw_s_spin_wait_count	= 0;
-ib_longlong	rw_s_spin_round_count	= 0;
-
-/* number of OS waits on rw-latches,
+UNIV_INTERN ib_int64_t	rw_s_spin_wait_count	= 0;
+/** number of spin loop rounds on rw-latches,
 resulted during shared (read) locks */
-ib_longlong	rw_s_os_wait_count	= 0;
+UNIV_INTERN ib_int64_t	rw_s_spin_round_count	= 0;
 
-/* number of unlocks (that unlock shared locks),
+/** number of OS waits on rw-latches,
+resulted during shared (read) locks */
+UNIV_INTERN ib_int64_t	rw_s_os_wait_count	= 0;
+
+/** number of unlocks (that unlock shared locks),
 set only when UNIV_SYNC_PERF_STAT is defined */
-ib_longlong	rw_s_exit_count		= 0;
+UNIV_INTERN ib_int64_t	rw_s_exit_count		= 0;
 
-/* number of spin waits on rw-latches,
+/** number of spin waits on rw-latches,
 resulted during exclusive (write) locks */
-ib_longlong	rw_x_spin_wait_count	= 0;
-ib_longlong	rw_x_spin_round_count	= 0;
-
-/* number of OS waits on rw-latches,
+UNIV_INTERN ib_int64_t	rw_x_spin_wait_count	= 0;
+/** number of spin loop rounds on rw-latches,
 resulted during exclusive (write) locks */
-ib_longlong	rw_x_os_wait_count	= 0;
+UNIV_INTERN ib_int64_t	rw_x_spin_round_count	= 0;
 
-/* number of unlocks (that unlock exclusive locks),
+/** number of OS waits on rw-latches,
+resulted during exclusive (write) locks */
+UNIV_INTERN ib_int64_t	rw_x_os_wait_count	= 0;
+
+/** number of unlocks (that unlock exclusive locks),
 set only when UNIV_SYNC_PERF_STAT is defined */
-ib_longlong	rw_x_exit_count		= 0;
+UNIV_INTERN ib_int64_t	rw_x_exit_count		= 0;
 
 /* The global list of rw-locks */
-rw_lock_list_t	rw_lock_list;
-mutex_t		rw_lock_list_mutex;
+UNIV_INTERN rw_lock_list_t	rw_lock_list;
+UNIV_INTERN mutex_t		rw_lock_list_mutex;
 
 #ifdef UNIV_SYNC_DEBUG
 /* The global mutex which protects debug info lists of all rw-locks.
 To modify the debug info list of an rw-lock, this mutex has to be
 acquired in addition to the mutex protecting the lock. */
 
-mutex_t		rw_lock_debug_mutex;
-os_event_t	rw_lock_debug_event;	/* If deadlock detection does not
-					get immediately the mutex, it may
-					wait for this event */
-ibool		rw_lock_debug_waiters;	/* This is set to TRUE, if there may
-					be waiters for the event */
+UNIV_INTERN mutex_t		rw_lock_debug_mutex;
+/* If deadlock detection does not get immediately the mutex,
+it may wait for this event */
+UNIV_INTERN os_event_t		rw_lock_debug_event;
+/* This is set to TRUE, if there may be waiters for the event */
+UNIV_INTERN ibool		rw_lock_debug_waiters;
 
-/**********************************************************************
+/******************************************************************//**
 Creates a debug info struct. */
 static
 rw_lock_debug_t*
 rw_lock_debug_create(void);
 /*======================*/
-/**********************************************************************
+/******************************************************************//**
 Frees a debug info struct. */
 static
 void
@@ -150,8 +193,9 @@ rw_lock_debug_free(
 /*===============*/
 	rw_lock_debug_t* info);
 
-/**********************************************************************
-Creates a debug info struct. */
+/******************************************************************//**
+Creates a debug info struct.
+@return	own: debug info struct */
 static
 rw_lock_debug_t*
 rw_lock_debug_create(void)
@@ -160,7 +204,7 @@ rw_lock_debug_create(void)
 	return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees a debug info struct. */
 static
 void
@@ -172,45 +216,49 @@ rw_lock_debug_free(
 }
 #endif /* UNIV_SYNC_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
 is necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
 void
 rw_lock_create_func(
 /*================*/
-	rw_lock_t*	lock,		/* in: pointer to memory */
+	rw_lock_t*	lock,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
 # ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/* in: level */
+	ulint		level,		/*!< in: level */
 # endif /* UNIV_SYNC_DEBUG */
-	const char*	cmutex_name, 	/* in: mutex name */
+	const char*	cmutex_name, 	/*!< in: mutex name */
 #endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/* in: file name where created */
-	ulint 		cline)		/* in: file line where created */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline)		/*!< in: file line where created */
 {
 	/* If this is the very first time a synchronization object is
 	created, then the following call initializes the sync system. */
 
-#ifndef UNIV_SYNC_ATOMIC
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
 	mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
 
 	lock->mutex.cfile_name = cfile_name;
 	lock->mutex.cline = cline;
 
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	lock->mutex.cmutex_name = cmutex_name;
-	lock->mutex.mutex_type = 1;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
-#endif /* UNIV_SYNC_ATOMIC */
+	ut_d(lock->mutex.cmutex_name = cmutex_name);
+	ut_d(lock->mutex.mutex_type = 1);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+# ifdef UNIV_DEBUG
+	UT_NOT_USED(cmutex_name);
+# endif
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
 	lock->lock_word = X_LOCK_DECR;
 	lock->waiters = 0;
- 	lock->pass = 1;
- 	/* We do not have to initialize writer_thread until pass == 0 */
+
+	/* We set this value to signify that lock->writer_thread
+	contains garbage at initialization and cannot be used for
+	recursive x-locking. */
+	lock->recursive = FALSE;
 
 #ifdef UNIV_SYNC_DEBUG
 	UT_LIST_INIT(lock->debug_list);
@@ -243,24 +291,24 @@ rw_lock_create_func(
 	mutex_exit(&rw_lock_list_mutex);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Calling this function is obligatory only if the memory buffer containing
 the rw-lock is freed. Removes an rw-lock object from the global list. The
 rw-lock is checked to be in the non-locked state. */
-
+UNIV_INTERN
 void
 rw_lock_free(
 /*=========*/
-	rw_lock_t*	lock)	/* in: rw-lock */
+	rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	ut_ad(rw_lock_validate(lock));
 	ut_a(lock->lock_word == X_LOCK_DECR);
 
 	lock->magic_n = 0;
 
-#ifndef UNIV_SYNC_ATOMIC
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
 	mutex_free(rw_lock_get_mutex(lock));
-#endif /* UNIV_SYNC_ATOMIC */
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
 	mutex_enter(&rw_lock_list_mutex);
 	os_event_free(lock->event);
@@ -280,14 +328,15 @@ rw_lock_free(
 }
 
 #ifdef UNIV_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
+simultaneous shared and exclusive locks.
+@return	TRUE */
+UNIV_INTERN
 ibool
 rw_lock_validate(
 /*=============*/
-	rw_lock_t*	lock)
+	rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	ut_a(lock);
 
@@ -302,27 +351,27 @@ rw_lock_validate(
 }
 #endif /* UNIV_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Lock an rw-lock in shared mode for the current thread. If the rw-lock is
 locked in exclusive mode, or there is an exclusive lock request waiting,
 the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
 for the lock, before suspending the thread. */
-
+UNIV_INTERN
 void
 rw_lock_s_lock_spin(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock
 				will be passed to another thread to unlock */
-	const char*	file_name, /* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	ulint	 index;	/* index of the reserved wait cell */
 	ulint	 i = 0;	/* spin round count */
 
 	ut_ad(rw_lock_validate(lock));
 
-	rw_s_spin_wait_count++;	/* Count calls to this function */
+	rw_s_spin_wait_count++;	/*!< Count calls to this function */
 lock_loop:
 
 	/* Spin waiting for the writer field to become free */
@@ -367,7 +416,7 @@ lock_loop:
 
 		/* Set waiters before checking lock_word to ensure wake-up
                 signal is sent. This may lead to some unnecessary signals. */
-		rw_lock_set_waiters(lock);
+		rw_lock_set_waiter_flag(lock);
 
 		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
 			sync_array_free_cell(sync_primary_wait_array, index);
@@ -394,7 +443,7 @@ lock_loop:
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 This function is used in the insert buffer to move the ownership of an
 x-latch on a buffer frame to the current thread. The x-latch was set by
 the buffer read operation and it protected the buffer frame while the
@@ -402,41 +451,32 @@ read was done. The ownership is moved because we want that the current
 thread is able to acquire a second x-latch which is stored in an mtr.
 This, in turn, is needed to pass the debug checks of index page
 operations. */
-
+UNIV_INTERN
 void
 rw_lock_x_lock_move_ownership(
 /*==========================*/
-	rw_lock_t*	lock)	/* in: lock which was x-locked in the
+	rw_lock_t*	lock)	/*!< in: lock which was x-locked in the
 				buffer read */
 {
 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
 
-#ifdef UNIV_SYNC_ATOMIC
-        lock->writer_thread = os_thread_get_curr_id();
-        os_memory_barrier_store();
-	lock->pass = 0;
-#else /* UNIV_SYNC_ATOMIC */
-	mutex_enter(&(lock->mutex));
-	lock->writer_thread = os_thread_get_curr_id();
-	lock->pass = 0;
-	mutex_exit(&(lock->mutex));
-#endif /* UNIV_SYNC_ATOMIC */
+	rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Function for the next writer to call. Waits for readers to exit.
-The caller must have already decremented lock_word by X_LOCK_DECR.*/
+The caller must have already decremented lock_word by X_LOCK_DECR. */
 UNIV_INLINE
 void
 rw_lock_x_lock_wait(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
 #ifdef UNIV_SYNC_DEBUG
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
 #endif
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	ulint index;
 	ulint i = 0;
@@ -475,8 +515,8 @@ rw_lock_x_lock_wait(
 					       file_name, line);
 #endif
 
- 			sync_array_wait_event(sync_primary_wait_array,
- 					      index);
+			sync_array_wait_event(sync_primary_wait_array,
+					      index);
 #ifdef UNIV_SYNC_DEBUG
 			rw_lock_remove_debug_info(lock, pass,
 					       RW_LOCK_WAIT_EX);
@@ -491,28 +531,33 @@ rw_lock_x_lock_wait(
 	rw_x_spin_round_count += i;
 }
 
-/**********************************************************************
-Low-level function for acquiring an exclusive lock. */
+/******************************************************************//**
+Low-level function for acquiring an exclusive lock.
+@return	RW_LOCK_NOT_LOCKED if did not succeed, RW_LOCK_EX if success. */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_low(
 /*===============*/
-				/* out: RW_LOCK_NOT_LOCKED if did
-				not succeed, RW_LOCK_EX if success. */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
 
-	if(rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
-		ut_ad(lock->pass);
+	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+
+		/* lock->recursive also tells us if the writer_thread
+		field is stale or active. As we are going to write
+		our own thread id in that field it must be that the
+		current writer_thread value is not active. */
+		ut_a(!lock->recursive);
 
 		/* Decrement occurred: we are writer or next-writer. */
-		lock->writer_thread = curr_thread;
-		lock->pass = pass;
+		rw_lock_set_writer_id_and_recursion_flag(lock,
+						pass ? FALSE : TRUE);
+
 		rw_lock_x_lock_wait(lock,
 #ifdef UNIV_SYNC_DEBUG
 				    pass,
@@ -521,12 +566,8 @@ rw_lock_x_lock_low(
 
 	} else {
 		/* Decrement failed: relock or failed lock */
-		/* Must verify pass first: otherwise another thread can
-		call move_ownership suddenly allowing recursive locks.
-		and after we have verified our thread_id matches
-		(though move_ownership has since changed it).*/
-		if(!pass && !(lock->pass) &&
-                   os_thread_eq(lock->writer_thread, curr_thread)) {
+		if (!pass && lock->recursive
+		    && os_thread_eq(lock->writer_thread, curr_thread)) {
 			/* Relock */
                         lock->lock_word -= X_LOCK_DECR;
 		} else {
@@ -544,7 +585,7 @@ rw_lock_x_lock_low(
 	return(TRUE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread. If the rw-lock is locked
 in shared or exclusive mode, or there is an exclusive lock request waiting,
@@ -553,18 +594,18 @@ for the lock before suspending the thread. If the same thread has an x-lock
 on the rw-lock, locking succeed, with the following exception: if pass != 0,
 only a single x-lock may be taken on the lock. NOTE: If the same thread has
 an s-lock, locking does not succeed! */
-
+UNIV_INTERN
 void
 rw_lock_x_lock_func(
 /*================*/
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	ulint		pass,	/* in: pass value; != 0, if the lock will
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
 {
-	ulint	index;	/* index of the reserved wait cell */
-	ulint	i;	/* spin round count */
+	ulint	index;	/*!< index of the reserved wait cell */
+	ulint	i;	/*!< spin round count */
 	ibool   spinning = FALSE;
 
 	ut_ad(rw_lock_validate(lock));
@@ -620,7 +661,7 @@ lock_loop:
 
 	/* Waiters must be set before checking lock_word, to ensure signal
 	is sent. This could lead to a few unnecessary wake-up signals. */
-	rw_lock_set_waiters(lock);
+	rw_lock_set_waiter_flag(lock);
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
 		sync_array_free_cell(sync_primary_wait_array, index);
@@ -646,13 +687,13 @@ lock_loop:
 }
 
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
 because the debug mutex is also acquired in sync0arr while holding the OS
 mutex protecting the sync array, and the ordinary mutex_enter might
 recursively call routines in sync0arr, leading to a deadlock on the OS
 mutex. */
-
+UNIV_INTERN
 void
 rw_lock_debug_mutex_enter(void)
 /*==========================*/
@@ -675,9 +716,9 @@ loop:
 	goto loop;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases the debug mutex. */
-
+UNIV_INTERN
 void
 rw_lock_debug_mutex_exit(void)
 /*==========================*/
@@ -690,17 +731,17 @@ rw_lock_debug_mutex_exit(void)
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Inserts the debug information for an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_add_debug_info(
 /*===================*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		pass,		/* in: pass value */
-	ulint		lock_type,	/* in: lock type */
-	const char*	file_name,	/* in: file where requested */
-	ulint		line)		/* in: line where requested */
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type,	/*!< in: lock type */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line)		/*!< in: line where requested */
 {
 	rw_lock_debug_t*	info;
 
@@ -726,15 +767,15 @@ rw_lock_add_debug_info(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Removes a debug information struct for an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_remove_debug_info(
 /*======================*/
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		pass,		/* in: pass value */
-	ulint		lock_type)	/* in: lock type */
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type)	/*!< in: lock type */
 {
 	rw_lock_debug_t*	info;
 
@@ -772,16 +813,16 @@ rw_lock_remove_debug_info(
 #endif /* UNIV_SYNC_DEBUG */
 
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-
+the pass value == 0.
+@return	TRUE if locked */
+UNIV_INTERN
 ibool
 rw_lock_own(
 /*========*/
-					/* out: TRUE if locked */
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		lock_type)	/* in: lock type: RW_LOCK_SHARED,
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
 					RW_LOCK_EX */
 {
 	rw_lock_debug_t*	info;
@@ -813,15 +854,15 @@ rw_lock_own(
 }
 #endif /* UNIV_SYNC_DEBUG */
 
-/**********************************************************************
-Checks if somebody has locked the rw-lock in the specified mode. */
-
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return	TRUE if locked */
+UNIV_INTERN
 ibool
 rw_lock_is_locked(
 /*==============*/
-					/* out: TRUE if locked */
-	rw_lock_t*	lock,		/* in: rw-lock */
-	ulint		lock_type)	/* in: lock type: RW_LOCK_SHARED,
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
 					RW_LOCK_EX */
 {
 	ibool	ret	= FALSE;
@@ -845,13 +886,13 @@ rw_lock_is_locked(
 }
 
 #ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
+/***************************************************************//**
 Prints debug info of currently locked rw-locks. */
-
+UNIV_INTERN
 void
 rw_lock_list_print_info(
 /*====================*/
-	FILE*	file)		/* in: file where to print */
+	FILE*	file)		/*!< in: file where to print */
 {
 	rw_lock_t*	lock;
 	ulint		count		= 0;
@@ -869,7 +910,7 @@ rw_lock_list_print_info(
 
 		count++;
 
-#ifndef UNIV_SYNC_ATOMIC
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
 		mutex_enter(&(lock->mutex));
 #endif
 		if (lock->lock_word != X_LOCK_DECR) {
@@ -888,7 +929,7 @@ rw_lock_list_print_info(
 				info = UT_LIST_GET_NEXT(list, info);
 			}
 		}
-#ifndef UNIV_SYNC_ATOMIC
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
 		mutex_exit(&(lock->mutex));
 #endif
 
@@ -899,13 +940,13 @@ rw_lock_list_print_info(
 	mutex_exit(&rw_lock_list_mutex);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Prints debug info of an rw-lock. */
-
+UNIV_INTERN
 void
 rw_lock_print(
 /*==========*/
-	rw_lock_t*	lock)	/* in: rw-lock */
+	rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	rw_lock_debug_t* info;
 
@@ -914,8 +955,13 @@ rw_lock_print(
 		"RW-LATCH INFO\n"
 		"RW-LATCH: %p ", (void*) lock);
 
-#ifndef UNIV_SYNC_ATOMIC
-	mutex_enter(&(lock->mutex));
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+	/* We used to acquire lock->mutex here, but it would cause a
+	recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
+	is defined.  Since this function is only invoked from
+	sync_thread_levels_g(), let us choose the smaller evil:
+	performing dirty reads instead of causing bogus deadlocks or
+	assertion failures. */
 #endif
 	if (lock->lock_word != X_LOCK_DECR) {
 
@@ -931,18 +977,15 @@ rw_lock_print(
 			info = UT_LIST_GET_NEXT(list, info);
 		}
 	}
-#ifndef UNIV_SYNC_ATOMIC
-	mutex_exit(&(lock->mutex));
-#endif
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Prints info of a debug struct. */
-
+UNIV_INTERN
 void
 rw_lock_debug_print(
 /*================*/
-	rw_lock_debug_t*	info)	/* in: debug struct */
+	rw_lock_debug_t*	info)	/*!< in: debug struct */
 {
 	ulint	rwt;
 
@@ -966,10 +1009,11 @@ rw_lock_debug_print(
 	putc('\n', stderr);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Returns the number of currently locked rw-locks. Works only in the debug
-version. */
-
+version.
+@return	number of locked rw-locks */
+UNIV_INTERN
 ulint
 rw_lock_n_locked(void)
 /*==================*/
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
similarity index 79%
rename from storage/innobase/sync/sync0sync.c
rename to storage/innodb_plugin/sync/sync0sync.c
index a8b1ac4926e..84ed08e14e7 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innodb_plugin/sync/sync0sync.c
@@ -1,7 +1,31 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0sync.c
+Mutex, the basic synchronization primitive
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
@@ -140,105 +164,89 @@ Q.E.D. */
 
 /* Number of spin waits on mutexes: for performance monitoring */
 
-/* round=one iteration of a spin loop */
-ib_longlong	mutex_spin_round_count		= 0;
-ib_longlong	mutex_spin_wait_count		= 0;
-ib_longlong	mutex_os_wait_count		= 0;
-ib_longlong	mutex_exit_count		= 0;
+/** The number of iterations in the mutex_spin_wait() spin loop.
+Intended for performance monitoring. */
+static ib_int64_t	mutex_spin_round_count		= 0;
+/** The number of mutex_spin_wait() calls.  Intended for
+performance monitoring. */
+static ib_int64_t	mutex_spin_wait_count		= 0;
+/** The number of OS waits in mutex_spin_wait().  Intended for
+performance monitoring. */
+static ib_int64_t	mutex_os_wait_count		= 0;
+/** The number of mutex_exit() calls. Intended for performance
+monitoring. */
+UNIV_INTERN ib_int64_t	mutex_exit_count		= 0;
 
-/* The global array of wait cells for implementation of the database's own
+/** The global array of wait cells for implementation of the database's own
 mutexes and read-write locks */
-sync_array_t*	sync_primary_wait_array;
-
-/* This variable is set to TRUE when sync_init is called */
-ibool	sync_initialized	= FALSE;
+UNIV_INTERN sync_array_t*	sync_primary_wait_array;
 
+/** This variable is set to TRUE when sync_init is called */
+UNIV_INTERN ibool	sync_initialized	= FALSE;
 
+/** An acquired mutex or rw-lock and its level in the latching order */
 typedef struct sync_level_struct	sync_level_t;
+/** Mutexes or rw-locks held by a thread */
 typedef struct sync_thread_struct	sync_thread_t;
 
 #ifdef UNIV_SYNC_DEBUG
-/* The latch levels currently owned by threads are stored in this data
+/** The latch levels currently owned by threads are stored in this data
 structure; the size of this array is OS_THREAD_MAX_N */
 
-sync_thread_t*	sync_thread_level_arrays;
+UNIV_INTERN sync_thread_t*	sync_thread_level_arrays;
 
-/* Mutex protecting sync_thread_level_arrays */
-mutex_t	sync_thread_mutex;
+/** Mutex protecting sync_thread_level_arrays */
+UNIV_INTERN mutex_t		sync_thread_mutex;
 #endif /* UNIV_SYNC_DEBUG */
 
-/* Global list of database mutexes (not OS mutexes) created. */
-ut_list_base_node_t  mutex_list;
+/** Global list of database mutexes (not OS mutexes) created. */
+UNIV_INTERN ut_list_base_node_t  mutex_list;
 
-/* Mutex protecting the mutex_list variable */
-mutex_t mutex_list_mutex;
+/** Mutex protecting the mutex_list variable */
+UNIV_INTERN mutex_t mutex_list_mutex;
 
 #ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
-ibool	sync_order_checks_on	= FALSE;
+/** Latching order checks start when this is set TRUE */
+UNIV_INTERN ibool	sync_order_checks_on	= FALSE;
 #endif /* UNIV_SYNC_DEBUG */
 
+/** Mutexes or rw-locks held by a thread */
 struct sync_thread_struct{
-	os_thread_id_t	id;	/* OS thread id */
-	sync_level_t*	levels;	/* level array for this thread; if this is NULL
-				this slot is unused */
+	os_thread_id_t	id;	/*!< OS thread id */
+	sync_level_t*	levels;	/*!< level array for this thread; if
+				this is NULL this slot is unused */
 };
 
-/* Number of slots reserved for each OS thread in the sync level array */
+/** Number of slots reserved for each OS thread in the sync level array */
 #define SYNC_THREAD_N_LEVELS	10000
 
+/** An acquired mutex or rw-lock and its level in the latching order */
 struct sync_level_struct{
-	void*	latch;	/* pointer to a mutex or an rw-lock; NULL means that
+	void*	latch;	/*!< pointer to a mutex or an rw-lock; NULL means that
 			the slot is empty */
-	ulint	level;	/* level of the latch in the latching order */
+	ulint	level;	/*!< level of the latch in the latching order */
 };
 
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
-	mutex_t*	mutex)	/* in: mutex */
-{
-	mutex_enter(mutex);
-}
-
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
-	mutex_t*	mutex)	/* in: mutex */
-{
-	mutex_exit(mutex);
-}
-
-/**********************************************************************
+/******************************************************************//**
 Creates, or rather, initializes a mutex object in a specified memory
 location (which must be appropriately aligned). The mutex is initialized
 in the reset state. Explicit freeing of the mutex with mutex_free is
 necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
 void
 mutex_create_func(
 /*==============*/
-	mutex_t*	mutex,		/* in: pointer to memory */
+	mutex_t*	mutex,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
-	const char*	cmutex_name,	/* in: mutex name */
+	const char*	cmutex_name,	/*!< in: mutex name */
 # ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/* in: level */
+	ulint		level,		/*!< in: level */
 # endif /* UNIV_SYNC_DEBUG */
 #endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/* in: file name where created */
-	ulint		cline)		/* in: file line where created */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline)		/*!< in: file line where created */
 {
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-	mutex_reset_lock_word(mutex);
-#elif defined(MY_ATOMIC_NOLOCK)
+#if defined(HAVE_ATOMIC_BUILTINS)
 	mutex_reset_lock_word(mutex);
 #else
 	os_fast_mutex_init(&(mutex->os_fast_mutex));
@@ -256,9 +264,8 @@ mutex_create_func(
 #endif /* UNIV_SYNC_DEBUG */
 	mutex->cfile_name = cfile_name;
 	mutex->cline = cline;
-#ifndef UNIV_HOTBACKUP
 	mutex->count_os_wait = 0;
-# ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	mutex->cmutex_name=	  cmutex_name;
 	mutex->count_using=	  0;
 	mutex->mutex_type=	  0;
@@ -267,8 +274,7 @@ mutex_create_func(
 	mutex->count_spin_loop= 0;
 	mutex->count_spin_rounds=   0;
 	mutex->count_os_yield=  0;
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 
 	/* Check that lock_word is aligned; this is important on Intel */
 	ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
@@ -294,15 +300,15 @@ mutex_create_func(
 	mutex_exit(&mutex_list_mutex);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Calling this function is obligatory only if the memory buffer containing
 the mutex is freed. Removes a mutex object from the mutex list. The mutex
 is checked to be in the reset state. */
-
+UNIV_INTERN
 void
 mutex_free(
 /*=======*/
-	mutex_t*	mutex)	/* in: mutex */
+	mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_ad(mutex_validate(mutex));
 	ut_a(mutex_get_lock_word(mutex) == 0);
@@ -330,9 +336,7 @@ mutex_free(
 
 	os_event_free(mutex->event);
 
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-#elif defined(MY_ATOMIC_NOLOCK)
-#else
+#if !defined(HAVE_ATOMIC_BUILTINS)
 	os_fast_mutex_free(&(mutex->os_fast_mutex));
 #endif
 	/* If we free the mutex protecting the mutex list (freeing is
@@ -343,21 +347,21 @@ mutex_free(
 #endif /* UNIV_DEBUG */
 }
 
-/************************************************************************
+/********************************************************************//**
 NOTE! Use the corresponding macro in the header file, not this function
 directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
+acquired immediately, returns with return value 1.
+@return	0 if succeed, 1 if not */
+UNIV_INTERN
 ulint
 mutex_enter_nowait_func(
 /*====================*/
-					/* out: 0 if succeed, 1 if not */
-	mutex_t*	mutex,		/* in: pointer to mutex */
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name __attribute__((unused)),
-					/* in: file name where mutex
+					/*!< in: file name where mutex
 					requested */
 	ulint		line __attribute__((unused)))
-					/* in: line where requested */
+					/*!< in: line where requested */
 {
 	ut_ad(mutex_validate(mutex));
 
@@ -375,13 +379,14 @@ mutex_enter_nowait_func(
 }
 
 #ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
+/******************************************************************//**
+Checks that the mutex has been initialized.
+@return	TRUE */
+UNIV_INTERN
 ibool
 mutex_validate(
 /*===========*/
-	const mutex_t*	mutex)
+	const mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_a(mutex);
 	ut_a(mutex->magic_n == MUTEX_MAGIC_N);
@@ -389,15 +394,15 @@ mutex_validate(
 	return(TRUE);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Checks that the current thread owns the mutex. Works only in the debug
-version. */
-
+version.
+@return	TRUE if owns */
+UNIV_INTERN
 ibool
 mutex_own(
 /*======*/
-				/* out: TRUE if owns */
-	const mutex_t*	mutex)	/* in: mutex */
+	const mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_ad(mutex_validate(mutex));
 
@@ -406,14 +411,14 @@ mutex_own(
 }
 #endif /* UNIV_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Sets the waiters field in a mutex. */
-
+UNIV_INTERN
 void
 mutex_set_waiters(
 /*==============*/
-	mutex_t*	mutex,	/* in: mutex */
-	ulint		n)	/* in: value to set */
+	mutex_t*	mutex,	/*!< in: mutex */
+	ulint		n)	/*!< in: value to set */
 {
 	volatile ulint*	ptr;		/* declared volatile to ensure that
 					the value is stored to memory */
@@ -425,28 +430,28 @@ mutex_set_waiters(
 				word in memory is atomic */
 }
 
-/**********************************************************************
+/******************************************************************//**
 Reserves a mutex for the current thread. If the mutex is reserved, the
 function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
 for the mutex before suspending the thread. */
-
+UNIV_INTERN
 void
 mutex_spin_wait(
 /*============*/
-	mutex_t*	mutex,		/* in: pointer to mutex */
-	const char*	file_name,	/* in: file name where mutex
+	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
 					requested */
-	ulint		line)		/* in: line where requested */
+	ulint		line)		/*!< in: line where requested */
 {
 	ulint	   index; /* index of the reserved wait cell */
 	ulint	   i;	  /* spin round count */
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */
+#ifdef UNIV_DEBUG
+	ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
 	ulint ltime_diff;
 	ulint sec;
 	ulint ms;
 	uint timer_started = 0;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex);
 
 	/* This update is not thread safe, but we don't mind if the count
@@ -466,9 +471,7 @@ mutex_loop:
 	a memory word. */
 
 spin_loop:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	mutex->count_spin_loop++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+	ut_d(mutex->count_spin_loop++);
 
 	while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
 		if (srv_spin_wait_delay) {
@@ -479,14 +482,16 @@ spin_loop:
 	}
 
 	if (i == SYNC_SPIN_ROUNDS) {
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
 		mutex->count_os_yield++;
-		if (timed_mutexes == 1 && timer_started==0) {
+#ifndef UNIV_HOTBACKUP
+		if (timed_mutexes && timer_started == 0) {
 			ut_usectime(&sec, &ms);
-			lstart_time= (ib_longlong)sec * 1000000 + ms;
+			lstart_time= (ib_int64_t)sec * 1000000 + ms;
 			timer_started = 1;
 		}
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 		os_thread_yield();
 	}
 
@@ -500,9 +505,7 @@ spin_loop:
 
 	mutex_spin_round_count += i;
 
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	mutex->count_spin_rounds += i;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+	ut_d(mutex->count_spin_rounds += i);
 
 	if (mutex_test_and_set(mutex) == 0) {
 		/* Succeeded! */
@@ -579,27 +582,26 @@ spin_loop:
 
 	mutex_os_wait_count++;
 
-#ifndef UNIV_HOTBACKUP
 	mutex->count_os_wait++;
-# ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	/* !!!!! Sometimes os_wait can be called without os_thread_yield */
-
-	if (timed_mutexes == 1 && timer_started==0) {
+#ifndef UNIV_HOTBACKUP
+	if (timed_mutexes == 1 && timer_started == 0) {
 		ut_usectime(&sec, &ms);
-		lstart_time= (ib_longlong)sec * 1000000 + ms;
+		lstart_time= (ib_int64_t)sec * 1000000 + ms;
 		timer_started = 1;
 	}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 
 	sync_array_wait_event(sync_primary_wait_array, index);
 	goto mutex_loop;
 
 finish_timing:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
 	if (timed_mutexes == 1 && timer_started==1) {
 		ut_usectime(&sec, &ms);
-		lfinish_time= (ib_longlong)sec * 1000000 + ms;
+		lfinish_time= (ib_int64_t)sec * 1000000 + ms;
 
 		ltime_diff= (ulint) (lfinish_time - lstart_time);
 		mutex->lspent_time += ltime_diff;
@@ -608,17 +610,17 @@ finish_timing:
 			mutex->lmax_spent_time= ltime_diff;
 		}
 	}
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
 	return;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Releases the threads waiting in the primary wait array for this mutex. */
-
+UNIV_INTERN
 void
 mutex_signal_object(
 /*================*/
-	mutex_t*	mutex)	/* in: mutex */
+	mutex_t*	mutex)	/*!< in: mutex */
 {
 	mutex_set_waiters(mutex, 0);
 
@@ -629,15 +631,15 @@ mutex_signal_object(
 }
 
 #ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
 Sets the debug information for a reserved mutex. */
-
+UNIV_INTERN
 void
 mutex_set_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/* in: mutex */
-	const char*	file_name,	/* in: file where requested */
-	ulint		line)		/* in: line where requested */
+	mutex_t*	mutex,		/*!< in: mutex */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line)		/*!< in: line where requested */
 {
 	ut_ad(mutex);
 	ut_ad(file_name);
@@ -648,16 +650,16 @@ mutex_set_debug_info(
 	mutex->line	 = line;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Gets the debug information for a reserved mutex. */
-
+UNIV_INTERN
 void
 mutex_get_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/* in: mutex */
-	const char**	file_name,	/* out: file where requested */
-	ulint*		line,		/* out: line where requested */
-	os_thread_id_t* thread_id)	/* out: id of the thread which owns
+	mutex_t*	mutex,		/*!< in: mutex */
+	const char**	file_name,	/*!< out: file where requested */
+	ulint*		line,		/*!< out: line where requested */
+	os_thread_id_t* thread_id)	/*!< out: id of the thread which owns
 					the mutex */
 {
 	ut_ad(mutex);
@@ -667,13 +669,13 @@ mutex_get_debug_info(
 	*thread_id = mutex->thread_id;
 }
 
-/**********************************************************************
+/******************************************************************//**
 Prints debug info of currently reserved mutexes. */
 static
 void
 mutex_list_print_info(
 /*==================*/
-	FILE*	file)		/* in: file where to print */
+	FILE*	file)		/*!< in: file where to print */
 {
 	mutex_t*	mutex;
 	const char*	file_name;
@@ -710,9 +712,10 @@ mutex_list_print_info(
 	mutex_exit(&mutex_list_mutex);
 }
 
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
+/******************************************************************//**
+Counts currently reserved mutexes. Works only in the debug version.
+@return	number of reserved mutexes */
+UNIV_INTERN
 ulint
 mutex_n_reserved(void)
 /*==================*/
@@ -741,10 +744,11 @@ mutex_n_reserved(void)
 			   was holding one mutex (mutex_list_mutex) */
 }
 
-/**********************************************************************
+/******************************************************************//**
 Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version. */
-
+the debug version.
+@return	TRUE if no mutexes and rw-locks reserved */
+UNIV_INTERN
 ibool
 sync_all_freed(void)
 /*================*/
@@ -752,27 +756,27 @@ sync_all_freed(void)
 	return(mutex_n_reserved() + rw_lock_n_locked() == 0);
 }
 
-/**********************************************************************
-Gets the value in the nth slot in the thread level arrays. */
+/******************************************************************//**
+Gets the value in the nth slot in the thread level arrays.
+@return	pointer to thread slot */
 static
 sync_thread_t*
 sync_thread_level_arrays_get_nth(
 /*=============================*/
-			/* out: pointer to thread slot */
-	ulint	n)	/* in: slot number */
+	ulint	n)	/*!< in: slot number */
 {
 	ut_ad(n < OS_THREAD_MAX_N);
 
 	return(sync_thread_level_arrays + n);
 }
 
-/**********************************************************************
-Looks for the thread slot for the calling thread. */
+/******************************************************************//**
+Looks for the thread slot for the calling thread.
+@return	pointer to thread slot, NULL if not found */
 static
 sync_thread_t*
 sync_thread_level_arrays_find_slot(void)
 /*====================================*/
-			/* out: pointer to thread slot, NULL if not found */
 
 {
 	sync_thread_t*	slot;
@@ -794,13 +798,13 @@ sync_thread_level_arrays_find_slot(void)
 	return(NULL);
 }
 
-/**********************************************************************
-Looks for an unused thread slot. */
+/******************************************************************//**
+Looks for an unused thread slot.
+@return	pointer to thread slot */
 static
 sync_thread_t*
 sync_thread_level_arrays_find_free(void)
 /*====================================*/
-			/* out: pointer to thread slot */
 
 {
 	sync_thread_t*	slot;
@@ -819,33 +823,33 @@ sync_thread_level_arrays_find_free(void)
 	return(NULL);
 }
 
-/**********************************************************************
-Gets the value in the nth slot in the thread level array. */
+/******************************************************************//**
+Gets the value in the nth slot in the thread level array.
+@return	pointer to level slot */
 static
 sync_level_t*
 sync_thread_levels_get_nth(
 /*=======================*/
-				/* out: pointer to level slot */
-	sync_level_t*	arr,	/* in: pointer to level array for an OS
+	sync_level_t*	arr,	/*!< in: pointer to level array for an OS
 				thread */
-	ulint		n)	/* in: slot number */
+	ulint		n)	/*!< in: slot number */
 {
 	ut_ad(n < SYNC_THREAD_N_LEVELS);
 
 	return(arr + n);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Checks if all the level values stored in the level array are greater than
-the given limit. */
+the given limit.
+@return	TRUE if all greater */
 static
 ibool
 sync_thread_levels_g(
 /*=================*/
-				/* out: TRUE if all greater */
-	sync_level_t*	arr,	/* in: pointer to level array for an OS
+	sync_level_t*	arr,	/*!< in: pointer to level array for an OS
 				thread */
-	ulint		limit)	/* in: level limit */
+	ulint		limit)	/*!< in: level limit */
 {
 	sync_level_t*	slot;
 	rw_lock_t*	lock;
@@ -906,16 +910,16 @@ sync_thread_levels_g(
 	return(TRUE);
 }
 
-/**********************************************************************
-Checks if the level value is stored in the level array. */
+/******************************************************************//**
+Checks if the level value is stored in the level array.
+@return	TRUE if stored */
 static
 ibool
 sync_thread_levels_contain(
 /*=======================*/
-				/* out: TRUE if stored */
-	sync_level_t*	arr,	/* in: pointer to level array for an OS
+	sync_level_t*	arr,	/*!< in: pointer to level array for an OS
 				thread */
-	ulint		level)	/* in: level */
+	ulint		level)	/*!< in: level */
 {
 	sync_level_t*	slot;
 	ulint		i;
@@ -935,15 +939,14 @@ sync_thread_levels_contain(
 	return(FALSE);
 }
 
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return	TRUE if empty except the exceptions specified below */
+UNIV_INTERN
 ibool
 sync_thread_levels_empty_gen(
 /*=========================*/
-					/* out: TRUE if empty except the
-					exceptions specified below */
-	ibool	dict_mutex_allowed)	/* in: TRUE if dictionary mutex is
+	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
 					allowed to be owned by the thread,
 					also purge_is_running mutex is
 					allowed */
@@ -992,27 +995,27 @@ sync_thread_levels_empty_gen(
 	return(TRUE);
 }
 
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return	TRUE if empty */
+UNIV_INTERN
 ibool
 sync_thread_levels_empty(void)
 /*==========================*/
-			/* out: TRUE if empty */
 {
 	return(sync_thread_levels_empty_gen(FALSE));
 }
 
-/**********************************************************************
+/******************************************************************//**
 Adds a latch and its level in the thread level array. Allocates the memory
 for the array if called first time for this OS thread. Makes the checks
 against other latch levels stored in the array for this thread. */
-
+UNIV_INTERN
 void
 sync_thread_add_level(
 /*==================*/
-	void*	latch,	/* in: pointer to a mutex or an rw-lock */
-	ulint	level)	/* in: level in the latching order; if
+	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
+	ulint	level)	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
 {
 	sync_level_t*	array;
@@ -1073,62 +1076,59 @@ sync_thread_add_level(
 		/* Do no order checking */
 		break;
 	case SYNC_MEM_POOL:
-		ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
-		break;
 	case SYNC_MEM_HASH:
-		ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
-		break;
 	case SYNC_RECV:
-		ut_a(sync_thread_levels_g(array, SYNC_RECV));
-		break;
 	case SYNC_WORK_QUEUE:
-		ut_a(sync_thread_levels_g(array, SYNC_WORK_QUEUE));
-		break;
 	case SYNC_LOG:
-		ut_a(sync_thread_levels_g(array, SYNC_LOG));
-		break;
 	case SYNC_THR_LOCAL:
-		ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
-		break;
 	case SYNC_ANY_LATCH:
-		ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
-		break;
 	case SYNC_TRX_SYS_HEADER:
-		ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
-		break;
+	case SYNC_FILE_FORMAT_TAG:
 	case SYNC_DOUBLEWRITE:
-		ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
+	case SYNC_BUF_POOL:
+	case SYNC_SEARCH_SYS:
+	case SYNC_SEARCH_SYS_CONF:
+	case SYNC_TRX_LOCK_HEAP:
+	case SYNC_KERNEL:
+	case SYNC_IBUF_BITMAP_MUTEX:
+	case SYNC_RSEG:
+	case SYNC_TRX_UNDO:
+	case SYNC_PURGE_LATCH:
+	case SYNC_PURGE_SYS:
+	case SYNC_DICT_AUTOINC_MUTEX:
+	case SYNC_DICT_OPERATION:
+	case SYNC_DICT_HEADER:
+	case SYNC_TRX_I_S_RWLOCK:
+	case SYNC_TRX_I_S_LAST_READ:
+		if (!sync_thread_levels_g(array, level)) {
+			fprintf(stderr,
+				"InnoDB: sync_thread_levels_g(array, %lu)"
+				" does not hold!\n", level);
+			ut_error;
+		}
 		break;
 	case SYNC_BUF_BLOCK:
-		ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
-		      && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
-		     || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
-		break;
-	case SYNC_BUF_POOL:
-		ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
-		break;
-	case SYNC_SEARCH_SYS:
-		ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
-		break;
-	case SYNC_TRX_LOCK_HEAP:
-		ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
+		/* Either the thread must own the buffer pool mutex
+		(buf_pool_mutex), or it is allowed to latch only ONE
+		buffer block (block->mutex or buf_pool_zip_mutex). */
+		if (!sync_thread_levels_g(array, level)) {
+			ut_a(sync_thread_levels_g(array, level - 1));
+			ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
+		}
 		break;
 	case SYNC_REC_LOCK:
 		ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
 		      && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
 		     || sync_thread_levels_g(array, SYNC_REC_LOCK));
 		break;
-	case SYNC_KERNEL:
-		ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
-		break;
 	case SYNC_IBUF_BITMAP:
+		/* Either the thread must own the master mutex to all
+		the bitmap pages, or it is allowed to latch only ONE
+		bitmap page. */
 		ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
 		      && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
 		     || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
 		break;
-	case SYNC_IBUF_BITMAP_MUTEX:
-		ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
-		break;
 	case SYNC_FSP_PAGE:
 		ut_a(sync_thread_levels_contain(array, SYNC_FSP));
 		break;
@@ -1149,18 +1149,6 @@ sync_thread_add_level(
 		ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
 		     && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
 		break;
-	case SYNC_RSEG:
-		ut_a(sync_thread_levels_g(array, SYNC_RSEG));
-		break;
-	case SYNC_TRX_UNDO:
-		ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
-		break;
-	case SYNC_PURGE_LATCH:
-		ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
-		break;
-	case SYNC_PURGE_SYS:
-		ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
-		break;
 	case SYNC_TREE_NODE:
 		ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
 		     || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
@@ -1189,15 +1177,6 @@ sync_thread_add_level(
 		     && !sync_thread_levels_contain(
 			     array, SYNC_IBUF_PESS_INSERT_MUTEX));
 		break;
-	case SYNC_DICT_AUTOINC_MUTEX:
-		ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
-		break;
-	case SYNC_DICT_OPERATION:
-		ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
-		break;
-	case SYNC_DICT_HEADER:
-		ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
-		break;
 	case SYNC_DICT:
 #ifdef UNIV_DEBUG
 		ut_a(buf_debug_prints
@@ -1227,15 +1206,16 @@ sync_thread_add_level(
 	mutex_exit(&sync_thread_mutex);
 }
 
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
+/******************************************************************//**
+Removes a latch from the thread level array if it is found there.
+@return TRUE if found in the array; it is no error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+UNIV_INTERN
 ibool
 sync_thread_reset_level(
 /*====================*/
-			/* out: TRUE if found from the array; it is an error
-			if the latch is not found */
-	void*	latch)	/* in: pointer to a mutex or an rw-lock */
+	void*	latch)	/*!< in: pointer to a mutex or an rw-lock */
 {
 	sync_level_t*	array;
 	sync_level_t*	slot;
@@ -1282,6 +1262,18 @@ sync_thread_reset_level(
 		}
 	}
 
+	if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
+		rw_lock_t*	rw_lock;
+
+		rw_lock = (rw_lock_t*) latch;
+
+		if (rw_lock->level == SYNC_LEVEL_VARYING) {
+			mutex_exit(&sync_thread_mutex);
+
+			return(TRUE);
+		}
+	}
+
 	ut_error;
 
 	mutex_exit(&sync_thread_mutex);
@@ -1290,9 +1282,9 @@ sync_thread_reset_level(
 }
 #endif /* UNIV_SYNC_DEBUG */
 
-/**********************************************************************
+/******************************************************************//**
 Initializes the synchronization data structures. */
-
+UNIV_INTERN
 void
 sync_init(void)
 /*===========*/
@@ -1344,10 +1336,10 @@ sync_init(void)
 #endif /* UNIV_SYNC_DEBUG */
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees the resources in InnoDB's own synchronization data structures. Use
 os_sync_free() after calling this. */
-
+UNIV_INTERN
 void
 sync_close(void)
 /*===========*/
@@ -1369,13 +1361,13 @@ sync_close(void)
 #endif /* UNIV_SYNC_DEBUG */
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Prints wait info of the sync system. */
-
+UNIV_INTERN
 void
 sync_print_wait_info(
 /*=================*/
-	FILE*	file)		/* in: file where to print */
+	FILE*	file)		/*!< in: file where to print */
 {
 #ifdef UNIV_SYNC_DEBUG
 	fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
@@ -1405,13 +1397,13 @@ sync_print_wait_info(
 		(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Prints info of the sync system. */
-
+UNIV_INTERN
 void
 sync_print(
 /*=======*/
-	FILE*	file)		/* in: file where to print */
+	FILE*	file)		/*!< in: file where to print */
 {
 #ifdef UNIV_SYNC_DEBUG
 	mutex_list_print_info(file);
diff --git a/storage/innobase/thr/thr0loc.c b/storage/innodb_plugin/thr/thr0loc.c
similarity index 63%
rename from storage/innobase/thr/thr0loc.c
rename to storage/innodb_plugin/thr/thr0loc.c
index b803bd53101..18f7b0707bd 100644
--- a/storage/innobase/thr/thr0loc.c
+++ b/storage/innodb_plugin/thr/thr0loc.c
@@ -1,7 +1,24 @@
-/******************************************************
-The thread local storage
+/*****************************************************************************
 
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file thr/thr0loc.c
+The thread local storage
 
 Created 10/5/1995 Heikki Tuuri
 *******************************************************/
@@ -27,38 +44,41 @@ is protected by a mutex. If you need modify the program and put new data to
 the thread local storage, just add it to struct thr_local_struct in the
 header file. */
 
-/* Mutex protecting the local storage hash table */
-mutex_t	thr_local_mutex;
+/** Mutex protecting thr_local_hash */
+static mutex_t		thr_local_mutex;
 
-/* The hash table. The module is not yet initialized when it is NULL. */
-hash_table_t*	thr_local_hash	= NULL;
+/** The hash table. The module is not yet initialized when it is NULL. */
+static hash_table_t*	thr_local_hash	= NULL;
 
-/* The private data for each thread should be put to
-the structure below and the accessor functions written
-for the field. */
+/** Thread local data */
 typedef struct thr_local_struct thr_local_t;
 
+/** @brief Thread local data.
+The private data for each thread should be put to
+the structure below and the accessor functions written
+for the field. */
 struct thr_local_struct{
-	os_thread_id_t	id;	/* id of the thread which owns this struct */
-	os_thread_t	handle;	/* operating system handle to the thread */
-	ulint		slot_no;/* the index of the slot in the thread table
+	os_thread_id_t	id;	/*!< id of the thread which owns this struct */
+	os_thread_t	handle;	/*!< operating system handle to the thread */
+	ulint		slot_no;/*!< the index of the slot in the thread table
 				for this thread */
-	ibool		in_ibuf;/* TRUE if the the thread is doing an ibuf
+	ibool		in_ibuf;/*!< TRUE if the the thread is doing an ibuf
 				operation */
-	hash_node_t	hash;	/* hash chain node */
-	ulint		magic_n;
+	hash_node_t	hash;	/*!< hash chain node */
+	ulint		magic_n;/*!< magic number (THR_LOCAL_MAGIC_N) */
 };
 
+/** The value of thr_local_struct::magic_n */
 #define THR_LOCAL_MAGIC_N	1231234
 
-/***********************************************************************
-Returns the local storage struct for a thread. */
+/*******************************************************************//**
+Returns the local storage struct for a thread.
+@return	local storage */
 static
 thr_local_t*
 thr_local_get(
 /*==========*/
-				/* out: local storage */
-	os_thread_id_t	id)	/* in: thread id of the thread */
+	os_thread_id_t	id)	/*!< in: thread id of the thread */
 {
 	thr_local_t*	local;
 
@@ -71,7 +91,7 @@ try_again:
 	local = NULL;
 
 	HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
-		    local, os_thread_eq(local->id, id));
+		    thr_local_t*, local,, os_thread_eq(local->id, id));
 	if (local == NULL) {
 		mutex_exit(&thr_local_mutex);
 
@@ -87,14 +107,14 @@ try_again:
 	return(local);
 }
 
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
+/*******************************************************************//**
+Gets the slot number in the thread table of a thread.
+@return	slot number */
+UNIV_INTERN
 ulint
 thr_local_get_slot_no(
 /*==================*/
-				/* out: slot number */
-	os_thread_id_t	id)	/* in: thread id of the thread */
+	os_thread_id_t	id)	/*!< in: thread id of the thread */
 {
 	ulint		slot_no;
 	thr_local_t*	local;
@@ -110,14 +130,14 @@ thr_local_get_slot_no(
 	return(slot_no);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Sets the slot number in the thread table of a thread. */
-
+UNIV_INTERN
 void
 thr_local_set_slot_no(
 /*==================*/
-	os_thread_id_t	id,	/* in: thread id of the thread */
-	ulint		slot_no)/* in: slot number */
+	os_thread_id_t	id,	/*!< in: thread id of the thread */
+	ulint		slot_no)/*!< in: slot number */
 {
 	thr_local_t*	local;
 
@@ -130,14 +150,14 @@ thr_local_set_slot_no(
 	mutex_exit(&thr_local_mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
+storage.
+@return	pointer to the in_ibuf field */
+UNIV_INTERN
 ibool*
 thr_local_get_in_ibuf_field(void)
 /*=============================*/
-			/* out: pointer to the in_ibuf field */
 {
 	thr_local_t*	local;
 
@@ -150,9 +170,9 @@ thr_local_get_in_ibuf_field(void)
 	return(&(local->in_ibuf));
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a local storage struct for the calling new thread. */
-
+UNIV_INTERN
 void
 thr_local_create(void)
 /*==================*/
@@ -180,13 +200,13 @@ thr_local_create(void)
 	mutex_exit(&thr_local_mutex);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees the local storage struct for the specified thread. */
-
+UNIV_INTERN
 void
 thr_local_free(
 /*===========*/
-	os_thread_id_t	id)	/* in: thread id */
+	os_thread_id_t	id)	/*!< in: thread id */
 {
 	thr_local_t*	local;
 
@@ -195,7 +215,7 @@ thr_local_free(
 	/* Look for the local struct in the hash table */
 
 	HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
-		    local, os_thread_eq(local->id, id));
+		    thr_local_t*, local,, os_thread_eq(local->id, id));
 	if (local == NULL) {
 		mutex_exit(&thr_local_mutex);
 
@@ -212,9 +232,9 @@ thr_local_free(
 	mem_free(local);
 }
 
-/********************************************************************
+/****************************************************************//**
 Initializes the thread local storage module. */
-
+UNIV_INTERN
 void
 thr_local_init(void)
 /*================*/
diff --git a/storage/innodb_plugin/trx/trx0i_s.c b/storage/innodb_plugin/trx/trx0i_s.c
new file mode 100644
index 00000000000..0d809806edc
--- /dev/null
+++ b/storage/innodb_plugin/trx/trx0i_s.c
@@ -0,0 +1,1444 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0i_s.c
+INFORMATION SCHEMA innodb_trx, innodb_locks and
+innodb_lock_waits tables fetch code.
+
+The code below fetches information needed to fill those
+3 dynamic tables and uploads it into a "transactions
+table cache" for later retrieval.
+
+Created July 17, 2007 Vasil Dimov
+*******************************************************/
+
+#include <mysql/plugin.h>
+
+#include "mysql_addons.h"
+
+#include "univ.i"
+#include "buf0buf.h"
+#include "dict0dict.h"
+#include "ha0storage.h"
+#include "ha_prototypes.h"
+#include "hash0hash.h"
+#include "lock0iter.h"
+#include "lock0lock.h"
+#include "mem0mem.h"
+#include "page0page.h"
+#include "rem0rec.h"
+#include "row0row.h"
+#include "srv0srv.h"
+#include "sync0rw.h"
+#include "sync0sync.h"
+#include "sync0types.h"
+#include "trx0i_s.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "ut0mem.h"
+#include "ut0ut.h"
+
+/** Initial number of rows in the table cache */
+#define TABLE_CACHE_INITIAL_ROWSNUM	1024
+
+/** @brief The maximum number of chunks to allocate for a table cache.
+
+The rows of a table cache are stored in a set of chunks. When a new
+row is added a new chunk is allocated if necessary.  Assuming that the
+first one is 1024 rows (TABLE_CACHE_INITIAL_ROWSNUM) and each
+subsequent is N/2 where N is the number of rows we have allocated till
+now, then 39th chunk would accommodate 1677416425 rows and all chunks
+would accommodate 3354832851 rows. */
+#define MEM_CHUNKS_IN_TABLE_CACHE	39
+
+/** The following are some testing auxiliary macros. Do not enable them
+in a production environment. */
+/* @{ */
+
+#if 0
+/** If this is enabled then lock folds will always be different
+resulting in equal rows being put in a different cells of the hash
+table. Checking for duplicates will be flawed because different
+fold will be calculated when a row is searched in the hash table. */
+#define TEST_LOCK_FOLD_ALWAYS_DIFFERENT
+#endif
+
+#if 0
+/** This effectively kills the search-for-duplicate-before-adding-a-row
+function, but searching in the hash is still performed. It will always
+be assumed that lock is not present and insertion will be performed in
+the hash table. */
+#define TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
+#endif
+
+#if 0
+/** This aggressively repeats adding each row many times. Depending on
+the above settings this may be noop or may result in lots of rows being
+added. */
+#define TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
+#endif
+
+#if 0
+/** Very similar to TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T but hash
+table search is not performed at all. */
+#define TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
+#endif
+
+#if 0
+/** Do not insert each row into the hash table, duplicates may appear
+if this is enabled, also if this is enabled searching into the hash is
+noop because it will be empty. */
+#define TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
+#endif
+/* @} */
+
+/** Memory limit passed to ha_storage_put_memlim().
+@param cache	hash storage
+@return		maximum allowed allocation size */
+#define MAX_ALLOWED_FOR_STORAGE(cache)		\
+	(TRX_I_S_MEM_LIMIT			\
+	 - (cache)->mem_allocd)
+
+/** Memory limit in table_cache_create_empty_row().
+@param cache	hash storage
+@return		maximum allowed allocation size */
+#define MAX_ALLOWED_FOR_ALLOC(cache)		\
+	(TRX_I_S_MEM_LIMIT			\
+	 - (cache)->mem_allocd			\
+	 - ha_storage_get_size((cache)->storage))
+
+/** Memory for each table in the intermediate buffer is allocated in
+separate chunks. These chunks are considered to be concatenated to
+represent one flat array of rows. */
+typedef struct i_s_mem_chunk_struct {
+	ulint	offset;		/*!< offset, in number of rows */
+	ulint	rows_allocd;	/*!< the size of this chunk, in number
+				of rows */
+	void*	base;		/*!< start of the chunk */
+} i_s_mem_chunk_t;
+
+/** This represents one table's cache. */
+typedef struct i_s_table_cache_struct {
+	ulint		rows_used;	/*!< number of used rows */
+	ulint		rows_allocd;	/*!< number of allocated rows */
+	ulint		row_size;	/*!< size of a single row */
+	i_s_mem_chunk_t	chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
+					memory chunks that stores the
+					rows */
+} i_s_table_cache_t;
+
+/** This structure describes the intermediate buffer */
+struct trx_i_s_cache_struct {
+	rw_lock_t	rw_lock;	/*!< read-write lock protecting
+					the rest of this structure */
+	ullint		last_read;	/*!< last time the cache was read;
+					measured in microseconds since
+					epoch */
+	mutex_t		last_read_mutex;/*!< mutex protecting the
+					last_read member - it is updated
+					inside a shared lock of the
+					rw_lock member */
+	i_s_table_cache_t innodb_trx;	/*!< innodb_trx table */
+	i_s_table_cache_t innodb_locks;	/*!< innodb_locks table */
+	i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
+/** the hash table size is LOCKS_HASH_CELLS_NUM * sizeof(void*) bytes */
+#define LOCKS_HASH_CELLS_NUM		10000
+	hash_table_t*	locks_hash;	/*!< hash table used to eliminate
+					duplicate entries in the
+					innodb_locks table */
+/** Initial size of the cache storage */
+#define CACHE_STORAGE_INITIAL_SIZE	1024
+/** Number of hash cells in the cache storage */
+#define CACHE_STORAGE_HASH_CELLS	2048
+	ha_storage_t*	storage;	/*!< storage for external volatile
+					data that can possibly not be
+					available later, when we release
+					the kernel mutex */
+	ulint		mem_allocd;	/*!< the amount of memory
+					allocated with mem_alloc*() */
+	ibool		is_truncated;	/*!< this is TRUE if the memory
+					limit was hit and thus the data
+					in the cache is truncated */
+};
+
+/** This is the intermediate buffer where data needed to fill the
+INFORMATION SCHEMA tables is fetched and later retrieved by the C++
+code in handler/i_s.cc. */
+static trx_i_s_cache_t	trx_i_s_cache_static;
+/** This is the intermediate buffer where data needed to fill the
+INFORMATION SCHEMA tables is fetched and later retrieved by the C++
+code in handler/i_s.cc. */
+UNIV_INTERN trx_i_s_cache_t*	trx_i_s_cache = &trx_i_s_cache_static;
+
+/*******************************************************************//**
+For a record lock that is in waiting state retrieves the only bit that
+is set, for a table lock returns ULINT_UNDEFINED.
+@return	record number within the heap */
+static
+ulint
+wait_lock_get_heap_no(
+/*==================*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ulint	ret;
+
+	switch (lock_get_type(lock)) {
+	case LOCK_REC:
+		ret = lock_rec_find_set_bit(lock);
+		ut_a(ret != ULINT_UNDEFINED);
+		break;
+	case LOCK_TABLE:
+		ret = ULINT_UNDEFINED;
+		break;
+	default:
+		ut_error;
+	}
+
+	return(ret);
+}
+
+/*******************************************************************//**
+Initializes the members of a table cache. */
+static
+void
+table_cache_init(
+/*=============*/
+	i_s_table_cache_t*	table_cache,	/*!< out: table cache */
+	size_t			row_size)	/*!< in: the size of a
+						row */
+{
+	ulint	i;
+
+	table_cache->rows_used = 0;
+	table_cache->rows_allocd = 0;
+	table_cache->row_size = row_size;
+
+	for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
+
+		/* the memory is actually allocated in
+		table_cache_create_empty_row() */
+		table_cache->chunks[i].base = NULL;
+	}
+}
+
+/*******************************************************************//**
+Returns an empty row from a table cache. The row is allocated if no more
+empty rows are available. The number of used rows is incremented.
+If the memory limit is hit then NULL is returned and nothing is
+allocated.
+@return	empty row, or NULL if out of memory */
+static
+void*
+table_cache_create_empty_row(
+/*=========================*/
+	i_s_table_cache_t*	table_cache,	/*!< in/out: table cache */
+	trx_i_s_cache_t*	cache)		/*!< in/out: cache to record
+						how many bytes are
+						allocated */
+{
+	ulint	i;
+	void*	row;
+
+	ut_a(table_cache->rows_used <= table_cache->rows_allocd);
+
+	if (table_cache->rows_used == table_cache->rows_allocd) {
+
+		/* rows_used == rows_allocd means that new chunk needs
+		to be allocated: either no more empty rows in the
+		last allocated chunk or nothing has been allocated yet
+		(rows_num == rows_allocd == 0); */
+
+		i_s_mem_chunk_t*	chunk;
+		ulint			req_bytes;
+		ulint			got_bytes;
+		ulint			req_rows;
+		ulint			got_rows;
+
+		/* find the first not allocated chunk */
+		for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
+
+			if (table_cache->chunks[i].base == NULL) {
+
+				break;
+			}
+		}
+
+		/* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
+		have been allocated :-X */
+		ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
+
+		/* allocate the chunk we just found */
+
+		if (i == 0) {
+
+			/* first chunk, nothing is allocated yet */
+			req_rows = TABLE_CACHE_INITIAL_ROWSNUM;
+		} else {
+
+			/* Memory is increased by the formula
+			new = old + old / 2; We are trying not to be
+			aggressive here (= using the common new = old * 2)
+			because the allocated memory will not be freed
+			until InnoDB exit (it is reused). So it is better
+			to once allocate the memory in more steps, but
+			have less unused/wasted memory than to use less
+			steps in allocation (which is done once in a
+			lifetime) but end up with lots of unused/wasted
+			memory. */
+			req_rows = table_cache->rows_allocd / 2;
+		}
+		req_bytes = req_rows * table_cache->row_size;
+
+		if (req_bytes > MAX_ALLOWED_FOR_ALLOC(cache)) {
+
+			return(NULL);
+		}
+
+		chunk = &table_cache->chunks[i];
+
+		chunk->base = mem_alloc2(req_bytes, &got_bytes);
+
+		got_rows = got_bytes / table_cache->row_size;
+
+		cache->mem_allocd += got_bytes;
+
+#if 0
+		printf("allocating chunk %d req bytes=%lu, got bytes=%lu, "
+		       "row size=%lu, "
+		       "req rows=%lu, got rows=%lu\n",
+		       i, req_bytes, got_bytes,
+		       table_cache->row_size,
+		       req_rows, got_rows);
+#endif
+
+		chunk->rows_allocd = got_rows;
+
+		table_cache->rows_allocd += got_rows;
+
+		/* adjust the offset of the next chunk */
+		if (i < MEM_CHUNKS_IN_TABLE_CACHE - 1) {
+
+			table_cache->chunks[i + 1].offset
+				= chunk->offset + chunk->rows_allocd;
+		}
+
+		/* return the first empty row in the newly allocated
+		chunk */
+		row = chunk->base;
+	} else {
+
+		char*	chunk_start;
+		ulint	offset;
+
+		/* there is an empty row, no need to allocate new
+		chunks */
+
+		/* find the first chunk that contains allocated but
+		empty/unused rows */
+		for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
+
+			if (table_cache->chunks[i].offset
+			    + table_cache->chunks[i].rows_allocd
+			    > table_cache->rows_used) {
+
+				break;
+			}
+		}
+
+		/* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
+		are full, but
+		table_cache->rows_used != table_cache->rows_allocd means
+		exactly the opposite - there are allocated but
+		empty/unused rows :-X */
+		ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
+
+		chunk_start = (char*) table_cache->chunks[i].base;
+		offset = table_cache->rows_used
+			- table_cache->chunks[i].offset;
+
+		row = chunk_start + offset * table_cache->row_size;
+	}
+
+	table_cache->rows_used++;
+
+	return(row);
+}
+
+/*******************************************************************//**
+Fills i_s_trx_row_t object.
+If memory can not be allocated then FALSE is returned.
+@return	FALSE if allocation fails */
+static
+ibool
+fill_trx_row(
+/*=========*/
+	i_s_trx_row_t*		row,		/*!< out: result object
+						that's filled */
+	const trx_t*		trx,		/*!< in: transaction to
+						get data from */
+	const i_s_locks_row_t*	requested_lock_row,/*!< in: pointer to the
+						corresponding row in
+						innodb_locks if trx is
+						waiting or NULL if trx
+						is not waiting */
+	trx_i_s_cache_t*	cache)		/*!< in/out: cache into
+						which to copy volatile
+						strings */
+{
+	row->trx_id = trx_get_id(trx);
+	row->trx_started = (ib_time_t) trx->start_time;
+	row->trx_state = trx_get_que_state_str(trx);
+
+	if (trx->wait_lock != NULL) {
+
+		ut_a(requested_lock_row != NULL);
+
+		row->requested_lock_row = requested_lock_row;
+		row->trx_wait_started = (ib_time_t) trx->wait_started;
+	} else {
+
+		ut_a(requested_lock_row == NULL);
+
+		row->requested_lock_row = NULL;
+		row->trx_wait_started = 0;
+	}
+
+	row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx));
+
+	if (trx->mysql_thd != NULL) {
+		row->trx_mysql_thread_id
+			= thd_get_thread_id(trx->mysql_thd);
+	} else {
+		/* For internal transactions e.g., purge and transactions
+		being recovered at startup there is no associated MySQL
+		thread data structure. */
+		row->trx_mysql_thread_id = 0;
+	}
+
+	if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) {
+
+		if (strlen(*trx->mysql_query_str)
+		    > TRX_I_S_TRX_QUERY_MAX_LEN) {
+
+			char	query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
+
+			memcpy(query, *trx->mysql_query_str,
+			       TRX_I_S_TRX_QUERY_MAX_LEN);
+			query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0';
+
+			row->trx_query = ha_storage_put_memlim(
+				cache->storage, query,
+				TRX_I_S_TRX_QUERY_MAX_LEN + 1,
+				MAX_ALLOWED_FOR_STORAGE(cache));
+		} else {
+
+			row->trx_query = ha_storage_put_str_memlim(
+				cache->storage, *trx->mysql_query_str,
+				MAX_ALLOWED_FOR_STORAGE(cache));
+		}
+
+		if (row->trx_query == NULL) {
+
+			return(FALSE);
+		}
+	} else {
+
+		row->trx_query = NULL;
+	}
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Format the nth field of "rec" and put it in "buf". The result is always
+NUL-terminated. Returns the number of bytes that were written to "buf"
+(including the terminating NUL).
+@return	end of the result */
+static
+ulint
+put_nth_field(
+/*==========*/
+	char*			buf,	/*!< out: buffer */
+	ulint			buf_size,/*!< in: buffer size in bytes */
+	ulint			n,	/*!< in: number of field */
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record */
+	const ulint*		offsets)/*!< in: record offsets, returned
+					by rec_get_offsets() */
+{
+	const byte*	data;
+	ulint		data_len;
+	dict_field_t*	dict_field;
+	ulint		ret;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (buf_size == 0) {
+
+		return(0);
+	}
+
+	ret = 0;
+
+	if (n > 0) {
+		/* we must append ", " before the actual data */
+
+		if (buf_size < 3) {
+
+			buf[0] = '\0';
+			return(1);
+		}
+
+		memcpy(buf, ", ", 3);
+
+		buf += 2;
+		buf_size -= 2;
+		ret += 2;
+	}
+
+	/* now buf_size >= 1 */
+
+	data = rec_get_nth_field(rec, offsets, n, &data_len);
+
+	dict_field = dict_index_get_nth_field(index, n);
+
+	ret += row_raw_format((const char*) data, data_len,
+			      dict_field, buf, buf_size);
+
+	return(ret);
+}
+
+/*******************************************************************//**
+Fills the "lock_data" member of i_s_locks_row_t object.
+If memory can not be allocated then FALSE is returned.
+@return	FALSE if allocation fails */
+static
+ibool
+fill_lock_data(
+/*===========*/
+	const char**		lock_data,/*!< out: "lock_data" to fill */
+	const lock_t*		lock,	/*!< in: lock used to find the data */
+	ulint			heap_no,/*!< in: rec num used to find the data */
+	trx_i_s_cache_t*	cache)	/*!< in/out: cache where to store
+					volatile data */
+{
+	mtr_t			mtr;
+
+	const buf_block_t*	block;
+	const page_t*		page;
+	const rec_t*		rec;
+
+	ut_a(lock_get_type(lock) == LOCK_REC);
+
+	mtr_start(&mtr);
+
+	block = buf_page_try_get(lock_rec_get_space_id(lock),
+				 lock_rec_get_page_no(lock),
+				 &mtr);
+
+	if (block == NULL) {
+
+		*lock_data = NULL;
+
+		mtr_commit(&mtr);
+
+		return(TRUE);
+	}
+
+	page = (const page_t*) buf_block_get_frame(block);
+
+	rec = page_find_rec_with_heap_no(page, heap_no);
+
+	if (page_rec_is_infimum(rec)) {
+
+		*lock_data = ha_storage_put_str_memlim(
+			cache->storage, "infimum pseudo-record",
+			MAX_ALLOWED_FOR_STORAGE(cache));
+	} else if (page_rec_is_supremum(rec)) {
+
+		*lock_data = ha_storage_put_str_memlim(
+			cache->storage, "supremum pseudo-record",
+			MAX_ALLOWED_FOR_STORAGE(cache));
+	} else {
+
+		const dict_index_t*	index;
+		ulint			n_fields;
+		mem_heap_t*		heap;
+		ulint			offsets_onstack[REC_OFFS_NORMAL_SIZE];
+		ulint*			offsets;
+		char			buf[TRX_I_S_LOCK_DATA_MAX_LEN];
+		ulint			buf_used;
+		ulint			i;
+
+		rec_offs_init(offsets_onstack);
+		offsets = offsets_onstack;
+
+		index = lock_rec_get_index(lock);
+
+		n_fields = dict_index_get_n_unique(index);
+
+		ut_a(n_fields > 0);
+
+		heap = NULL;
+		offsets = rec_get_offsets(rec, index, offsets, n_fields,
+					  &heap);
+
+		/* format and store the data */
+
+		buf_used = 0;
+		for (i = 0; i < n_fields; i++) {
+
+			buf_used += put_nth_field(
+				buf + buf_used, sizeof(buf) - buf_used,
+				i, index, rec, offsets) - 1;
+		}
+
+		*lock_data = (const char*) ha_storage_put_memlim(
+			cache->storage, buf, buf_used + 1,
+			MAX_ALLOWED_FOR_STORAGE(cache));
+
+		if (UNIV_UNLIKELY(heap != NULL)) {
+
+			/* this means that rec_get_offsets() has created a new
+			heap and has stored offsets in it; check that this is
+			really the case and free the heap */
+			ut_a(offsets != offsets_onstack);
+			mem_heap_free(heap);
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	if (*lock_data == NULL) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Fills i_s_locks_row_t object. Returns its first argument.
+If memory can not be allocated then FALSE is returned.
+@return	FALSE if allocation fails */
+static
+ibool
+fill_locks_row(
+/*===========*/
+	i_s_locks_row_t* row,	/*!< out: result object that's filled */
+	const lock_t*	lock,	/*!< in: lock to get data from */
+	ulint		heap_no,/*!< in: lock's record number
+				or ULINT_UNDEFINED if the lock
+				is a table lock */
+	trx_i_s_cache_t* cache)	/*!< in/out: cache into which to copy
+				volatile strings */
+{
+	row->lock_trx_id = lock_get_trx_id(lock);
+	row->lock_mode = lock_get_mode_str(lock);
+	row->lock_type = lock_get_type_str(lock);
+
+	row->lock_table = ha_storage_put_str_memlim(
+		cache->storage, lock_get_table_name(lock),
+		MAX_ALLOWED_FOR_STORAGE(cache));
+
+	/* memory could not be allocated */
+	if (row->lock_table == NULL) {
+
+		return(FALSE);
+	}
+
+	switch (lock_get_type(lock)) {
+	case LOCK_REC:
+		row->lock_index = ha_storage_put_str_memlim(
+			cache->storage, lock_rec_get_index_name(lock),
+			MAX_ALLOWED_FOR_STORAGE(cache));
+
+		/* memory could not be allocated */
+		if (row->lock_index == NULL) {
+
+			return(FALSE);
+		}
+
+		row->lock_space = lock_rec_get_space_id(lock);
+		row->lock_page = lock_rec_get_page_no(lock);
+		row->lock_rec = heap_no;
+
+		if (!fill_lock_data(&row->lock_data, lock, heap_no, cache)) {
+
+			/* memory could not be allocated */
+			return(FALSE);
+		}
+
+		break;
+	case LOCK_TABLE:
+		row->lock_index = NULL;
+
+		row->lock_space = ULINT_UNDEFINED;
+		row->lock_page = ULINT_UNDEFINED;
+		row->lock_rec = ULINT_UNDEFINED;
+
+		row->lock_data = NULL;
+
+		break;
+	default:
+		ut_error;
+	}
+
+	row->lock_table_id = lock_get_table_id(lock);
+
+	row->hash_chain.value = row;
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Fills i_s_lock_waits_row_t object. Returns its first argument.
+@return	result object that's filled */
+static
+i_s_lock_waits_row_t*
+fill_lock_waits_row(
+/*================*/
+	i_s_lock_waits_row_t*	row,		/*!< out: result object
+						that's filled */
+	const i_s_locks_row_t*	requested_lock_row,/*!< in: pointer to the
+						relevant requested lock
+						row in innodb_locks */
+	const i_s_locks_row_t*	blocking_lock_row)/*!< in: pointer to the
+						relevant blocking lock
+						row in innodb_locks */
+{
+	row->requested_lock_row = requested_lock_row;
+	row->blocking_lock_row = blocking_lock_row;
+
+	return(row);
+}
+
+/*******************************************************************//**
+Calculates a hash fold for a lock. For a record lock the fold is
+calculated from 4 elements, which uniquely identify a lock at a given
+point in time: transaction id, space id, page number, record number.
+For a table lock the fold is table's id.
+@return	fold */
+static
+ulint
+fold_lock(
+/*======*/
+	const lock_t*	lock,	/*!< in: lock object to fold */
+	ulint		heap_no)/*!< in: lock's record number
+				or ULINT_UNDEFINED if the lock
+				is a table lock */
+{
+#ifdef TEST_LOCK_FOLD_ALWAYS_DIFFERENT
+	static ulint	fold = 0;
+
+	return(fold++);
+#else
+	ulint	ret;
+
+	switch (lock_get_type(lock)) {
+	case LOCK_REC:
+		ut_a(heap_no != ULINT_UNDEFINED);
+
+		ret = ut_fold_ulint_pair((ulint) lock_get_trx_id(lock),
+					 lock_rec_get_space_id(lock));
+
+		ret = ut_fold_ulint_pair(ret,
+					 lock_rec_get_page_no(lock));
+
+		ret = ut_fold_ulint_pair(ret, heap_no);
+
+		break;
+	case LOCK_TABLE:
+		/* this check is actually not necessary for continuing
+		correct operation, but something must have gone wrong if
+		it fails. */
+		ut_a(heap_no == ULINT_UNDEFINED);
+
+		ret = (ulint) lock_get_table_id(lock);
+
+		break;
+	default:
+		ut_error;
+	}
+
+	return(ret);
+#endif
+}
+
+/*******************************************************************//**
+Checks whether i_s_locks_row_t object represents a lock_t object.
+@return	TRUE if they match */
+static
+ibool
+locks_row_eq_lock(
+/*==============*/
+	const i_s_locks_row_t*	row,	/*!< in: innodb_locks row */
+	const lock_t*		lock,	/*!< in: lock object */
+	ulint			heap_no)/*!< in: lock's record number
+					or ULINT_UNDEFINED if the lock
+					is a table lock */
+{
+#ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
+	return(0);
+#else
+	switch (lock_get_type(lock)) {
+	case LOCK_REC:
+		ut_a(heap_no != ULINT_UNDEFINED);
+
+		return(row->lock_trx_id == lock_get_trx_id(lock)
+		       && row->lock_space == lock_rec_get_space_id(lock)
+		       && row->lock_page == lock_rec_get_page_no(lock)
+		       && row->lock_rec == heap_no);
+
+	case LOCK_TABLE:
+		/* this check is actually not necessary for continuing
+		correct operation, but something must have gone wrong if
+		it fails. */
+		ut_a(heap_no == ULINT_UNDEFINED);
+
+		return(row->lock_trx_id == lock_get_trx_id(lock)
+		       && row->lock_table_id == lock_get_table_id(lock));
+
+	default:
+		ut_error;
+		return(FALSE);
+	}
+#endif
+}
+
+/*******************************************************************//**
+Searches for a row in the innodb_locks cache that has a specified id.
+This happens in O(1) time since a hash table is used. Returns pointer to
+the row or NULL if none is found.
+@return	row or NULL */
+static
+i_s_locks_row_t*
+search_innodb_locks(
+/*================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	const lock_t*		lock,	/*!< in: lock to search for */
+	ulint			heap_no)/*!< in: lock's record number
+					or ULINT_UNDEFINED if the lock
+					is a table lock */
+{
+	i_s_hash_chain_t*	hash_chain;
+
+	HASH_SEARCH(
+		/* hash_chain->"next" */
+		next,
+		/* the hash table */
+		cache->locks_hash,
+		/* fold */
+		fold_lock(lock, heap_no),
+		/* the type of the next variable */
+		i_s_hash_chain_t*,
+		/* auxiliary variable */
+		hash_chain,
+		/* assertion on every traversed item */
+		,
+		/* this determines if we have found the lock */
+		locks_row_eq_lock(hash_chain->value, lock, heap_no));
+
+	if (hash_chain == NULL) {
+
+		return(NULL);
+	}
+	/* else */
+
+	return(hash_chain->value);
+}
+
+/*******************************************************************//**
+Adds new element to the locks cache, enlarging it if necessary.
+Returns a pointer to the added row. If the row is already present then
+no row is added and a pointer to the existing row is returned.
+If row can not be allocated then NULL is returned.
+@return	row */
+static
+i_s_locks_row_t*
+add_lock_to_cache(
+/*==============*/
+	trx_i_s_cache_t*	cache,	/*!< in/out: cache */
+	const lock_t*		lock,	/*!< in: the element to add */
+	ulint			heap_no)/*!< in: lock's record number
+					or ULINT_UNDEFINED if the lock
+					is a table lock */
+{
+	i_s_locks_row_t*	dst_row;
+
+#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
+	ulint	i;
+	for (i = 0; i < 10000; i++) {
+#endif
+#ifndef TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
+	/* quit if this lock is already present */
+	dst_row = search_innodb_locks(cache, lock, heap_no);
+	if (dst_row != NULL) {
+
+		return(dst_row);
+	}
+#endif
+
+	dst_row = (i_s_locks_row_t*)
+		table_cache_create_empty_row(&cache->innodb_locks, cache);
+
+	/* memory could not be allocated */
+	if (dst_row == NULL) {
+
+		return(NULL);
+	}
+
+	if (!fill_locks_row(dst_row, lock, heap_no, cache)) {
+
+		/* memory could not be allocated */
+		cache->innodb_locks.rows_used--;
+		return(NULL);
+	}
+
+#ifndef TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
+	HASH_INSERT(
+		/* the type used in the hash chain */
+		i_s_hash_chain_t,
+		/* hash_chain->"next" */
+		next,
+		/* the hash table */
+		cache->locks_hash,
+		/* fold */
+		fold_lock(lock, heap_no),
+		/* add this data to the hash */
+		&dst_row->hash_chain);
+#endif
+#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
+	} /* for()-loop */
+#endif
+
+	return(dst_row);
+}
+
+/*******************************************************************//**
+Adds new pair of locks to the lock waits cache.
+If memory can not be allocated then FALSE is returned.
+@return	FALSE if allocation fails */
+static
+ibool
+add_lock_wait_to_cache(
+/*===================*/
+	trx_i_s_cache_t*	cache,		/*!< in/out: cache */
+	const i_s_locks_row_t*	requested_lock_row,/*!< in: pointer to the
+						relevant requested lock
+						row in innodb_locks */
+	const i_s_locks_row_t*	blocking_lock_row)/*!< in: pointer to the
+						relevant blocking lock
+						row in innodb_locks */
+{
+	i_s_lock_waits_row_t*	dst_row;
+
+	dst_row = (i_s_lock_waits_row_t*)
+		table_cache_create_empty_row(&cache->innodb_lock_waits,
+					     cache);
+
+	/* memory could not be allocated */
+	if (dst_row == NULL) {
+
+		return(FALSE);
+	}
+
+	fill_lock_waits_row(dst_row, requested_lock_row, blocking_lock_row);
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Adds transaction's relevant (important) locks to cache.
+If the transaction is waiting, then the wait lock is added to
+innodb_locks and a pointer to the added row is returned in
+requested_lock_row, otherwise requested_lock_row is set to NULL.
+If rows can not be allocated then FALSE is returned and the value of
+requested_lock_row is undefined.
+@return	FALSE if allocation fails */
+static
+ibool
+add_trx_relevant_locks_to_cache(
+/*============================*/
+	trx_i_s_cache_t*	cache,	/*!< in/out: cache */
+	const trx_t*		trx,	/*!< in: transaction */
+	i_s_locks_row_t**	requested_lock_row)/*!< out: pointer to the
+					requested lock row, or NULL or
+					undefined */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+
+	/* If transaction is waiting we add the wait lock and all locks
+	from another transactions that are blocking the wait lock. */
+	if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+		const lock_t*		curr_lock;
+		ulint			wait_lock_heap_no;
+		i_s_locks_row_t*	blocking_lock_row;
+		lock_queue_iterator_t	iter;
+
+		ut_a(trx->wait_lock != NULL);
+
+		wait_lock_heap_no
+			= wait_lock_get_heap_no(trx->wait_lock);
+
+		/* add the requested lock */
+		*requested_lock_row
+			= add_lock_to_cache(cache, trx->wait_lock,
+					    wait_lock_heap_no);
+
+		/* memory could not be allocated */
+		if (*requested_lock_row == NULL) {
+
+			return(FALSE);
+		}
+
+		/* then iterate over the locks before the wait lock and
+		add the ones that are blocking it */
+
+		lock_queue_iterator_reset(&iter, trx->wait_lock,
+					  ULINT_UNDEFINED);
+
+		curr_lock = lock_queue_iterator_get_prev(&iter);
+		while (curr_lock != NULL) {
+
+			if (lock_has_to_wait(trx->wait_lock,
+					     curr_lock)) {
+
+				/* add the lock that is
+				blocking trx->wait_lock */
+				blocking_lock_row
+					= add_lock_to_cache(
+						cache, curr_lock,
+						/* heap_no is the same
+						for the wait and waited
+						locks */
+						wait_lock_heap_no);
+
+				/* memory could not be allocated */
+				if (blocking_lock_row == NULL) {
+
+					return(FALSE);
+				}
+
+				/* add the relation between both locks
+				to innodb_lock_waits */
+				if (!add_lock_wait_to_cache(
+						cache, *requested_lock_row,
+						blocking_lock_row)) {
+
+					/* memory could not be allocated */
+					return(FALSE);
+				}
+			}
+
+			curr_lock = lock_queue_iterator_get_prev(&iter);
+		}
+	} else {
+
+		*requested_lock_row = NULL;
+	}
+
+	return(TRUE);
+}
+
+/** The minimum time that a cache must not be updated after it has been
+read for the last time; measured in microseconds. We use this technique
+to ensure that SELECTs which join several INFORMATION SCHEMA tables read
+the same version of the cache. */
+#define CACHE_MIN_IDLE_TIME_US	100000 /* 0.1 sec */
+
+/*******************************************************************//**
+Checks if the cache can safely be updated.
+@return	TRUE if can be updated */
+static
+ibool
+can_cache_be_updated(
+/*=================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+	ullint	now;
+
+	/* Here we read cache->last_read without acquiring its mutex
+	because last_read is only updated when a shared rw lock on the
+	whole cache is being held (see trx_i_s_cache_end_read()) and
+	we are currently holding an exclusive rw lock on the cache.
+	So it is not possible for last_read to be updated while we are
+	reading it. */
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
+#endif
+
+	now = ut_time_us(NULL);
+	if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************************//**
+Declare a cache empty, preparing it to be filled up. Not all resources
+are freed because they can be reused. */
+static
+void
+trx_i_s_cache_clear(
+/*================*/
+	trx_i_s_cache_t*	cache)	/*!< out: cache to clear */
+{
+	cache->innodb_trx.rows_used = 0;
+	cache->innodb_locks.rows_used = 0;
+	cache->innodb_lock_waits.rows_used = 0;
+
+	hash_table_clear(cache->locks_hash);
+
+	ha_storage_empty(&cache->storage);
+}
+
+/*******************************************************************//**
+Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
+table cache buffer. Cache must be locked for write. */
+static
+void
+fetch_data_into_cache(
+/*==================*/
+	trx_i_s_cache_t*	cache)	/*!< in/out: cache */
+{
+	trx_t*			trx;
+	i_s_trx_row_t*		trx_row;
+	i_s_locks_row_t*	requested_lock_row;
+
+	ut_ad(mutex_own(&kernel_mutex));
+
+	trx_i_s_cache_clear(cache);
+
+	/* We iterate over the list of all transactions and add each one
+	to innodb_trx's cache. We also add all locks that are relevant
+	to each transaction into innodb_locks' and innodb_lock_waits'
+	caches. */
+
+	for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+		if (!add_trx_relevant_locks_to_cache(cache, trx,
+						     &requested_lock_row)) {
+
+			cache->is_truncated = TRUE;
+			return;
+		}
+
+		trx_row = (i_s_trx_row_t*)
+			table_cache_create_empty_row(&cache->innodb_trx,
+						     cache);
+
+		/* memory could not be allocated */
+		if (trx_row == NULL) {
+
+			cache->is_truncated = TRUE;
+			return;
+		}
+
+		if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
+
+			/* memory could not be allocated */
+			cache->innodb_trx.rows_used--;
+			cache->is_truncated = TRUE;
+			return;
+		}
+	}
+
+	cache->is_truncated = FALSE;
+}
+
+/*******************************************************************//**
+Update the transactions cache if it has not been read for some time.
+Called from handler/i_s.cc.
+@return	0 - fetched, 1 - not */
+UNIV_INTERN
+int
+trx_i_s_possibly_fetch_data_into_cache(
+/*===================================*/
+	trx_i_s_cache_t*	cache)	/*!< in/out: cache */
+{
+	if (!can_cache_be_updated(cache)) {
+
+		return(1);
+	}
+
+	/* We are going to access trx->query in all transactions */
+	innobase_mysql_prepare_print_arbitrary_thd();
+
+	/* We need to read trx_sys and record/table lock queues */
+	mutex_enter(&kernel_mutex);
+
+	fetch_data_into_cache(cache);
+
+	mutex_exit(&kernel_mutex);
+
+	innobase_mysql_end_print_arbitrary_thd();
+
+	return(0);
+}
+
+/*******************************************************************//**
+Returns TRUE if the data in the cache is truncated due to the memory
+limit posed by TRX_I_S_MEM_LIMIT.
+@return	TRUE if truncated */
+UNIV_INTERN
+ibool
+trx_i_s_cache_is_truncated(
+/*=======================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+	return(cache->is_truncated);
+}
+
+/*******************************************************************//**
+Initialize INFORMATION SCHEMA trx related cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_init(
+/*===============*/
+	trx_i_s_cache_t*	cache)	/*!< out: cache to init */
+{
+	/* The latching is done in the following order:
+	acquire trx_i_s_cache_t::rw_lock, X
+	acquire kernel_mutex
+	release kernel_mutex
+	release trx_i_s_cache_t::rw_lock
+	acquire trx_i_s_cache_t::rw_lock, S
+	acquire trx_i_s_cache_t::last_read_mutex
+	release trx_i_s_cache_t::last_read_mutex
+	release trx_i_s_cache_t::rw_lock */
+
+	rw_lock_create(&cache->rw_lock, SYNC_TRX_I_S_RWLOCK);
+
+	cache->last_read = 0;
+
+	mutex_create(&cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ);
+
+	table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
+	table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
+	table_cache_init(&cache->innodb_lock_waits,
+			 sizeof(i_s_lock_waits_row_t));
+
+	cache->locks_hash = hash_create(LOCKS_HASH_CELLS_NUM);
+
+	cache->storage = ha_storage_create(CACHE_STORAGE_INITIAL_SIZE,
+					   CACHE_STORAGE_HASH_CELLS);
+
+	cache->mem_allocd = 0;
+
+	cache->is_truncated = FALSE;
+}
+
+/*******************************************************************//**
+Issue a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_read(
+/*=====================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+	rw_lock_s_lock(&cache->rw_lock);
+}
+
+/*******************************************************************//**
+Release a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_read(
+/*===================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+	ullint	now;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
+#endif
+
+	/* update cache last read time */
+	now = ut_time_us(NULL);
+	mutex_enter(&cache->last_read_mutex);
+	cache->last_read = now;
+	mutex_exit(&cache->last_read_mutex);
+
+	rw_lock_s_unlock(&cache->rw_lock);
+}
+
+/*******************************************************************//**
+Issue an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_write(
+/*======================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+	rw_lock_x_lock(&cache->rw_lock);
+}
+
+/*******************************************************************//**
+Release an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_write(
+/*====================*/
+	trx_i_s_cache_t*	cache)	/*!< in: cache */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
+#endif
+
+	rw_lock_x_unlock(&cache->rw_lock);
+}
+
+/*******************************************************************//**
+Selects a INFORMATION SCHEMA table cache from the whole cache.
+@return	table cache */
+static
+i_s_table_cache_t*
+cache_select_table(
+/*===============*/
+	trx_i_s_cache_t*	cache,	/*!< in: whole cache */
+	enum i_s_table		table)	/*!< in: which table */
+{
+	i_s_table_cache_t*	table_cache;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)
+	     || rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
+#endif
+
+	switch (table) {
+	case I_S_INNODB_TRX:
+		table_cache = &cache->innodb_trx;
+		break;
+	case I_S_INNODB_LOCKS:
+		table_cache = &cache->innodb_locks;
+		break;
+	case I_S_INNODB_LOCK_WAITS:
+		table_cache = &cache->innodb_lock_waits;
+		break;
+	default:
+		ut_error;
+	}
+
+	return(table_cache);
+}
+
+/*******************************************************************//**
+Retrieves the number of used rows in the cache for a given
+INFORMATION SCHEMA table.
+@return	number of rows */
+UNIV_INTERN
+ulint
+trx_i_s_cache_get_rows_used(
+/*========================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table)	/*!< in: which table */
+{
+	i_s_table_cache_t*	table_cache;
+
+	table_cache = cache_select_table(cache, table);
+
+	return(table_cache->rows_used);
+}
+
+/*******************************************************************//**
+Retrieves the nth row (zero-based) in the cache for a given
+INFORMATION SCHEMA table.
+@return	row */
+UNIV_INTERN
+void*
+trx_i_s_cache_get_nth_row(
+/*======================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table,	/*!< in: which table */
+	ulint			n)	/*!< in: row number */
+{
+	i_s_table_cache_t*	table_cache;
+	ulint			i;
+	void*			row;
+
+	table_cache = cache_select_table(cache, table);
+
+	ut_a(n < table_cache->rows_used);
+
+	row = NULL;
+
+	for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
+
+		if (table_cache->chunks[i].offset
+		    + table_cache->chunks[i].rows_allocd > n) {
+
+			row = (char*) table_cache->chunks[i].base
+				+ (n - table_cache->chunks[i].offset)
+				* table_cache->row_size;
+			break;
+		}
+	}
+
+	ut_a(row != NULL);
+
+	return(row);
+}
+
+/*******************************************************************//**
+Crafts a lock id string from a i_s_locks_row_t object. Returns its
+second argument. This function aborts if there is not enough space in
+lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
+want to be 100% sure that it will not abort.
+@return	resulting lock id */
+UNIV_INTERN
+char*
+trx_i_s_create_lock_id(
+/*===================*/
+	const i_s_locks_row_t*	row,	/*!< in: innodb_locks row */
+	char*			lock_id,/*!< out: resulting lock_id */
+	ulint			lock_id_size)/*!< in: size of the lock id
+					buffer */
+{
+	int	res_len;
+
+	/* please adjust TRX_I_S_LOCK_ID_MAX_LEN if you change this */
+
+	if (row->lock_space != ULINT_UNDEFINED) {
+		/* record lock */
+		res_len = ut_snprintf(lock_id, lock_id_size,
+				      TRX_ID_FMT ":%lu:%lu:%lu",
+				      row->lock_trx_id, row->lock_space,
+				      row->lock_page, row->lock_rec);
+	} else {
+		/* table lock */
+		res_len = ut_snprintf(lock_id, lock_id_size,
+				      TRX_ID_FMT ":%llu",
+				      row->lock_trx_id,
+				      row->lock_table_id);
+	}
+
+	/* the typecast is safe because snprintf(3) never returns
+	negative result */
+	ut_a(res_len >= 0);
+	ut_a((ulint) res_len < lock_id_size);
+
+	return(lock_id);
+}
diff --git a/storage/innobase/trx/trx0purge.c b/storage/innodb_plugin/trx/trx0purge.c
similarity index 83%
rename from storage/innobase/trx/trx0purge.c
rename to storage/innodb_plugin/trx/trx0purge.c
index f0e85ef1604..cd79fd1c315 100644
--- a/storage/innobase/trx/trx0purge.c
+++ b/storage/innodb_plugin/trx/trx0purge.c
@@ -1,7 +1,24 @@
-/******************************************************
-Purge old versions
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0purge.c
+Purge old versions
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -14,6 +31,7 @@ Created 3/26/1996 Heikki Tuuri
 
 #include "fsp0fsp.h"
 #include "mach0data.h"
+#include "mtr0log.h"
 #include "trx0rseg.h"
 #include "trx0trx.h"
 #include "trx0roll.h"
@@ -26,24 +44,24 @@ Created 3/26/1996 Heikki Tuuri
 #include "srv0que.h"
 #include "os0thread.h"
 
-/* The global data structure coordinating a purge */
-trx_purge_t*	purge_sys = NULL;
+/** The global data structure coordinating a purge */
+UNIV_INTERN trx_purge_t*	purge_sys = NULL;
 
-/* A dummy undo record used as a return value when we have a whole undo log
+/** A dummy undo record used as a return value when we have a whole undo log
 which needs no purge */
-trx_undo_rec_t	trx_purge_dummy_rec;
+UNIV_INTERN trx_undo_rec_t	trx_purge_dummy_rec;
 
-/*********************************************************************
+/*****************************************************************//**
 Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
+undo log still exists in the system.
+@return TRUE if is sure that it is preserved, also if the function
+returns FALSE, it is possible that the undo log still exists in the
+system */
+UNIV_INTERN
 ibool
 trx_purge_update_undo_must_exist(
 /*=============================*/
-			/* out: TRUE if is sure that it is preserved, also
-			if the function returns FALSE, it is possible that
-			the undo log still exists in the system */
-	dulint	trx_id)	/* in: transaction id */
+	trx_id_t	trx_id)	/*!< in: transaction id */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -59,15 +77,15 @@ trx_purge_update_undo_must_exist(
 
 /*=================== PURGE RECORD ARRAY =============================*/
 
-/***********************************************************************
-Stores info of an undo log record during a purge. */
+/*******************************************************************//**
+Stores info of an undo log record during a purge.
+@return	pointer to the storage cell */
 static
 trx_undo_inf_t*
 trx_purge_arr_store_info(
 /*=====================*/
-			/* out: pointer to the storage cell */
-	dulint	trx_no,	/* in: transaction number */
-	dulint	undo_no)/* in: undo number */
+	trx_id_t	trx_no,	/*!< in: transaction number */
+	undo_no_t	undo_no)/*!< in: undo number */
 {
 	trx_undo_inf_t*	cell;
 	trx_undo_arr_t*	arr;
@@ -91,13 +109,13 @@ trx_purge_arr_store_info(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Removes info of an undo log record during a purge. */
 UNIV_INLINE
 void
 trx_purge_arr_remove_info(
 /*======================*/
-	trx_undo_inf_t*	cell)	/* in: pointer to the storage cell */
+	trx_undo_inf_t*	cell)	/*!< in: pointer to the storage cell */
 {
 	trx_undo_arr_t*	arr;
 
@@ -110,20 +128,20 @@ trx_purge_arr_remove_info(
 	arr->n_used--;
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Gets the biggest pair of a trx number and an undo number in a purge array. */
 static
 void
 trx_purge_arr_get_biggest(
 /*======================*/
-	trx_undo_arr_t*	arr,	/* in: purge array */
-	dulint*		trx_no,	/* out: transaction number: ut_dulint_zero
+	trx_undo_arr_t*	arr,	/*!< in: purge array */
+	trx_id_t*	trx_no,	/*!< out: transaction number: ut_dulint_zero
 				if array is empty */
-	dulint*		undo_no)/* out: undo number */
+	undo_no_t*	undo_no)/*!< out: undo number */
 {
 	trx_undo_inf_t*	cell;
-	dulint		pair_trx_no;
-	dulint		pair_undo_no;
+	trx_id_t	pair_trx_no;
+	undo_no_t	pair_undo_no;
 	int		trx_cmp;
 	ulint		n_used;
 	ulint		i;
@@ -160,14 +178,14 @@ trx_purge_arr_get_biggest(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph. */
+this query graph.
+@return	own: the query graph */
 static
 que_t*
 trx_purge_graph_build(void)
 /*=======================*/
-				/* out, own: the query graph */
 {
 	mem_heap_t*	heap;
 	que_fork_t*	fork;
@@ -189,10 +207,10 @@ trx_purge_graph_build(void)
 	return(fork);
 }
 
-/************************************************************************
+/********************************************************************//**
 Creates the global purge system control structure and inits the history
 mutex. */
-
+UNIV_INTERN
 void
 trx_purge_sys_create(void)
 /*======================*/
@@ -233,17 +251,17 @@ trx_purge_sys_create(void)
 
 /*================ UNDO LOG HISTORY LIST =============================*/
 
-/************************************************************************
+/********************************************************************//**
 Adds the update undo log as the first log in the history list. Removes the
 update undo log segment from the rseg slot if it is too big for reuse. */
-
+UNIV_INTERN
 void
 trx_purge_add_update_undo_to_history(
 /*=================================*/
-	trx_t*	trx,		/* in: transaction */
-	page_t*	undo_page,	/* in: update undo log header page,
+	trx_t*	trx,		/*!< in: transaction */
+	page_t*	undo_page,	/*!< in: update undo log header page,
 				x-latched */
-	mtr_t*	mtr)		/* in: mtr */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	trx_undo_t*	undo;
 	trx_rseg_t*	rseg;
@@ -261,7 +279,8 @@ trx_purge_add_update_undo_to_history(
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+	rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
+				    rseg->page_no, mtr);
 
 	undo_header = undo_page + undo->hdr_offset;
 	seg_header  = undo_page + TRX_UNDO_SEG_HDR;
@@ -313,16 +332,16 @@ trx_purge_add_update_undo_to_history(
 	}
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees an undo log segment which is in the history list. Cuts the end of the
 history list at the youngest undo log in this segment. */
 static
 void
 trx_purge_free_segment(
 /*===================*/
-	trx_rseg_t*	rseg,		/* in: rollback segment */
-	fil_addr_t	hdr_addr,	/* in: the file address of log_hdr */
-	ulint		n_removed_logs)	/* in: count of how many undo logs we
+	trx_rseg_t*	rseg,		/*!< in: rollback segment */
+	fil_addr_t	hdr_addr,	/*!< in: the file address of log_hdr */
+	ulint		n_removed_logs)	/*!< in: count of how many undo logs we
 					will cut off from the end of the
 					history list */
 {
@@ -343,9 +362,11 @@ loop:
 	mtr_start(&mtr);
 	mutex_enter(&(rseg->mutex));
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+				 rseg->page_no, &mtr);
 
-	undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+				      hdr_addr.page, &mtr);
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 	log_hdr = undo_page + hdr_addr.boffset;
 
@@ -417,16 +438,16 @@ loop:
 	mtr_commit(&mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Removes unnecessary history data from a rollback segment. */
 static
 void
 trx_purge_truncate_rseg_history(
 /*============================*/
-	trx_rseg_t*	rseg,		/* in: rollback segment */
-	dulint		limit_trx_no,	/* in: remove update undo logs whose
+	trx_rseg_t*	rseg,		/*!< in: rollback segment */
+	trx_id_t	limit_trx_no,	/*!< in: remove update undo logs whose
 					trx number is < limit_trx_no */
-	dulint		limit_undo_no)	/* in: if transaction number is equal
+	undo_no_t	limit_undo_no)	/*!< in: if transaction number is equal
 					to limit_trx_no, truncate undo records
 					with undo number < limit_undo_no */
 {
@@ -445,7 +466,8 @@ trx_purge_truncate_rseg_history(
 	mtr_start(&mtr);
 	mutex_enter(&(rseg->mutex));
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+				 rseg->page_no, &mtr);
 
 	hdr_addr = trx_purge_get_log_from_hist(
 		flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
@@ -459,7 +481,8 @@ loop:
 		return;
 	}
 
-	undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+				      hdr_addr.page, &mtr);
 
 	log_hdr = undo_page + hdr_addr.boffset;
 
@@ -511,14 +534,15 @@ loop:
 	mtr_start(&mtr);
 	mutex_enter(&(rseg->mutex));
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+				 rseg->page_no, &mtr);
 
 	hdr_addr = prev_hdr_addr;
 
 	goto loop;
 }
 
-/************************************************************************
+/********************************************************************//**
 Removes unnecessary history data from rollback segments. NOTE that when this
 function is called, the caller must not have any latches on undo log pages! */
 static
@@ -527,15 +551,15 @@ trx_purge_truncate_history(void)
 /*============================*/
 {
 	trx_rseg_t*	rseg;
-	dulint		limit_trx_no;
-	dulint		limit_undo_no;
+	trx_id_t	limit_trx_no;
+	undo_no_t	limit_undo_no;
 
 	ut_ad(mutex_own(&(purge_sys->mutex)));
 
 	trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
 				  &limit_undo_no);
 
-	if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) {
+	if (ut_dulint_is_zero(limit_trx_no)) {
 
 		limit_trx_no = purge_sys->purge_trx_no;
 		limit_undo_no = purge_sys->purge_undo_no;
@@ -561,14 +585,14 @@ trx_purge_truncate_history(void)
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages! */
+called, the caller must not have any latches on undo log pages!
+@return	TRUE if array empty */
 UNIV_INLINE
 ibool
 trx_purge_truncate_if_arr_empty(void)
 /*=================================*/
-			/* out: TRUE if array empty */
 {
 	ut_ad(mutex_own(&(purge_sys->mutex)));
 
@@ -582,20 +606,20 @@ trx_purge_truncate_if_arr_empty(void)
 	return(FALSE);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Updates the last not yet purged history log info in rseg when we have purged
 a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
 static
 void
 trx_purge_rseg_get_next_history_log(
 /*================================*/
-	trx_rseg_t*	rseg)	/* in: rollback segment */
+	trx_rseg_t*	rseg)	/*!< in: rollback segment */
 {
 	page_t*		undo_page;
 	trx_ulogf_t*	log_hdr;
 	trx_usegf_t*	seg_hdr;
 	fil_addr_t	prev_log_addr;
-	dulint		trx_no;
+	trx_id_t	trx_no;
 	ibool		del_marks;
 	mtr_t		mtr;
 
@@ -611,7 +635,7 @@ trx_purge_rseg_get_next_history_log(
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(rseg->space,
+	undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
 						rseg->last_page_no, &mtr);
 	log_hdr = undo_page + rseg->last_offset;
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
@@ -663,7 +687,7 @@ trx_purge_rseg_get_next_history_log(
 	/* Read the trx number and del marks from the previous log header */
 	mtr_start(&mtr);
 
-	log_hdr = trx_undo_page_get_s_latched(rseg->space,
+	log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
 					      prev_log_addr.page, &mtr)
 		+ prev_log_addr.boffset;
 
@@ -683,7 +707,7 @@ trx_purge_rseg_get_next_history_log(
 	mutex_exit(&(rseg->mutex));
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Chooses the next undo log to purge and updates the info in purge_sys. This
 function is used to initialize purge_sys when the next record to purge is
 not known, and also to update the purge system info on the next record when
@@ -696,8 +720,9 @@ trx_purge_choose_next_log(void)
 	trx_undo_rec_t*	rec;
 	trx_rseg_t*	rseg;
 	trx_rseg_t*	min_rseg;
-	dulint		min_trx_no;
+	trx_id_t	min_trx_no;
 	ulint		space = 0;   /* remove warning (??? bug ???) */
+	ulint		zip_size = 0;
 	ulint		page_no = 0; /* remove warning (??? bug ???) */
 	ulint		offset = 0;  /* remove warning (??? bug ???) */
 	mtr_t		mtr;
@@ -723,6 +748,7 @@ trx_purge_choose_next_log(void)
 				min_rseg = rseg;
 				min_trx_no = rseg->last_trx_no;
 				space = rseg->space;
+				zip_size = rseg->zip_size;
 				ut_a(space == 0); /* We assume in purge of
 						  externally stored fields
 						  that space id == 0 */
@@ -748,7 +774,7 @@ trx_purge_choose_next_log(void)
 
 		rec = &trx_purge_dummy_rec;
 	} else {
-		rec = trx_undo_get_first_rec(space, page_no, offset,
+		rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
 					     RW_S_LATCH, &mtr);
 		if (rec == NULL) {
 			/* Undo log empty */
@@ -773,22 +799,21 @@ trx_purge_choose_next_log(void)
 	} else {
 		purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
 
-		purge_sys->page_no = buf_frame_get_page_no(rec);
-		purge_sys->offset = rec - buf_frame_align(rec);
+		purge_sys->page_no = page_get_page_no(page_align(rec));
+		purge_sys->offset = page_offset(rec);
 	}
 
 	mtr_commit(&mtr);
 }
 
-/***************************************************************************
-Gets the next record to purge and updates the info in the purge system. */
+/***********************************************************************//**
+Gets the next record to purge and updates the info in the purge system.
+@return	copy of an undo log record or pointer to the dummy undo log record */
 static
 trx_undo_rec_t*
 trx_purge_get_next_rec(
 /*===================*/
-				/* out: copy of an undo log record or
-				pointer to the dummy undo log record */
-	mem_heap_t*	heap)	/* in: memory heap where copied */
+	mem_heap_t*	heap)	/*!< in: memory heap where copied */
 {
 	trx_undo_rec_t*	rec;
 	trx_undo_rec_t*	rec_copy;
@@ -799,6 +824,7 @@ trx_purge_get_next_rec(
 	ulint		offset;
 	ulint		page_no;
 	ulint		space;
+	ulint		zip_size;
 	ulint		type;
 	ulint		cmpl_info;
 	mtr_t		mtr;
@@ -807,6 +833,7 @@ trx_purge_get_next_rec(
 	ut_ad(purge_sys->next_stored);
 
 	space = purge_sys->rseg->space;
+	zip_size = purge_sys->rseg->zip_size;
 	page_no = purge_sys->page_no;
 	offset = purge_sys->offset;
 
@@ -825,7 +852,8 @@ trx_purge_get_next_rec(
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+	undo_page = trx_undo_page_get_s_latched(space, zip_size,
+						page_no, &mtr);
 	rec = undo_page + offset;
 
 	rec2 = rec;
@@ -876,14 +904,15 @@ trx_purge_get_next_rec(
 
 		mtr_start(&mtr);
 
-		undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+		undo_page = trx_undo_page_get_s_latched(space, zip_size,
+							page_no, &mtr);
 
 		rec = undo_page + offset;
 	} else {
-		page = buf_frame_align(rec2);
+		page = page_align(rec2);
 
 		purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
-		purge_sys->page_no = buf_frame_get_page_no(page);
+		purge_sys->page_no = page_get_page_no(page);
 		purge_sys->offset = rec2 - page;
 
 		if (undo_page != page) {
@@ -899,21 +928,19 @@ trx_purge_get_next_rec(
 	return(rec_copy);
 }
 
-/************************************************************************
+/********************************************************************//**
 Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
+released with the corresponding release function.
+@return copy of an undo log record or pointer to trx_purge_dummy_rec,
+if the whole undo log can skipped in purge; NULL if none left */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_purge_fetch_next_rec(
 /*=====================*/
-				/* out: copy of an undo log record or
-				pointer to the dummy undo log record
-				&trx_purge_dummy_rec, if the whole undo log
-				can skipped in purge; NULL if none left */
-	dulint*		roll_ptr,/* out: roll pointer to undo record */
-	trx_undo_inf_t** cell,	/* out: storage cell for the record in the
+	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
+	trx_undo_inf_t** cell,	/*!< out: storage cell for the record in the
 				purge array */
-	mem_heap_t*	heap)	/* in: memory heap where copied */
+	mem_heap_t*	heap)	/*!< in: memory heap where copied */
 {
 	trx_undo_rec_t*	undo_rec;
 
@@ -995,13 +1022,13 @@ trx_purge_fetch_next_rec(
 	return(undo_rec);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Releases a reserved purge undo record. */
-
+UNIV_INTERN
 void
 trx_purge_rec_release(
 /*==================*/
-	trx_undo_inf_t*	cell)	/* in: storage cell */
+	trx_undo_inf_t*	cell)	/*!< in: storage cell */
 {
 	trx_undo_arr_t*	arr;
 
@@ -1014,14 +1041,13 @@ trx_purge_rec_release(
 	mutex_exit(&(purge_sys->mutex));
 }
 
-/***********************************************************************
-This function runs a purge batch. */
-
+/*******************************************************************//**
+This function runs a purge batch.
+@return	number of undo log pages handled in the batch */
+UNIV_INTERN
 ulint
 trx_purge(void)
 /*===========*/
-				/* out: number of undo log pages handled in
-				the batch */
 {
 	que_thr_t*	thr;
 	/*	que_thr_t*	thr2; */
@@ -1122,9 +1148,9 @@ trx_purge(void)
 	return(purge_sys->n_pages_handled - old_pages_handled);
 }
 
-/**********************************************************************
+/******************************************************************//**
 Prints information of the purge system to stderr. */
-
+UNIV_INTERN
 void
 trx_purge_sys_print(void)
 /*=====================*/
@@ -1132,11 +1158,10 @@ trx_purge_sys_print(void)
 	fprintf(stderr, "InnoDB: Purge system view:\n");
 	read_view_print(purge_sys->view);
 
-	fprintf(stderr, "InnoDB: Purge trx n:o %lu %lu, undo n_o %lu %lu\n",
-		(ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
-		(ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
-		(ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
-		(ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
+	fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
+		", undo n:o " TRX_ID_FMT "\n",
+		TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
+		TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
 	fprintf(stderr,
 		"InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
 		"InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
diff --git a/storage/innobase/trx/trx0rec.c b/storage/innodb_plugin/trx/trx0rec.c
similarity index 52%
rename from storage/innobase/trx/trx0rec.c
rename to storage/innodb_plugin/trx/trx0rec.c
index 50f8b011463..36911c9df85 100644
--- a/storage/innobase/trx/trx0rec.c
+++ b/storage/innodb_plugin/trx/trx0rec.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log record
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0rec.c
+Transaction undo log record
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -14,29 +31,31 @@ Created 3/26/1996 Heikki Tuuri
 
 #include "fsp0fsp.h"
 #include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
 #include "trx0undo.h"
+#include "mtr0log.h"
+#ifndef UNIV_HOTBACKUP
 #include "dict0dict.h"
 #include "ut0mem.h"
+#include "row0ext.h"
 #include "row0upd.h"
 #include "que0que.h"
 #include "trx0purge.h"
+#include "trx0rseg.h"
 #include "row0row.h"
 
 /*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the mtr log entry of the inserted undo log record on the undo log
 page. */
 UNIV_INLINE
 void
 trx_undof_page_add_undo_rec_log(
 /*============================*/
-	page_t* undo_page,	/* in: undo log page */
-	ulint	old_free,	/* in: start offset of the inserted entry */
-	ulint	new_free,	/* in: end offset of the entry */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t* undo_page,	/*!< in: undo log page */
+	ulint	old_free,	/*!< in: start offset of the inserted entry */
+	ulint	new_free,	/*!< in: end offset of the entry */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	byte*		log_ptr;
 	const byte*	log_end;
@@ -65,17 +84,18 @@ trx_undof_page_add_undo_rec_log(
 		mlog_catenate_string(mtr, undo_page + old_free + 2, len);
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
+/***********************************************************//**
+Parses a redo log record of adding an undo log record.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 trx_undo_parse_add_undo_rec(
 /*========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page)	/* in: page or NULL */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page)	/*!< in: page or NULL */
 {
 	ulint	len;
 	byte*	rec;
@@ -113,15 +133,16 @@ trx_undo_parse_add_undo_rec(
 	return(ptr + len);
 }
 
-/**************************************************************************
-Calculates the free space left for extending an undo log record. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Calculates the free space left for extending an undo log record.
+@return	bytes left */
 UNIV_INLINE
 ulint
 trx_undo_left(
 /*==========*/
-			/* out: bytes left */
-	page_t* page,	/* in: undo log page */
-	byte*	ptr)	/* in: pointer to page */
+	const page_t*	page,	/*!< in: undo log page */
+	const byte*	ptr)	/*!< in: pointer to page */
 {
 	/* The '- 10' is a safety margin, in case we have some small
 	calculation error below */
@@ -129,28 +150,77 @@ trx_undo_left(
 	return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
 }
 
-/**************************************************************************
-Reports in the undo log of an insert of a clustered index record. */
+/**********************************************************************//**
+Set the next and previous pointers in the undo page for the undo record
+that was written to ptr. Update the first free value by the number of bytes
+written for this undo record.
+@return	offset of the inserted entry on the page if succeeded, 0 if fail */
+static
+ulint
+trx_undo_page_set_next_prev_and_add(
+/*================================*/
+	page_t*		undo_page,	/*!< in/out: undo log page */
+	byte*		ptr,		/*!< in: ptr up to where data has been
+					written on this undo page. */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ulint		first_free;	/*!< offset within undo_page */
+	ulint		end_of_rec;	/*!< offset within undo_page */
+	byte*		ptr_to_first_free;
+					/* pointer within undo_page
+					that points to the next free
+					offset value within undo_page.*/
+
+	ut_ad(ptr > undo_page);
+	ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
+
+	if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
+
+		return(0);
+	}
+
+	ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
+
+	first_free = mach_read_from_2(ptr_to_first_free);
+
+	/* Write offset of the previous undo log record */
+	mach_write_to_2(ptr, first_free);
+	ptr += 2;
+
+	end_of_rec = ptr - undo_page;
+
+	/* Write offset of the next undo log record */
+	mach_write_to_2(undo_page + first_free, end_of_rec);
+
+	/* Update the offset to first free undo record */
+	mach_write_to_2(ptr_to_first_free, end_of_rec);
+
+	/* Write this log entry to the UNDO log */
+	trx_undof_page_add_undo_rec_log(undo_page, first_free,
+					end_of_rec, mtr);
+
+	return(first_free);
+}
+
+/**********************************************************************//**
+Reports in the undo log of an insert of a clustered index record.
+@return	offset of the inserted entry on the page if succeed, 0 if fail */
 static
 ulint
 trx_undo_page_report_insert(
 /*========================*/
-					/* out: offset of the inserted entry
-					on the page if succeed, 0 if fail */
-	page_t*		undo_page,	/* in: undo log page */
-	trx_t*		trx,		/* in: transaction */
-	dict_index_t*	index,		/* in: clustered index */
-	dtuple_t*	clust_entry,	/* in: index entry which will be
+	page_t*		undo_page,	/*!< in: undo log page */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_index_t*	index,		/*!< in: clustered index */
+	const dtuple_t*	clust_entry,	/*!< in: index entry which will be
 					inserted to the clustered index */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint		first_free;
 	byte*		ptr;
-	ulint		len;
-	dfield_t*	field;
-	ulint		flen;
 	ulint		i;
 
+	ut_ad(dict_index_is_clust(index));
 	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
 
@@ -160,10 +230,9 @@ trx_undo_page_report_insert(
 
 	ut_ad(first_free <= UNIV_PAGE_SIZE);
 
-	if (trx_undo_left(undo_page, ptr) < 30) {
+	if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
 
-		/* NOTE: the value 30 must be big enough such that the general
-		fields written below fit on the undo log page */
+		/* Not enough space for writing the general parameters */
 
 		return(0);
 	}
@@ -172,31 +241,24 @@ trx_undo_page_report_insert(
 	ptr += 2;
 
 	/* Store first some general parameters to the undo log */
-	mach_write_to_1(ptr, TRX_UNDO_INSERT_REC);
-	ptr++;
-
-	len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
-	ptr += len;
-
-	len = mach_dulint_write_much_compressed(ptr, (index->table)->id);
-	ptr += len;
+	*ptr++ = TRX_UNDO_INSERT_REC;
+	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
+	ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
 	/*----------------------------------------*/
 	/* Store then the fields required to uniquely determine the record
 	to be inserted in the clustered index */
 
 	for (i = 0; i < dict_index_get_n_unique(index); i++) {
 
-		field = dtuple_get_nth_field(clust_entry, i);
-
-		flen = dfield_get_len(field);
+		const dfield_t*	field	= dtuple_get_nth_field(clust_entry, i);
+		ulint		flen	= dfield_get_len(field);
 
 		if (trx_undo_left(undo_page, ptr) < 5) {
 
 			return(0);
 		}
 
-		len = mach_write_compressed(ptr, flen);
-		ptr += len;
+		ptr += mach_write_compressed(ptr, flen);
 
 		if (flen != UNIV_SQL_NULL) {
 			if (trx_undo_left(undo_page, ptr) < flen) {
@@ -209,54 +271,27 @@ trx_undo_page_report_insert(
 		}
 	}
 
-	if (trx_undo_left(undo_page, ptr) < 2) {
-
-		return(0);
-	}
-
-	/*----------------------------------------*/
-	/* Write pointers to the previous and the next undo log records */
-
-	if (trx_undo_left(undo_page, ptr) < 2) {
-
-		return(0);
-	}
-
-	mach_write_to_2(ptr, first_free);
-	ptr += 2;
-
-	mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
-	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
-			ptr - undo_page);
-
-	/* Write the log entry to the REDO log of this change in the UNDO
-	log */
-	trx_undof_page_add_undo_rec_log(undo_page, first_free,
-					ptr - undo_page, mtr);
-	return(first_free);
+	return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
 }
 
-/**************************************************************************
-Reads from an undo log record the general parameters. */
-
+/**********************************************************************//**
+Reads from an undo log record the general parameters.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
 byte*
 trx_undo_rec_get_pars(
 /*==================*/
-					/* out: remaining part of undo log
-					record after reading these values */
-	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
-	ulint*		type,		/* out: undo record type:
+	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	ulint*		type,		/*!< out: undo record type:
 					TRX_UNDO_INSERT_REC, ... */
-	ulint*		cmpl_info,	/* out: compiler info, relevant only
+	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
 					for update type records */
-	ibool*		updated_extern,	/* out: TRUE if we updated an
+	ibool*		updated_extern,	/*!< out: TRUE if we updated an
 					externally stored fild */
-	dulint*		undo_no,	/* out: undo log record number */
-	dulint*		table_id)	/* out: table id */
+	undo_no_t*	undo_no,	/*!< out: undo log record number */
+	dulint*		table_id)	/*!< out: table id */
 {
 	byte*		ptr;
-	ulint		len;
 	ulint		type_cmpl;
 
 	ptr = undo_rec + 2;
@@ -275,36 +310,55 @@ trx_undo_rec_get_pars(
 	*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
 
 	*undo_no = mach_dulint_read_much_compressed(ptr);
-	len = mach_dulint_get_much_compressed_size(*undo_no);
-	ptr += len;
+	ptr += mach_dulint_get_much_compressed_size(*undo_no);
 
 	*table_id = mach_dulint_read_much_compressed(ptr);
-	len = mach_dulint_get_much_compressed_size(*table_id);
-	ptr += len;
+	ptr += mach_dulint_get_much_compressed_size(*table_id);
 
 	return(ptr);
 }
 
-/**************************************************************************
-Reads from an undo log record a stored column value. */
+/**********************************************************************//**
+Reads from an undo log record a stored column value.
+@return	remaining part of undo log record after reading these values */
 static
 byte*
 trx_undo_rec_get_col_val(
 /*=====================*/
-			/* out: remaining part of undo log record after
-			reading these values */
-	byte*	ptr,	/* in: pointer to remaining part of undo log record */
-	byte**	field,	/* out: pointer to stored field */
-	ulint*	len)	/* out: length of the field, or UNIV_SQL_NULL */
+	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
+	byte**	field,	/*!< out: pointer to stored field */
+	ulint*	len,	/*!< out: length of the field, or UNIV_SQL_NULL */
+	ulint*	orig_len)/*!< out: original length of the locally
+			stored part of an externally stored column, or 0 */
 {
 	*len = mach_read_compressed(ptr);
 	ptr += mach_get_compressed_size(*len);
 
-	*field = ptr;
+	*orig_len = 0;
 
-	if (*len != UNIV_SQL_NULL) {
+	switch (*len) {
+	case UNIV_SQL_NULL:
+		*field = NULL;
+		break;
+	case UNIV_EXTERN_STORAGE_FIELD:
+		*orig_len = mach_read_compressed(ptr);
+		ptr += mach_get_compressed_size(*orig_len);
+		*len = mach_read_compressed(ptr);
+		ptr += mach_get_compressed_size(*len);
+		*field = ptr;
+		ptr += *len;
+
+		ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
+		ut_ad(*len > *orig_len);
+		ut_ad(*len >= REC_MAX_INDEX_COL_LEN
+		      + BTR_EXTERN_FIELD_REF_SIZE);
+
+		*len += UNIV_EXTERN_STORAGE_FIELD;
+		break;
+	default:
+		*field = ptr;
 		if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
-			ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
+			ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
 		} else {
 			ptr += *len;
 		}
@@ -313,33 +367,29 @@ trx_undo_rec_get_col_val(
 	return(ptr);
 }
 
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
+/*******************************************************************//**
+Builds a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
 byte*
 trx_undo_rec_get_row_ref(
 /*=====================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part of a copy of an undo log
+	byte*		ptr,	/*!< in: remaining part of a copy of an undo log
 				record, at the start of the row reference;
 				NOTE that this copy of the undo log record must
 				be preserved as long as the row reference is
 				used, as we do NOT copy the data in the
 				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t**	ref,	/* out, own: row reference */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	ref,	/*!< out, own: row reference */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
 {
-	dfield_t*	dfield;
-	byte*		field;
-	ulint		len;
 	ulint		ref_len;
 	ulint		i;
 
 	ut_ad(index && ptr && ref && heap);
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 
 	ref_len = dict_index_get_n_unique(index);
 
@@ -348,9 +398,14 @@ trx_undo_rec_get_row_ref(
 	dict_index_copy_types(*ref, index, ref_len);
 
 	for (i = 0; i < ref_len; i++) {
+		dfield_t*	dfield;
+		byte*		field;
+		ulint		len;
+		ulint		orig_len;
+
 		dfield = dtuple_get_nth_field(*ref, i);
 
-		ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
 		dfield_set_data(dfield, field, len);
 	}
@@ -358,77 +413,145 @@ trx_undo_rec_get_row_ref(
 	return(ptr);
 }
 
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
+/*******************************************************************//**
+Skips a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
 byte*
 trx_undo_rec_skip_row_ref(
 /*======================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part in update undo log
+	byte*		ptr,	/*!< in: remaining part in update undo log
 				record, at the start of the row reference */
-	dict_index_t*	index)	/* in: clustered index */
+	dict_index_t*	index)	/*!< in: clustered index */
 {
-	byte*	field;
-	ulint	len;
 	ulint	ref_len;
 	ulint	i;
 
 	ut_ad(index && ptr);
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 
 	ref_len = dict_index_get_n_unique(index);
 
 	for (i = 0; i < ref_len; i++) {
-		ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+		byte*	field;
+		ulint	len;
+		ulint	orig_len;
+
+		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 	}
 
 	return(ptr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
+Fetch a prefix of an externally stored column, for writing to the undo log
+of an update or delete marking of a clustered index record.
+@return	ext_buf */
+static
+byte*
+trx_undo_page_fetch_ext(
+/*====================*/
+	byte*		ext_buf,	/*!< in: a buffer of
+					REC_MAX_INDEX_COL_LEN
+					+ BTR_EXTERN_FIELD_REF_SIZE */
+	ulint		zip_size,	/*!< compressed page size in bytes,
+					or 0 for uncompressed BLOB  */
+	const byte*	field,		/*!< in: an externally stored column */
+	ulint*		len)		/*!< in: length of field;
+					out: used length of ext_buf */
+{
+	/* Fetch the BLOB. */
+	ulint	ext_len = btr_copy_externally_stored_field_prefix(
+		ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
+	/* BLOBs should always be nonempty. */
+	ut_a(ext_len);
+	/* Append the BLOB pointer to the prefix. */
+	memcpy(ext_buf + ext_len,
+	       field + *len - BTR_EXTERN_FIELD_REF_SIZE,
+	       BTR_EXTERN_FIELD_REF_SIZE);
+	*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
+	return(ext_buf);
+}
+
+/**********************************************************************//**
+Writes to the undo log a prefix of an externally stored column.
+@return	undo log position */
+static
+byte*
+trx_undo_page_report_modify_ext(
+/*============================*/
+	byte*		ptr,		/*!< in: undo log position,
+					at least 15 bytes must be available */
+	byte*		ext_buf,	/*!< in: a buffer of
+					REC_MAX_INDEX_COL_LEN
+					+ BTR_EXTERN_FIELD_REF_SIZE,
+					or NULL when should not fetch
+					a longer prefix */
+	ulint		zip_size,	/*!< compressed page size in bytes,
+					or 0 for uncompressed BLOB  */
+	const byte**	field,		/*!< in/out: the locally stored part of
+					the externally stored column */
+	ulint*		len)		/*!< in/out: length of field, in bytes */
+{
+	if (ext_buf) {
+		/* If an ordering column is externally stored, we will
+		have to store a longer prefix of the field.  In this
+		case, write to the log a marker followed by the
+		original length and the real length of the field. */
+		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
+
+		ptr += mach_write_compressed(ptr, *len);
+
+		*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
+						 *field, len);
+
+		ptr += mach_write_compressed(ptr, *len);
+	} else {
+		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
+					     + *len);
+	}
+
+	return(ptr);
+}
+
+/**********************************************************************//**
 Reports in the undo log of an update or delete marking of a clustered index
-record. */
+record.
+@return byte offset of the inserted undo log entry on the page if
+succeed, 0 if fail */
 static
 ulint
 trx_undo_page_report_modify(
 /*========================*/
-					/* out: byte offset of the inserted
-					undo log entry on the page if succeed,
-					0 if fail */
-	page_t*		undo_page,	/* in: undo log page */
-	trx_t*		trx,		/* in: transaction */
-	dict_index_t*	index,		/* in: clustered index where update or
+	page_t*		undo_page,	/*!< in: undo log page */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_index_t*	index,		/*!< in: clustered index where update or
 					delete marking is done */
-	rec_t*		rec,		/* in: clustered index record which
+	const rec_t*	rec,		/*!< in: clustered index record which
 					has NOT yet been modified */
-	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
-	upd_t*		update,		/* in: update vector which tells the
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update,		/*!< in: update vector which tells the
 					columns to be updated; in the case of
 					a delete, this should be set to NULL */
-	ulint		cmpl_info,	/* in: compiler info on secondary
+	ulint		cmpl_info,	/*!< in: compiler info on secondary
 					index updates */
-	mtr_t*		mtr)		/* in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dict_table_t*	table;
-	upd_field_t*	upd_field;
 	ulint		first_free;
 	byte*		ptr;
-	ulint		len;
-	byte*		field;
+	const byte*	field;
 	ulint		flen;
-	ulint		pos;
-	dulint		roll_ptr;
-	dulint		trx_id;
-	ulint		bits;
 	ulint		col_no;
-	byte*		old_ptr;
 	ulint		type_cmpl;
 	byte*		type_cmpl_ptr;
 	ulint		i;
+	trx_id_t	trx_id;
+	ibool		ignore_prefix = FALSE;
+	byte		ext_buf[REC_MAX_INDEX_COL_LEN
+				+ BTR_EXTERN_FIELD_REF_SIZE];
 
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
@@ -453,53 +576,55 @@ trx_undo_page_report_modify(
 
 	/* Store first some general parameters to the undo log */
 
-	if (update) {
-		if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
-			type_cmpl = TRX_UNDO_UPD_DEL_REC;
-		} else {
-			type_cmpl = TRX_UNDO_UPD_EXIST_REC;
-		}
-	} else {
+	if (!update) {
 		type_cmpl = TRX_UNDO_DEL_MARK_REC;
+	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
+		type_cmpl = TRX_UNDO_UPD_DEL_REC;
+		/* We are about to update a delete marked record.
+		We don't typically need the prefix in this case unless
+		the delete marking is done by the same transaction
+		(which we check below). */
+		ignore_prefix = TRUE;
+	} else {
+		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
 	}
 
-	type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);
-
-	mach_write_to_1(ptr, type_cmpl);
-
+	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
 	type_cmpl_ptr = ptr;
 
-	ptr++;
-	len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
-	ptr += len;
+	*ptr++ = (byte) type_cmpl;
+	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
 
-	len = mach_dulint_write_much_compressed(ptr, table->id);
-	ptr += len;
+	ptr += mach_dulint_write_much_compressed(ptr, table->id);
 
 	/*----------------------------------------*/
 	/* Store the state of the info bits */
 
-	bits = rec_get_info_bits(rec, dict_table_is_comp(table));
-	mach_write_to_1(ptr, bits);
-	ptr += 1;
+	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
 
 	/* Store the values of the system columns */
 	field = rec_get_nth_field(rec, offsets,
 				  dict_index_get_sys_col_pos(
-					  index, DATA_TRX_ID), &len);
-	ut_ad(len == DATA_TRX_ID_LEN);
+					  index, DATA_TRX_ID), &flen);
+	ut_ad(flen == DATA_TRX_ID_LEN);
+
 	trx_id = trx_read_trx_id(field);
+
+	/* If it is an update of a delete marked record, then we are
+	allowed to ignore blob prefixes if the delete marking was done
+	by some other trx as it must have committed by now for us to
+	allow an over-write. */
+	if (ignore_prefix) {
+		ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
+	}
+	ptr += mach_dulint_write_compressed(ptr, trx_id);
+
 	field = rec_get_nth_field(rec, offsets,
 				  dict_index_get_sys_col_pos(
-					  index, DATA_ROLL_PTR), &len);
-	ut_ad(len == DATA_ROLL_PTR_LEN);
-	roll_ptr = trx_read_roll_ptr(field);
+					  index, DATA_ROLL_PTR), &flen);
+	ut_ad(flen == DATA_ROLL_PTR_LEN);
 
-	len = mach_dulint_write_compressed(ptr, trx_id);
-	ptr += len;
-
-	len = mach_dulint_write_compressed(ptr, roll_ptr);
-	ptr += len;
+	ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
 
 	/*----------------------------------------*/
 	/* Store then the fields required to uniquely determine the
@@ -509,13 +634,16 @@ trx_undo_page_report_modify(
 
 		field = rec_get_nth_field(rec, offsets, i, &flen);
 
-		if (trx_undo_left(undo_page, ptr) < 4) {
+		/* The ordering columns must not be stored externally. */
+		ut_ad(!rec_offs_nth_extern(offsets, i));
+		ut_ad(dict_index_get_nth_col(index, i)->ord_part);
+
+		if (trx_undo_left(undo_page, ptr) < 5) {
 
 			return(0);
 		}
 
-		len = mach_write_compressed(ptr, flen);
-		ptr += len;
+		ptr += mach_write_compressed(ptr, flen);
 
 		if (flen != UNIV_SQL_NULL) {
 			if (trx_undo_left(undo_page, ptr) < flen) {
@@ -537,13 +665,11 @@ trx_undo_page_report_modify(
 			return(0);
 		}
 
-		len = mach_write_compressed(ptr, upd_get_n_fields(update));
-		ptr += len;
+		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
 
 		for (i = 0; i < upd_get_n_fields(update); i++) {
 
-			upd_field = upd_get_nth_field(update, i);
-			pos = upd_field->field_no;
+			ulint	pos = upd_get_nth_field(update, i)->field_no;
 
 			/* Write field number to undo log */
 			if (trx_undo_left(undo_page, ptr) < 5) {
@@ -551,38 +677,37 @@ trx_undo_page_report_modify(
 				return(0);
 			}
 
-			len = mach_write_compressed(ptr, pos);
-			ptr += len;
+			ptr += mach_write_compressed(ptr, pos);
 
 			/* Save the old value of field */
 			field = rec_get_nth_field(rec, offsets, pos, &flen);
 
-			if (trx_undo_left(undo_page, ptr) < 5) {
+			if (trx_undo_left(undo_page, ptr) < 15) {
 
 				return(0);
 			}
 
 			if (rec_offs_nth_extern(offsets, pos)) {
-				/* If a field has external storage, we add
-				to flen the flag */
-
-				len = mach_write_compressed(
+				ptr = trx_undo_page_report_modify_ext(
 					ptr,
-					UNIV_EXTERN_STORAGE_FIELD + flen);
+					dict_index_get_nth_col(index, pos)
+					->ord_part
+					&& !ignore_prefix
+					&& flen < REC_MAX_INDEX_COL_LEN
+					? ext_buf : NULL,
+					dict_table_zip_size(table),
+					&field, &flen);
 
 				/* Notify purge that it eventually has to
 				free the old externally stored field */
 
 				trx->update_undo->del_marks = TRUE;
 
-				*type_cmpl_ptr = *type_cmpl_ptr
-					| TRX_UNDO_UPD_EXTERN;
+				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
 			} else {
-				len = mach_write_compressed(ptr, flen);
+				ptr += mach_write_compressed(ptr, flen);
 			}
 
-			ptr += len;
-
 			if (flen != UNIV_SQL_NULL) {
 				if (trx_undo_left(undo_page, ptr) < flen) {
 
@@ -602,12 +727,15 @@ trx_undo_page_report_modify(
 	in the purge of old versions where we use it to build and search the
 	delete marked index records, to look if we can remove them from the
 	index tree. Note that starting from 4.0.14 also externally stored
-	fields can be ordering in some index. But we always store at least
-	384 first bytes locally to the clustered index record, which means
-	we can construct the column prefix fields in the index from the
-	stored data. */
+	fields can be ordering in some index. Starting from 5.2, we no longer
+	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
+	but we can construct the column prefix fields in the index by
+	fetching the first page of the BLOB that is pointed to by the
+	clustered index. This works also in crash recovery, because all pages
+	(including BLOBs) are recovered before anything is rolled back. */
 
 	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+		byte*	old_ptr = ptr;
 
 		trx->update_undo->del_marks = TRUE;
 
@@ -616,8 +744,6 @@ trx_undo_page_report_modify(
 			return(0);
 		}
 
-		old_ptr = ptr;
-
 		/* Reserve 2 bytes to write the number of bytes the stored
 		fields take in this undo record */
 
@@ -629,32 +755,36 @@ trx_undo_page_report_modify(
 			const dict_col_t*	col
 				= dict_table_get_nth_col(table, col_no);
 
-			if (col->ord_part > 0) {
-
-				pos = dict_index_get_nth_col_pos(index,
-								 col_no);
+			if (col->ord_part) {
+				ulint	pos;
 
 				/* Write field number to undo log */
-				if (trx_undo_left(undo_page, ptr) < 5) {
+				if (trx_undo_left(undo_page, ptr) < 5 + 15) {
 
 					return(0);
 				}
 
-				len = mach_write_compressed(ptr, pos);
-				ptr += len;
+				pos = dict_index_get_nth_col_pos(index,
+								 col_no);
+				ptr += mach_write_compressed(ptr, pos);
 
 				/* Save the old value of field */
 				field = rec_get_nth_field(rec, offsets, pos,
 							  &flen);
 
-				if (trx_undo_left(undo_page, ptr) < 5) {
-
-					return(0);
+				if (rec_offs_nth_extern(offsets, pos)) {
+					ptr = trx_undo_page_report_modify_ext(
+						ptr,
+						flen < REC_MAX_INDEX_COL_LEN
+						&& !ignore_prefix
+						? ext_buf : NULL,
+						dict_table_zip_size(table),
+						&field, &flen);
+				} else {
+					ptr += mach_write_compressed(
+						ptr, flen);
 				}
 
-				len = mach_write_compressed(ptr, flen);
-				ptr += len;
-
 				if (flen != UNIV_SQL_NULL) {
 					if (trx_undo_left(undo_page, ptr)
 					    < flen) {
@@ -692,24 +822,21 @@ trx_undo_page_report_modify(
 	return(first_free);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Reads from an undo log update record the system field values of the old
-version. */
-
+version.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
 byte*
 trx_undo_update_rec_get_sys_cols(
 /*=============================*/
-				/* out: remaining part of undo log
-				record after reading these values */
-	byte*	ptr,		/* in: remaining part of undo log
-				record after reading general
-				parameters */
-	dulint*	trx_id,		/* out: trx id */
-	dulint*	roll_ptr,	/* out: roll ptr */
-	ulint*	info_bits)	/* out: info bits state */
+	byte*		ptr,		/*!< in: remaining part of undo
+					log record after reading
+					general parameters */
+	trx_id_t*	trx_id,		/*!< out: trx id */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll ptr */
+	ulint*		info_bits)	/*!< out: info bits state */
 {
-	ulint	len;
-
 	/* Read the state of the info bits */
 	*info_bits = mach_read_from_1(ptr);
 	ptr += 1;
@@ -717,26 +844,23 @@ trx_undo_update_rec_get_sys_cols(
 	/* Read the values of the system columns */
 
 	*trx_id = mach_dulint_read_compressed(ptr);
-	len = mach_dulint_get_compressed_size(*trx_id);
-	ptr += len;
+	ptr += mach_dulint_get_compressed_size(*trx_id);
 
 	*roll_ptr = mach_dulint_read_compressed(ptr);
-	len = mach_dulint_get_compressed_size(*roll_ptr);
-	ptr += len;
+	ptr += mach_dulint_get_compressed_size(*roll_ptr);
 
 	return(ptr);
 }
 
-/**************************************************************************
-Reads from an update undo log record the number of updated fields. */
+/**********************************************************************//**
+Reads from an update undo log record the number of updated fields.
+@return	remaining part of undo log record after reading this value */
 UNIV_INLINE
 byte*
 trx_undo_update_rec_get_n_upd_fields(
 /*=================================*/
-			/* out: remaining part of undo log record after
-			reading this value */
-	byte*	ptr,	/* in: pointer to remaining part of undo log record */
-	ulint*	n)	/* out: number of fields */
+	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
+	ulint*	n)	/*!< out: number of fields */
 {
 	*n = mach_read_compressed(ptr);
 	ptr += mach_get_compressed_size(*n);
@@ -744,16 +868,15 @@ trx_undo_update_rec_get_n_upd_fields(
 	return(ptr);
 }
 
-/**************************************************************************
-Reads from an update undo log record a stored field number. */
+/**********************************************************************//**
+Reads from an update undo log record a stored field number.
+@return	remaining part of undo log record after reading this value */
 UNIV_INLINE
 byte*
 trx_undo_update_rec_get_field_no(
 /*=============================*/
-			/* out: remaining part of undo log record after
-			reading this value */
-	byte*	ptr,	/* in: pointer to remaining part of undo log record */
-	ulint*	field_no)/* out: field number */
+	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
+	ulint*	field_no)/*!< out: field number */
 {
 	*field_no = mach_read_compressed(ptr);
 	ptr += mach_get_compressed_size(*field_no);
@@ -761,45 +884,41 @@ trx_undo_update_rec_get_field_no(
 	return(ptr);
 }
 
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
+/*******************************************************************//**
+Builds an update vector based on a remaining part of an undo log record.
+@return remaining part of the record, NULL if an error detected, which
+means that the record is corrupted */
+UNIV_INTERN
 byte*
 trx_undo_update_rec_get_update(
 /*===========================*/
-				/* out: remaining part of the record,
-				NULL if an error detected, which means that
-				the record is corrupted */
-	byte*		ptr,	/* in: remaining part in update undo log
+	byte*		ptr,	/*!< in: remaining part in update undo log
 				record, after reading the row reference
 				NOTE that this copy of the undo log record must
 				be preserved as long as the update vector is
 				used, as we do NOT copy the data in the
 				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint		type,	/* in: TRX_UNDO_UPD_EXIST_REC,
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
 				TRX_UNDO_UPD_DEL_REC, or
 				TRX_UNDO_DEL_MARK_REC; in the last case,
 				only trx id and roll ptr fields are added to
 				the update vector */
-	dulint		trx_id,	/* in: transaction id from this undo record */
-	dulint		roll_ptr,/* in: roll pointer from this undo record */
-	ulint		info_bits,/* in: info bits from this undo record */
-	trx_t*		trx,	/* in: transaction */
-	mem_heap_t*	heap,	/* in: memory heap from which the memory
+	trx_id_t	trx_id,	/*!< in: transaction id from this undo record */
+	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
+	ulint		info_bits,/*!< in: info bits from this undo record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	upd_t**		upd)	/* out, own: update vector */
+	upd_t**		upd)	/*!< out, own: update vector */
 {
 	upd_field_t*	upd_field;
 	upd_t*		update;
 	ulint		n_fields;
 	byte*		buf;
-	byte*		field;
-	ulint		len;
-	ulint		field_no;
 	ulint		i;
 
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 
 	if (type != TRX_UNDO_DEL_MARK_REC) {
 		ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
@@ -835,6 +954,11 @@ trx_undo_update_rec_get_update(
 
 	for (i = 0; i < n_fields; i++) {
 
+		byte*	field;
+		ulint	len;
+		ulint	field_no;
+		ulint	orig_len;
+
 		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
 
 		if (field_no >= dict_index_get_n_fields(index)) {
@@ -856,20 +980,24 @@ trx_undo_update_rec_get_update(
 			return(NULL);
 		}
 
-		ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
-
 		upd_field = upd_get_nth_field(update, i);
 
 		upd_field_set_field_no(upd_field, field_no, index, trx);
 
-		if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
+		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
-			upd_field->extern_storage = TRUE;
+		upd_field->orig_len = orig_len;
 
+		if (len == UNIV_SQL_NULL) {
+			dfield_set_null(&upd_field->new_val);
+		} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
+			dfield_set_data(&upd_field->new_val, field, len);
+		} else {
 			len -= UNIV_EXTERN_STORAGE_FIELD;
-		}
 
-		dfield_set_data(&(upd_field->new_val), field, len);
+			dfield_set_data(&upd_field->new_val, field, len);
+			dfield_set_ext(&upd_field->new_val);
+		}
 	}
 
 	*upd = update;
@@ -877,38 +1005,37 @@ trx_undo_update_rec_get_update(
 	return(ptr);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
+columns which occur as ordering in any index of the table.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
 byte*
 trx_undo_rec_get_partial_row(
 /*=========================*/
-				/* out: pointer to remaining part of undo
-				record */
-	byte*		ptr,	/* in: remaining part in update undo log
+	byte*		ptr,	/*!< in: remaining part in update undo log
 				record of a suitable type, at the start of
 				the stored index columns;
 				NOTE that this copy of the undo log record must
 				be preserved as long as the partial row is
 				used, as we do NOT copy the data in the
 				record! */
-	dict_index_t*	index,	/* in: clustered index */
-	dtuple_t**	row,	/* out, own: partial row */
-	mem_heap_t*	heap)	/* in: memory heap from which the memory
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	row,	/*!< out, own: partial row */
+	ibool		ignore_prefix, /*!< in: flag to indicate if we
+				expect blob prefixes in undo. Used
+				only in the assertion. */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
 {
-	dfield_t*	dfield;
-	byte*		field;
-	ulint		len;
-	ulint		field_no;
-	ulint		col_no;
+	const byte*	end_ptr;
 	ulint		row_len;
-	ulint		total_len;
-	byte*		start_ptr;
-	ulint		i;
 
-	ut_ad(index && ptr && row && heap);
+	ut_ad(index);
+	ut_ad(ptr);
+	ut_ad(row);
+	ut_ad(heap);
+	ut_ad(dict_index_is_clust(index));
 
 	row_len = dict_table_get_n_cols(index->table);
 
@@ -916,40 +1043,57 @@ trx_undo_rec_get_partial_row(
 
 	dict_table_copy_types(*row, index->table);
 
-	start_ptr = ptr;
-
-	total_len = mach_read_from_2(ptr);
+	end_ptr = ptr + mach_read_from_2(ptr);
 	ptr += 2;
 
-	for (i = 0;; i++) {
-
-		if (ptr == start_ptr + total_len) {
-
-			break;
-		}
+	while (ptr != end_ptr) {
+		dfield_t*		dfield;
+		byte*			field;
+		ulint			field_no;
+		const dict_col_t*	col;
+		ulint			col_no;
+		ulint			len;
+		ulint			orig_len;
 
 		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
 
-		col_no = dict_index_get_nth_col_no(index, field_no);
+		col = dict_index_get_nth_col(index, field_no);
+		col_no = dict_col_get_no(col);
 
-		ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
 		dfield = dtuple_get_nth_field(*row, col_no);
 
 		dfield_set_data(dfield, field, len);
+
+		if (len != UNIV_SQL_NULL
+		    && len >= UNIV_EXTERN_STORAGE_FIELD) {
+			dfield_set_len(dfield,
+				       len - UNIV_EXTERN_STORAGE_FIELD);
+			dfield_set_ext(dfield);
+			/* If the prefix of this column is indexed,
+			ensure that enough prefix is stored in the
+			undo log record. */
+			ut_a(ignore_prefix
+			     || !col->ord_part
+			     || dfield_get_len(dfield)
+			     >= REC_MAX_INDEX_COL_LEN
+			     + BTR_EXTERN_FIELD_REF_SIZE);
+		}
 	}
 
 	return(ptr);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************************
+/***********************************************************************//**
 Erases the unused undo log page end. */
 static
 void
 trx_undo_erase_page_end(
 /*====================*/
-	page_t*	undo_page,	/* in: undo page whose end to erase */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t*	undo_page,	/*!< in: undo page whose end to erase */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	ulint	first_free;
 
@@ -961,17 +1105,17 @@ trx_undo_erase_page_end(
 	mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
 }
 
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
+/***********************************************************//**
+Parses a redo log record of erasing of an undo page end.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 trx_undo_parse_erase_page_end(
 /*==========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr __attribute__((unused)), /*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
 {
 	ut_ad(ptr && end_ptr);
 
@@ -985,58 +1129,56 @@ trx_undo_parse_erase_page_end(
 	return(ptr);
 }
 
-/***************************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
 Writes information to an undo log about an insert, update, or a delete marking
 of a clustered index record. This information is used in a rollback of the
 transaction and in consistent reads that must look to the history of this
-transaction. */
-
+transaction.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
 ulint
 trx_undo_report_row_operation(
 /*==========================*/
-					/* out: DB_SUCCESS or error code */
-	ulint		flags,		/* in: if BTR_NO_UNDO_LOG_FLAG bit is
+	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
 					set, does nothing */
-	ulint		op_type,	/* in: TRX_UNDO_INSERT_OP or
+	ulint		op_type,	/*!< in: TRX_UNDO_INSERT_OP or
 					TRX_UNDO_MODIFY_OP */
-	que_thr_t*	thr,		/* in: query thread */
-	dict_index_t*	index,		/* in: clustered index */
-	dtuple_t*	clust_entry,	/* in: in the case of an insert,
+	que_thr_t*	thr,		/*!< in: query thread */
+	dict_index_t*	index,		/*!< in: clustered index */
+	const dtuple_t*	clust_entry,	/*!< in: in the case of an insert,
 					index entry to insert into the
 					clustered index, otherwise NULL */
-	upd_t*		update,		/* in: in the case of an update,
+	const upd_t*	update,		/*!< in: in the case of an update,
 					the update vector, otherwise NULL */
-	ulint		cmpl_info,	/* in: compiler info on secondary
+	ulint		cmpl_info,	/*!< in: compiler info on secondary
 					index updates */
-	rec_t*		rec,		/* in: in case of an update or delete
+	const rec_t*	rec,		/*!< in: in case of an update or delete
 					marking, the record in the clustered
 					index, otherwise NULL */
-	dulint*		roll_ptr)	/* out: rollback pointer to the
+	roll_ptr_t*	roll_ptr)	/*!< out: rollback pointer to the
 					inserted undo log record,
 					ut_dulint_zero if BTR_NO_UNDO_LOG
 					flag was specified */
 {
 	trx_t*		trx;
 	trx_undo_t*	undo;
-	page_t*		undo_page;
-	ulint		offset;
 	ulint		page_no;
-	ibool		is_insert;
 	trx_rseg_t*	rseg;
 	mtr_t		mtr;
 	ulint		err		= DB_SUCCESS;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
+	rec_offs_init(offsets_);
 
-	ut_a(index->type & DICT_CLUSTERED);
+	ut_a(dict_index_is_clust(index));
 
 	if (flags & BTR_NO_UNDO_LOG_FLAG) {
 
 		*roll_ptr = ut_dulint_zero;
 
-		return(err);
+		return(DB_SUCCESS);
 	}
 
 	ut_ad(thr);
@@ -1058,7 +1200,13 @@ trx_undo_report_row_operation(
 		}
 
 		undo = trx->insert_undo;
-		is_insert = TRUE;
+
+		if (UNIV_UNLIKELY(!undo)) {
+			/* Did not succeed */
+			mutex_exit(&(trx->undo_mutex));
+
+			return(err);
+		}
 	} else {
 		ut_ad(op_type == TRX_UNDO_MODIFY_OP);
 
@@ -1069,14 +1217,15 @@ trx_undo_report_row_operation(
 		}
 
 		undo = trx->update_undo;
-		is_insert = FALSE;
-	}
 
-	if (err != DB_SUCCESS) {
-		/* Did not succeed: return the error encountered */
-		mutex_exit(&(trx->undo_mutex));
+		if (UNIV_UNLIKELY(!undo)) {
+			/* Did not succeed */
+			mutex_exit(&(trx->undo_mutex));
+			return(err);
+		}
 
-		return(err);
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
 	}
 
 	page_no = undo->last_page_no;
@@ -1084,28 +1233,28 @@ trx_undo_report_row_operation(
 	mtr_start(&mtr);
 
 	for (;;) {
-		undo_page = buf_page_get_gen(undo->space, page_no,
-					     RW_X_LATCH, undo->guess_page,
-					     BUF_GET,
-					     __FILE__, __LINE__,
-					     &mtr);
+		buf_block_t*	undo_block;
+		page_t*		undo_page;
+		ulint		offset;
 
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+		undo_block = buf_page_get_gen(undo->space, undo->zip_size,
+					      page_no, RW_X_LATCH,
+					      undo->guess_block, BUF_GET,
+					      __FILE__, __LINE__, &mtr);
+		buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
+
+		undo_page = buf_block_get_frame(undo_block);
 
 		if (op_type == TRX_UNDO_INSERT_OP) {
 			offset = trx_undo_page_report_insert(
 				undo_page, trx, index, clust_entry, &mtr);
 		} else {
-			offsets = rec_get_offsets(rec, index, offsets,
-						  ULINT_UNDEFINED, &heap);
 			offset = trx_undo_page_report_modify(
 				undo_page, trx, index, rec, offsets, update,
 				cmpl_info, &mtr);
 		}
 
-		if (offset == 0) {
+		if (UNIV_UNLIKELY(offset == 0)) {
 			/* The record did not fit on the page. We erase the
 			end segment of the undo log page and write a log
 			record of it: this is to ensure that in the debug
@@ -1113,14 +1262,29 @@ trx_undo_report_row_operation(
 			records stays identical to the original page */
 
 			trx_undo_erase_page_end(undo_page, &mtr);
-		}
-
-		mtr_commit(&mtr);
-
-		if (offset != 0) {
+			mtr_commit(&mtr);
+		} else {
 			/* Success */
 
-			break;
+			mtr_commit(&mtr);
+
+			undo->empty = FALSE;
+			undo->top_page_no = page_no;
+			undo->top_offset  = offset;
+			undo->top_undo_no = trx->undo_no;
+			undo->guess_block = undo_block;
+
+			UT_DULINT_INC(trx->undo_no);
+
+			mutex_exit(&trx->undo_mutex);
+
+			*roll_ptr = trx_undo_build_roll_ptr(
+				op_type == TRX_UNDO_INSERT_OP,
+				rseg->id, page_no, offset);
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+			return(DB_SUCCESS);
 		}
 
 		ut_ad(page_no == undo->last_page_no);
@@ -1139,7 +1303,7 @@ trx_undo_report_row_operation(
 
 		mutex_exit(&(rseg->mutex));
 
-		if (page_no == FIL_NULL) {
+		if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
 			/* Did not succeed: out of space */
 
 			mutex_exit(&(trx->undo_mutex));
@@ -1150,37 +1314,20 @@ trx_undo_report_row_operation(
 			return(DB_OUT_OF_FILE_SPACE);
 		}
 	}
-
-	undo->empty = FALSE;
-	undo->top_page_no = page_no;
-	undo->top_offset  = offset;
-	undo->top_undo_no = trx->undo_no;
-	undo->guess_page = undo_page;
-
-	UT_DULINT_INC(trx->undo_no);
-
-	mutex_exit(&(trx->undo_mutex));
-
-	*roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no,
-					    offset);
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	return(err);
 }
 
 /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
 
-/**********************************************************************
+/******************************************************************//**
 Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
+the undo log record exists.
+@return	own: copy of the record */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_undo_get_undo_rec_low(
 /*======================*/
-					/* out, own: copy of the record */
-	dulint		roll_ptr,	/* in: roll pointer to record */
-	mem_heap_t*	heap)		/* in: memory heap where copied */
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
+	mem_heap_t*	heap)		/*!< in: memory heap where copied */
 {
 	trx_undo_rec_t*	undo_rec;
 	ulint		rseg_id;
@@ -1197,7 +1344,8 @@ trx_undo_get_undo_rec_low(
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr);
+	undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
+						page_no, &mtr);
 
 	undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
 
@@ -1206,24 +1354,24 @@ trx_undo_get_undo_rec_low(
 	return(undo_rec);
 }
 
-/**********************************************************************
-Copies an undo record to heap. */
+/******************************************************************//**
+Copies an undo record to heap.
 
+NOTE: the caller must have latches on the clustered index page and
+purge_view.
+
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
+truncated and we cannot fetch the old version */
+UNIV_INTERN
 ulint
 trx_undo_get_undo_rec(
 /*==================*/
-					/* out: DB_SUCCESS, or
-					DB_MISSING_HISTORY if the undo log
-					has been truncated and we cannot
-					fetch the old version; NOTE: the
-					caller must have latches on the
-					clustered index page and purge_view */
-	dulint		roll_ptr,	/* in: roll pointer to record */
-	dulint		trx_id,		/* in: id of the trx that generated
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
+	trx_id_t	trx_id,		/*!< in: id of the trx that generated
 					the roll pointer: it points to an
 					undo log of this transaction */
-	trx_undo_rec_t** undo_rec,	/* out, own: copy of the record */
-	mem_heap_t*	heap)		/* in: memory heap where copied */
+	trx_undo_rec_t** undo_rec,	/*!< out, own: copy of the record */
+	mem_heap_t*	heap)		/*!< in: memory heap where copied */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -1242,42 +1390,43 @@ trx_undo_get_undo_rec(
 	return(DB_SUCCESS);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Build a previous version of a clustered index record. This function checks
 that the caller has a latch on the index page of the clustered index record
 and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
+is locked all the way down to the purge_view.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
+earlier than purge_view, which means that it may have been removed,
+DB_ERROR if corrupted record */
+UNIV_INTERN
 ulint
 trx_undo_prev_version_build(
 /*========================*/
-				/* out: DB_SUCCESS, or DB_MISSING_HISTORY if
-				the previous version is not >= purge_view,
-				which means that it may have been removed,
-				DB_ERROR if corrupted record */
-	rec_t*		index_rec,/* in: clustered index record in the
+	const rec_t*	index_rec,/*!< in: clustered index record in the
 				index tree */
 	mtr_t*		index_mtr __attribute__((unused)),
-				/* in: mtr which contains the latch to
+				/*!< in: mtr which contains the latch to
 				index_rec page and purge_view */
-	rec_t*		rec,	/* in: version of a clustered index record */
-	dict_index_t*	index,	/* in: clustered index */
-	ulint*		offsets,/* in: rec_get_offsets(rec, index) */
-	mem_heap_t*	heap,	/* in: memory heap from which the memory
+	const rec_t*	rec,	/*!< in: version of a clustered index record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	rec_t**		old_vers)/* out, own: previous version, or NULL if
+	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
 				rec is the first inserted version, or if
-				history data has been deleted */
+				history data has been deleted (an error),
+				or if the purge COULD have removed the version
+				though it has not yet done so */
 {
-	trx_undo_rec_t*	undo_rec;
+	trx_undo_rec_t*	undo_rec	= NULL;
 	dtuple_t*	entry;
-	dulint		rec_trx_id;
+	trx_id_t	rec_trx_id;
 	ulint		type;
-	dulint		undo_no;
+	undo_no_t	undo_no;
 	dulint		table_id;
-	dulint		trx_id;
-	dulint		roll_ptr;
-	dulint		old_roll_ptr;
+	trx_id_t	trx_id;
+	roll_ptr_t	roll_ptr;
+	roll_ptr_t	old_roll_ptr;
 	upd_t*		update;
 	byte*		ptr;
 	ulint		info_bits;
@@ -1288,13 +1437,12 @@ trx_undo_prev_version_build(
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
-				MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(index_mtr, buf_block_align(index_rec),
-				   MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains_page(index_mtr, index_rec,
+					MTR_MEMO_PAGE_X_FIX));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (!(index->type & DICT_CLUSTERED)) {
+	if (!dict_index_is_clust(index)) {
 		fprintf(stderr, "InnoDB: Error: trying to access"
 			" update undo rec for non-clustered index %s\n"
 			"InnoDB: Submit a detailed bug report to"
@@ -1324,7 +1472,9 @@ trx_undo_prev_version_build(
 
 	err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
 
-	if (err != DB_SUCCESS) {
+	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+		/* The undo record may already have been purged.
+		This should never happen in InnoDB. */
 
 		return(err);
 	}
@@ -1334,6 +1484,29 @@ trx_undo_prev_version_build(
 
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 					       &info_bits);
+
+	/* (a) If a clustered index record version is such that the
+	trx id stamp in it is bigger than purge_sys->view, then the
+	BLOBs in that version are known to exist (the purge has not
+	progressed that far);
+
+	(b) if the version is the first version such that trx id in it
+	is less than purge_sys->view, and it is not delete-marked,
+	then the BLOBs in that version are known to exist (the purge
+	cannot have purged the BLOBs referenced by that version
+	yet).
+
+	This function does not fetch any BLOBs.  The callers might, by
+	possibly invoking row_ext_create() via row_build().  However,
+	they should have all needed information in the *old_vers
+	returned by this function.  This is because *old_vers is based
+	on the transaction undo log records.  The function
+	trx_undo_page_fetch_ext() will write BLOB prefixes to the
+	transaction undo log that are at least as long as the longest
+	possible column prefix in a secondary index.  Thus, secondary
+	index entries for *old_vers can be constructed without
+	dereferencing any BLOB pointers. */
+
 	ptr = trx_undo_rec_skip_row_ref(ptr, index);
 
 	ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
@@ -1379,14 +1552,12 @@ trx_undo_prev_version_build(
 		      "InnoDB: record version ", stderr);
 		rec_print_new(stderr, rec, offsets);
 		fprintf(stderr, "\n"
-			"InnoDB: Record trx id %lu %lu, update rec"
-			" trx id %lu %lu\n"
+			"InnoDB: Record trx id " TRX_ID_FMT
+			", update rec trx id " TRX_ID_FMT "\n"
 			"InnoDB: Roll ptr in rec %lu %lu, in update rec"
 			" %lu %lu\n",
-			(ulong) ut_dulint_get_high(rec_trx_id),
-			(ulong) ut_dulint_get_low(rec_trx_id),
-			(ulong) ut_dulint_get_high(trx_id),
-			(ulong) ut_dulint_get_low(trx_id),
+			TRX_ID_PREP_PRINTF(rec_trx_id),
+			TRX_ID_PREP_PRINTF(trx_id),
 			(ulong) ut_dulint_get_high(old_roll_ptr),
 			(ulong) ut_dulint_get_low(old_roll_ptr),
 			(ulong) ut_dulint_get_high(roll_ptr),
@@ -1397,38 +1568,34 @@ trx_undo_prev_version_build(
 	}
 
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
-		ulint*	ext_vect;
-		ulint	n_ext_vect;
+		ulint	n_ext;
 
 		/* We have to set the appropriate extern storage bits in the
 		old version of the record: the extern bits in rec for those
 		fields that update does NOT update, as well as the the bits for
 		those fields that update updates to become externally stored
-		fields. Store the info to ext_vect: */
+		fields. Store the info: */
 
-		ext_vect = mem_alloc(sizeof(ulint)
-				     * rec_offs_n_fields(offsets));
-		n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets,
-							   update);
-		entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec,
-					       heap);
+		entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
+					       offsets, &n_ext, heap);
+		n_ext += btr_push_update_extern_fields(entry, update, heap);
+		/* The page containing the clustered index record
+		corresponding to entry is latched in mtr.  Thus the
+		following call is safe. */
 		row_upd_index_replace_new_col_vals(entry, index, update, heap);
 
-		buf = mem_heap_alloc(heap,
-				     rec_get_converted_size(index, entry));
+		buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
+								  n_ext));
 
-		*old_vers = rec_convert_dtuple_to_rec(buf, index, entry);
-
-		/* Now set the extern bits in the old version of the record */
-		rec_set_field_extern_bits(*old_vers, index,
-					  ext_vect, n_ext_vect, NULL);
-		mem_free(ext_vect);
+		*old_vers = rec_convert_dtuple_to_rec(buf, index,
+						      entry, n_ext);
 	} else {
 		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
 		*old_vers = rec_copy(buf, rec, offsets);
 		rec_offs_make_valid(*old_vers, index, offsets);
-		row_upd_rec_in_place(*old_vers, offsets, update);
+		row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
 	}
 
 	return(DB_SUCCESS);
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0roll.c b/storage/innodb_plugin/trx/trx0roll.c
similarity index 75%
rename from storage/innobase/trx/trx0roll.c
rename to storage/innodb_plugin/trx/trx0roll.c
index 8934fe87c7e..51d17192d5b 100644
--- a/storage/innobase/trx/trx0roll.c
+++ b/storage/innodb_plugin/trx/trx0roll.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction rollback
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0roll.c
+Transaction rollback
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -27,32 +44,32 @@ Created 3/26/1996 Heikki Tuuri
 #include "lock0lock.h"
 #include "pars0pars.h"
 
-/* This many pages must be undone before a truncate is tried within rollback */
+/** This many pages must be undone before a truncate is tried within
+rollback */
 #define TRX_ROLL_TRUNC_THRESHOLD	1
 
-/* In crash recovery, the current trx to be rolled back */
-trx_t*		trx_roll_crash_recv_trx	= NULL;
+/** In crash recovery, the current trx to be rolled back */
+static trx_t*		trx_roll_crash_recv_trx	= NULL;
 
-/* In crash recovery we set this to the undo n:o of the current trx to be
+/** In crash recovery we set this to the undo n:o of the current trx to be
 rolled back. Then we can print how many % the rollback has progressed. */
-ib_longlong	trx_roll_max_undo_no;
+static ib_int64_t	trx_roll_max_undo_no;
 
-/* Auxiliary variable which tells the previous progress % we printed */
-ulint		trx_roll_progress_printed_pct;
-
-/***********************************************************************
-Rollback a transaction used in MySQL. */
+/** Auxiliary variable which tells the previous progress % we printed */
+static ulint		trx_roll_progress_printed_pct;
 
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 trx_general_rollback_for_mysql(
 /*===========================*/
-				/* out: error code or DB_SUCCESS */
-	trx_t*		trx,	/* in: transaction handle */
-	ibool		partial,/* in: TRUE if partial rollback requested */
-	trx_savept_t*	savept)	/* in: pointer to savepoint undo number, if
+	trx_t*		trx,	/*!< in: transaction handle */
+	ibool		partial,/*!< in: TRUE if partial rollback requested */
+	trx_savept_t*	savept)	/*!< in: pointer to savepoint undo number, if
 				partial rollback requested */
 {
-#ifndef UNIV_HOTBACKUP
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
 	roll_node_t*	roll_node;
@@ -104,23 +121,16 @@ trx_general_rollback_for_mysql(
 	srv_active_wake_master_thread();
 
 	return((int) trx->error_state);
-#else /* UNIV_HOTBACKUP */
-	/* This function depends on MySQL code that is not included in
-	InnoDB Hot Backup builds.  Besides, this function should never
-	be called in InnoDB Hot Backup. */
-	ut_error;
-	return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
 }
 
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 trx_rollback_for_mysql(
 /*===================*/
-			/* out: error code or DB_SUCCESS */
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	int	err;
 
@@ -135,35 +145,21 @@ trx_rollback_for_mysql(
 	the transaction object does not have an InnoDB session object, and we
 	set a dummy session that we use for all MySQL transactions. */
 
-	mutex_enter(&kernel_mutex);
-
-	if (trx->sess == NULL) {
-		/* Open a dummy session */
-
-		if (!trx_dummy_sess) {
-			trx_dummy_sess = sess_open();
-		}
-
-		trx->sess = trx_dummy_sess;
-	}
-
-	mutex_exit(&kernel_mutex);
-
 	err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
-	
+
 	trx->op_info = "";
 
 	return(err);
 }
 
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
+/*******************************************************************//**
+Rollback the latest SQL statement for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
 int
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
-			/* out: error code or DB_SUCCESS */
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	int	err;
 
@@ -184,14 +180,14 @@ trx_rollback_last_sql_stat_for_mysql(
 	return(err);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees a single savepoint struct. */
-
+UNIV_INTERN
 void
 trx_roll_savepoint_free(
 /*=====================*/
-	trx_t*			trx,	/* in: transaction handle */
-	trx_named_savept_t*	savep)	/* in: savepoint to free */
+	trx_t*			trx,	/*!< in: transaction handle */
+	trx_named_savept_t*	savep)	/*!< in: savepoint to free */
 {
 	ut_a(savep != NULL);
 	ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
@@ -201,15 +197,15 @@ trx_roll_savepoint_free(
 	mem_free(savep);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees savepoint structs starting from savep, if savep == NULL then
 free all savepoints. */
-
+UNIV_INTERN
 void
 trx_roll_savepoints_free(
 /*=====================*/
-	trx_t*			trx,	/* in: transaction handle */
-	trx_named_savept_t*	savep)	/* in: free all savepoints > this one;
+	trx_t*			trx,	/*!< in: transaction handle */
+	trx_named_savept_t*	savep)	/*!< in: free all savepoints > this one;
 					if this is NULL, free all savepoints
 					of trx */
 {
@@ -230,24 +226,22 @@ trx_roll_savepoints_free(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Rolls back a transaction back to a named savepoint. Modifications after the
 savepoint are undone but InnoDB does NOT release the corresponding locks
 which are stored in memory. If a lock is 'implicit', that is, a new inserted
 row holds a lock where the lock information is carried by the trx id stored in
 the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
 ulint
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
-						/* out: if no savepoint
-						of the name found then
-						DB_NO_SAVEPOINT,
-						otherwise DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name,		/* in: savepoint name */
-	ib_longlong*	mysql_binlog_cache_pos)	/* out: the MySQL binlog cache
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
 						position corresponding to this
 						savepoint; MySQL needs this
 						information to remove the
@@ -300,19 +294,19 @@ trx_rollback_to_savepoint_for_mysql(
 	return(err);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Creates a named savepoint. If the transaction is not yet started, starts it.
 If there is already a savepoint of the same name, this call erases that old
 savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
+commit or rollback.
+@return	always DB_SUCCESS */
+UNIV_INTERN
 ulint
 trx_savepoint_for_mysql(
 /*====================*/
-						/* out: always DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name,		/* in: savepoint name */
-	ib_longlong	binlog_cache_pos)	/* in: MySQL binlog cache
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
 						position corresponding to this
 						connection at the time of the
 						savepoint */
@@ -358,19 +352,17 @@ trx_savepoint_for_mysql(
 	return(DB_SUCCESS);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Releases only the named savepoint. Savepoints which were set after this
-savepoint are left as is. */
-
+savepoint are left as is.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
 ulint
 trx_release_savepoint_for_mysql(
 /*============================*/
-						/* out: if no savepoint
-						of the name found then
-						DB_NO_SAVEPOINT,
-						otherwise DB_SUCCESS */
-	trx_t*		trx,			/* in: transaction handle */
-	const char*	savepoint_name)		/* in: savepoint name */
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name)		/*!< in: savepoint name */
 {
 	trx_named_savept_t*	savep;
 
@@ -388,14 +380,28 @@ trx_release_savepoint_for_mysql(
 	return(DB_NO_SAVEPOINT);
 }
 
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
+/*******************************************************************//**
+Determines if this transaction is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if trx is an incomplete transaction that is being rolled
+back in crash recovery */
+UNIV_INTERN
+ibool
+trx_is_recv(
+/*========*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	return(trx == trx_roll_crash_recv_trx);
+}
 
+/*******************************************************************//**
+Returns a transaction savepoint taken at this point in time.
+@return	savepoint */
+UNIV_INTERN
 trx_savept_t
 trx_savept_take(
 /*============*/
-			/* out: savepoint */
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	trx_savept_t	savept;
 
@@ -404,94 +410,25 @@ trx_savept_take(
 	return(savept);
 }
 
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
-os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
-			/* out: a dummy parameter */
-	void*	arg __attribute__((unused)))
-			/* in: a dummy parameter required by
-			os_thread_create */
+/*******************************************************************//**
+Roll back an active transaction. */
+static
+void
+trx_rollback_active(
+/*================*/
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	mem_heap_t*	heap;
 	que_fork_t*	fork;
 	que_thr_t*	thr;
 	roll_node_t*	roll_node;
-	trx_t*		trx;
 	dict_table_t*	table;
-	ib_longlong	rows_to_undo;
+	ib_int64_t	rows_to_undo;
 	const char*	unit		= "";
-	int		err;
+	ibool		dictionary_locked = FALSE;
 
-	mutex_enter(&kernel_mutex);
-
-	/* Open a dummy session */
-
-	if (!trx_dummy_sess) {
-		trx_dummy_sess = sess_open();
-	}
-
-	mutex_exit(&kernel_mutex);
-
-	if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
-
-		fprintf(stderr,
-			"InnoDB: Starting in background the rollback"
-			" of uncommitted transactions\n");
-	} else {
-		goto leave_function;
-	}
-loop:
 	heap = mem_heap_create(512);
 
-	mutex_enter(&kernel_mutex);
-
-	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
-	while (trx) {
-		if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
-			trx = UT_LIST_GET_NEXT(trx_list, trx);
-		} else if (trx->conc_state == TRX_PREPARED) {
-
-			trx->sess = trx_dummy_sess;
-			trx = UT_LIST_GET_NEXT(trx_list, trx);
-		} else {
-			break;
-		}
-	}
-
-	mutex_exit(&kernel_mutex);
-
-	if (trx == NULL) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Rollback of non-prepared transactions"
-			" completed\n");
-
-		mem_heap_free(heap);
-
-		goto leave_function;
-	}
-
-	trx->sess = trx_dummy_sess;
-
-	if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
-		fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
-			(ulong) ut_dulint_get_high(trx->id),
-			(ulong) ut_dulint_get_low(trx->id));
-
-		trx_cleanup_at_db_startup(trx);
-
-		mem_heap_free(heap);
-
-		goto loop;
-	}
-
 	fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
 	fork->trx = trx;
 
@@ -520,10 +457,9 @@ loop:
 
 	ut_print_timestamp(stderr);
 	fprintf(stderr,
-		"  InnoDB: Rolling back trx with id %lu %lu, %lu%s"
+		"  InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
 		" rows to undo\n",
-		(ulong) ut_dulint_get_high(trx->id),
-		(ulong) ut_dulint_get_low(trx->id),
+		TRX_ID_PREP_PRINTF(trx->id),
 		(ulong) rows_to_undo, unit);
 	mutex_exit(&kernel_mutex);
 
@@ -531,8 +467,9 @@ loop:
 
 	trx->mysql_process_no = os_proc_get_number();
 
-	if (trx->dict_operation) {
+	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
 		row_mysql_lock_data_dictionary(trx);
+		dictionary_locked = TRUE;
 	}
 
 	que_run_threads(thr);
@@ -553,7 +490,9 @@ loop:
 
 	mutex_exit(&kernel_mutex);
 
-	if (trx->dict_operation) {
+	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
+	    && !ut_dulint_is_zero(trx->table_id)) {
+
 		/* If the transaction was for a dictionary operation, we
 		drop the relevant table, if it still exists */
 
@@ -566,30 +505,98 @@ loop:
 		table = dict_table_get_on_id_low(trx->table_id);
 
 		if (table) {
+			ulint	err;
+
 			fputs("InnoDB: Table found: dropping table ", stderr);
 			ut_print_name(stderr, trx, TRUE, table->name);
 			fputs(" in recovery\n", stderr);
 
 			err = row_drop_table_for_mysql(table->name, trx, TRUE);
+			trx_commit_for_mysql(trx);
 
 			ut_a(err == (int) DB_SUCCESS);
 		}
 	}
 
-	if (trx->dict_operation) {
+	if (dictionary_locked) {
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
-	fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
-		(ulong) ut_dulint_get_high(trx->id),
-		(ulong) ut_dulint_get_low(trx->id));
+	fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
+		" completed\n",
+		TRX_ID_PREP_PRINTF(trx->id));
 	mem_heap_free(heap);
 
 	trx_roll_crash_recv_trx	= NULL;
+}
 
-	goto loop;
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery.  If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+trx_rollback_or_clean_all_recovered(
+/*================================*/
+	void*	arg __attribute__((unused)))
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+{
+	trx_t*	trx;
+
+	mutex_enter(&kernel_mutex);
+
+	if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+
+		fprintf(stderr,
+			"InnoDB: Starting in background the rollback"
+			" of uncommitted transactions\n");
+	} else {
+		goto leave_function;
+	}
+
+	mutex_exit(&kernel_mutex);
+
+loop:
+	mutex_enter(&kernel_mutex);
+
+	for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
+	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+		if (!trx->is_recovered) {
+			continue;
+		}
+
+		switch (trx->conc_state) {
+		case TRX_NOT_STARTED:
+		case TRX_PREPARED:
+			continue;
+
+		case TRX_COMMITTED_IN_MEMORY:
+			mutex_exit(&kernel_mutex);
+			fprintf(stderr,
+				"InnoDB: Cleaning up trx with id "
+				TRX_ID_FMT "\n",
+				TRX_ID_PREP_PRINTF(trx->id));
+			trx_cleanup_at_db_startup(trx);
+			goto loop;
+
+		case TRX_ACTIVE:
+			mutex_exit(&kernel_mutex);
+			trx_rollback_active(trx);
+			goto loop;
+		}
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: Rollback of non-prepared transactions completed\n");
 
 leave_function:
+	mutex_exit(&kernel_mutex);
+
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
@@ -598,9 +605,10 @@ leave_function:
 	OS_THREAD_DUMMY_RETURN;
 }
 
-/***********************************************************************
-Creates an undo number array. */
-
+/*******************************************************************//**
+Creates an undo number array.
+@return	own: undo number array */
+UNIV_INTERN
 trx_undo_arr_t*
 trx_undo_arr_create(void)
 /*=====================*/
@@ -628,29 +636,28 @@ trx_undo_arr_create(void)
 	return(arr);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Frees an undo number array. */
-
+UNIV_INTERN
 void
 trx_undo_arr_free(
 /*==============*/
-	trx_undo_arr_t*	arr)	/* in: undo number array */
+	trx_undo_arr_t*	arr)	/*!< in: undo number array */
 {
 	ut_ad(arr->n_used == 0);
 
 	mem_heap_free(arr->heap);
 }
 
-/***********************************************************************
-Stores info of an undo log record to the array if it is not stored yet. */
+/*******************************************************************//**
+Stores info of an undo log record to the array if it is not stored yet.
+@return	FALSE if the record already existed in the array */
 static
 ibool
 trx_undo_arr_store_info(
 /*====================*/
-			/* out: FALSE if the record already existed in the
-			array */
-	trx_t*	trx,	/* in: transaction */
-	dulint	undo_no)/* in: undo number */
+	trx_t*		trx,	/*!< in: transaction */
+	undo_no_t	undo_no)/*!< in: undo number */
 {
 	trx_undo_inf_t*	cell;
 	trx_undo_inf_t*	stored_here;
@@ -703,14 +710,14 @@ trx_undo_arr_store_info(
 	}
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Removes an undo number from the array. */
 static
 void
 trx_undo_arr_remove_info(
 /*=====================*/
-	trx_undo_arr_t*	arr,	/* in: undo number array */
-	dulint		undo_no)/* in: undo number */
+	trx_undo_arr_t*	arr,	/*!< in: undo number array */
+	undo_no_t	undo_no)/*!< in: undo number */
 {
 	trx_undo_inf_t*	cell;
 	ulint		n_used;
@@ -737,19 +744,18 @@ trx_undo_arr_remove_info(
 	}
 }
 
-/***********************************************************************
-Gets the biggest undo number in an array. */
+/*******************************************************************//**
+Gets the biggest undo number in an array.
+@return	biggest value, ut_dulint_zero if the array is empty */
 static
-dulint
+undo_no_t
 trx_undo_arr_get_biggest(
 /*=====================*/
-				/* out: biggest value, ut_dulint_zero if
-				the array is empty */
-	trx_undo_arr_t*	arr)	/* in: undo number array */
+	trx_undo_arr_t*	arr)	/*!< in: undo number array */
 {
 	trx_undo_inf_t*	cell;
 	ulint		n_used;
-	dulint		biggest;
+	undo_no_t	biggest;
 	ulint		n;
 	ulint		i;
 
@@ -774,17 +780,17 @@ trx_undo_arr_get_biggest(
 	}
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Tries truncate the undo logs. */
-
+UNIV_INTERN
 void
 trx_roll_try_truncate(
 /*==================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	trx_undo_arr_t*	arr;
-	dulint		limit;
-	dulint		biggest;
+	undo_no_t	limit;
+	undo_no_t	biggest;
 
 	ut_ad(mutex_own(&(trx->undo_mutex)));
 	ut_ad(mutex_own(&((trx->rseg)->mutex)));
@@ -813,17 +819,17 @@ trx_roll_try_truncate(
 	}
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Pops the topmost undo log record in a single undo log and updates the info
-about the topmost record in the undo log memory struct. */
+about the topmost record in the undo log memory struct.
+@return	undo log record, the page s-latched */
 static
 trx_undo_rec_t*
 trx_roll_pop_top_rec(
 /*=================*/
-				/* out: undo log record, the page s-latched */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_t*		undo_page;
 	ulint		offset;
@@ -832,7 +838,7 @@ trx_roll_pop_top_rec(
 
 	ut_ad(mutex_own(&(trx->undo_mutex)));
 
-	undo_page = trx_undo_page_get_s_latched(undo->space,
+	undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
 						undo->top_page_no, mtr);
 	offset = undo->top_offset;
 
@@ -847,14 +853,14 @@ trx_roll_pop_top_rec(
 
 		undo->empty = TRUE;
 	} else {
-		prev_rec_page = buf_frame_align(prev_rec);
+		prev_rec_page = page_align(prev_rec);
 
 		if (prev_rec_page != undo_page) {
 
 			trx->pages_undone++;
 		}
 
-		undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
+		undo->top_page_no = page_get_page_no(prev_rec_page);
 		undo->top_offset  = prev_rec - prev_rec_page;
 		undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
 	}
@@ -862,30 +868,29 @@ trx_roll_pop_top_rec(
 	return(undo_page + offset);
 }
 
-/************************************************************************
+/********************************************************************//**
 Pops the topmost record when the two undo logs of a transaction are seen
 as a single stack of records ordered by their undo numbers. Inserts the
 undo number of the popped undo record to the array of currently processed
 undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
+of this undo record, it must be released with trx_undo_rec_release.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(
 /*========================*/
-				/* out: undo log record copied to heap, NULL
-				if none left, or if the undo number of the
-				top record would be less than the limit */
-	trx_t*		trx,	/* in: transaction */
-	dulint		limit,	/* in: least undo number we need */
-	dulint*		roll_ptr,/* out: roll pointer to undo record */
-	mem_heap_t*	heap)	/* in: memory heap where copied */
+	trx_t*		trx,	/*!< in: transaction */
+	undo_no_t	limit,	/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap)	/*!< in: memory heap where copied */
 {
 	trx_undo_t*	undo;
 	trx_undo_t*	ins_undo;
 	trx_undo_t*	upd_undo;
 	trx_undo_rec_t*	undo_rec;
 	trx_undo_rec_t*	undo_rec_copy;
-	dulint		undo_no;
+	undo_no_t	undo_no;
 	ibool		is_insert;
 	trx_rseg_t*	rseg;
 	ulint		progress_pct;
@@ -995,17 +1000,17 @@ try_again:
 	return(undo_rec_copy);
 }
 
-/************************************************************************
+/********************************************************************//**
 Reserves an undo log record for a query thread to undo. This should be
 called if the query thread gets the undo log record not using the pop
-function above. */
-
+function above.
+@return	TRUE if succeeded */
+UNIV_INTERN
 ibool
 trx_undo_rec_reserve(
 /*=================*/
-			/* out: TRUE if succeeded */
-	trx_t*	trx,	/* in: transaction */
-	dulint	undo_no)/* in: undo number of the record */
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no)/*!< in: undo number of the record */
 {
 	ibool	ret;
 
@@ -1018,14 +1023,14 @@ trx_undo_rec_reserve(
 	return(ret);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 Releases a reserved undo record. */
-
+UNIV_INTERN
 void
 trx_undo_rec_release(
 /*=================*/
-	trx_t*	trx,	/* in: transaction */
-	dulint	undo_no)/* in: undo number */
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no)/*!< in: undo number */
 {
 	trx_undo_arr_t*	arr;
 
@@ -1038,15 +1043,15 @@ trx_undo_rec_release(
 	mutex_exit(&(trx->undo_mutex));
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Starts a rollback operation. */
-
+UNIV_INTERN
 void
 trx_rollback(
 /*=========*/
-	trx_t*		trx,	/* in: transaction */
-	trx_sig_t*	sig,	/* in: signal starting the rollback */
-	que_thr_t**	next_thr)/* in/out: next query thread to run;
+	trx_t*		trx,	/*!< in: transaction */
+	trx_sig_t*	sig,	/*!< in: signal starting the rollback */
+	que_thr_t**	next_thr)/*!< in/out: next query thread to run;
 				if the value which is passed in is
 				a pointer to a NULL pointer, then the
 				calling function can start running
@@ -1109,17 +1114,17 @@ trx_rollback(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Builds an undo 'query' graph for a transaction. The actual rollback is
 performed by executing this query graph like a query subprocedure call.
 The reply about the completion of the rollback will be sent by this
-graph. */
-
+graph.
+@return	own: the query graph */
+UNIV_INTERN
 que_t*
 trx_roll_graph_build(
 /*=================*/
-			/* out, own: the query graph */
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
 	mem_heap_t*	heap;
 	que_fork_t*	fork;
@@ -1141,14 +1146,14 @@ trx_roll_graph_build(
 	return(fork);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Finishes error processing after the necessary partial rollback has been
 done. */
 static
 void
 trx_finish_error_processing(
 /*========================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	trx_sig_t*	sig;
 	trx_sig_t*	next_sig;
@@ -1171,14 +1176,14 @@ trx_finish_error_processing(
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Finishes a partial rollback operation. */
 static
 void
 trx_finish_partial_rollback_off_kernel(
 /*===================================*/
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t**	next_thr)/* in/out: next query thread to run;
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t**	next_thr)/*!< in/out: next query thread to run;
 				if the value which is passed in is a pointer
 				to a NULL pointer, then the calling function
 				can start running a new query thread; if this
@@ -1199,15 +1204,15 @@ trx_finish_partial_rollback_off_kernel(
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
-/********************************************************************
+/****************************************************************//**
 Finishes a transaction rollback. */
-
+UNIV_INTERN
 void
 trx_finish_rollback_off_kernel(
 /*===========================*/
-	que_t*		graph,	/* in: undo graph which can now be freed */
-	trx_t*		trx,	/* in: transaction */
-	que_thr_t**	next_thr)/* in/out: next query thread to run;
+	que_t*		graph,	/*!< in: undo graph which can now be freed */
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t**	next_thr)/*!< in/out: next query thread to run;
 				if the value which is passed in is
 				a pointer to a NULL pointer, then the
 				calling function can start running
@@ -1267,14 +1272,14 @@ trx_finish_rollback_off_kernel(
 	}
 }
 
-/*************************************************************************
-Creates a rollback command node struct. */
-
+/*********************************************************************//**
+Creates a rollback command node struct.
+@return	own: rollback node struct */
+UNIV_INTERN
 roll_node_t*
 roll_node_create(
 /*=============*/
-				/* out, own: rollback node struct */
-	mem_heap_t*	heap)	/* in: mem heap where created */
+	mem_heap_t*	heap)	/*!< in: mem heap where created */
 {
 	roll_node_t*	node;
 
@@ -1287,14 +1292,14 @@ roll_node_create(
 	return(node);
 }
 
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
+/***********************************************************//**
+Performs an execution step for a rollback command node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
 que_thr_t*
 trx_rollback_step(
 /*==============*/
-				/* out: query thread to run next, or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	roll_node_t*	node;
 	ulint		sig_no;
diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innodb_plugin/trx/trx0rseg.c
similarity index 58%
rename from storage/innobase/trx/trx0rseg.c
rename to storage/innodb_plugin/trx/trx0rseg.c
index 020f217c90b..580762e8716 100644
--- a/storage/innobase/trx/trx0rseg.c
+++ b/storage/innodb_plugin/trx/trx0rseg.c
@@ -1,7 +1,24 @@
-/******************************************************
-Rollback segment
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0rseg.c
+Rollback segment
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -17,14 +34,14 @@ Created 3/26/1996 Heikki Tuuri
 #include "srv0srv.h"
 #include "trx0purge.h"
 
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return	rollback segment */
+UNIV_INTERN
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-			/* out: rollback segment */
-	ulint	id)	/* in: rollback segment id */
+	ulint	id)	/*!< in: rollback segment id */
 {
 	trx_rseg_t*	rseg;
 
@@ -39,29 +56,30 @@ trx_rseg_get_on_id(
 	return(rseg);
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
+a new rollback segment is created in the database.
+@return	page number of the created segment, FIL_NULL if fail */
+UNIV_INTERN
 ulint
 trx_rseg_header_create(
 /*===================*/
-				/* out: page number of the created segment,
-				FIL_NULL if fail */
-	ulint	space,		/* in: space id */
-	ulint	max_size,	/* in: max size in pages */
-	ulint*	slot_no,	/* out: rseg id == slot number in trx sys */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	max_size,	/*!< in: max size in pages */
+	ulint*	slot_no,	/*!< out: rseg id == slot number in trx sys */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	ulint		page_no;
 	trx_rsegf_t*	rsegf;
 	trx_sysf_t*	sys_header;
 	ulint		i;
-	page_t*		page;
+	buf_block_t*	block;
 
 	ut_ad(mtr);
 	ut_ad(mutex_own(&kernel_mutex));
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
 				MTR_MEMO_X_LOCK));
 	sys_header = trx_sysf_get(mtr);
 
@@ -73,22 +91,21 @@ trx_rseg_header_create(
 	}
 
 	/* Allocate a new file segment for the rollback segment */
-	page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
+	block = fseg_create(space, 0,
+			    TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
 
-	if (page == NULL) {
+	if (block == NULL) {
 		/* No space left */
 
 		return(FIL_NULL);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
 
-	page_no = buf_frame_get_page_no(page);
+	page_no = buf_block_get_page_no(block);
 
 	/* Get the rollback segment file page */
-	rsegf = trx_rsegf_get_new(space, page_no, mtr);
+	rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
 
 	/* Initialize max size field */
 	mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
@@ -114,20 +131,22 @@ trx_rseg_header_create(
 	return(page_no);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Creates and initializes a rollback segment object. The values for the
 fields are read from the header. The object is inserted to the rseg
 list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object. */
+array in the trx system object.
+@return	own: rollback segment object */
 static
 trx_rseg_t*
 trx_rseg_mem_create(
 /*================*/
-				/* out, own: rollback segment object */
-	ulint	id,		/* in: rollback segment id */
-	ulint	space,		/* in: space where the segment placed */
-	ulint	page_no,	/* in: page number of the segment header */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	id,		/*!< in: rollback segment id */
+	ulint	space,		/*!< in: space where the segment placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the segment header */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	trx_rsegf_t*	rseg_header;
 	trx_rseg_t*	rseg;
@@ -142,6 +161,7 @@ trx_rseg_mem_create(
 
 	rseg->id = id;
 	rseg->space = space;
+	rseg->zip_size = zip_size;
 	rseg->page_no = page_no;
 
 	mutex_create(&rseg->mutex, SYNC_RSEG);
@@ -150,7 +170,7 @@ trx_rseg_mem_create(
 
 	trx_sys_set_nth_rseg(trx_sys, id, rseg);
 
-	rseg_header = trx_rsegf_get_new(space, page_no, mtr);
+	rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
 
 	rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
 					MLOG_4BYTES, mtr);
@@ -172,7 +192,8 @@ trx_rseg_mem_create(
 		rseg->last_page_no = node_addr.page;
 		rseg->last_offset = node_addr.boffset;
 
-		undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page,
+		undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size,
+						 node_addr.page,
 						 mtr) + node_addr.boffset;
 
 		rseg->last_trx_no = mtr_read_dulint(
@@ -186,15 +207,15 @@ trx_rseg_mem_create(
 	return(rseg);
 }
 
-/*************************************************************************
+/*********************************************************************//**
 Creates the memory copies for rollback segments and initializes the
 rseg list and array in trx_sys at a database startup. */
-
+UNIV_INTERN
 void
 trx_rseg_list_and_array_init(
 /*=========================*/
-	trx_sysf_t*	sys_header,	/* in: trx system header */
-	mtr_t*		mtr)		/* in: mtr */
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	i;
 	ulint	page_no;
@@ -212,33 +233,39 @@ trx_rseg_list_and_array_init(
 
 			trx_sys_set_nth_rseg(trx_sys, i, NULL);
 		} else {
+			ulint	zip_size;
+
 			space = trx_sysf_rseg_get_space(sys_header, i, mtr);
 
-			trx_rseg_mem_create(i, space, page_no, mtr);
+			zip_size = space ? fil_space_get_zip_size(space) : 0;
+
+			trx_rseg_mem_create(i, space, zip_size, page_no, mtr);
 		}
 	}
 }
 
-/********************************************************************
-Creates a new rollback segment to the database. */
-
+/****************************************************************//**
+Creates a new rollback segment to the database.
+@return	the created segment object, NULL if fail */
+UNIV_INTERN
 trx_rseg_t*
 trx_rseg_create(
 /*============*/
-				/* out: the created segment object, NULL if
-				fail */
-	ulint	space,		/* in: space id */
-	ulint	max_size,	/* in: max size in pages */
-	ulint*	id,		/* out: rseg id */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	space,		/*!< in: space id */
+	ulint	max_size,	/*!< in: max size in pages */
+	ulint*	id,		/*!< out: rseg id */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
+	ulint		flags;
+	ulint		zip_size;
 	ulint		page_no;
 	trx_rseg_t*	rseg;
 
-	mtr_x_lock(fil_space_get_latch(space), mtr);
+	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+	zip_size = dict_table_flags_to_zip_size(flags);
 	mutex_enter(&kernel_mutex);
 
-	page_no = trx_rseg_header_create(space, max_size, id, mtr);
+	page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr);
 
 	if (page_no == FIL_NULL) {
 
@@ -246,7 +273,7 @@ trx_rseg_create(
 		return(NULL);
 	}
 
-	rseg = trx_rseg_mem_create(*id, space, page_no, mtr);
+	rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr);
 
 	mutex_exit(&kernel_mutex);
 
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innodb_plugin/trx/trx0sys.c
similarity index 53%
rename from storage/innobase/trx/trx0sys.c
rename to storage/innodb_plugin/trx/trx0sys.c
index 40348dd4199..ef10119587d 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innodb_plugin/trx/trx0sys.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction system
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0sys.c
+Transaction system
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -12,8 +29,10 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0sys.ic"
 #endif
 
+#ifndef UNIV_HOTBACKUP
 #include "fsp0fsp.h"
-#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "mtr0log.h"
 #include "trx0trx.h"
 #include "trx0rseg.h"
 #include "trx0undo.h"
@@ -22,47 +41,105 @@ Created 3/26/1996 Heikki Tuuri
 #include "log0log.h"
 #include "os0file.h"
 
-/* The transaction system */
-trx_sys_t*		trx_sys		= NULL;
-trx_doublewrite_t*	trx_doublewrite = NULL;
+/** The file format tag structure with id and name. */
+struct file_format_struct {
+	ulint		id;		/*!< id of the file format */
+	const char*	name;		/*!< text representation of the
+					file format */
+	mutex_t		mutex;		/*!< covers changes to the above
+					fields */
+};
 
-/* The following is set to TRUE when we are upgrading from the old format data
-files to the new >= 4.1.x format multiple tablespaces format data files */
+/** The file format tag */
+typedef struct file_format_struct	file_format_t;
 
-ibool			trx_doublewrite_must_reset_space_ids	= FALSE;
+/** The transaction system */
+UNIV_INTERN trx_sys_t*		trx_sys		= NULL;
+/** The doublewrite buffer */
+UNIV_INTERN trx_doublewrite_t*	trx_doublewrite = NULL;
 
-/* The following is TRUE when we are using the database in the new format,
-i.e., we have successfully upgraded, or have created a new database
-installation */
+/** The following is set to TRUE when we are upgrading from pre-4.1
+format data files to the multiple tablespaces format data files */
+UNIV_INTERN ibool	trx_doublewrite_must_reset_space_ids	= FALSE;
+/** Set to TRUE when the doublewrite buffer is being created */
+UNIV_INTERN ibool	trx_doublewrite_buf_is_being_created = FALSE;
 
-ibool			trx_sys_multiple_tablespace_format	= FALSE;
+/** The following is TRUE when we are using the database in the
+post-4.1 format, i.e., we have successfully upgraded, or have created
+a new database installation */
+UNIV_INTERN ibool	trx_sys_multiple_tablespace_format	= FALSE;
 
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
+UNIV_INTERN char	trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Master binlog file position.  We have successfully got the updates
+up to this position.  -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+UNIV_INTERN ib_int64_t	trx_sys_mysql_master_log_pos	= -1;
+/* @} */
 
-char		trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong	trx_sys_mysql_master_log_pos	= -1;
-
-/* If this MySQL server uses binary logging, after InnoDB has been inited
+/** If this MySQL server uses binary logging, after InnoDB has been inited
 and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
+here. */
+/* @{ */
+/** Binlog file name */
+UNIV_INTERN char	trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Binlog file position, or -1 if unknown */
+UNIV_INTERN ib_int64_t	trx_sys_mysql_bin_log_pos	= -1;
+/* @} */
+#endif /* !UNIV_HOTBACKUP */
 
-char		trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong	trx_sys_mysql_bin_log_pos	= -1;
+/** List of animal names representing file format. */
+static const char*	file_format_name_map[] = {
+	"Antelope",
+	"Barracuda",
+	"Cheetah",
+	"Dragon",
+	"Elk",
+	"Fox",
+	"Gazelle",
+	"Hornet",
+	"Impala",
+	"Jaguar",
+	"Kangaroo",
+	"Leopard",
+	"Moose",
+	"Nautilus",
+	"Ocelot",
+	"Porpoise",
+	"Quail",
+	"Rabbit",
+	"Shark",
+	"Tiger",
+	"Urchin",
+	"Viper",
+	"Whale",
+	"Xenops",
+	"Yak",
+	"Zebra"
+};
 
+/** The number of elements in the file format name array. */
+static const ulint	FILE_FORMAT_NAME_N
+	= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
 
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
+#ifndef UNIV_HOTBACKUP
+/** This is used to track the maximum file format id known to InnoDB. It's
+updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
+or create a table. */
+static	file_format_t	file_format_max;
 
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
 ibool
 trx_doublewrite_page_inside(
 /*========================*/
-				/* out: TRUE if the location is inside
-				the two blocks of the doublewrite buffer */
-	ulint	page_no)	/* in: page number */
+	ulint	page_no)	/*!< in: page number */
 {
 	if (trx_doublewrite == NULL) {
 
@@ -84,13 +161,13 @@ trx_doublewrite_page_inside(
 	return(FALSE);
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates or initialializes the doublewrite buffer at a database start. */
 static
 void
 trx_doublewrite_init(
 /*=================*/
-	byte*	doublewrite)	/* in: pointer to the doublewrite buf
+	byte*	doublewrite)	/*!< in: pointer to the doublewrite buf
 				header on trx sys page */
 {
 	trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
@@ -118,17 +195,17 @@ trx_doublewrite_init(
 		2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
 }
 
-/********************************************************************
+/****************************************************************//**
 Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
 multiple tablespace format. */
-
+UNIV_INTERN
 void
 trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 /*===============================================*/
 {
-	page_t*	page;
-	byte*	doublewrite;
-	mtr_t	mtr;
+	buf_block_t*	block;
+	byte*		doublewrite;
+	mtr_t		mtr;
 
 	/* We upgraded to 4.1.x and reset the space id fields in the
 	doublewrite buffer. Let us mark to the trx_sys header that the upgrade
@@ -136,12 +213,11 @@ trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 
 	mtr_start(&mtr);
 
-	page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+			     RW_X_LATCH, &mtr);
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
-	doublewrite = page + TRX_SYS_DOUBLEWRITE;
+	doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
 	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 			 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
@@ -149,22 +225,22 @@ trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 	mtr_commit(&mtr);
 
 	/* Flush the modified pages to disk and make a checkpoint */
-	log_make_checkpoint_at(ut_dulint_max, TRUE);
+	log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
 	trx_sys_multiple_tablespace_format = TRUE;
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates the doublewrite buffer to a new InnoDB installation. The header of the
 doublewrite buffer is placed on the trx system header page. */
-
+UNIV_INTERN
 void
 trx_sys_create_doublewrite_buf(void)
 /*================================*/
 {
-	page_t*	page;
-	page_t*	page2;
-	page_t*	new_page;
+	buf_block_t*	block;
+	buf_block_t*	block2;
+	buf_block_t*	new_block;
 	byte*	doublewrite;
 	byte*	fseg_header;
 	ulint	page_no;
@@ -180,13 +256,13 @@ trx_sys_create_doublewrite_buf(void)
 
 start_again:
 	mtr_start(&mtr);
+	trx_doublewrite_buf_is_being_created = TRUE;
 
-	page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+			     RW_X_LATCH, &mtr);
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
-	doublewrite = page + TRX_SYS_DOUBLEWRITE;
+	doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 	    == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
@@ -196,6 +272,7 @@ start_again:
 		trx_doublewrite_init(doublewrite);
 
 		mtr_commit(&mtr);
+		trx_doublewrite_buf_is_being_created = FALSE;
 	} else {
 		fprintf(stderr,
 			"InnoDB: Doublewrite buffer not found:"
@@ -214,18 +291,16 @@ start_again:
 			exit(1);
 		}
 
-		page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
-				    TRX_SYS_DOUBLEWRITE
-				    + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
+		block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+				     TRX_SYS_DOUBLEWRITE
+				     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
 
 		/* fseg_create acquires a second latch on the page,
 		therefore we must declare it: */
 
-#ifdef UNIV_SYNC_DEBUG
-		buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+		buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
-		if (page2 == NULL) {
+		if (block2 == NULL) {
 			fprintf(stderr,
 				"InnoDB: Cannot create doublewrite buffer:"
 				" you must\n"
@@ -238,8 +313,8 @@ start_again:
 			exit(1);
 		}
 
-		fseg_header = page + TRX_SYS_DOUBLEWRITE
-			+ TRX_SYS_DOUBLEWRITE_FSEG;
+		fseg_header = buf_block_get_frame(block)
+			+ TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
 		prev_page_no = 0;
 
 		for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
@@ -268,20 +343,13 @@ start_again:
 			the page position in the tablespace, then the page
 			has not been written to in doublewrite. */
 
-			new_page = buf_page_get(TRX_SYS_SPACE, page_no,
-						RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
-			/* Make a dummy change to the page to ensure it will
-			be written to disk in a flush */
-
-			mlog_write_ulint(new_page + FIL_PAGE_DATA,
-					 TRX_SYS_DOUBLEWRITE_MAGIC_N,
-					 MLOG_4BYTES, &mtr);
+			new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
+						 RW_X_LATCH, &mtr);
+			buf_block_dbg_add_level(new_block,
+						SYNC_NO_ORDER_CHECK);
 
 			if (i == FSP_EXTENT_SIZE / 2) {
+				ut_a(page_no == FSP_EXTENT_SIZE);
 				mlog_write_ulint(doublewrite
 						 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 						 page_no, MLOG_4BYTES, &mtr);
@@ -291,6 +359,7 @@ start_again:
 						 page_no, MLOG_4BYTES, &mtr);
 			} else if (i == FSP_EXTENT_SIZE / 2
 				   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+				ut_a(page_no == 2 * FSP_EXTENT_SIZE);
 				mlog_write_ulint(doublewrite
 						 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 						 page_no, MLOG_4BYTES, &mtr);
@@ -320,7 +389,7 @@ start_again:
 		mtr_commit(&mtr);
 
 		/* Flush the modified pages to disk and make a checkpoint */
-		log_make_checkpoint_at(ut_dulint_max, TRUE);
+		log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
 		fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
 
@@ -330,18 +399,18 @@ start_again:
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 At a database startup initializes the doublewrite buffer memory structure if
 we already have a doublewrite buffer created in the data files. If we are
 upgrading to an InnoDB version which supports multiple tablespaces, then this
 function performs the necessary update operations. If we are in a crash
 recovery, this function uses a possible doublewrite buffer to restore
 half-written pages in the data files. */
-
+UNIV_INTERN
 void
 trx_sys_doublewrite_init_or_restore_pages(
 /*======================================*/
-	ibool	restore_corrupt_pages)
+	ibool	restore_corrupt_pages)	/*!< in: TRUE=restore pages */
 {
 	byte*	buf;
 	byte*	read_buf;
@@ -363,7 +432,7 @@ trx_sys_doublewrite_init_or_restore_pages(
 	/* Read the trx sys header to check if we are using the doublewrite
 	buffer */
 
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
 	       UNIV_PAGE_SIZE, read_buf, NULL);
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
@@ -401,10 +470,10 @@ trx_sys_doublewrite_init_or_restore_pages(
 
 	/* Read the pages from the doublewrite buffer to memory */
 
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 	       buf, NULL);
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 	       buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 	       NULL);
@@ -433,7 +502,7 @@ trx_sys_doublewrite_init_or_restore_pages(
 					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 			}
 
-			fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
+			fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
 			       UNIV_PAGE_SIZE, page, NULL);
 			/* printf("Resetting space id in page %lu\n",
 			source_page_no); */
@@ -472,32 +541,37 @@ trx_sys_doublewrite_init_or_restore_pages(
 			/* It is an unwritten doublewrite buffer page:
 			do nothing */
 		} else {
-			/* Read in the actual page from the data files */
+			ulint	zip_size = fil_space_get_zip_size(space_id);
+
+			/* Read in the actual page from the file */
+			fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
+			       page_no, 0,
+			       zip_size ? zip_size : UNIV_PAGE_SIZE,
+			       read_buf, NULL);
 
-			fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
-			       UNIV_PAGE_SIZE, read_buf, NULL);
 			/* Check if the page is corrupt */
 
-			if (buf_page_is_corrupted(read_buf)) {
+			if (UNIV_UNLIKELY
+			    (buf_page_is_corrupted(read_buf, zip_size))) {
 
 				fprintf(stderr,
 					"InnoDB: Warning: database page"
 					" corruption or a failed\n"
-					"InnoDB: file read of page %lu.\n",
-					(ulong) page_no);
-				fprintf(stderr,
+					"InnoDB: file read of"
+					" space %lu page %lu.\n"
 					"InnoDB: Trying to recover it from"
-					" the doublewrite buffer.\n");
+					" the doublewrite buffer.\n",
+					(ulong) space_id, (ulong) page_no);
 
-				if (buf_page_is_corrupted(page)) {
+				if (buf_page_is_corrupted(page, zip_size)) {
 					fprintf(stderr,
 						"InnoDB: Dump of the page:\n");
-					buf_page_print(read_buf);
+					buf_page_print(read_buf, zip_size);
 					fprintf(stderr,
 						"InnoDB: Dump of"
 						" corresponding page"
 						" in doublewrite buffer:\n");
-					buf_page_print(page);
+					buf_page_print(page, zip_size);
 
 					fprintf(stderr,
 						"InnoDB: Also the page in the"
@@ -519,8 +593,9 @@ trx_sys_doublewrite_init_or_restore_pages(
 				position */
 
 				fil_io(OS_FILE_WRITE, TRUE, space_id,
-				       page_no, 0,
-				       UNIV_PAGE_SIZE, page, NULL);
+				       zip_size, page_no, 0,
+				       zip_size ? zip_size : UNIV_PAGE_SIZE,
+				       page, NULL);
 				fprintf(stderr,
 					"InnoDB: Recovered the page from"
 					" the doublewrite buffer.\n");
@@ -536,14 +611,14 @@ leave_func:
 	ut_free(unaligned_read_buf);
 }
 
-/********************************************************************
-Checks that trx is in the trx list. */
-
+/****************************************************************//**
+Checks that trx is in the trx list.
+@return	TRUE if is in */
+UNIV_INTERN
 ibool
 trx_in_trx_list(
 /*============*/
-			/* out: TRUE if is in */
-	trx_t*	in_trx)	/* in: trx */
+	trx_t*	in_trx)	/*!< in: trx */
 {
 	trx_t*	trx;
 
@@ -564,9 +639,9 @@ trx_in_trx_list(
 	return(FALSE);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Writes the value of max_trx_id to the file based trx system header. */
-
+UNIV_INTERN
 void
 trx_sys_flush_max_trx_id(void)
 /*==========================*/
@@ -585,20 +660,20 @@ trx_sys_flush_max_trx_id(void)
 	mtr_commit(&mtr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
 replication slave updates the latest master binlog position up to which
 replication has proceeded. */
-
+UNIV_INTERN
 void
 trx_sys_update_mysql_binlog_offset(
 /*===============================*/
-	const char*	file_name,/* in: MySQL log file name */
-	ib_longlong	offset,	/* in: position in that log file */
-	ulint		field,	/* in: offset of the MySQL log info field in
+	const char*	file_name,/*!< in: MySQL log file name */
+	ib_int64_t	offset,	/*!< in: position in that log file */
+	ulint		field,	/*!< in: offset of the MySQL log info field in
 				the trx sys header */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_sysf_t*	sys_header;
 
@@ -646,44 +721,10 @@ trx_sys_update_mysql_binlog_offset(
 			 MLOG_4BYTES, mtr);
 }
 
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
-	byte*	page)	/* in: buffer containing the trx system header page,
-			i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-{
-	trx_sysf_t*	sys_header;
-
-	sys_header = page + TRX_SYS;
-
-	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
-			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
-	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
-		fprintf(stderr,
-			"ibbackup: Last MySQL binlog file position %lu %lu,"
-			" file name %s\n",
-			(ulong) mach_read_from_4(
-				sys_header + TRX_SYS_MYSQL_LOG_INFO
-				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-			(ulong) mach_read_from_4(
-				sys_header + TRX_SYS_MYSQL_LOG_INFO
-				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
-			sys_header + TRX_SYS_MYSQL_LOG_INFO
-			+ TRX_SYS_MYSQL_LOG_NAME);
-	}
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*********************************************************************
+/*****************************************************************//**
 Stores the MySQL binlog offset info in the trx system header if
 the magic number shows it valid, and print the info to stderr */
-
+UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset(void)
 /*===================================*/
@@ -714,8 +755,8 @@ trx_sys_print_mysql_binlog_offset(void)
 		+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
 
 	trx_sys_mysql_bin_log_pos
-		= (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32)
-		+ (ib_longlong)trx_sys_mysql_bin_log_pos_low;
+		= (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
+		+ (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
 
 	ut_memcpy(trx_sys_mysql_bin_log_name,
 		  sys_header + TRX_SYS_MYSQL_LOG_INFO
@@ -730,10 +771,10 @@ trx_sys_print_mysql_binlog_offset(void)
 	mtr_commit(&mtr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Prints to stderr the MySQL master log offset info in the trx system header if
 the magic number shows it valid. */
-
+UNIV_INTERN
 void
 trx_sys_print_mysql_master_log_pos(void)
 /*====================================*/
@@ -775,23 +816,23 @@ trx_sys_print_mysql_master_log_pos(void)
 		  TRX_SYS_MYSQL_LOG_NAME_LEN);
 
 	trx_sys_mysql_master_log_pos
-		= (((ib_longlong) mach_read_from_4(
+		= (((ib_int64_t) mach_read_from_4(
 			    sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 			    + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
-		+ ((ib_longlong) mach_read_from_4(
+		+ ((ib_int64_t) mach_read_from_4(
 			   sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 			   + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
 	mtr_commit(&mtr);
 }
 
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
 ulint
 trx_sysf_rseg_find_free(
 /*====================*/
-			/* out: slot index or ULINT_UNDEFINED if not found */
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	trx_sysf_t*	sys_header;
 	ulint		page_no;
@@ -814,17 +855,18 @@ trx_sysf_rseg_find_free(
 	return(ULINT_UNDEFINED);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates the file page for the transaction system. This function is called only
 at the database creation, before trx_sys_init. */
 static
 void
 trx_sysf_create(
 /*============*/
-	mtr_t*	mtr)	/* in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	trx_sysf_t*	sys_header;
 	ulint		slot_no;
+	buf_block_t*	block;
 	page_t*		page;
 	ulint		page_no;
 	ulint		i;
@@ -835,17 +877,17 @@ trx_sysf_create(
 	then enter the kernel: we must do it in this order to conform
 	to the latching order rules. */
 
-	mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
+	mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
 	mutex_enter(&kernel_mutex);
 
 	/* Create the trx sys file block in a new allocated file segment */
-	page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
-			   mtr);
-	ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
+	block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+			    mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+	ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
+
+	page = buf_block_get_frame(block);
 
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
 			 MLOG_2BYTES, mtr);
@@ -882,7 +924,7 @@ trx_sysf_create(
 		       + page - sys_header);
 
 	/* Create the first rollback segment in the SYSTEM tablespace */
-	page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
+	page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
 					 mtr);
 	ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 	ut_a(page_no != FIL_NULL);
@@ -890,16 +932,16 @@ trx_sysf_create(
 	mutex_exit(&kernel_mutex);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
 system. This is called when the database is started. */
-
+UNIV_INTERN
 void
 trx_sys_init_at_db_start(void)
 /*==========================*/
 {
 	trx_sysf_t*	sys_header;
-	ib_longlong	rows_to_undo	= 0;
+	ib_int64_t	rows_to_undo	= 0;
 	const char*	unit		= "";
 	trx_t*		trx;
 	mtr_t		mtr;
@@ -933,6 +975,7 @@ trx_sys_init_at_db_start(void)
 		2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
 
 	UT_LIST_INIT(trx_sys->mysql_trx_list);
+	trx_dummy_sess = sess_open();
 	trx_lists_init_at_db_start();
 
 	if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
@@ -964,9 +1007,8 @@ trx_sys_init_at_db_start(void)
 			(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
 			(ulong) rows_to_undo, unit);
 
-		fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
-			(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
-			(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+		fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
+			TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 	}
 
 	UT_LIST_INIT(trx_sys->view_list);
@@ -978,9 +1020,9 @@ trx_sys_init_at_db_start(void)
 	mtr_commit(&mtr);
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
-
+UNIV_INTERN
 void
 trx_sys_create(void)
 /*================*/
@@ -995,3 +1037,499 @@ trx_sys_create(void)
 
 	trx_sys_init_at_db_start();
 }
+
+/*****************************************************************//**
+Update the file format tag.
+@return	always TRUE */
+static
+ibool
+trx_sys_file_format_max_write(
+/*==========================*/
+	ulint		format_id,	/*!< in: file format id */
+	const char**	name)		/*!< out: max file format name, can
+					be NULL */
+{
+	mtr_t		mtr;
+	byte*		ptr;
+	buf_block_t*	block;
+	ulint		tag_value_low;
+
+	mtr_start(&mtr);
+
+	block = buf_page_get(
+		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+	file_format_max.id = format_id;
+	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+	tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+	if (name) {
+		*name = file_format_max.name;
+	}
+
+	mlog_write_dulint(
+		ptr,
+		ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
+				 tag_value_low),
+		&mtr);
+
+	mtr_commit(&mtr);
+
+	return(TRUE);
+}
+
+/*****************************************************************//**
+Read the file format tag.
+@return	the file format or ULINT_UNDEFINED if not set. */
+static
+ulint
+trx_sys_file_format_max_read(void)
+/*==============================*/
+{
+	mtr_t			mtr;
+	const byte*		ptr;
+	const buf_block_t*	block;
+	ulint			format_id;
+	dulint			file_format_id;
+
+	/* Since this is called during the startup phase it's safe to
+	read the value without a covering mutex. */
+	mtr_start(&mtr);
+
+	block = buf_page_get(
+		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+	file_format_id = mach_read_from_8(ptr);
+
+	mtr_commit(&mtr);
+
+	format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+	if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
+	    || format_id >= FILE_FORMAT_NAME_N) {
+
+		/* Either it has never been tagged, or garbage in it. */
+		return(ULINT_UNDEFINED);
+	}
+
+	return(format_id);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id)	/*!< in: id of the file format */
+{
+	ut_a(id < FILE_FORMAT_NAME_N);
+
+	return(file_format_name_map[id]);
+}
+
+/*****************************************************************//**
+Check for the max file format tag stored on disk. Note: If max_format_id
+is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+trx_sys_file_format_max_check(
+/*==========================*/
+	ulint	max_format_id)	/*!< in: max format id to check */
+{
+	ulint	format_id;
+
+	/* Check the file format in the tablespace. Do not try to
+	recover if the file format is not supported by the engine
+	unless forced by the user. */
+	format_id = trx_sys_file_format_max_read();
+	if (format_id == ULINT_UNDEFINED) {
+		/* Format ID was not set. Set it to minimum possible
+		value. */
+		format_id = DICT_TF_FORMAT_51;
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: highest supported file format is %s.\n",
+		trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
+
+	if (format_id > DICT_TF_FORMAT_MAX) {
+
+		ut_a(format_id < FILE_FORMAT_NAME_N);
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: %s: the system tablespace is in a file "
+			"format that this version doesn't support - %s\n",
+			((max_format_id <= DICT_TF_FORMAT_MAX)
+				? "Error" : "Warning"),
+			trx_sys_file_format_id_to_name(format_id));
+
+		if (max_format_id <= DICT_TF_FORMAT_MAX) {
+			return(DB_ERROR);
+		}
+	}
+
+	format_id = (format_id > max_format_id) ? format_id : max_format_id;
+
+	/* We don't need a mutex here, as this function should only
+	be called once at start up. */
+	file_format_max.id = format_id;
+	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return	TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+	ulint		format_id,	/*!< in: file format id */
+	const char**	name)		/*!< out: max file format name or
+					NULL if not needed. */
+{
+	ibool		ret = FALSE;
+
+	ut_a(format_id <= DICT_TF_FORMAT_MAX);
+
+	mutex_enter(&file_format_max.mutex);
+
+	/* Only update if not already same value. */
+	if (format_id != file_format_max.id) {
+
+		ret = trx_sys_file_format_max_write(format_id, name);
+	}
+
+	mutex_exit(&file_format_max.mutex);
+
+	return(ret);
+}
+
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void)
+/*==============================*/
+{
+	ulint	format_id;
+
+	format_id = trx_sys_file_format_max_read();
+
+	/* If format_id is not set then set it to the minimum. */
+	if (format_id == ULINT_UNDEFINED) {
+		trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
+	}
+}
+
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return	TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+	const char**	name,		/*!< out: max file format name */
+	ulint		format_id)	/*!< in: file format identifier */
+{
+	ibool		ret = FALSE;
+
+	ut_a(name);
+	ut_a(file_format_max.name != NULL);
+	ut_a(format_id <= DICT_TF_FORMAT_MAX);
+
+	mutex_enter(&file_format_max.mutex);
+
+	if (format_id > file_format_max.id) {
+
+		ret = trx_sys_file_format_max_write(format_id, name);
+	}
+
+	mutex_exit(&file_format_max.mutex);
+
+	return(ret);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void)
+/*=============================*/
+{
+	return(file_format_max.name);
+}
+
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void)
+/*==========================*/
+{
+	mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
+
+	/* We don't need a mutex here, as this function should only
+	be called once at start up. */
+	file_format_max.id = DICT_TF_FORMAT_51;
+
+	file_format_max.name = trx_sys_file_format_id_to_name(
+		file_format_max.id);
+}
+
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void)
+/*===========================*/
+{
+	/* Does nothing at the moment */
+}
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+	const byte*	page)	/*!< in: buffer containing the trx
+				system header page, i.e., page number
+				TRX_SYS_PAGE_NO in the tablespace */
+{
+	const trx_sysf_t*	sys_header;
+
+	sys_header = page + TRX_SYS;
+
+	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+		fprintf(stderr,
+			"ibbackup: Last MySQL binlog file position %lu %lu,"
+			" file name %s\n",
+			(ulong) mach_read_from_4(
+				sys_header + TRX_SYS_MYSQL_LOG_INFO
+				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+			(ulong) mach_read_from_4(
+				sys_header + TRX_SYS_MYSQL_LOG_INFO
+				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+			sys_header + TRX_SYS_MYSQL_LOG_INFO
+			+ TRX_SYS_MYSQL_LOG_NAME);
+	}
+}
+
+
+/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
+   (This code duplicaton should be fixed at some point!)
+*/
+
+#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
+/* The offset of the file format tag on the trx system header page */
+#define TRX_SYS_FILE_FORMAT_TAG		(UNIV_PAGE_SIZE - 16)
+/* We use these random constants to reduce the probability of reading
+garbage (from previous versions) that maps to an actual format id. We
+use these as bit masks at the time of  reading and writing from/to disk. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW	3645922177UL
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH	2745987765UL
+
+/* END OF COPIED DEFINITIONS */
+
+
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+	const char *pathname,  /*!< in: pathname of the first system
+				        table space file */
+	ulint *format_id)      /*!< out: file format of the system table
+				         space */
+{
+	os_file_t	file;
+	ibool		success;
+	byte		buf[UNIV_PAGE_SIZE * 2];
+	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
+	const byte*	ptr;
+	dulint		file_format_id;
+
+	*format_id = ULINT_UNDEFINED;
+	
+	file = os_file_create_simple_no_error_handling(
+		pathname,
+		OS_FILE_OPEN,
+		OS_FILE_READ_ONLY,
+		&success
+	);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+        
+		ut_print_timestamp(stderr);
+        
+		fprintf(stderr,
+"  ibbackup: Error: trying to read system tablespace file format,\n"
+"  ibbackup: but could not open the tablespace file %s!\n",
+			pathname
+		);
+		return(FALSE);
+	}
+
+	/* Read the page on which file format is stored */
+
+	success = os_file_read_no_error_handling(
+		file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
+	);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+        
+		ut_print_timestamp(stderr);
+        
+		fprintf(stderr,
+"  ibbackup: Error: trying to read system table space file format,\n"
+"  ibbackup: but failed to read the tablespace file %s!\n",
+			pathname
+		);
+		os_file_close(file);
+		return(FALSE);
+	}
+	os_file_close(file);
+
+	/* get the file format from the page */
+	ptr = page + TRX_SYS_FILE_FORMAT_TAG;
+	file_format_id = mach_read_from_8(ptr);
+
+	*format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+	if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
+	    || *format_id >= FILE_FORMAT_NAME_N) {
+
+		/* Either it has never been tagged, or garbage in it. */
+		*format_id = ULINT_UNDEFINED;
+		return(TRUE);
+	}
+	
+	return(TRUE);
+}
+
+
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+	const char *pathname,  /*!< in: pathname of a per-table
+				        datafile */
+	ulint *format_id)      /*!< out: file format of the per-table
+				         data file */
+{
+	os_file_t	file;
+	ibool		success;
+	byte		buf[UNIV_PAGE_SIZE * 2];
+	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
+	const byte*	ptr;
+	ib_uint32_t	flags;
+
+	*format_id = ULINT_UNDEFINED;
+	
+	file = os_file_create_simple_no_error_handling(
+		pathname,
+		OS_FILE_OPEN,
+		OS_FILE_READ_ONLY,
+		&success
+	);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+        
+		ut_print_timestamp(stderr);
+        
+		fprintf(stderr,
+"  ibbackup: Error: trying to read per-table tablespace format,\n"
+"  ibbackup: but could not open the tablespace file %s!\n",
+			pathname
+		);
+		return(FALSE);
+	}
+
+	/* Read the first page of the per-table datafile */
+
+	success = os_file_read_no_error_handling(
+		file, page, 0, 0, UNIV_PAGE_SIZE
+	);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+        
+		ut_print_timestamp(stderr);
+        
+		fprintf(stderr,
+"  ibbackup: Error: trying to per-table data file format,\n"
+"  ibbackup: but failed to read the tablespace file %s!\n",
+			pathname
+		);
+		os_file_close(file);
+		return(FALSE);
+	}
+	os_file_close(file);
+
+	/* get the file format from the page */
+	ptr = page + 54;
+	flags = mach_read_from_4(ptr);
+	if (flags == 0) {
+		/* file format is Antelope */
+		*format_id = 0;
+		return (TRUE);
+	} else if (flags & 1) {
+		/* tablespace flags are ok */
+		*format_id = (flags / 32) % 128;
+		return (TRUE);
+	} else {
+		/* bad tablespace flags */
+		return(FALSE);
+	}
+}
+
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id)	/*!< in: id of the file format */
+{
+	if (!(id < FILE_FORMAT_NAME_N)) {
+		/* unknown id */
+		return ("Unknown");
+	}
+
+	return(file_format_name_map[id]);
+}
+
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
similarity index 83%
rename from storage/innobase/trx/trx0trx.c
rename to storage/innodb_plugin/trx/trx0trx.c
index 8ada38845c5..4d4885062a6 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innodb_plugin/trx/trx0trx.c
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0trx.c
+The transaction
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -27,82 +44,52 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0xa.h"
 #include "ha_prototypes.h"
 
-/* Copy of the prototype for innobase_mysql_print_thd: this
-copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */
+/** Dummy session used currently in MySQL interface */
+UNIV_INTERN sess_t*		trx_dummy_sess = NULL;
 
-void innobase_mysql_print_thd(
-	FILE*	f,
-	void*	thd,
-	ulint	max_query_len);
-
-/* Dummy session used currently in MySQL interface */
-sess_t*		trx_dummy_sess = NULL;
-
-/* Number of transactions currently allocated for MySQL: protected by
+/** Number of transactions currently allocated for MySQL: protected by
 the kernel mutex */
-ulint	trx_n_mysql_transactions = 0;
+UNIV_INTERN ulint	trx_n_mysql_transactions = 0;
 
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
-	trx_t*	trx) /* in: transaction */
-{
-	trx_start_if_not_started(trx);
-}
-
-/*****************************************************************
+/*************************************************************//**
 Set detailed error message for the transaction. */
-
+UNIV_INTERN
 void
 trx_set_detailed_error(
 /*===================*/
-	trx_t*		trx,	/* in: transaction struct */
-	const char*	msg)	/* in: detailed error message */
+	trx_t*		trx,	/*!< in: transaction struct */
+	const char*	msg)	/*!< in: detailed error message */
 {
 	ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
 }
 
-/*****************************************************************
+/*************************************************************//**
 Set detailed error message for the transaction from a file. Note that the
 file is rewinded before reading from it. */
-
+UNIV_INTERN
 void
 trx_set_detailed_error_from_file(
 /*=============================*/
-	trx_t*	trx,	/* in: transaction struct */
-	FILE*	file)	/* in: file to read message from */
+	trx_t*	trx,	/*!< in: transaction struct */
+	FILE*	file)	/*!< in: file to read message from */
 {
 	os_file_read_string(file, trx->detailed_error,
 			    sizeof(trx->detailed_error));
 }
 
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
-trx_get_error_info(
-/*===============*/
-			/* out: the error info */
-	trx_t*	trx)	/* in: trx object */
-{
-	return(trx->error_info);
-}
-
-/********************************************************************
-Creates and initializes a transaction object. */
-
+/****************************************************************//**
+Creates and initializes a transaction object.
+@return	own: the transaction */
+UNIV_INTERN
 trx_t*
 trx_create(
 /*=======*/
-			/* out, own: the transaction */
-	sess_t*	sess)	/* in: session or NULL */
+	sess_t*	sess)	/*!< in: session */
 {
 	trx_t*	trx;
 
 	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(sess);
 
 	trx = mem_alloc(sizeof(trx_t));
 
@@ -111,6 +98,7 @@ trx_create(
 	trx->op_info = "";
 
 	trx->is_purge = 0;
+	trx->is_recovered = 0;
 	trx->conc_state = TRX_NOT_STARTED;
 	trx->start_time = time(NULL);
 
@@ -127,7 +115,8 @@ trx_create(
 	trx->flush_log_later = FALSE;
 	trx->must_flush_log_later = FALSE;
 
-	trx->dict_operation = FALSE;
+	trx->dict_operation = TRX_DICT_OP_NONE;
+	trx->table_id = ut_dulint_zero;
 
 	trx->mysql_thd = NULL;
 	trx->mysql_query_str = NULL;
@@ -151,6 +140,7 @@ trx_create(
 	trx->undo_no_arr = NULL;
 
 	trx->error_state = DB_SUCCESS;
+	trx->error_key_num = 0;
 	trx->detailed_error[0] = '\0';
 
 	trx->sess = sess;
@@ -180,8 +170,6 @@ trx_create(
 	trx->declared_to_be_inside_innodb = FALSE;
 	trx->n_tickets_to_enter_innodb = 0;
 
-	trx->auto_inc_lock = NULL;
-
 	trx->global_read_view_heap = mem_heap_create(256);
 	trx->global_read_view = NULL;
 	trx->read_view = NULL;
@@ -192,27 +180,25 @@ trx_create(
 
 	trx->n_autoinc_rows = 0;
 
+	/* Remember to free the vector explicitly. */
+	trx->autoinc_locks = ib_vector_create(
+		mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
+
 	return(trx);
 }
 
-/************************************************************************
-Creates a transaction object for MySQL. */
-
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return	own: transaction object */
+UNIV_INTERN
 trx_t*
 trx_allocate_for_mysql(void)
 /*========================*/
-				/* out, own: transaction object */
 {
 	trx_t*	trx;
 
 	mutex_enter(&kernel_mutex);
 
-	/* Open a dummy session */
-
-	if (!trx_dummy_sess) {
-		trx_dummy_sess = sess_open();
-	}
-
 	trx = trx_create(trx_dummy_sess);
 
 	trx_n_mysql_transactions++;
@@ -228,24 +214,18 @@ trx_allocate_for_mysql(void)
 	return(trx);
 }
 
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return	own: transaction object */
+UNIV_INTERN
 trx_t*
 trx_allocate_for_background(void)
 /*=============================*/
-				/* out, own: transaction object */
 {
 	trx_t*	trx;
 
 	mutex_enter(&kernel_mutex);
 
-	/* Open a dummy session */
-
-	if (!trx_dummy_sess) {
-		trx_dummy_sess = sess_open();
-	}
-
 	trx = trx_create(trx_dummy_sess);
 
 	mutex_exit(&kernel_mutex);
@@ -253,13 +233,13 @@ trx_allocate_for_background(void)
 	return(trx);
 }
 
-/************************************************************************
+/********************************************************************//**
 Releases the search latch if trx has reserved it. */
-
+UNIV_INTERN
 void
 trx_search_latch_release_if_reserved(
 /*=================================*/
-	trx_t*	   trx) /* in: transaction */
+	trx_t*	   trx) /*!< in: transaction */
 {
 	if (trx->has_search_latch) {
 		rw_lock_s_unlock(&btr_search_latch);
@@ -268,13 +248,13 @@ trx_search_latch_release_if_reserved(
 	}
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees a transaction object. */
-
+UNIV_INTERN
 void
 trx_free(
 /*=====*/
-	trx_t*	trx)	/* in, own: trx object */
+	trx_t*	trx)	/*!< in, own: trx object */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 
@@ -305,6 +285,7 @@ trx_free(
 		trx_print(stderr, trx, 600);
 
 		ut_print_buf(stderr, trx, sizeof(trx_t));
+		putc('\n', stderr);
 	}
 
 	ut_a(trx->magic_n == TRX_MAGIC_N);
@@ -329,7 +310,6 @@ trx_free(
 	ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
 
 	ut_a(!trx->has_search_latch);
-	ut_a(!trx->auto_inc_lock);
 
 	ut_a(trx->dict_operation_lock_mode == 0);
 
@@ -347,16 +327,20 @@ trx_free(
 
 	ut_a(trx->read_view == NULL);
 
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+	/* We allocated a dedicated heap for the vector. */
+	ib_vector_free(trx->autoinc_locks);
+
 	mem_free(trx);
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees a transaction object for MySQL. */
-
+UNIV_INTERN
 void
 trx_free_for_mysql(
 /*===============*/
-	trx_t*	trx)	/* in, own: trx object */
+	trx_t*	trx)	/*!< in, own: trx object */
 {
 	mutex_enter(&kernel_mutex);
 
@@ -371,13 +355,13 @@ trx_free_for_mysql(
 	mutex_exit(&kernel_mutex);
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees a transaction object of a background operation of the master thread. */
-
+UNIV_INTERN
 void
 trx_free_for_background(
 /*====================*/
-	trx_t*	trx)	/* in, own: trx object */
+	trx_t*	trx)	/*!< in, own: trx object */
 {
 	mutex_enter(&kernel_mutex);
 
@@ -386,7 +370,7 @@ trx_free_for_background(
 	mutex_exit(&kernel_mutex);
 }
 
-/********************************************************************
+/****************************************************************//**
 Inserts the trx handle in the trx system trx list in the right position.
 The list is sorted on the trx id so that the biggest id is at the list
 start. This function is used at the database startup to insert incomplete
@@ -395,7 +379,7 @@ static
 void
 trx_list_insert_ordered(
 /*====================*/
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
 	trx_t*	trx2;
 
@@ -426,13 +410,13 @@ trx_list_insert_ordered(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Creates trx objects for transactions and initializes the trx list of
 trx_sys at database start. Rollback segment and undo log lists must
 already exist when this function is called, because the lists of
 transactions to be rolled back or cleaned up are built based on the
 undo log lists. */
-
+UNIV_INTERN
 void
 trx_lists_init_at_db_start(void)
 /*============================*/
@@ -453,8 +437,9 @@ trx_lists_init_at_db_start(void)
 
 		while (undo != NULL) {
 
-			trx = trx_create(NULL);
+			trx = trx_create(trx_dummy_sess);
 
+			trx->is_recovered = TRUE;
 			trx->id = undo->trx_id;
 			trx->xid = undo->xid;
 			trx->insert_undo = undo;
@@ -469,11 +454,11 @@ trx_lists_init_at_db_start(void)
 				if (undo->state == TRX_UNDO_PREPARED) {
 
 					fprintf(stderr,
-						"InnoDB: Transaction %lu %lu"
+						"InnoDB: Transaction "
+						TRX_ID_FMT
 						" was in the"
 						" XA prepared state.\n",
-						ut_dulint_get_high(trx->id),
-						ut_dulint_get_low(trx->id));
+						TRX_ID_PREP_PRINTF(trx->id));
 
 					if (srv_force_recovery == 0) {
 
@@ -511,7 +496,8 @@ trx_lists_init_at_db_start(void)
 			}
 
 			if (undo->dict_operation) {
-				trx->dict_operation = undo->dict_operation;
+				trx_set_dict_operation(
+					trx, TRX_DICT_OP_TABLE);
 				trx->table_id = undo->table_id;
 			}
 
@@ -531,8 +517,9 @@ trx_lists_init_at_db_start(void)
 			trx = trx_get_on_id(undo->trx_id);
 
 			if (NULL == trx) {
-				trx = trx_create(NULL);
+				trx = trx_create(trx_dummy_sess);
 
+				trx->is_recovered = TRUE;
 				trx->id = undo->trx_id;
 				trx->xid = undo->xid;
 
@@ -544,12 +531,10 @@ trx_lists_init_at_db_start(void)
 
 					if (undo->state == TRX_UNDO_PREPARED) {
 						fprintf(stderr,
-							"InnoDB: Transaction"
-							" %lu %lu was in the"
+							"InnoDB: Transaction "
+							TRX_ID_FMT " was in the"
 							" XA prepared state.\n",
-							ut_dulint_get_high(
-								trx->id),
-							ut_dulint_get_low(
+							TRX_ID_PREP_PRINTF(
 								trx->id));
 
 						if (srv_force_recovery == 0) {
@@ -590,8 +575,8 @@ trx_lists_init_at_db_start(void)
 				trx_list_insert_ordered(trx);
 
 				if (undo->dict_operation) {
-					trx->dict_operation
-						= undo->dict_operation;
+					trx_set_dict_operation(
+						trx, TRX_DICT_OP_TABLE);
 					trx->table_id = undo->table_id;
 				}
 			}
@@ -613,14 +598,14 @@ trx_lists_init_at_db_start(void)
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available. */
+Skips the SYSTEM rollback segment if another is available.
+@return	assigned rollback segment id */
 UNIV_INLINE
 ulint
 trx_assign_rseg(void)
 /*=================*/
-			/* out: assigned rollback segment id */
 {
 	trx_rseg_t*	rseg	= trx_sys->latest_rseg;
 
@@ -647,15 +632,15 @@ loop:
 	return(rseg->id);
 }
 
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return	TRUE */
+UNIV_INTERN
 ibool
 trx_start_low(
 /*==========*/
-			/* out: TRUE */
-	trx_t*	trx,	/* in: transaction */
-	ulint	rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+	trx_t*	trx,	/*!< in: transaction */
+	ulint	rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
 			is passed, the system chooses the rollback segment
 			automatically in a round-robin fashion */
 {
@@ -698,20 +683,28 @@ trx_start_low(
 	return(TRUE);
 }
 
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return	TRUE */
+UNIV_INTERN
 ibool
 trx_start(
 /*======*/
-			/* out: TRUE */
-	trx_t*	trx,	/* in: transaction */
-	ulint	rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+	trx_t*	trx,	/*!< in: transaction */
+	ulint	rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
 			is passed, the system chooses the rollback segment
 			automatically in a round-robin fashion */
 {
 	ibool	ret;
 
+	/* Update the info whether we should skip XA steps that eat CPU time
+	For the duration of the transaction trx->support_xa is not reread
+	from thd so any changes in the value take effect in the next
+	transaction. This is to avoid a scenario where some undo
+	generated by a transaction, has XA stuff, and other undo,
+	generated by the same transaction, doesn't. */
+	trx->support_xa = thd_supports_xa(trx->mysql_thd);
+
 	mutex_enter(&kernel_mutex);
 
 	ret = trx_start_low(trx, rseg_id);
@@ -721,19 +714,18 @@ trx_start(
 	return(ret);
 }
 
-/********************************************************************
+/****************************************************************//**
 Commits a transaction. */
-
+UNIV_INTERN
 void
 trx_commit_off_kernel(
 /*==================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	page_t*		update_hdr_page;
-	dulint		lsn;
+	ib_uint64_t	lsn		= 0;
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
-	ibool		must_flush_log	= FALSE;
 	mtr_t		mtr;
 
 	ut_ad(mutex_own(&kernel_mutex));
@@ -748,8 +740,6 @@ trx_commit_off_kernel(
 
 		mtr_start(&mtr);
 
-		must_flush_log = TRUE;
-
 		/* Change the undo log segment states from TRX_UNDO_ACTIVE
 		to some other state: these modifications to the file data
 		structure define the transaction as committed in the file
@@ -848,6 +838,20 @@ trx_commit_off_kernel(
 	trx->conc_state = TRX_COMMITTED_IN_MEMORY;
 	/*--------------------------------------*/
 
+	/* If we release kernel_mutex below and we are still doing
+	recovery i.e.: back ground rollback thread is still active
+	then there is a chance that the rollback thread may see
+	this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
+	up calling trx_cleanup_at_db_startup(). This can happen 
+	in the case we are committing a trx here that is left in
+	PREPARED state during the crash. Note that commit of the
+	rollback of a PREPARED trx happens in the recovery thread
+	while the rollback of other transactions happen in the
+	background thread. To avoid this race we unconditionally
+	unset the is_recovered flag from the trx. */
+
+	trx->is_recovered = FALSE;
+
 	lock_release_off_kernel(trx);
 
 	if (trx->global_read_view) {
@@ -858,7 +862,7 @@ trx_commit_off_kernel(
 
 	trx->read_view = NULL;
 
-	if (must_flush_log) {
+	if (lsn) {
 
 		mutex_exit(&kernel_mutex);
 
@@ -887,11 +891,11 @@ trx_commit_off_kernel(
 		there are > 2 users in the database. Then at least 2 users can
 		gather behind one doing the physical log write to disk.
 
-		If we are calling trx_commit() under MySQL's binlog mutex, we
+		If we are calling trx_commit() under prepare_commit_mutex, we
 		will delay possible log write and flush to a separate function
 		trx_commit_complete_for_mysql(), which is only called when the
-		thread has released the binlog mutex. This is to make the
-		group commit algorithm to work. Otherwise, the MySQL binlog
+		thread has released the mutex. This is to make the
+		group commit algorithm to work. Otherwise, the prepare_commit
 		mutex would serialize all commits and prevent a group of
 		transactions from gathering. */
 
@@ -943,15 +947,15 @@ trx_commit_off_kernel(
 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
 }
 
-/********************************************************************
+/****************************************************************//**
 Cleans up a transaction at database startup. The cleanup is needed if
 the transaction already got to the middle of a commit when the database
 crashed, andf we cannot roll it back. */
-
+UNIV_INTERN
 void
 trx_cleanup_at_db_startup(
 /*======================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	if (trx->insert_undo != NULL) {
 
@@ -966,16 +970,16 @@ trx_cleanup_at_db_startup(
 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
 }
 
-/************************************************************************
+/********************************************************************//**
 Assigns a read view for a consistent read query. All the consistent reads
 within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
+when this function is first called for a new started transaction.
+@return	consistent read view */
+UNIV_INTERN
 read_view_t*
 trx_assign_read_view(
 /*=================*/
-			/* out: consistent read view */
-	trx_t*	trx)	/* in: active transaction */
+	trx_t*	trx)	/*!< in: active transaction */
 {
 	ut_ad(trx->conc_state == TRX_ACTIVE);
 
@@ -996,14 +1000,14 @@ trx_assign_read_view(
 	return(trx->read_view);
 }
 
-/********************************************************************
+/****************************************************************//**
 Commits a transaction. NOTE that the kernel mutex is temporarily released. */
 static
 void
 trx_handle_commit_sig_off_kernel(
 /*=============================*/
-	trx_t*		trx,		/* in: transaction */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	trx_t*		trx,		/*!< in: transaction */
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -1040,15 +1044,15 @@ trx_handle_commit_sig_off_kernel(
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
-/***************************************************************
+/***********************************************************//**
 The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
 the TRX_QUE_RUNNING state and releases query threads which were
 waiting for a lock in the wait_thrs list. */
-
+UNIV_INTERN
 void
 trx_end_lock_wait(
 /*==============*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	que_thr_t*	thr;
 
@@ -1068,14 +1072,14 @@ trx_end_lock_wait(
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
-/***************************************************************
+/***********************************************************//**
 Moves the query threads in the lock wait list to the SUSPENDED state and puts
 the transaction to the TRX_QUE_RUNNING state. */
 static
 void
 trx_lock_wait_to_suspended(
 /*=======================*/
-	trx_t*	trx)	/* in: transaction in the TRX_QUE_LOCK_WAIT state */
+	trx_t*	trx)	/*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
 {
 	que_thr_t*	thr;
 
@@ -1095,14 +1099,14 @@ trx_lock_wait_to_suspended(
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
-/***************************************************************
+/***********************************************************//**
 Moves the query threads in the sig reply wait list of trx to the SUSPENDED
 state. */
 static
 void
 trx_sig_reply_wait_to_suspended(
 /*============================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	trx_sig_t*	sig;
 	que_thr_t*	thr;
@@ -1126,17 +1130,17 @@ trx_sig_reply_wait_to_suspended(
 	}
 }
 
-/*********************************************************************
+/*****************************************************************//**
 Checks the compatibility of a new signal with the other signals in the
-queue. */
+queue.
+@return	TRUE if the signal can be queued */
 static
 ibool
 trx_sig_is_compatible(
 /*==================*/
-			/* out: TRUE if the signal can be queued */
-	trx_t*	trx,	/* in: trx handle */
-	ulint	type,	/* in: signal type */
-	ulint	sender)	/* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
+	trx_t*	trx,	/*!< in: trx handle */
+	ulint	type,	/*!< in: signal type */
+	ulint	sender)	/*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
 {
 	trx_sig_t*	sig;
 
@@ -1200,22 +1204,22 @@ trx_sig_is_compatible(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Sends a signal to a trx object. */
-
+UNIV_INTERN
 void
 trx_sig_send(
 /*=========*/
-	trx_t*		trx,		/* in: trx handle */
-	ulint		type,		/* in: signal type */
-	ulint		sender,		/* in: TRX_SIG_SELF or
+	trx_t*		trx,		/*!< in: trx handle */
+	ulint		type,		/*!< in: signal type */
+	ulint		sender,		/*!< in: TRX_SIG_SELF or
 					TRX_SIG_OTHER_SESS */
-	que_thr_t*	receiver_thr,	/* in: query thread which wants the
+	que_thr_t*	receiver_thr,	/*!< in: query thread which wants the
 					reply, or NULL; if type is
 					TRX_SIG_END_WAIT, this must be NULL */
-	trx_savept_t*	savept,		/* in: possible rollback savepoint, or
+	trx_savept_t*	savept,		/*!< in: possible rollback savepoint, or
 					NULL */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -1284,16 +1288,16 @@ trx_sig_send(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Ends signal handling. If the session is in the error state, and
 trx->graph_before_signal_handling != NULL, then returns control to the error
 handling routine of the graph (currently just returns the control to the
 graph root which then will send an error message to the client). */
-
+UNIV_INTERN
 void
 trx_end_signal_handling(
 /*====================*/
-	trx_t*	trx)	/* in: trx */
+	trx_t*	trx)	/*!< in: trx */
 {
 	ut_ad(mutex_own(&kernel_mutex));
 	ut_ad(trx->handling_signals == TRUE);
@@ -1308,14 +1312,14 @@ trx_end_signal_handling(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Starts handling of a trx signal. */
-
+UNIV_INTERN
 void
 trx_sig_start_handle(
 /*=================*/
-	trx_t*		trx,		/* in: trx handle */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	trx_t*		trx,		/*!< in: trx handle */
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -1413,15 +1417,15 @@ loop:
 	goto loop;
 }
 
-/********************************************************************
+/****************************************************************//**
 Send the reply message when a signal in the queue of the trx has been
 handled. */
-
+UNIV_INTERN
 void
 trx_sig_reply(
 /*==========*/
-	trx_sig_t*	sig,		/* in: signal */
-	que_thr_t**	next_thr)	/* in/out: next query thread to run;
+	trx_sig_t*	sig,		/*!< in: signal */
+	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
 					if the value which is passed in is
 					a pointer to a NULL pointer, then the
 					calling function can start running
@@ -1448,14 +1452,14 @@ trx_sig_reply(
 	}
 }
 
-/********************************************************************
+/****************************************************************//**
 Removes a signal object from the trx signal queue. */
-
+UNIV_INTERN
 void
 trx_sig_remove(
 /*===========*/
-	trx_t*		trx,	/* in: trx handle */
-	trx_sig_t*	sig)	/* in, own: signal */
+	trx_t*		trx,	/*!< in: trx handle */
+	trx_sig_t*	sig)	/*!< in, own: signal */
 {
 	ut_ad(trx && sig);
 	ut_ad(mutex_own(&kernel_mutex));
@@ -1470,14 +1474,14 @@ trx_sig_remove(
 	}
 }
 
-/*************************************************************************
-Creates a commit command node struct. */
-
+/*********************************************************************//**
+Creates a commit command node struct.
+@return	own: commit node struct */
+UNIV_INTERN
 commit_node_t*
 commit_node_create(
 /*===============*/
-				/* out, own: commit node struct */
-	mem_heap_t*	heap)	/* in: mem heap where created */
+	mem_heap_t*	heap)	/*!< in: mem heap where created */
 {
 	commit_node_t*	node;
 
@@ -1488,14 +1492,14 @@ commit_node_create(
 	return(node);
 }
 
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
 que_thr_t*
 trx_commit_step(
 /*============*/
-				/* out: query thread to run next, or NULL */
-	que_thr_t*	thr)	/* in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	commit_node_t*	node;
 	que_thr_t*	next_thr;
@@ -1536,14 +1540,14 @@ trx_commit_step(
 	return(thr);
 }
 
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
 ulint
 trx_commit_for_mysql(
 /*=================*/
-			/* out: 0 or error number */
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
 	/* Because we do not do the commit by sending an Innobase
 	sig to the transaction, we must here make sure that trx has been
@@ -1551,30 +1555,10 @@ trx_commit_for_mysql(
 
 	ut_a(trx);
 
-	trx->op_info = "committing";
-
-	/* If we are doing the XA recovery of prepared transactions, then
-	the transaction object does not have an InnoDB session object, and we
-	set the dummy session that we use for all MySQL transactions. */
-
-	if (trx->sess == NULL) {
-		/* Open a dummy session */
-
-		if (!trx_dummy_sess) {
-			mutex_enter(&kernel_mutex);
-
-			if (!trx_dummy_sess) {
-				trx_dummy_sess = sess_open();
-			}
-
-			mutex_exit(&kernel_mutex);
-		}
-
-		trx->sess = trx_dummy_sess;
-	}
-
 	trx_start_if_not_started(trx);
 
+	trx->op_info = "committing";
+
 	mutex_enter(&kernel_mutex);
 
 	trx_commit_off_kernel(trx);
@@ -1583,20 +1567,20 @@ trx_commit_for_mysql(
 
 	trx->op_info = "";
 
-	return(0);
+	return(DB_SUCCESS);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
+with trx->flush_log_later == TRUE.
+@return	0 or error number */
+UNIV_INTERN
 ulint
 trx_commit_complete_for_mysql(
 /*==========================*/
-			/* out: 0 or error number */
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
-	dulint	lsn	= trx->commit_lsn;
+	ib_uint64_t	lsn	= trx->commit_lsn;
 
 	ut_a(trx);
 
@@ -1633,13 +1617,13 @@ trx_commit_complete_for_mysql(
 	return(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Marks the latest SQL statement ended. */
-
+UNIV_INTERN
 void
 trx_mark_sql_stat_end(
 /*==================*/
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
 	ut_a(trx);
 
@@ -1650,25 +1634,23 @@ trx_mark_sql_stat_end(
 	trx->last_sql_stat_start.least_undo_no = trx->undo_no;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Prints info about a transaction to the given file. The caller must own the
 kernel mutex and must have called
 innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
 or InnoDB cannot meanwhile change the info printed here. */
-
+UNIV_INTERN
 void
 trx_print(
 /*======*/
-	FILE*	f,		/* in: output stream */
-	trx_t*	trx,		/* in: transaction */
-	ulint	max_query_len)	/* in: max query length to print, or 0 to
+	FILE*	f,		/*!< in: output stream */
+	trx_t*	trx,		/*!< in: transaction */
+	ulint	max_query_len)	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 {
 	ibool	newline;
 
-	fprintf(f, "TRANSACTION %lu %lu",
-		(ulong) ut_dulint_get_high(trx->id),
-		(ulong) ut_dulint_get_low(trx->id));
+	fprintf(f, "TRANSACTION " TRX_ID_FMT, TRX_ID_PREP_PRINTF(trx->id));
 
 	switch (trx->conc_state) {
 	case TRX_NOT_STARTED:
@@ -1700,6 +1682,10 @@ trx_print(
 		fputs(trx->op_info, f);
 	}
 
+	if (trx->is_recovered) {
+		fputs(" recovered trx", f);
+	}
+
 	if (trx->is_purge) {
 		fputs(" purge trx", f);
 	}
@@ -1748,7 +1734,7 @@ trx_print(
 		fputs(", holds adaptive hash latch", f);
 	}
 
-	if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
+	if (!ut_dulint_is_zero(trx->undo_no)) {
 		newline = TRUE;
 		fprintf(f, ", undo log entries %lu",
 			(ulong) ut_dulint_get_low(trx->undo_no));
@@ -1763,18 +1749,17 @@ trx_print(
 	}
 }
 
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
-
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return	<0, 0 or >0; similar to strcmp(3) */
+UNIV_INTERN
 int
 trx_weight_cmp(
 /*===========*/
-			/* out: <0, 0 or >0; similar to strcmp(3) */
-	trx_t*	a,	/* in: the first transaction to be compared */
-	trx_t*	b)	/* in: the second transaction to be compared */
+	const trx_t*	a,	/*!< in: the first transaction to be compared */
+	const trx_t*	b)	/*!< in: the second transaction to be compared */
 {
 	ibool	a_notrans_edit;
 	ibool	b_notrans_edit;
@@ -1812,24 +1797,20 @@ trx_weight_cmp(
 		UT_LIST_GET_LEN(b->trx_locks));
 #endif
 
-#define TRX_WEIGHT(t)	\
-	ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
-
 	return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
 }
 
-/********************************************************************
+/****************************************************************//**
 Prepares a transaction. */
-
+UNIV_INTERN
 void
 trx_prepare_off_kernel(
 /*===================*/
-	trx_t*	trx)	/* in: transaction */
+	trx_t*	trx)	/*!< in: transaction */
 {
 	page_t*		update_hdr_page;
 	trx_rseg_t*	rseg;
-	ibool		must_flush_log	= FALSE;
-	dulint		lsn;
+	ib_uint64_t	lsn		= 0;
 	mtr_t		mtr;
 
 	ut_ad(mutex_own(&kernel_mutex));
@@ -1842,8 +1823,6 @@ trx_prepare_off_kernel(
 
 		mtr_start(&mtr);
 
-		must_flush_log = TRUE;
-
 		/* Change the undo log segment states from TRX_UNDO_ACTIVE
 		to TRX_UNDO_PREPARED: these modifications to the file data
 		structure define the transaction as prepared in the
@@ -1884,7 +1863,7 @@ trx_prepare_off_kernel(
 	trx->conc_state = TRX_PREPARED;
 	/*--------------------------------------*/
 
-	if (must_flush_log) {
+	if (lsn) {
 		/* Depending on the my.cnf options, we may now write the log
 		buffer to the log files, making the prepared state of the
 		transaction durable if the OS does not crash. We may also
@@ -1931,14 +1910,14 @@ trx_prepare_off_kernel(
 	}
 }
 
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
+/**********************************************************************//**
+Does the transaction prepare for MySQL.
+@return	0 or error number */
+UNIV_INTERN
 ulint
 trx_prepare_for_mysql(
 /*==================*/
-			/* out: 0 or error number */
-	trx_t*	trx)	/* in: trx handle */
+	trx_t*	trx)	/*!< in: trx handle */
 {
 	/* Because we do not do the prepare by sending an Innobase
 	sig to the transaction, we must here make sure that trx has been
@@ -1961,17 +1940,16 @@ trx_prepare_for_mysql(
 	return(0);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
+their transaction objects for a recovery.
+@return	number of prepared transactions stored in xid_list */
+UNIV_INTERN
 int
 trx_recover_for_mysql(
 /*==================*/
-				/* out: number of prepared transactions
-				stored in xid_list */
-	XID*	xid_list,	/* in/out: prepared transactions */
-	ulint	len)		/* in: number of slots in xid_list */
+	XID*	xid_list,	/*!< in/out: prepared transactions */
+	ulint	len)		/*!< in: number of slots in xid_list */
 {
 	trx_t*	trx;
 	ulint	count = 0;
@@ -1999,10 +1977,9 @@ trx_recover_for_mysql(
 
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
-				"  InnoDB: Transaction %lu %lu in"
+				"  InnoDB: Transaction " TRX_ID_FMT " in"
 				" prepared state after recovery\n",
-				(ulong) ut_dulint_get_high(trx->id),
-				(ulong) ut_dulint_get_low(trx->id));
+				TRX_ID_PREP_PRINTF(trx->id));
 
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
@@ -2034,15 +2011,15 @@ trx_recover_for_mysql(
 	return ((int) count);
 }
 
-/***********************************************************************
+/*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
-
+which is in the prepared state
+@return	trx or NULL */
+UNIV_INTERN
 trx_t*
 trx_get_trx_by_xid(
 /*===============*/
-			/* out: trx or NULL */
-	XID*	xid)	/* in: X/Open XA transaction identification */
+	XID*	xid)	/*!< in: X/Open XA transaction identification */
 {
 	trx_t*	trx;
 
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innodb_plugin/trx/trx0undo.c
similarity index 69%
rename from storage/innobase/trx/trx0undo.c
rename to storage/innodb_plugin/trx/trx0undo.c
index b31580d0ce0..9af96f14526 100644
--- a/storage/innobase/trx/trx0undo.c
+++ b/storage/innodb_plugin/trx/trx0undo.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log
+/*****************************************************************************
 
-(c) 1996 Innobase Oy
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0undo.c
+Transaction undo log
 
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
@@ -13,13 +30,14 @@ Created 3/26/1996 Heikki Tuuri
 #endif
 
 #include "fsp0fsp.h"
+#ifndef UNIV_HOTBACKUP
 #include "mach0data.h"
+#include "mtr0log.h"
 #include "trx0rseg.h"
 #include "trx0trx.h"
 #include "srv0srv.h"
 #include "trx0rec.h"
 #include "trx0purge.h"
-#include "trx0xa.h"
 
 /* How should the old versions in the history list be managed?
    ----------------------------------------------------------
@@ -75,74 +93,79 @@ it until a truncate operation occurs, which can remove undo logs from the end
 of the list and release undo log segments. In stepping through the list,
 s-latches on the undo log pages are enough, but in a truncate, x-latches must
 be obtained on the rollback segment and individual pages. */
+#endif /* !UNIV_HOTBACKUP */
 
-/************************************************************************
+/********************************************************************//**
 Initializes the fields in an undo log segment page. */
 static
 void
 trx_undo_page_init(
 /*===============*/
-	page_t* undo_page,	/* in: undo log segment page */
-	ulint	type,		/* in: undo log segment type */
-	mtr_t*	mtr);		/* in: mtr */
-/************************************************************************
-Creates and initializes an undo log memory object. */
+	page_t* undo_page,	/*!< in: undo log segment page */
+	ulint	type,		/*!< in: undo log segment type */
+	mtr_t*	mtr);		/*!< in: mtr */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Creates and initializes an undo log memory object.
+@return	own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create(
 /*================*/
-				/* out, own: the undo log memory object */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	ulint		id,	/* in: slot index within rseg */
-	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	ulint		id,	/*!< in: slot index within rseg */
+	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
-	dulint		trx_id,	/* in: id of the trx for which the undo log
+	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
-	XID*		xid,	/* in: X/Open XA transaction identification*/
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset);/* in: undo log header byte offset on page */
-/*******************************************************************
+	const XID*	xid,	/*!< in: X/Open XA transaction identification*/
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset);/*!< in: undo log header byte offset on page */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
 Initializes a cached insert undo log header page for new use. NOTE that this
 function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
+the operation of this function!
+@return	undo log header byte offset on page */
 static
 ulint
 trx_undo_insert_header_reuse(
 /*=========================*/
-				/* out: undo log header byte offset on page */
-	page_t*	undo_page,	/* in: insert undo log segment header page,
-				x-latched */
-	dulint	trx_id,		/* in: transaction id */
-	mtr_t*	mtr);		/* in: mtr */
-/**************************************************************************
+	page_t*		undo_page,	/*!< in/out: insert undo log segment
+					header page, x-latched */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/**********************************************************************//**
 If an update undo log can be discarded immediately, this function frees the
 space, resetting the page to the proper state for caching. */
 static
 void
 trx_undo_discard_latest_update_undo(
 /*================================*/
-	page_t*	undo_page,	/* in: header page of an undo log of size 1 */
-	mtr_t*	mtr);		/* in: mtr */
+	page_t*	undo_page,	/*!< in: header page of an undo log of size 1 */
+	mtr_t*	mtr);		/*!< in: mtr */
 
-
-/***************************************************************************
-Gets the previous record in an undo log from the previous page. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Gets the previous record in an undo log from the previous page.
+@return	undo log record, the page s-latched, NULL if none */
 static
 trx_undo_rec_t*
 trx_undo_get_prev_rec_from_prev_page(
 /*=================================*/
-				/* out: undo log record, the page s-latched,
-				NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset,	/* in: undo log header offset on page */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
+	ulint	space;
+	ulint	zip_size;
 	ulint	prev_page_no;
 	page_t* prev_page;
 	page_t*	undo_page;
 
-	undo_page = buf_frame_align(rec);
+	undo_page = page_align(rec);
 
 	prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
 					  + TRX_UNDO_PAGE_NODE, mtr)
@@ -153,24 +176,26 @@ trx_undo_get_prev_rec_from_prev_page(
 		return(NULL);
 	}
 
-	prev_page = trx_undo_page_get_s_latched(
-		buf_frame_get_space_id(undo_page), prev_page_no, mtr);
+	space = page_get_space_id(undo_page);
+	zip_size = fil_space_get_zip_size(space);
+
+	prev_page = trx_undo_page_get_s_latched(space, zip_size,
+						prev_page_no, mtr);
 
 	return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
 }
 
-/***************************************************************************
-Gets the previous record in an undo log. */
-
+/***********************************************************************//**
+Gets the previous record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_undo_get_prev_rec(
 /*==================*/
-				/* out: undo log record, the page s-latched,
-				NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset,	/* in: undo log header offset on page */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_undo_rec_t*	prev_rec;
 
@@ -188,27 +213,28 @@ trx_undo_get_prev_rec(
 						    mtr));
 }
 
-/***************************************************************************
-Gets the next record in an undo log from the next page. */
+/***********************************************************************//**
+Gets the next record in an undo log from the next page.
+@return	undo log record, the page latched, NULL if none */
 static
 trx_undo_rec_t*
 trx_undo_get_next_rec_from_next_page(
 /*=================================*/
-			/* out: undo log record, the page latched, NULL if
-			none */
-	page_t*	undo_page, /* in: undo log page */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset,	/* in: undo log header offset on page */
-	ulint	mode,	/* in: latch mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	space,	/*!< in: undo log header space */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	page_t*	undo_page, /*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset,	/*!< in: undo log header offset on page */
+	ulint	mode,	/*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	trx_ulogf_t*	log_hdr;
 	ulint		next_page_no;
 	page_t*		next_page;
-	ulint		space;
 	ulint		next;
 
-	if (page_no == buf_frame_get_page_no(undo_page)) {
+	if (page_no == page_get_page_no(undo_page)) {
 
 		log_hdr = undo_page + offset;
 		next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
@@ -219,8 +245,6 @@ trx_undo_get_next_rec_from_next_page(
 		}
 	}
 
-	space = buf_frame_get_space_id(undo_page);
-
 	next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
 					  + TRX_UNDO_PAGE_NODE, mtr)
 		.page;
@@ -230,29 +254,31 @@ trx_undo_get_next_rec_from_next_page(
 	}
 
 	if (mode == RW_S_LATCH) {
-		next_page = trx_undo_page_get_s_latched(space, next_page_no,
-							mtr);
+		next_page = trx_undo_page_get_s_latched(space, zip_size,
+							next_page_no, mtr);
 	} else {
 		ut_ad(mode == RW_X_LATCH);
-		next_page = trx_undo_page_get(space, next_page_no, mtr);
+		next_page = trx_undo_page_get(space, zip_size,
+					      next_page_no, mtr);
 	}
 
 	return(trx_undo_page_get_first_rec(next_page, page_no, offset));
 }
 
-/***************************************************************************
-Gets the next record in an undo log. */
-
+/***********************************************************************//**
+Gets the next record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_undo_get_next_rec(
 /*==================*/
-				/* out: undo log record, the page s-latched,
-				NULL if none */
-	trx_undo_rec_t*	rec,	/* in: undo record */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset,	/* in: undo log header offset on page */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
+	ulint		space;
+	ulint		zip_size;
 	trx_undo_rec_t*	next_rec;
 
 	next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
@@ -261,32 +287,38 @@ trx_undo_get_next_rec(
 		return(next_rec);
 	}
 
-	return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec),
+	space = page_get_space_id(page_align(rec));
+	zip_size = fil_space_get_zip_size(space);
+
+	return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+						    page_align(rec),
 						    page_no, offset,
 						    RW_S_LATCH, mtr));
 }
 
-/***************************************************************************
-Gets the first record in an undo log. */
-
+/***********************************************************************//**
+Gets the first record in an undo log.
+@return	undo log record, the page latched, NULL if none */
+UNIV_INTERN
 trx_undo_rec_t*
 trx_undo_get_first_rec(
 /*===================*/
-			/* out: undo log record, the page latched, NULL if
-			none */
-	ulint	space,	/* in: undo log header space */
-	ulint	page_no,/* in: undo log header page number */
-	ulint	offset,	/* in: undo log header offset on page */
-	ulint	mode,	/* in: latching mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr)	/* in: mtr */
+	ulint	space,	/*!< in: undo log header space */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset,	/*!< in: undo log header offset on page */
+	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
+	mtr_t*	mtr)	/*!< in: mtr */
 {
 	page_t*		undo_page;
 	trx_undo_rec_t*	rec;
 
 	if (mode == RW_S_LATCH) {
-		undo_page = trx_undo_page_get_s_latched(space, page_no, mtr);
+		undo_page = trx_undo_page_get_s_latched(space, zip_size,
+							page_no, mtr);
 	} else {
-		undo_page = trx_undo_page_get(space, page_no, mtr);
+		undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
 	}
 
 	rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
@@ -295,38 +327,42 @@ trx_undo_get_first_rec(
 		return(rec);
 	}
 
-	return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset,
+	return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+						    undo_page, page_no, offset,
 						    mode, mtr));
 }
 
 /*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the mtr log entry of an undo log page initialization. */
 UNIV_INLINE
 void
 trx_undo_page_init_log(
 /*===================*/
-	page_t* undo_page,	/* in: undo log page */
-	ulint	type,		/* in: undo log type */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t* undo_page,	/*!< in: undo log page */
+	ulint	type,		/*!< in: undo log type */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
 
 	mlog_catenate_ulint_compressed(mtr, type);
 }
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
+/***********************************************************//**
+Parses the redo log entry of an undo log page initialization.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 trx_undo_parse_page_init(
 /*=====================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
 {
 	ulint	type;
 
@@ -344,15 +380,15 @@ trx_undo_parse_page_init(
 	return(ptr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Initializes the fields in an undo log segment page. */
 static
 void
 trx_undo_page_init(
 /*===============*/
-	page_t* undo_page,	/* in: undo log segment page */
-	ulint	type,		/* in: undo log segment type */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t* undo_page,	/*!< in: undo log segment page */
+	ulint	type,		/*!< in: undo log segment type */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	trx_upagef_t*	page_hdr;
 
@@ -370,29 +406,29 @@ trx_undo_page_init(
 	trx_undo_page_init_log(undo_page, type, mtr);
 }
 
-/*******************************************************************
-Creates a new undo log segment in file. */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Creates a new undo log segment in file.
+@return DB_SUCCESS if page creation OK possible error codes are:
+DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
 static
 ulint
 trx_undo_seg_create(
 /*================*/
-				/* out: DB_SUCCESS if page creation OK
-				possible error codes are:
-				DB_TOO_MANY_CONCURRENT_TRXS
-				DB_OUT_OF_FILE_SPACE */
-	trx_rseg_t*	rseg __attribute__((unused)),/* in: rollback segment */
-	trx_rsegf_t*	rseg_hdr,/* in: rollback segment header, page
+	trx_rseg_t*	rseg __attribute__((unused)),/*!< in: rollback segment */
+	trx_rsegf_t*	rseg_hdr,/*!< in: rollback segment header, page
 				x-latched */
-	ulint		type,	/* in: type of the segment: TRX_UNDO_INSERT or
+	ulint		type,	/*!< in: type of the segment: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
-	ulint*		id,	/* out: slot index within rseg header */
+	ulint*		id,	/*!< out: slot index within rseg header */
 	page_t**	undo_page,
-				/* out: segment header page x-latched, NULL
+				/*!< out: segment header page x-latched, NULL
 				if there was an error */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		slot_no;
 	ulint		space;
+	buf_block_t*	block;
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
@@ -418,7 +454,7 @@ trx_undo_seg_create(
 		return(DB_TOO_MANY_CONCURRENT_TRXS);
 	}
 
-	space = buf_frame_get_space_id(rseg_hdr);
+	space = page_get_space_id(page_align(rseg_hdr));
 
 	success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
 					   mtr);
@@ -428,21 +464,21 @@ trx_undo_seg_create(
 	}
 
 	/* Allocate a new file segment for the undo log */
-	*undo_page = fseg_create_general(space, 0,
-					TRX_UNDO_SEG_HDR
-					+ TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+	block = fseg_create_general(space, 0,
+				    TRX_UNDO_SEG_HDR
+				    + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
 
 	fil_space_release_free_extents(space, n_reserved);
 
-	if (*undo_page == NULL) {
+	if (block == NULL) {
 		/* No space left */
 
 		return(DB_OUT_OF_FILE_SPACE);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+	*undo_page = buf_block_get_frame(block);
 
 	page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
 	seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
@@ -461,43 +497,46 @@ trx_undo_seg_create(
 		      page_hdr + TRX_UNDO_PAGE_NODE, mtr);
 
 	trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
-				buf_frame_get_page_no(*undo_page), mtr);
-
+			       page_get_page_no(*undo_page), mtr);
 	*id = slot_no;
 
 	return(err);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the mtr log entry of an undo log header initialization. */
 UNIV_INLINE
 void
 trx_undo_header_create_log(
 /*=======================*/
-	page_t* undo_page,	/* in: undo log header page */
-	dulint	trx_id,		/* in: transaction id */
-	mtr_t*	mtr)		/* in: mtr */
+	const page_t*	undo_page,	/*!< in: undo log header page */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
 
 	mlog_catenate_dulint_compressed(mtr, trx_id);
 }
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************
+/***************************************************************//**
 Creates a new undo log header in file. NOTE that this function has its own
 log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
-this function! */
+this function!
+@return	header byte offset on page */
 static
 ulint
 trx_undo_header_create(
 /*===================*/
-				/* out: header byte offset on page */
-	page_t*	undo_page,	/* in: undo log segment header page,
-				x-latched; it is assumed that there are
-				TRX_UNDO_LOG_XA_HDR_SIZE bytes free space
-				on it */
-	dulint	trx_id,		/* in: transaction id */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t*		undo_page,	/*!< in/out: undo log segment
+					header page, x-latched; it is
+					assumed that there is
+					TRX_UNDO_LOG_XA_HDR_SIZE bytes
+					free space on it */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
@@ -555,15 +594,16 @@ trx_undo_header_create(
 	return(free);
 }
 
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
 Write X/Open XA Transaction Identification (XID) to undo log header */
 static
 void
 trx_undo_write_xid(
 /*===============*/
-	trx_ulogf_t*	log_hdr,/* in: undo log header */
-	const XID*	xid,	/* in: X/Open XA Transaction Identification */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_ulogf_t*	log_hdr,/*!< in: undo log header */
+	const XID*	xid,	/*!< in: X/Open XA Transaction Identification */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
 			 (ulint)xid->formatID, MLOG_4BYTES, mtr);
@@ -578,14 +618,14 @@ trx_undo_write_xid(
 			  XIDDATASIZE, mtr);
 }
 
-/************************************************************************
+/********************************************************************//**
 Read X/Open XA Transaction Identification (XID) from undo log header */
 static
 void
 trx_undo_read_xid(
 /*==============*/
-	trx_ulogf_t*	log_hdr,/* in: undo log header */
-	XID*		xid)	/* out: X/Open XA Transaction Identification */
+	trx_ulogf_t*	log_hdr,/*!< in: undo log header */
+	XID*		xid)	/*!< out: X/Open XA Transaction Identification */
 {
 	xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
 
@@ -597,15 +637,15 @@ trx_undo_read_xid(
 	memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Adds space for the XA XID after an undo log old-style header. */
 static
 void
 trx_undo_header_add_space_for_xid(
 /*==============================*/
-	page_t*		undo_page,/* in: undo log segment header page */
-	trx_ulogf_t*	log_hdr,/* in: undo log header */
-	mtr_t*		mtr)	/* in: mtr */
+	page_t*		undo_page,/*!< in: undo log segment header page */
+	trx_ulogf_t*	log_hdr,/*!< in: undo log header */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_upagef_t*	page_hdr;
 	ulint		free;
@@ -635,35 +675,38 @@ trx_undo_header_add_space_for_xid(
 			 MLOG_2BYTES, mtr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Writes the mtr log entry of an undo log header reuse. */
 UNIV_INLINE
 void
 trx_undo_insert_header_reuse_log(
 /*=============================*/
-	page_t* undo_page,	/* in: undo log header page */
-	dulint	trx_id,		/* in: transaction id */
-	mtr_t*	mtr)		/* in: mtr */
+	const page_t*	undo_page,	/*!< in: undo log header page */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
 
 	mlog_catenate_dulint_compressed(mtr, trx_id);
 }
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
+/***********************************************************//**
+Parses the redo log entry of an undo log page header create or reuse.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 trx_undo_parse_page_header(
 /*=======================*/
-			/* out: end of log record or NULL */
-	ulint	type,	/* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr,/* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
+	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
 {
-	dulint	trx_id;
+	trx_id_t	trx_id;
 
 	ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
 
@@ -684,19 +727,19 @@ trx_undo_parse_page_header(
 	return(ptr);
 }
 
-/*******************************************************************
+/***************************************************************//**
 Initializes a cached insert undo log header page for new use. NOTE that this
 function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
+the operation of this function!
+@return	undo log header byte offset on page */
 static
 ulint
 trx_undo_insert_header_reuse(
 /*=========================*/
-				/* out: undo log header byte offset on page */
-	page_t*	undo_page,	/* in: insert undo log segment header page,
-				x-latched */
-	dulint	trx_id,		/* in: transaction id */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t*		undo_page,	/*!< in/out: insert undo log segment
+					header page, x-latched */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
@@ -744,29 +787,33 @@ trx_undo_insert_header_reuse(
 	return(free);
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Writes the redo log entry of an update undo log header discard. */
 UNIV_INLINE
 void
 trx_undo_discard_latest_log(
 /*========================*/
-	page_t* undo_page,	/* in: undo log header page */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t* undo_page,	/*!< in: undo log header page */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
 }
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
+/***********************************************************//**
+Parses the redo log entry of an undo log page header discard.
+@return	end of log record or NULL */
+UNIV_INTERN
 byte*
 trx_undo_parse_discard_latest(
 /*==========================*/
-			/* out: end of log record or NULL */
-	byte*	ptr,	/* in: buffer */
-	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
-	page_t*	page,	/* in: page or NULL */
-	mtr_t*	mtr)	/* in: mtr or NULL */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr __attribute__((unused)), /*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
 {
 	ut_ad(end_ptr);
 
@@ -777,15 +824,15 @@ trx_undo_parse_discard_latest(
 	return(ptr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 If an update undo log can be discarded immediately, this function frees the
 space, resetting the page to the proper state for caching. */
 static
 void
 trx_undo_discard_latest_update_undo(
 /*================================*/
-	page_t*	undo_page,	/* in: header page of an undo log of size 1 */
-	mtr_t*	mtr)		/* in: mtr */
+	page_t*	undo_page,	/*!< in: header page of an undo log of size 1 */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	trx_usegf_t*	seg_hdr;
 	trx_upagef_t*	page_hdr;
@@ -819,17 +866,17 @@ trx_undo_discard_latest_update_undo(
 	trx_undo_discard_latest_log(undo_page, mtr);
 }
 
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Tries to add a page to the undo log segment where the undo log is placed.
+@return	page number if success, else FIL_NULL */
+UNIV_INTERN
 ulint
 trx_undo_add_page(
 /*==============*/
-				/* out: page number if success, else
-				FIL_NULL */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory object */
-	mtr_t*		mtr)	/* in: mtr which does not have a latch to any
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory object */
+	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
 				undo log page; the caller must have reserved
 				the rollback segment mutex */
 {
@@ -851,7 +898,8 @@ trx_undo_add_page(
 		return(FIL_NULL);
 	}
 
-	header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+	header_page = trx_undo_page_get(undo->space, undo->zip_size,
+					undo->hdr_page_no, mtr);
 
 	success = fsp_reserve_free_extents(&n_reserved, undo->space, 1,
 					   FSP_UNDO, mtr);
@@ -876,7 +924,8 @@ trx_undo_add_page(
 
 	undo->last_page_no = page_no;
 
-	new_page = trx_undo_page_get(undo->space, page_no, mtr);
+	new_page = trx_undo_page_get(undo->space, undo->zip_size,
+				     page_no, mtr);
 
 	trx_undo_page_init(new_page, undo->type, mtr);
 
@@ -888,21 +937,21 @@ trx_undo_add_page(
 	return(page_no);
 }
 
-/************************************************************************
-Frees an undo log page that is not the header page. */
+/********************************************************************//**
+Frees an undo log page that is not the header page.
+@return	last page number in remaining log */
 static
 ulint
 trx_undo_free_page(
 /*===============*/
-				/* out: last page number in remaining log */
-	trx_rseg_t* rseg,	/* in: rollback segment */
-	ibool	in_history,	/* in: TRUE if the undo log is in the history
+	trx_rseg_t* rseg,	/*!< in: rollback segment */
+	ibool	in_history,	/*!< in: TRUE if the undo log is in the history
 				list */
-	ulint	space,		/* in: space */
-	ulint	hdr_page_no,	/* in: header page number */
-	ulint	page_no,	/* in: page number to free: must not be the
+	ulint	space,		/*!< in: space */
+	ulint	hdr_page_no,	/*!< in: header page number */
+	ulint	page_no,	/*!< in: page number to free: must not be the
 				header page */
-	mtr_t*	mtr)		/* in: mtr which does not have a latch to any
+	mtr_t*	mtr)		/*!< in: mtr which does not have a latch to any
 				undo log page; the caller must have reserved
 				the rollback segment mutex */
 {
@@ -911,14 +960,17 @@ trx_undo_free_page(
 	fil_addr_t	last_addr;
 	trx_rsegf_t*	rseg_header;
 	ulint		hist_size;
+	ulint		zip_size;
 
 	ut_a(hdr_page_no != page_no);
 	ut_ad(!mutex_own(&kernel_mutex));
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	undo_page = trx_undo_page_get(space, page_no, mtr);
+	zip_size = rseg->zip_size;
 
-	header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
+
+	header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
 
 	flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
 		    undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
@@ -931,7 +983,8 @@ trx_undo_free_page(
 	rseg->curr_size--;
 
 	if (in_history) {
-		rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
+		rseg_header = trx_rsegf_get(space, zip_size,
+					    rseg->page_no, mtr);
 
 		hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
 					   MLOG_4BYTES, mtr);
@@ -943,18 +996,18 @@ trx_undo_free_page(
 	return(last_addr.page);
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees an undo log page when there is also the memory object for the undo
 log. */
 static
 void
 trx_undo_free_page_in_rollback(
 /*===========================*/
-	trx_t*		trx __attribute__((unused)), /* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory copy */
-	ulint		page_no,/* in: page number to free: must not be the
+	trx_t*		trx __attribute__((unused)), /*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	ulint		page_no,/*!< in: page number to free: must not be the
 				header page */
-	mtr_t*		mtr)	/* in: mtr which does not have a latch to any
+	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
 				undo log page; the caller must have reserved
 				the rollback segment mutex */
 {
@@ -970,23 +1023,25 @@ trx_undo_free_page_in_rollback(
 	undo->size--;
 }
 
-/************************************************************************
+/********************************************************************//**
 Empties an undo log header page of undo records for that undo log. Other
 undo logs may still have records on that page, if it is an update undo log. */
 static
 void
 trx_undo_empty_header_page(
 /*=======================*/
-	ulint	space,		/* in: space */
-	ulint	hdr_page_no,	/* in: header page number */
-	ulint	hdr_offset,	/* in: header offset */
-	mtr_t*	mtr)		/* in: mtr */
+	ulint	space,		/*!< in: space */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	hdr_page_no,	/*!< in: header page number */
+	ulint	hdr_offset,	/*!< in: header offset */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	page_t*		header_page;
 	trx_ulogf_t*	log_hdr;
 	ulint		end;
 
-	header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+	header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
 
 	log_hdr = header_page + hdr_offset;
 
@@ -995,16 +1050,16 @@ trx_undo_empty_header_page(
 	mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Truncates an undo log from the end. This function is used during a rollback
 to free space from an undo log. */
-
+UNIV_INTERN
 void
 trx_undo_truncate_end(
 /*==================*/
-	trx_t*		trx,	/* in: transaction whose undo log it is */
-	trx_undo_t*	undo,	/* in: undo log */
-	dulint		limit)	/* in: all undo records with undo number
+	trx_t*		trx,	/*!< in: transaction whose undo log it is */
+	trx_undo_t*	undo,	/*!< in: undo log */
+	undo_no_t	limit)	/*!< in: all undo records with undo number
 				>= this value should be truncated */
 {
 	page_t*		undo_page;
@@ -1026,7 +1081,8 @@ trx_undo_truncate_end(
 
 		last_page_no = undo->last_page_no;
 
-		undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr);
+		undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+					      last_page_no, &mtr);
 
 		rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
 						 undo->hdr_offset);
@@ -1069,22 +1125,24 @@ function_exit:
 	mtr_commit(&mtr);
 }
 
-/***************************************************************************
+/***********************************************************************//**
 Truncates an undo log from the start. This function is used during a purge
 operation. */
-
+UNIV_INTERN
 void
 trx_undo_truncate_start(
 /*====================*/
-	trx_rseg_t* rseg,	/* in: rollback segment */
-	ulint	space,		/* in: space id of the log */
-	ulint	hdr_page_no,	/* in: header page number */
-	ulint	hdr_offset,	/* in: header offset on the page */
-	dulint	limit)		/* in: all undo pages with undo numbers <
-				this value should be truncated; NOTE that
-				the function only frees whole pages; the
-				header page is not freed, but emptied, if
-				all the records there are < limit */
+	trx_rseg_t*	rseg,		/*!< in: rollback segment */
+	ulint		space,		/*!< in: space id of the log */
+	ulint		hdr_page_no,	/*!< in: header page number */
+	ulint		hdr_offset,	/*!< in: header offset on the page */
+	undo_no_t	limit)		/*!< in: all undo pages with
+					undo numbers < this value
+					should be truncated; NOTE that
+					the function only frees whole
+					pages; the header page is not
+					freed, but emptied, if all the
+					records there are < limit */
 {
 	page_t*		undo_page;
 	trx_undo_rec_t* rec;
@@ -1094,14 +1152,15 @@ trx_undo_truncate_start(
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) {
+	if (ut_dulint_is_zero(limit)) {
 
 		return;
 	}
 loop:
 	mtr_start(&mtr);
 
-	rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset,
+	rec = trx_undo_get_first_rec(space, rseg->zip_size,
+				     hdr_page_no, hdr_offset,
 				     RW_X_LATCH, &mtr);
 	if (rec == NULL) {
 		/* Already empty */
@@ -1111,7 +1170,7 @@ loop:
 		return;
 	}
 
-	undo_page = buf_frame_align(rec);
+	undo_page = page_align(rec);
 
 	last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
 					      hdr_offset);
@@ -1122,10 +1181,11 @@ loop:
 		return;
 	}
 
-	page_no = buf_frame_get_page_no(undo_page);
+	page_no = page_get_page_no(undo_page);
 
 	if (page_no == hdr_page_no) {
-		trx_undo_empty_header_page(space, hdr_page_no, hdr_offset,
+		trx_undo_empty_header_page(space, rseg->zip_size,
+					   hdr_page_no, hdr_offset,
 					   &mtr);
 	} else {
 		trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
@@ -1137,13 +1197,13 @@ loop:
 	goto loop;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees an undo log segment which is not in the history list. */
 static
 void
 trx_undo_seg_free(
 /*==============*/
-	trx_undo_t*	undo)	/* in: undo log */
+	trx_undo_t*	undo)	/*!< in: undo log */
 {
 	trx_rseg_t*	rseg;
 	fseg_header_t*	file_seg;
@@ -1152,10 +1212,9 @@ trx_undo_seg_free(
 	ibool		finished;
 	mtr_t		mtr;
 
-	finished = FALSE;
 	rseg = undo->rseg;
 
-	while (!finished) {
+	do {
 
 		mtr_start(&mtr);
 
@@ -1163,7 +1222,8 @@ trx_undo_seg_free(
 
 		mutex_enter(&(rseg->mutex));
 
-		seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no,
+		seg_header = trx_undo_page_get(undo->space, undo->zip_size,
+					       undo->hdr_page_no,
 					       &mtr) + TRX_UNDO_SEG_HDR;
 
 		file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
@@ -1172,32 +1232,33 @@ trx_undo_seg_free(
 
 		if (finished) {
 			/* Update the rseg header */
-			rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
-						    &mtr);
+			rseg_header = trx_rsegf_get(
+				rseg->space, rseg->zip_size, rseg->page_no,
+				&mtr);
 			trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
 					       &mtr);
 		}
 
 		mutex_exit(&(rseg->mutex));
 		mtr_commit(&mtr);
-	}
+	} while (!finished);
 }
 
 /*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
 
-/************************************************************************
+/********************************************************************//**
 Creates and initializes an undo log memory object according to the values
 in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg. */
+inserted in the appropriate list of rseg.
+@return	own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create_at_db_start(
 /*============================*/
-				/* out, own: the undo log memory object */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	ulint		id,	/* in: slot index within rseg */
-	ulint		page_no,/* in: undo log segment page number */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	ulint		id,	/*!< in: slot index within rseg */
+	ulint		page_no,/*!< in: undo log segment page number */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_t*		undo_page;
 	trx_upagef_t*	page_header;
@@ -1206,7 +1267,7 @@ trx_undo_mem_create_at_db_start(
 	trx_undo_t*	undo;
 	ulint		type;
 	ulint		state;
-	dulint		trx_id;
+	trx_id_t	trx_id;
 	ulint		offset;
 	fil_addr_t	last_addr;
 	page_t*		last_page;
@@ -1220,7 +1281,8 @@ trx_undo_mem_create_at_db_start(
 		ut_error;
 	}
 
-	undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
+	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+				      page_no, mtr);
 
 	page_header = undo_page + TRX_UNDO_PAGE_HDR;
 
@@ -1273,7 +1335,8 @@ trx_undo_mem_create_at_db_start(
 	undo->last_page_no = last_addr.page;
 	undo->top_page_no = last_addr.page;
 
-	last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr);
+	last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+				      undo->last_page_no, mtr);
 
 	rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
 
@@ -1307,17 +1370,16 @@ add_to_list:
 	return(undo);
 }
 
-/************************************************************************
+/********************************************************************//**
 Initializes the undo log lists for a rollback segment memory copy. This
 function is only called when the database is started or a new rollback
-segment is created. */
-
+segment is created.
+@return	the combined size of undo log segments in pages */
+UNIV_INTERN
 ulint
 trx_undo_lists_init(
 /*================*/
-				/* out: the combined size of undo log segments
-				in pages */
-	trx_rseg_t*	rseg)	/* in: rollback segment memory object */
+	trx_rseg_t*	rseg)	/*!< in: rollback segment memory object */
 {
 	ulint		page_no;
 	trx_undo_t*	undo;
@@ -1333,7 +1395,8 @@ trx_undo_lists_init(
 
 	mtr_start(&mtr);
 
-	rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
+	rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
+					rseg->page_no, &mtr);
 
 	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
 		page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
@@ -1354,8 +1417,9 @@ trx_undo_lists_init(
 
 			mtr_start(&mtr);
 
-			rseg_header = trx_rsegf_get(rseg->space,
-						    rseg->page_no, &mtr);
+			rseg_header = trx_rsegf_get(
+				rseg->space, rseg->zip_size, rseg->page_no,
+				&mtr);
 		}
 	}
 
@@ -1364,22 +1428,22 @@ trx_undo_lists_init(
 	return(size);
 }
 
-/************************************************************************
-Creates and initializes an undo log memory object. */
+/********************************************************************//**
+Creates and initializes an undo log memory object.
+@return	own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create(
 /*================*/
-				/* out, own: the undo log memory object */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	ulint		id,	/* in: slot index within rseg */
-	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	ulint		id,	/*!< in: slot index within rseg */
+	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
-	dulint		trx_id,	/* in: id of the trx for which the undo log
+	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
-	XID*		xid,	/* in: X/Open transaction identification */
-	ulint		page_no,/* in: undo log header page number */
-	ulint		offset)	/* in: undo log header byte offset on page */
+	const XID*	xid,	/*!< in: X/Open transaction identification */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset)	/*!< in: undo log header byte offset on page */
 {
 	trx_undo_t*	undo;
 
@@ -1410,6 +1474,7 @@ trx_undo_mem_create(
 	undo->rseg = rseg;
 
 	undo->space = rseg->space;
+	undo->zip_size = rseg->zip_size;
 	undo->hdr_page_no = page_no;
 	undo->hdr_offset = offset;
 	undo->last_page_no = page_no;
@@ -1417,22 +1482,22 @@ trx_undo_mem_create(
 
 	undo->empty = TRUE;
 	undo->top_page_no = page_no;
-	undo->guess_page = NULL;
+	undo->guess_block = NULL;
 
 	return(undo);
 }
 
-/************************************************************************
+/********************************************************************//**
 Initializes a cached undo log object for new use. */
 static
 void
 trx_undo_mem_init_for_reuse(
 /*========================*/
-	trx_undo_t*	undo,	/* in: undo log to init */
-	dulint		trx_id,	/* in: id of the trx for which the undo log
+	trx_undo_t*	undo,	/*!< in: undo log to init */
+	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
-	XID*		xid,	/* in: X/Open XA transaction identification*/
-	ulint		offset)	/* in: undo log header byte offset on page */
+	const XID*	xid,	/*!< in: X/Open XA transaction identification*/
+	ulint		offset)	/*!< in: undo log header byte offset on page */
 {
 	ut_ad(mutex_own(&((undo->rseg)->mutex)));
 
@@ -1455,13 +1520,13 @@ trx_undo_mem_init_for_reuse(
 	undo->empty = TRUE;
 }
 
-/************************************************************************
+/********************************************************************//**
 Frees an undo log memory copy. */
 static
 void
 trx_undo_mem_free(
 /*==============*/
-	trx_undo_t*	undo)	/* in: the undo object to be freed */
+	trx_undo_t*	undo)	/*!< in: the undo object to be freed */
 {
 	if (undo->id >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
@@ -1472,28 +1537,25 @@ trx_undo_mem_free(
 	mem_free(undo);
 }
 
-/**************************************************************************
-Creates a new undo log. */
+/**********************************************************************//**
+Creates a new undo log.
+@return DB_SUCCESS if successful in creating the new undo lob object,
+possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
+DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
 static
 ulint
 trx_undo_create(
 /*============*/
-				/* out: DB_SUCCESS if successful in creating
-				the new undo lob object, possible error
-				codes are: 
-				DB_TOO_MANY_CONCURRENT_TRXS
-				DB_OUT_OF_FILE_SPACE 
-				DB_OUT_OF_MEMORY*/
-	trx_t*		trx,	/* in: transaction */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory copy */
-	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
+	trx_t*		trx,	/*!< in: transaction */
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory copy */
+	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
-	dulint		trx_id,	/* in: id of the trx for which the undo log
+	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is created */
-	XID*		xid,	/* in: X/Open transaction identification*/
-	trx_undo_t**	undo,	/* out: the new undo log object, undefined
+	const XID*	xid,	/*!< in: X/Open transaction identification*/
+	trx_undo_t**	undo,	/*!< out: the new undo log object, undefined
 				 * if did not succeed */
-	mtr_t*		mtr)	/* in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_rsegf_t*	rseg_header;
 	ulint		page_no;
@@ -1511,10 +1573,11 @@ trx_undo_create(
 
 	rseg->curr_size++;
 
-	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+	rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
+				    mtr);
 
 	err = trx_undo_seg_create(rseg, rseg_header, type, &id,
-							&undo_page, mtr);
+				  &undo_page, mtr);
 
 	if (err != DB_SUCCESS) {
 		/* Did not succeed */
@@ -1524,7 +1587,7 @@ trx_undo_create(
 		return(err);
 	}
 
-	page_no = buf_frame_get_page_no(undo_page);
+	page_no = page_get_page_no(undo_page);
 
 	offset = trx_undo_header_create(undo_page, trx_id, mtr);
 
@@ -1545,22 +1608,21 @@ trx_undo_create(
 
 /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
 
-/************************************************************************
-Reuses a cached undo log. */
+/********************************************************************//**
+Reuses a cached undo log.
+@return	the undo log memory object, NULL if none cached */
 static
 trx_undo_t*
 trx_undo_reuse_cached(
 /*==================*/
-				/* out: the undo log memory object, NULL if
-				none cached */
-	trx_t*		trx,	/* in: transaction */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
+	trx_t*		trx,	/*!< in: transaction */
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	ulint		type,	/*!< in: type of the log: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
-	dulint		trx_id,	/* in: id of the trx for which the undo log
+	trx_id_t	trx_id,	/*!< in: id of the trx for which the undo log
 				is used */
-	XID*		xid,	/* in: X/Open XA transaction identification */
-	mtr_t*		mtr)	/* in: mtr */
+	const XID*	xid,	/*!< in: X/Open XA transaction identification */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_undo_t*	undo;
 	page_t*		undo_page;
@@ -1598,7 +1660,8 @@ trx_undo_reuse_cached(
 		ut_error;
 	}
 
-	undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+				      undo->hdr_page_no, mtr);
 
 	if (type == TRX_UNDO_INSERT) {
 		offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
@@ -1625,47 +1688,56 @@ trx_undo_reuse_cached(
 	return(undo);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Marks an undo log header as a header of a data dictionary operation
 transaction. */
 static
 void
 trx_undo_mark_as_dict_operation(
 /*============================*/
-	trx_t*		trx,	/* in: dict op transaction */
-	trx_undo_t*	undo,	/* in: assigned undo log */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_t*		trx,	/*!< in: dict op transaction */
+	trx_undo_t*	undo,	/*!< in: assigned undo log */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_t*	hdr_page;
 
-	ut_a(trx->dict_operation);
+	hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
+				     undo->hdr_page_no, mtr);
 
-	hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		ut_error;
+	case TRX_DICT_OP_INDEX:
+		/* Do not discard the table on recovery. */
+		undo->table_id = ut_dulint_zero;
+		break;
+	case TRX_DICT_OP_TABLE:
+		undo->table_id = trx->table_id;
+		break;
+	}
 
 	mlog_write_ulint(hdr_page + undo->hdr_offset
 			 + TRX_UNDO_DICT_TRANS,
-			 trx->dict_operation, MLOG_1BYTE, mtr);
+			 TRUE, MLOG_1BYTE, mtr);
 
 	mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
-			  trx->table_id, mtr);
+			  undo->table_id, mtr);
 
-	undo->dict_operation = trx->dict_operation;
-	undo->table_id = trx->table_id;
+	undo->dict_operation = TRUE;
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
+undo log reused.
+@return DB_SUCCESS if undo log assign successful, possible error codes
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+DB_OUT_OF_MEMORY */
+UNIV_INTERN
 ulint
 trx_undo_assign_undo(
 /*=================*/
-				/* out: DB_SUCCESS if undo log assign
-				successful, possible error codes are:
-				DD_TOO_MANY_CONCURRENT_TRXS
-				DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/
-	trx_t*		trx,	/* in: transaction */
-	ulint		type)	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	trx_t*		trx,	/*!< in: transaction */
+	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
@@ -1706,7 +1778,7 @@ trx_undo_assign_undo(
 		trx->update_undo = undo;
 	}
 
-	if (trx->dict_operation) {
+	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
 		trx_undo_mark_as_dict_operation(trx, undo, &mtr);
 	}
 
@@ -1717,18 +1789,17 @@ func_exit:
 	return err;
 }
 
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction finish.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
 page_t*
 trx_undo_set_state_at_finish(
 /*=========================*/
-				/* out: undo log segment header page,
-				x-latched */
-	trx_rseg_t*	rseg,	/* in: rollback segment memory object */
-	trx_t*		trx __attribute__((unused)), /* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory copy */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_rseg_t*	rseg,	/*!< in: rollback segment memory object */
+	trx_t*		trx __attribute__((unused)), /*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_usegf_t*	seg_hdr;
 	trx_upagef_t*	page_hdr;
@@ -1747,7 +1818,8 @@ trx_undo_set_state_at_finish(
 		ut_error;
 	}
 
-	undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+				      undo->hdr_page_no, mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1784,17 +1856,16 @@ trx_undo_set_state_at_finish(
 	return(undo_page);
 }
 
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction prepare.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
 page_t*
 trx_undo_set_state_at_prepare(
 /*==========================*/
-				/* out: undo log segment header page,
-				x-latched */
-	trx_t*		trx,	/* in: transaction */
-	trx_undo_t*	undo,	/* in: undo log memory copy */
-	mtr_t*		mtr)	/* in: mtr */
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	trx_usegf_t*	seg_hdr;
 	trx_upagef_t*	page_hdr;
@@ -1811,7 +1882,8 @@ trx_undo_set_state_at_prepare(
 		ut_error;
 	}
 
-	undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+				      undo->hdr_page_no, mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1835,18 +1907,18 @@ trx_undo_set_state_at_prepare(
 	return(undo_page);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Adds the update undo log header as the first in the history list, and
 frees the memory object, or puts it to the list of cached update undo log
 segments. */
-
+UNIV_INTERN
 void
 trx_undo_update_cleanup(
 /*====================*/
-	trx_t*	trx,		/* in: trx owning the update undo log */
-	page_t*	undo_page,	/* in: update undo log header page,
+	trx_t*	trx,		/*!< in: trx owning the update undo log */
+	page_t*	undo_page,	/*!< in: update undo log header page,
 				x-latched */
-	mtr_t*	mtr)		/* in: mtr */
+	mtr_t*	mtr)		/*!< in: mtr */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
@@ -1872,15 +1944,15 @@ trx_undo_update_cleanup(
 	}
 }
 
-/**********************************************************************
+/******************************************************************//**
 Frees or caches an insert undo log after a transaction commit or rollback.
 Knowledge of inserts is not needed after a commit or rollback, therefore
 the data can be discarded. */
-
+UNIV_INTERN
 void
 trx_undo_insert_cleanup(
 /*====================*/
-	trx_t*	trx)	/* in: transaction handle */
+	trx_t*	trx)	/*!< in: transaction handle */
 {
 	trx_undo_t*	undo;
 	trx_rseg_t*	rseg;
@@ -1918,3 +1990,4 @@ trx_undo_insert_cleanup(
 
 	mutex_exit(&(rseg->mutex));
 }
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/usr/usr0sess.c b/storage/innodb_plugin/usr/usr0sess.c
new file mode 100644
index 00000000000..990991a2c06
--- /dev/null
+++ b/storage/innodb_plugin/usr/usr0sess.c
@@ -0,0 +1,98 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file usr/usr0sess.c
+Sessions
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#include "usr0sess.h"
+
+#ifdef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#include "trx0trx.h"
+
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it. */
+static
+void
+sess_close(
+/*=======*/
+	sess_t*		sess);	/*!< in, own: session object */
+
+/*********************************************************************//**
+Opens a session.
+@return	own: session object */
+UNIV_INTERN
+sess_t*
+sess_open(void)
+/*===========*/
+{
+	sess_t*	sess;
+
+	ut_ad(mutex_own(&kernel_mutex));
+
+	sess = mem_alloc(sizeof(sess_t));
+
+	sess->state = SESS_ACTIVE;
+
+	sess->trx = trx_create(sess);
+
+	UT_LIST_INIT(sess->graphs);
+
+	return(sess);
+}
+
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it. */
+static
+void
+sess_close(
+/*=======*/
+	sess_t*	sess)	/*!< in, own: session object */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(sess->trx == NULL);
+
+	mem_free(sess);
+}
+
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it, if it is in a state
+where it should be closed.
+@return	TRUE if closed */
+UNIV_INTERN
+ibool
+sess_try_close(
+/*===========*/
+	sess_t*	sess)	/*!< in, own: session object */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+
+	if (UT_LIST_GET_LEN(sess->graphs) == 0) {
+		sess_close(sess);
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c
new file mode 100644
index 00000000000..30de5aa6f17
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c
@@ -0,0 +1,43 @@
+/*****************************************************************************
+
+Copyright (c) 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************************
+If this program compiles, then pthread_t objects can be used as arguments
+to GCC atomic builtin functions.
+
+Created March 5, 2009 Vasil Dimov
+*****************************************************************************/
+
+#include <pthread.h>
+#include <string.h>
+
+int
+main(int argc, char** argv)
+{
+	pthread_t	x1;
+	pthread_t	x2;
+	pthread_t	x3;
+
+	memset(&x1, 0x0, sizeof(x1));
+	memset(&x2, 0x0, sizeof(x2));
+	memset(&x3, 0x0, sizeof(x3));
+
+	__sync_bool_compare_and_swap(&x1, x2, x3);
+
+	return(0);
+}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c
new file mode 100644
index 00000000000..a18a537d1d4
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c
@@ -0,0 +1,34 @@
+/*****************************************************************************
+
+Copyright (c) 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************************
+If this program compiles, then pthread_t objects can be used as arguments
+to Solaris libc atomic functions.
+
+Created April 18, 2009 Vasil Dimov
+*****************************************************************************/
+
+#include <pthread.h>
+
+int
+main(int argc, char** argv)
+{
+	pthread_t	x = 0;
+
+	return(0);
+}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c b/storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c
new file mode 100644
index 00000000000..7eb704edd4b
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c
@@ -0,0 +1,39 @@
+/*****************************************************************************
+
+Copyright (c) 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************************
+If this program compiles, then Solaris libc atomic funcions are available.
+
+Created April 18, 2009 Vasil Dimov
+*****************************************************************************/
+#include <atomic.h>
+
+int
+main(int argc, char** argv)
+{
+	ulong_t		ulong	= 0;
+	uint32_t	uint32	= 0;
+	uint64_t	uint64	= 0;
+
+	atomic_cas_ulong(&ulong, 0, 1);
+	atomic_cas_32(&uint32, 0, 1);
+	atomic_cas_64(&uint64, 0, 1);
+	atomic_add_long(&ulong, 0);
+
+	return(0);
+}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_pause.c b/storage/innodb_plugin/ut/ut0auxconf_pause.c
new file mode 100644
index 00000000000..54d63bdd9bc
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0auxconf_pause.c
@@ -0,0 +1,32 @@
+/*****************************************************************************
+
+Copyright (c) 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************************
+If this program compiles and can be run and returns 0, then the pause
+instruction is available.
+
+Created Jul 21, 2009 Vasil Dimov
+*****************************************************************************/
+
+int
+main(int argc, char** argv)
+{
+	__asm__ __volatile__ ("pause");
+
+	return(0);
+}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c b/storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c
new file mode 100644
index 00000000000..96add4526ef
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c
@@ -0,0 +1,35 @@
+/*****************************************************************************
+
+Copyright (c) 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************************
+This program should compile and when run, print a single line like:
+#define SIZEOF_PTHREAD_T %d
+
+Created April 18, 2009 Vasil Dimov
+*****************************************************************************/
+
+#include <stdio.h>
+#include <pthread.h>
+
+int
+main(int argc, char** argv)
+{
+	printf("#define SIZEOF_PTHREAD_T %d\n", (int) sizeof(pthread_t));
+
+	return(0);
+}
diff --git a/storage/innodb_plugin/ut/ut0byte.c b/storage/innodb_plugin/ut/ut0byte.c
new file mode 100644
index 00000000000..4e093f72ce2
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0byte.c
@@ -0,0 +1,55 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0byte.c
+Byte utilities
+
+Created 5/11/1994 Heikki Tuuri
+********************************************************************/
+
+#include "ut0byte.h"
+
+#ifdef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+/** Zero value for a dulint */
+UNIV_INTERN const dulint	ut_dulint_zero	= {0, 0};
+
+/** Maximum value for a dulint */
+UNIV_INTERN const dulint	ut_dulint_max	= {0xFFFFFFFFUL, 0xFFFFFFFFUL};
+
+#ifdef notdefined /* unused code */
+#include "ut0sort.h"
+
+/************************************************************//**
+Sort function for dulint arrays. */
+UNIV_INTERN
+void
+ut_dulint_sort(
+/*===========*/
+	dulint*	arr,	/*!< in/out: array to be sorted */
+	dulint*	aux_arr,/*!< in/out: auxiliary array (same size as arr) */
+	ulint	low,	/*!< in: low bound of sort interval, inclusive */
+	ulint	high)	/*!< in: high bound of sort interval, noninclusive */
+{
+	UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
+			      ut_dulint_cmp);
+}
+#endif /* notdefined */
diff --git a/storage/innodb_plugin/ut/ut0dbg.c b/storage/innodb_plugin/ut/ut0dbg.c
new file mode 100644
index 00000000000..4484e6c36de
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0dbg.c
@@ -0,0 +1,187 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************//**
+@file ut/ut0dbg.c
+Debug utilities for Innobase.
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#include "univ.i"
+#include "ut0dbg.h"
+
+#if defined(__GNUC__) && (__GNUC__ > 2)
+#else
+/** This is used to eliminate compiler warnings */
+UNIV_INTERN ulint	ut_dbg_zero	= 0;
+#endif
+
+#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
+/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
+will stop at the next ut_a() or ut_ad(). */
+UNIV_INTERN ibool	ut_dbg_stop_threads	= FALSE;
+#endif
+#ifdef __NETWARE__
+/** Flag for ignoring further assertion failures.  This is set to TRUE
+when on NetWare there happens an InnoDB assertion failure or other
+fatal error condition that requires an immediate shutdown. */
+UNIV_INTERN ibool panic_shutdown = FALSE;
+#elif !defined(UT_DBG_USE_ABORT)
+/** A null pointer that will be dereferenced to trigger a memory trap */
+UNIV_INTERN ulint*	ut_dbg_null_ptr		= NULL;
+#endif
+
+/*************************************************************//**
+Report a failed assertion. */
+UNIV_INTERN
+void
+ut_dbg_assertion_failed(
+/*====================*/
+	const char* expr,	/*!< in: the failed assertion (optional) */
+	const char* file,	/*!< in: source file containing the assertion */
+	ulint line)		/*!< in: line number of the assertion */
+{
+	ut_print_timestamp(stderr);
+#ifdef UNIV_HOTBACKUP
+	fprintf(stderr, "  InnoDB: Assertion failure in file %s line %lu\n",
+		file, line);
+#else /* UNIV_HOTBACKUP */
+	fprintf(stderr,
+		"  InnoDB: Assertion failure in thread %lu"
+		" in file %s line %lu\n",
+		os_thread_pf(os_thread_get_curr_id()), file, line);
+#endif /* UNIV_HOTBACKUP */
+	if (expr) {
+		fprintf(stderr,
+			"InnoDB: Failing assertion: %s\n", expr);
+	}
+
+	fputs("InnoDB: We intentionally generate a memory trap.\n"
+	      "InnoDB: Submit a detailed bug report"
+	      " to http://bugs.mysql.com.\n"
+	      "InnoDB: If you get repeated assertion failures"
+	      " or crashes, even\n"
+	      "InnoDB: immediately after the mysqld startup, there may be\n"
+	      "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
+	      "InnoDB: " REFMAN "forcing-recovery.html\n"
+	      "InnoDB: about forcing recovery.\n", stderr);
+#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
+	ut_dbg_stop_threads = TRUE;
+#endif
+}
+
+#ifdef __NETWARE__
+/*************************************************************//**
+Shut down MySQL/InnoDB after assertion failure. */
+UNIV_INTERN
+void
+ut_dbg_panic(void)
+/*==============*/
+{
+	if (!panic_shutdown) {
+		panic_shutdown = TRUE;
+		innobase_shutdown_for_mysql();
+	}
+	exit(1);
+}
+#else /* __NETWARE__ */
+# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
+/*************************************************************//**
+Stop a thread after assertion failure. */
+UNIV_INTERN
+void
+ut_dbg_stop_thread(
+/*===============*/
+	const char*	file,
+	ulint		line)
+{
+#ifndef UNIV_HOTBACKUP
+	fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
+		os_thread_pf(os_thread_get_curr_id()), file, line);
+	os_thread_sleep(1000000000);
+#endif /* !UNIV_HOTBACKUP */
+}
+# endif
+#endif /* __NETWARE__ */
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <unistd.h>
+
+#ifndef timersub
+#define timersub(a, b, r)						\
+	do {								\
+		(r)->tv_sec = (a)->tv_sec - (b)->tv_sec;		\
+		(r)->tv_usec = (a)->tv_usec - (b)->tv_usec;		\
+		if ((r)->tv_usec < 0) {					\
+			(r)->tv_sec--;					\
+			(r)->tv_usec += 1000000;			\
+		}							\
+	} while (0)
+#endif /* timersub */
+
+/*******************************************************************//**
+Resets a speedo (records the current time in it). */
+UNIV_INTERN
+void
+speedo_reset(
+/*=========*/
+	speedo_t*	speedo)	/*!< out: speedo */
+{
+	gettimeofday(&speedo->tv, NULL);
+
+	getrusage(RUSAGE_SELF, &speedo->ru);
+}
+
+/*******************************************************************//**
+Shows the time elapsed and usage statistics since the last reset of a
+speedo. */
+UNIV_INTERN
+void
+speedo_show(
+/*========*/
+	const speedo_t*	speedo)	/*!< in: speedo */
+{
+	struct rusage	ru_now;
+	struct timeval	tv_now;
+	struct timeval	tv_diff;
+
+	getrusage(RUSAGE_SELF, &ru_now);
+
+	gettimeofday(&tv_now, NULL);
+
+#define PRINT_TIMEVAL(prefix, tvp)		\
+	fprintf(stderr, "%s% 5ld.%06ld sec\n",	\
+		prefix, (tvp)->tv_sec, (tvp)->tv_usec)
+
+	timersub(&tv_now, &speedo->tv, &tv_diff);
+	PRINT_TIMEVAL("real", &tv_diff);
+
+	timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
+	PRINT_TIMEVAL("user", &tv_diff);
+
+	timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
+	PRINT_TIMEVAL("sys ", &tv_diff);
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/ut/ut0list.c b/storage/innodb_plugin/ut/ut0list.c
similarity index 56%
rename from storage/innobase/ut/ut0list.c
rename to storage/innodb_plugin/ut/ut0list.c
index a0db7ff7b55..895a575c535 100644
--- a/storage/innobase/ut/ut0list.c
+++ b/storage/innodb_plugin/ut/ut0list.c
@@ -1,15 +1,40 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0list.c
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
 #include "ut0list.h"
 #ifdef UNIV_NONINL
 #include "ut0list.ic"
 #endif
 
-/********************************************************************
-Create a new list. */
-
+/****************************************************************//**
+Create a new list.
+@return	list */
+UNIV_INTERN
 ib_list_t*
 ib_list_create(void)
 /*=================*/
-			/* out: list */
 {
 	ib_list_t*	list = mem_alloc(sizeof(ib_list_t));
 
@@ -20,15 +45,15 @@ ib_list_create(void)
 	return(list);
 }
 
-/********************************************************************
+/****************************************************************//**
 Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
+lists created with this function.
+@return	list */
+UNIV_INTERN
 ib_list_t*
 ib_list_create_heap(
 /*================*/
-				/* out: list */
-	mem_heap_t*	heap)	/* in: memory heap to use */
+	mem_heap_t*	heap)	/*!< in: memory heap to use */
 {
 	ib_list_t*	list = mem_heap_alloc(heap, sizeof(ib_list_t));
 
@@ -39,13 +64,13 @@ ib_list_create_heap(
 	return(list);
 }
 
-/********************************************************************
+/****************************************************************//**
 Free a list. */
-
+UNIV_INTERN
 void
 ib_list_free(
 /*=========*/
-	ib_list_t*	list)	/* in: list */
+	ib_list_t*	list)	/*!< in: list */
 {
 	ut_a(!list->is_heap_list);
 
@@ -56,46 +81,46 @@ ib_list_free(
 	mem_free(list);
 }
 
-/********************************************************************
-Add the data to the start of the list. */
-
+/****************************************************************//**
+Add the data to the start of the list.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_first(
 /*==============*/
-				/* out: new list node*/
-	ib_list_t*	list,	/* in: list */
-	void*		data,	/* in: data */
-	mem_heap_t*	heap)	/* in: memory heap to use */
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap)	/*!< in: memory heap to use */
 {
 	return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
 }
 
-/********************************************************************
-Add the data to the end of the list. */
-
+/****************************************************************//**
+Add the data to the end of the list.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_last(
 /*=============*/
-				/* out: new list node*/
-	ib_list_t*	list,	/* in: list */
-	void*		data,	/* in: data */
-	mem_heap_t*	heap)	/* in: memory heap to use */
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap)	/*!< in: memory heap to use */
 {
 	return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
 }
 
-/********************************************************************
-Add the data after the indicated node. */
-
+/****************************************************************//**
+Add the data after the indicated node.
+@return	new list node */
+UNIV_INTERN
 ib_list_node_t*
 ib_list_add_after(
 /*==============*/
-					/* out: new list node*/
-	ib_list_t*	list,		/* in: list */
-	ib_list_node_t*	prev_node,	/* in: node preceding new node (can
+	ib_list_t*	list,		/*!< in: list */
+	ib_list_node_t*	prev_node,	/*!< in: node preceding new node (can
 					be NULL) */
-	void*		data,		/* in: data */
-	mem_heap_t*	heap)		/* in: memory heap to use */
+	void*		data,		/*!< in: data */
+	mem_heap_t*	heap)		/*!< in: memory heap to use */
 {
 	ib_list_node_t*	node = mem_heap_alloc(heap, sizeof(ib_list_node_t));
 
@@ -138,14 +163,14 @@ ib_list_add_after(
 	return(node);
 }
 
-/********************************************************************
+/****************************************************************//**
 Remove the node from the list. */
-
+UNIV_INTERN
 void
 ib_list_remove(
 /*===========*/
-	ib_list_t*	list,	/* in: list */
-	ib_list_node_t*	node)	/* in: node to remove */
+	ib_list_t*	list,	/*!< in: list */
+	ib_list_node_t*	node)	/*!< in: node to remove */
 {
 	if (node->prev) {
 		node->prev->next = node->next;
diff --git a/storage/innobase/ut/ut0mem.c b/storage/innodb_plugin/ut/ut0mem.c
similarity index 56%
rename from storage/innobase/ut/ut0mem.c
rename to storage/innodb_plugin/ut/ut0mem.c
index b466a5f6872..edb63c95700 100644
--- a/storage/innobase/ut/ut0mem.c
+++ b/storage/innodb_plugin/ut/ut0mem.c
@@ -1,7 +1,24 @@
-/************************************************************************
-Memory primitives
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file ut/ut0mem.c
+Memory primitives
 
 Created 5/11/1994 Heikki Tuuri
 *************************************************************************/
@@ -12,70 +29,96 @@ Created 5/11/1994 Heikki Tuuri
 #include "ut0mem.ic"
 #endif
 
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "os0thread.h"
+#ifndef UNIV_HOTBACKUP
+# include "os0thread.h"
+# include "srv0srv.h"
 
-/* This struct is placed first in every allocated memory block */
+#include <stdlib.h>
+
+/** This struct is placed first in every allocated memory block */
 typedef struct ut_mem_block_struct ut_mem_block_t;
 
-/* The total amount of memory currently allocated from the OS with malloc */
-ulint	ut_total_allocated_memory	= 0;
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large() or malloc().  Does not count malloc()
+if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
+UNIV_INTERN ulint		ut_total_allocated_memory	= 0;
 
+/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
+UNIV_INTERN os_fast_mutex_t	ut_list_mutex;
+
+/** Dynamically allocated memory block */
 struct ut_mem_block_struct{
 	UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
-			/* mem block list node */
-	ulint	size;	/* size of allocated memory */
-	ulint	magic_n;
+			/*!< mem block list node */
+	ulint	size;	/*!< size of allocated memory */
+	ulint	magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
 };
 
+/** The value of ut_mem_block_struct::magic_n.  Used in detecting
+memory corruption. */
 #define UT_MEM_MAGIC_N	1601650166
 
-/* List of all memory blocks allocated from the operating system
-with malloc */
-UT_LIST_BASE_NODE_T(ut_mem_block_t)   ut_mem_block_list;
+/** List of all memory blocks allocated from the operating system
+with malloc.  Protected by ut_list_mutex. */
+static UT_LIST_BASE_NODE_T(ut_mem_block_t)   ut_mem_block_list;
 
-os_fast_mutex_t ut_list_mutex;	/* this protects the list */
+/** Flag: has ut_mem_block_list been initialized? */
+static ibool  ut_mem_block_list_inited = FALSE;
 
-ibool  ut_mem_block_list_inited = FALSE;
+/** A dummy pointer for generating a null pointer exception in
+ut_malloc_low() */
+static ulint*	ut_mem_null_ptr	= NULL;
 
-ulint*	ut_mem_null_ptr	= NULL;
-
-/**************************************************************************
+/**********************************************************************//**
 Initializes the mem block list at database startup. */
-static
+UNIV_INTERN
 void
-ut_mem_block_list_init(void)
-/*========================*/
+ut_mem_init(void)
+/*=============*/
 {
+	ut_a(!ut_mem_block_list_inited);
 	os_fast_mutex_init(&ut_list_mutex);
 	UT_LIST_INIT(ut_mem_block_list);
 	ut_mem_block_list_inited = TRUE;
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**************************************************************************
+/**********************************************************************//**
 Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
+defined and set_to_zero is TRUE.
+@return	own: allocated memory */
+UNIV_INTERN
 void*
 ut_malloc_low(
 /*==========*/
-				/* out, own: allocated memory */
-	ulint	n,		/* in: number of bytes to allocate */
-	ibool	set_to_zero,	/* in: TRUE if allocated memory should be
+	ulint	n,		/*!< in: number of bytes to allocate */
+	ibool	set_to_zero,	/*!< in: TRUE if allocated memory should be
 				set to zero if UNIV_SET_MEM_TO_ZERO is
 				defined */
-	ibool	assert_on_error)/* in: if TRUE, we crash mysqld if the
+	ibool	assert_on_error)/*!< in: if TRUE, we crash mysqld if the
 				memory cannot be allocated */
 {
-	ulint	retry_count	= 0;
+#ifndef UNIV_HOTBACKUP
+	ulint	retry_count;
 	void*	ret;
 
-	ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		ret = malloc(n);
+		ut_a(ret || !assert_on_error);
 
-	if (!ut_mem_block_list_inited) {
-		ut_mem_block_list_init();
+#ifdef UNIV_SET_MEM_TO_ZERO
+		if (set_to_zero) {
+			memset(ret, '\0', n);
+			UNIV_MEM_ALLOC(ret, n);
+		}
+#endif
+		return(ret);
 	}
+
+	ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
+	ut_a(ut_mem_block_list_inited);
+
+	retry_count = 0;
 retry:
 	os_fast_mutex_lock(&ut_list_mutex);
 
@@ -174,31 +217,47 @@ retry:
 	os_fast_mutex_unlock(&ut_list_mutex);
 
 	return((void*)((byte*)ret + sizeof(ut_mem_block_t)));
+#else /* !UNIV_HOTBACKUP */
+	void*	ret = malloc(n);
+	ut_a(ret || !assert_on_error);
+
+# ifdef UNIV_SET_MEM_TO_ZERO
+	if (set_to_zero) {
+		memset(ret, '\0', n);
+	}
+# endif
+	return(ret);
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
+defined.
+@return	own: allocated memory */
+UNIV_INTERN
 void*
 ut_malloc(
 /*======*/
-			/* out, own: allocated memory */
-	ulint	n)	/* in: number of bytes to allocate */
+	ulint	n)	/*!< in: number of bytes to allocate */
 {
+#ifndef UNIV_HOTBACKUP
 	return(ut_malloc_low(n, TRUE, TRUE));
+#else /* !UNIV_HOTBACKUP */
+	return(malloc(n));
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
 out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
+stderr a message if fails.
+@return	TRUE if succeeded */
+UNIV_INTERN
 ibool
 ut_test_malloc(
 /*===========*/
-			/* out: TRUE if succeeded */
-	ulint	n)	/* in: try to allocate this many bytes */
+	ulint	n)	/*!< in: try to allocate this many bytes */
 {
 	void*	ret;
 
@@ -228,17 +287,24 @@ ut_test_malloc(
 
 	return(TRUE);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**************************************************************************
+/**********************************************************************//**
 Frees a memory block allocated with ut_malloc. */
-
+UNIV_INTERN
 void
 ut_free(
 /*====*/
-	void* ptr)  /* in, own: memory block */
+	void* ptr)  /*!< in, own: memory block */
 {
+#ifndef UNIV_HOTBACKUP
 	ut_mem_block_t* block;
 
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		free(ptr);
+		return;
+	}
+
 	block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
 
 	os_fast_mutex_lock(&ut_list_mutex);
@@ -252,9 +318,13 @@ ut_free(
 	free(block);
 
 	os_fast_mutex_unlock(&ut_list_mutex);
+#else /* !UNIV_HOTBACKUP */
+	free(ptr);
+#endif /* !UNIV_HOTBACKUP */
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
 use this function because the allocation functions in mem0mem.h are the
 recommended ones in InnoDB.
@@ -263,7 +333,7 @@ man realloc in Linux, 2004:
 
        realloc()  changes the size of the memory block pointed to
        by ptr to size bytes.  The contents will be  unchanged  to
-       the minimum of the old and new sizes; newly allocated mem�
+       the minimum of the old and new sizes; newly allocated mem-
        ory will be uninitialized.  If ptr is NULL,  the	 call  is
        equivalent  to malloc(size); if size is equal to zero, the
        call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
@@ -277,20 +347,24 @@ RETURN VALUE
        size  was equal to 0, either NULL or a pointer suitable to
        be passed to free() is returned.	 If realloc()  fails  the
        original	 block	is  left  untouched  - it is not freed or
-       moved. */
-
+       moved.
+@return	own: pointer to new mem block or NULL */
+UNIV_INTERN
 void*
 ut_realloc(
 /*=======*/
-			/* out, own: pointer to new mem block or NULL */
-	void*	ptr,	/* in: pointer to old block or NULL */
-	ulint	size)	/* in: desired size */
+	void*	ptr,	/*!< in: pointer to old block or NULL */
+	ulint	size)	/*!< in: desired size */
 {
 	ut_mem_block_t* block;
 	ulint		old_size;
 	ulint		min_size;
 	void*		new_ptr;
 
+	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+		return(realloc(ptr, size));
+	}
+
 	if (ptr == NULL) {
 
 		return(ut_malloc(size));
@@ -329,15 +403,17 @@ ut_realloc(
 	return(new_ptr);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Frees in shutdown all allocated memory not freed yet. */
-
+UNIV_INTERN
 void
 ut_free_all_mem(void)
 /*=================*/
 {
 	ut_mem_block_t* block;
 
+	ut_a(ut_mem_block_list_inited);
+	ut_mem_block_list_inited = FALSE;
 	os_fast_mutex_free(&ut_list_mutex);
 
 	while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
@@ -358,19 +434,20 @@ ut_free_all_mem(void)
 			(ulong) ut_total_allocated_memory);
 	}
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/**************************************************************************
+/**********************************************************************//**
 Copies up to size - 1 characters from the NUL-terminated string src to
 dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
+occurred if the return value >= size.
+@return	strlen(src) */
+UNIV_INTERN
 ulint
 ut_strlcpy(
 /*=======*/
-				/* out: strlen(src) */
-	char*		dst,	/* in: destination buffer */
-	const char*	src,	/* in: source buffer */
-	ulint		size)	/* in: size of destination buffer */
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size)	/*!< in: size of destination buffer */
 {
 	ulint	src_size = strlen(src);
 
@@ -384,17 +461,17 @@ ut_strlcpy(
 	return(src_size);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
+(size - 1) bytes of src, not the first.
+@return	strlen(src) */
+UNIV_INTERN
 ulint
 ut_strlcpy_rev(
 /*===========*/
-				/* out: strlen(src) */
-	char*		dst,	/* in: destination buffer */
-	const char*	src,	/* in: source buffer */
-	ulint		size)	/* in: size of destination buffer */
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size)	/*!< in: size of destination buffer */
 {
 	ulint	src_size = strlen(src);
 
@@ -407,18 +484,18 @@ ut_strlcpy_rev(
 	return(src_size);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Make a quoted copy of a NUL-terminated string.	Leading and trailing
 quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
+See also ut_strlenq() and ut_memcpyq().
+@return	pointer to end of dest */
+UNIV_INTERN
 char*
 ut_strcpyq(
 /*=======*/
-				/* out: pointer to end of dest */
-	char*		dest,	/* in: output buffer */
-	char		q,	/* in: the quote character */
-	const char*	src)	/* in: null-terminated string */
+	char*		dest,	/*!< in: output buffer */
+	char		q,	/*!< in: the quote character */
+	const char*	src)	/*!< in: null-terminated string */
 {
 	while (*src) {
 		if ((*dest++ = *src++) == q) {
@@ -429,19 +506,19 @@ ut_strcpyq(
 	return(dest);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Make a quoted copy of a fixed-length string.  Leading and trailing
 quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
+See also ut_strlenq() and ut_strcpyq().
+@return	pointer to end of dest */
+UNIV_INTERN
 char*
 ut_memcpyq(
 /*=======*/
-				/* out: pointer to end of dest */
-	char*		dest,	/* in: output buffer */
-	char		q,	/* in: the quote character */
-	const char*	src,	/* in: string to be quoted */
-	ulint		len)	/* in: length of src */
+	char*		dest,	/*!< in: output buffer */
+	char		q,	/*!< in: the quote character */
+	const char*	src,	/*!< in: string to be quoted */
+	ulint		len)	/*!< in: length of src */
 {
 	const char*	srcend = src + len;
 
@@ -454,16 +531,17 @@ ut_memcpyq(
 	return(dest);
 }
 
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
+are only counted once.
+@return	the number of times s2 occurs in s1 */
+UNIV_INTERN
 ulint
 ut_strcount(
 /*========*/
-				/* out: the number of times s2 occurs in s1 */
-	const char*	s1,	/* in: string to search in */
-	const char*	s2)	/* in: string to search for */
+	const char*	s1,	/*!< in: string to search in */
+	const char*	s2)	/*!< in: string to search for */
 {
 	ulint	count = 0;
 	ulint	len = strlen(s2);
@@ -488,18 +566,17 @@ ut_strcount(
 	return(count);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
+are only replaced once.
+@return	own: modified string, must be freed with mem_free() */
+UNIV_INTERN
+char*
 ut_strreplace(
 /*==========*/
-				/* out, own: modified string, must be
-				freed with mem_free() */
-	const char*	str,	/* in: string to operate on */
-	const char*	s1,	/* in: string to replace */
-	const char*	s2)	/* in: string to replace s1 with */
+	const char*	str,	/*!< in: string to operate on */
+	const char*	s1,	/*!< in: string to replace */
+	const char*	s2)	/*!< in: string to replace s1 with */
 {
 	char*		new_str;
 	char*		ptr;
@@ -546,3 +623,84 @@ ut_strreplace(
 
 	return(new_str);
 }
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+void
+test_ut_str_sql_format()
+{
+	char	buf[128];
+	ulint	ret;
+
+#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
+	do {\
+		ibool	ok = TRUE;\
+		memset(buf, 'x', 10);\
+		buf[10] = '\0';\
+		fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
+			str, (ulint) str_len, (ulint) buf_size);\
+		ret = ut_str_sql_format(str, str_len, buf, buf_size);\
+		if (ret != ret_expected) {\
+			fprintf(stderr, "expected ret %lu, got %lu\n",\
+				(ulint) ret_expected, ret);\
+			ok = FALSE;\
+		}\
+		if (strcmp((char*) buf, buf_expected) != 0) {\
+			fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
+				buf_expected, buf);\
+			ok = FALSE;\
+		}\
+		if (ok) {\
+			fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
+				(ulint) ret, buf);\
+		} else {\
+			return;\
+		}\
+	} while (0)
+
+	CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
+
+	CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
+
+	CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
+
+	CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
+	CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
+	CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
+	CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
+	CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
+
+	CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
+	CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
+	CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
+	CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
+	CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
+	CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
+	CALL_AND_TEST("'", 1, buf, 4, 3, "''");
+	CALL_AND_TEST("''", 2, buf, 4, 3, "''");
+	CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
+	CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
+	CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
+
+	CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
+	CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
+	CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
+	CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
+	CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
+	CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
+	CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
+	CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
+
+	CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
+
+	CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
+	CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/ut/ut0rnd.c b/storage/innodb_plugin/ut/ut0rnd.c
similarity index 51%
rename from storage/innobase/ut/ut0rnd.c
rename to storage/innodb_plugin/ut/ut0rnd.c
index 016809e0474..cefd0990ecc 100644
--- a/storage/innobase/ut/ut0rnd.c
+++ b/storage/innodb_plugin/ut/ut0rnd.c
@@ -1,7 +1,24 @@
-/*******************************************************************
-Random numbers and hashing
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0rnd.c
+Random numbers and hashing
 
 Created 5/11/1994 Heikki Tuuri
 ********************************************************************/
@@ -12,23 +29,25 @@ Created 5/11/1994 Heikki Tuuri
 #include "ut0rnd.ic"
 #endif
 
-/* These random numbers are used in ut_find_prime */
+/** These random numbers are used in ut_find_prime */
+/*@{*/
 #define	UT_RANDOM_1	1.0412321
 #define	UT_RANDOM_2	1.1131347
 #define UT_RANDOM_3	1.0132677
+/*@}*/
 
+/** Seed value of ut_rnd_gen_ulint(). */
+UNIV_INTERN ulint	ut_rnd_ulint_counter = 65654363;
 
-ulint	ut_rnd_ulint_counter = 65654363;
-
-/***************************************************************
+/***********************************************************//**
 Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
+The prime is chosen so that it is not near any power of 2.
+@return	prime */
+UNIV_INTERN
 ulint
 ut_find_prime(
 /*==========*/
-			/* out: prime */
-	ulint	n)	/* in: positive number > 100 */
+	ulint	n)	/*!< in: positive number > 100 */
 {
 	ulint	pow2;
 	ulint	i;
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innodb_plugin/ut/ut0ut.c
similarity index 58%
rename from storage/innobase/ut/ut0ut.c
rename to storage/innodb_plugin/ut/ut0ut.c
index 1ae43172894..e4cc226fbad 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innodb_plugin/ut/ut0ut.c
@@ -1,7 +1,31 @@
-/*******************************************************************
-Various utilities for Innobase.
+/*****************************************************************************
 
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0ut.c
+Various utilities for Innobase.
 
 Created 5/11/1994 Heikki Tuuri
 ********************************************************************/
@@ -16,32 +40,35 @@ Created 5/11/1994 Heikki Tuuri
 #include <string.h>
 #include <ctype.h>
 
-#include "ut0sort.h"
-#include "trx0trx.h"
-#include "ha_prototypes.h"
+#ifndef UNIV_HOTBACKUP
+# include "trx0trx.h"
+# include "ha_prototypes.h"
+# include "mysql_com.h" /* NAME_LEN */
+#endif /* UNIV_HOTBACKUP */
 
-ibool	ut_always_false	= FALSE;
+/** A constant to prevent the compiler from optimizing ut_delay() away. */
+UNIV_INTERN ibool	ut_always_false	= FALSE;
 
 #ifdef __WIN__
-/*********************************************************************
+/*****************************************************************//**
 NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
 epoch starts from 1970/1/1. For selection of constant see:
 http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC  ((ib_longlong) 11644473600000000ULL)
+#define WIN_TO_UNIX_DELTA_USEC  ((ib_int64_t) 11644473600000000ULL)
 
 
-/*********************************************************************
-This is the Windows version of gettimeofday(2).*/
+/*****************************************************************//**
+This is the Windows version of gettimeofday(2).
+@return	0 if all OK else -1 */
 static
 int
 ut_gettimeofday(
 /*============*/
-			/* out: 0 if all OK else -1 */
-	struct timeval*	tv,	/* out: Values are relative to Unix epoch */
-	void*		tz)	/* in: not used */
+	struct timeval*	tv,	/*!< out: Values are relative to Unix epoch */
+	void*		tz)	/*!< in: not used */
 {
 	FILETIME	ft;
-	ib_longlong	tm;
+	ib_int64_t	tm;
 
 	if (!tv) {
 		errno = EINVAL;
@@ -50,7 +77,7 @@ ut_gettimeofday(
 
 	GetSystemTimeAsFileTime(&ft);
 
-	tm = (ib_longlong) ft.dwHighDateTime << 32;
+	tm = (ib_int64_t) ft.dwHighDateTime << 32;
 	tm |= ft.dwLowDateTime;
 
 	ut_a(tm >= 0);	/* If tm wraps over to negative, the quotient / 10
@@ -68,42 +95,36 @@ ut_gettimeofday(
 	return(0);
 }
 #else
+/** An alias for gettimeofday(2).  On Microsoft Windows, we have to
+reimplement this function. */
 #define	ut_gettimeofday		gettimeofday
 #endif
 
-/************************************************************
+/********************************************************//**
 Gets the high 32 bits in a ulint. That is makes a shift >> 32,
 but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
+we do this by a special conversion.
+@return	a >> 32 */
+UNIV_INTERN
 ulint
 ut_get_high32(
 /*==========*/
-			/* out: a >> 32 */
-	ulint	a)	/* in: ulint */
+	ulint	a)	/*!< in: ulint */
 {
-	ib_longlong	i;
+	ib_int64_t	i;
 
-	i = (ib_longlong)a;
+	i = (ib_int64_t)a;
 
 	i = i >> 32;
 
 	return((ulint)i);
 }
 
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void)
-{
-	return((clock() * 1000) / CLOCKS_PER_SEC);
-}
-
-/**************************************************************
+/**********************************************************//**
 Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
+the only way to manipulate it is to use the function ut_difftime.
+@return	system time */
+UNIV_INTERN
 ib_time_t
 ut_time(void)
 /*=========*/
@@ -111,18 +132,18 @@ ut_time(void)
 	return(time(NULL));
 }
 
-/**************************************************************
+/**********************************************************//**
 Returns system time.
 Upon successful completion, the value 0 is returned; otherwise the
 value -1 is returned and the global variable errno is set to indicate the
-error. */
-
+error.
+@return	0 on success, -1 otherwise */
+UNIV_INTERN
 int
 ut_usectime(
 /*========*/
-			/* out: 0 on success, -1 otherwise */
-	ulint*	sec,	/* out: seconds since the Epoch */
-	ulint*	ms)	/* out: microseconds since the Epoch+*sec */
+	ulint*	sec,	/*!< out: seconds since the Epoch */
+	ulint*	ms)	/*!< out: microseconds since the Epoch+*sec */
 {
 	struct timeval	tv;
 	int		ret;
@@ -153,43 +174,51 @@ ut_usectime(
 	return(ret);
 }
 
-/**************************************************************
-Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms) */
-
-ib_longlong
-ut_usecdiff(
-/*========*/
-	ulint	end_sec,	/* in: seconds since the Epoch */
-	ulint	end_ms,	/* in: microseconds since the Epoch+*sec1 */
-	ulint	start_sec,	/* in: seconds since the Epoch */
-	ulint	start_ms)	/* in: microseconds since the Epoch+*sec2 */
+/**********************************************************//**
+Returns the number of microseconds since epoch. Similar to
+time(3), the return value is also stored in *tloc, provided
+that tloc is non-NULL.
+@return	us since epoch */
+UNIV_INTERN
+ullint
+ut_time_us(
+/*=======*/
+	ullint*	tloc)	/*!< out: us since epoch, if non-NULL */
 {
-  ib_longlong end_mics = end_sec * 1000000LL + end_ms;
-  ib_longlong start_mics = start_sec * 1000000LL + start_ms;
+	struct timeval	tv;
+	ullint		us;
 
-  return end_mics - start_mics;
+	ut_gettimeofday(&tv, NULL);
+
+	us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec;
+
+	if (tloc != NULL) {
+		*tloc = us;
+	}
+
+	return(us);
 }
 
-/**************************************************************
-Returns the difference of two times in seconds. */
-
+/**********************************************************//**
+Returns the difference of two times in seconds.
+@return	time2 - time1 expressed in seconds */
+UNIV_INTERN
 double
 ut_difftime(
 /*========*/
-				/* out: time2 - time1 expressed in seconds */
-	ib_time_t	time2,	/* in: time */
-	ib_time_t	time1)	/* in: time */
+	ib_time_t	time2,	/*!< in: time */
+	ib_time_t	time1)	/*!< in: time */
 {
 	return(difftime(time2, time1));
 }
 
-/**************************************************************
+/**********************************************************//**
 Prints a timestamp to a file. */
-
+UNIV_INTERN
 void
 ut_print_timestamp(
 /*===============*/
-	FILE*  file) /* in: file where to print */
+	FILE*  file) /*!< in: file where to print */
 {
 #ifdef __WIN__
 	SYSTEMTIME cal_tm;
@@ -226,13 +255,13 @@ ut_print_timestamp(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
+UNIV_INTERN
 void
 ut_sprintf_timestamp(
 /*=================*/
-	char*	buf) /* in: buffer where to sprintf */
+	char*	buf) /*!< in: buffer where to sprintf */
 {
 #ifdef __WIN__
 	SYSTEMTIME cal_tm;
@@ -269,14 +298,15 @@ ut_sprintf_timestamp(
 #endif
 }
 
-/**************************************************************
+#ifdef UNIV_HOTBACKUP
+/**********************************************************//**
 Sprintfs a timestamp to a buffer with no spaces and with ':' characters
 replaced by '_'. */
-
+UNIV_INTERN
 void
 ut_sprintf_timestamp_without_extra_chars(
 /*=====================================*/
-	char*	buf) /* in: buffer where to sprintf */
+	char*	buf) /*!< in: buffer where to sprintf */
 {
 #ifdef __WIN__
 	SYSTEMTIME cal_tm;
@@ -313,15 +343,15 @@ ut_sprintf_timestamp_without_extra_chars(
 #endif
 }
 
-/**************************************************************
+/**********************************************************//**
 Returns current year, month, day. */
-
+UNIV_INTERN
 void
 ut_get_year_month_day(
 /*==================*/
-	ulint*	year,	/* out: current year */
-	ulint*	month,	/* out: month */
-	ulint*	day)	/* out: day */
+	ulint*	year,	/*!< out: current year */
+	ulint*	month,	/*!< out: month */
+	ulint*	day)	/*!< out: day */
 {
 #ifdef __WIN__
 	SYSTEMTIME cal_tm;
@@ -349,24 +379,26 @@ ut_get_year_month_day(
 	*day = (ulint)cal_tm_ptr->tm_mday;
 #endif
 }
+#endif /* UNIV_HOTBACKUP */
 
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
 Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
+in microseconds on 100 MHz Pentium + Visual C++.
+@return	dummy value */
+UNIV_INTERN
 ulint
 ut_delay(
 /*=====*/
-			/* out: dummy value */
-	ulint	delay)	/* in: delay in microseconds on 100 MHz Pentium */
+	ulint	delay)	/*!< in: delay in microseconds on 100 MHz Pentium */
 {
 	ulint	i, j;
 
 	j = 0;
 
 	for (i = 0; i < delay * 50; i++) {
-                PAUSE_INSTRUCTION();
 		j += i;
+		UT_RELAX_CPU();
 	}
 
 	if (ut_always_false) {
@@ -375,16 +407,17 @@ ut_delay(
 
 	return(j);
 }
+#endif /* !UNIV_HOTBACKUP */
 
-/*****************************************************************
+/*************************************************************//**
 Prints the contents of a memory buffer in hex and ascii. */
-
+UNIV_INTERN
 void
 ut_print_buf(
 /*=========*/
-	FILE*		file,	/* in: file where to print */
-	const void*	buf,	/* in: memory buffer */
-	ulint		len)	/* in: length of the buffer */
+	FILE*		file,	/*!< in: file where to print */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
 {
 	const byte*	data;
 	ulint		i;
@@ -409,25 +442,14 @@ ut_print_buf(
 	putc(';', file);
 }
 
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
-/*============================================================*/
-{
-	UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
-			      ut_ulint_cmp);
-}
-
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
+/*************************************************************//**
+Calculates fast the number rounded up to the nearest power of 2.
+@return	first power of 2 which is >= n */
+UNIV_INTERN
 ulint
 ut_2_power_up(
 /*==========*/
-			/* out: first power of 2 which is >= n */
-	ulint	n)	/* in: number != 0 */
+	ulint	n)	/*!< in: number != 0 */
 {
 	ulint	res;
 
@@ -442,14 +464,14 @@ ut_2_power_up(
 	return(res);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Outputs a NUL-terminated file name, quoted with apostrophes. */
-
+UNIV_INTERN
 void
 ut_print_filename(
 /*==============*/
-	FILE*		f,	/* in: output stream */
-	const char*	name)	/* in: name to print */
+	FILE*		f,	/*!< in: output stream */
+	const char*	name)	/*!< in: name to print */
 {
 	putc('\'', f);
 	for (;;) {
@@ -467,71 +489,62 @@ ut_print_filename(
 done:
 	putc('\'', f);
 }
-
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
 as in SQL database_name.identifier. */
-
+UNIV_INTERN
 void
 ut_print_name(
 /*==========*/
-	FILE*		f,	/* in: output stream */
-	trx_t*		trx,	/* in: transaction */
-	ibool		table_id,/* in: TRUE=print a table name,
+	FILE*		f,	/*!< in: output stream */
+	trx_t*		trx,	/*!< in: transaction */
+	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
-	const char*	name)	/* in: name to print */
+	const char*	name)	/*!< in: name to print */
 {
 	ut_print_namel(f, trx, table_id, name, strlen(name));
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
 as in SQL database_name.identifier. */
-
+UNIV_INTERN
 void
 ut_print_namel(
 /*===========*/
-	FILE*		f,	/* in: output stream */
-	trx_t*		trx,	/* in: transaction (NULL=no quotes) */
-	ibool		table_id,/* in: TRUE=print a table name,
+	FILE*		f,	/*!< in: output stream */
+	trx_t*		trx,	/*!< in: transaction (NULL=no quotes) */
+	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
-	const char*	name,	/* in: name to print */
-	ulint		namelen)/* in: length of name */
+	const char*	name,	/*!< in: name to print */
+	ulint		namelen)/*!< in: length of name */
 {
-#ifdef UNIV_HOTBACKUP
-	fwrite(name, 1, namelen, f);
-#else
-	if (table_id) {
-		char*	slash = memchr(name, '/', namelen);
-		if (!slash) {
+	/* 2 * NAME_LEN for database and table name,
+	and some slack for the #mysql50# prefix and quotes */
+	char		buf[3 * NAME_LEN];
+	const char*	bufend;
 
-			goto no_db_name;
-		}
+	bufend = innobase_convert_name(buf, sizeof buf,
+				       name, namelen,
+				       trx ? trx->mysql_thd : NULL,
+				       table_id);
 
-		/* Print the database name and table name separately. */
-		innobase_print_identifier(f, trx, TRUE, name, slash - name);
-		putc('.', f);
-		innobase_print_identifier(f, trx, TRUE, slash + 1,
-					  namelen - (slash - name) - 1);
-	} else {
-no_db_name:
-		innobase_print_identifier(f, trx, table_id, name, namelen);
-	}
-#endif
+	fwrite(buf, 1, bufend - buf, f);
 }
 
-/**************************************************************************
+/**********************************************************************//**
 Catenate files. */
-
+UNIV_INTERN
 void
 ut_copy_file(
 /*=========*/
-	FILE*	dest,	/* in: output file */
-	FILE*	src)	/* in: input file to be appended to output */
+	FILE*	dest,	/*!< in: output file */
+	FILE*	src)	/*!< in: input file to be appended to output */
 {
 	long	len = ftell(src);
 	char	buf[4096];
@@ -549,22 +562,23 @@ ut_copy_file(
 		}
 	} while (len > 0);
 }
-
-/**************************************************************************
-snprintf(). */
+#endif /* !UNIV_HOTBACKUP */
 
 #ifdef __WIN__
-#include <stdarg.h>
+# include <stdarg.h>
+/**********************************************************************//**
+A substitute for snprintf(3), formatted output conversion into
+a limited buffer.
+@return number of characters that would have been printed if the size
+were unlimited, not including the terminating '\0'. */
+UNIV_INTERN
 int
 ut_snprintf(
-				/* out: number of characters that would
-				have been printed if the size were
-				unlimited, not including the terminating
-				'\0'. */
-	char*		str,	/* out: string */
-	size_t		size,	/* in: str size */
-	const char*	fmt,	/* in: format */
-	...)			/* in: format values */
+/*========*/
+	char*		str,	/*!< out: string */
+	size_t		size,	/*!< in: str size */
+	const char*	fmt,	/*!< in: format */
+	...)			/*!< in: format values */
 {
 	int	res;
 	va_list	ap1;
diff --git a/storage/innodb_plugin/ut/ut0vec.c b/storage/innodb_plugin/ut/ut0vec.c
new file mode 100644
index 00000000000..45f2bc9771f
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0vec.c
@@ -0,0 +1,79 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0vec.c
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+#include "ut0vec.h"
+#ifdef UNIV_NONINL
+#include "ut0vec.ic"
+#endif
+#include <string.h>
+
+/****************************************************************//**
+Create a new vector with the given initial size.
+@return	vector */
+UNIV_INTERN
+ib_vector_t*
+ib_vector_create(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: heap */
+	ulint		size)	/*!< in: initial size */
+{
+	ib_vector_t*	vec;
+
+	ut_a(size > 0);
+
+	vec = mem_heap_alloc(heap, sizeof(*vec));
+
+	vec->heap = heap;
+	vec->data = mem_heap_alloc(heap, sizeof(void*) * size);
+	vec->used = 0;
+	vec->total = size;
+
+	return(vec);
+}
+
+/****************************************************************//**
+Push a new element to the vector, increasing its size if necessary. */
+UNIV_INTERN
+void
+ib_vector_push(
+/*===========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	void*		elem)	/*!< in: data element */
+{
+	if (vec->used >= vec->total) {
+		void**	new_data;
+		ulint	new_total = vec->total * 2;
+
+		new_data = mem_heap_alloc(vec->heap,
+					  sizeof(void*) * new_total);
+		memcpy(new_data, vec->data, sizeof(void*) * vec->total);
+
+		vec->data = new_data;
+		vec->total = new_total;
+	}
+
+	vec->data[vec->used] = elem;
+	vec->used++;
+}
diff --git a/storage/innodb_plugin/ut/ut0wqueue.c b/storage/innodb_plugin/ut/ut0wqueue.c
new file mode 100644
index 00000000000..5220d1e17f4
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0wqueue.c
@@ -0,0 +1,118 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+#include "ut0wqueue.h"
+
+/*******************************************************************//**
+@file ut/ut0wqueue.c
+A work queue
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Create a new work queue.
+@return	work queue */
+UNIV_INTERN
+ib_wqueue_t*
+ib_wqueue_create(void)
+/*===================*/
+{
+	ib_wqueue_t*	wq = mem_alloc(sizeof(ib_wqueue_t));
+
+	mutex_create(&wq->mutex, SYNC_WORK_QUEUE);
+
+	wq->items = ib_list_create();
+	wq->event = os_event_create(NULL);
+
+	return(wq);
+}
+
+/****************************************************************//**
+Free a work queue. */
+UNIV_INTERN
+void
+ib_wqueue_free(
+/*===========*/
+	ib_wqueue_t*	wq)	/*!< in: work queue */
+{
+	ut_a(!ib_list_get_first(wq->items));
+
+	mutex_free(&wq->mutex);
+	ib_list_free(wq->items);
+	os_event_free(wq->event);
+
+	mem_free(wq);
+}
+
+/****************************************************************//**
+Add a work item to the queue. */
+UNIV_INTERN
+void
+ib_wqueue_add(
+/*==========*/
+	ib_wqueue_t*	wq,	/*!< in: work queue */
+	void*		item,	/*!< in: work item */
+	mem_heap_t*	heap)	/*!< in: memory heap to use for allocating the
+				list node */
+{
+	mutex_enter(&wq->mutex);
+
+	ib_list_add_last(wq->items, item, heap);
+	os_event_set(wq->event);
+
+	mutex_exit(&wq->mutex);
+}
+
+/****************************************************************//**
+Wait for a work item to appear in the queue.
+@return	work item */
+UNIV_INTERN
+void*
+ib_wqueue_wait(
+/*===========*/
+	ib_wqueue_t*	wq)	/*!< in: work queue */
+{
+	ib_list_node_t*	node;
+
+	for (;;) {
+		os_event_wait(wq->event);
+
+		mutex_enter(&wq->mutex);
+
+		node = ib_list_get_first(wq->items);
+
+		if (node) {
+			ib_list_remove(wq->items, node);
+
+			if (!ib_list_get_first(wq->items)) {
+				/* We must reset the event when the list
+				gets emptied. */
+				os_event_reset(wq->event);
+			}
+
+			break;
+		}
+
+		mutex_exit(&wq->mutex);
+	}
+
+	mutex_exit(&wq->mutex);
+
+	return(node->data);
+}
diff --git a/storage/innodb_plugin/win-plugin/README b/storage/innodb_plugin/win-plugin/README
new file mode 100644
index 00000000000..00f4e996a3f
--- /dev/null
+++ b/storage/innodb_plugin/win-plugin/README
@@ -0,0 +1,22 @@
+This directory contains patches that need to be applied to the MySQL
+source tree in order to build the dynamic plugin on Windows --
+HA_INNODB.DLL. Please note the followings when adding the patches:
+
+* The patch must be applied from the mysql top-level source directory.
+  patch -p0 < win-plugin.diff
+* The patch filenames end in ".diff".
+* All patches here are expected to apply cleanly to the latest MySQL 5.1
+  tree when storage/innobase is replaced with this InnoDB branch.
+
+When applying the patch, the following files will be modified:
+
+  * CMakeLists.txt
+  * sql/CMakeLists.txt
+  * win/configure.js
+
+Also, two new files will be added:
+
+  * sql/mysqld.def
+  * sql/mysqld_x64.def
+
+You can get "patch" utility for Windows from http://unxutils.sourceforge.net/
diff --git a/storage/innodb_plugin/win-plugin/win-plugin.diff b/storage/innodb_plugin/win-plugin/win-plugin.diff
new file mode 100644
index 00000000000..4b3354ac4de
--- /dev/null
+++ b/storage/innodb_plugin/win-plugin/win-plugin.diff
@@ -0,0 +1,279 @@
+diff -Nur CMakeLists.txt.orig CMakeLists.txt
+--- CMakeLists.txt.orig	2008-10-03 12:25:41 -05:00
++++ CMakeLists.txt	2008-09-26 17:32:51 -05:00
+@@ -254,9 +254,9 @@
+ IF(WITH_FEDERATED_STORAGE_ENGINE)
+   ADD_SUBDIRECTORY(storage/federated)
+ ENDIF(WITH_FEDERATED_STORAGE_ENGINE)
+-IF(WITH_INNOBASE_STORAGE_ENGINE)
++IF(WITH_INNOBASE_STORAGE_ENGINE OR INNODB_DYNAMIC_PLUGIN)
+   ADD_SUBDIRECTORY(storage/innobase)
+-ENDIF(WITH_INNOBASE_STORAGE_ENGINE)
++ENDIF(WITH_INNOBASE_STORAGE_ENGINE OR INNODB_DYNAMIC_PLUGIN)
+ ADD_SUBDIRECTORY(sql)
+ ADD_SUBDIRECTORY(server-tools/instance-manager)
+ ADD_SUBDIRECTORY(libmysql)
+ 
+diff -Nur sql/CMakeLists.txt.orig sql/CMakeLists.txt
+--- sql/CMakeLists.txt.orig	2008-10-03 12:25:41 -05:00
++++ sql/CMakeLists.txt	2008-09-24 03:58:19 -05:00
+@@ -98,6 +98,15 @@
+                       LINK_FLAGS  "/PDB:${CMAKE_CFG_INTDIR}/mysqld${MYSQLD_EXE_SUFFIX}.pdb")
+ ENDIF(cmake_version EQUAL 20406)
+ 
++# Checks for 64-bit version
++IF(CMAKE_SIZEOF_VOID_P MATCHES 8)
++SET_TARGET_PROPERTIES(mysqld PROPERTIES
++                      LINK_FLAGS "/def:\"${PROJECT_SOURCE_DIR}/sql/mysqld_x64.def\"")
++ELSE(CMAKE_SIZEOF_VOID_P MATCHES 8)
++SET_TARGET_PROPERTIES(mysqld PROPERTIES
++                      LINK_FLAGS "/def:\"${PROJECT_SOURCE_DIR}/sql/mysqld.def\"")
++ENDIF(CMAKE_SIZEOF_VOID_P MATCHES 8)
++
+ IF(EMBED_MANIFESTS)
+   MYSQL_EMBED_MANIFEST("mysqld" "asInvoker")
+ ENDIF(EMBED_MANIFESTS)
+
+diff -Nur sql/mysqld.def.orig sql/mysqld.def
+--- sql/mysqld.def.orig	1969-12-31 18:00:00 -06:00
++++ sql/mysqld.def	2009-04-09 02:20:32 -05:00
+@@ -0,0 +1,111 @@
++EXPORTS
++	?use_hidden_primary_key@handler@@UAEXXZ
++	?get_dynamic_partition_info@handler@@UAEXPAUPARTITION_INFO@@I@Z
++	?read_first_row@handler@@UAEHPAEI@Z
++	?read_range_next@handler@@UAEHXZ
++	?read_range_first@handler@@UAEHPBUst_key_range@@0_N1@Z
++	?read_multi_range_first@handler@@UAEHPAPAUst_key_multi_range@@PAU2@I_NPAUst_handler_buffer@@@Z
++	?read_multi_range_next@handler@@UAEHPAPAUst_key_multi_range@@@Z
++	?index_read_idx_map@handler@@UAEHPAEIPBEKW4ha_rkey_function@@@Z
++	?print_error@handler@@UAEXHH@Z
++	?clone@handler@@UAEPAV1@PAUst_mem_root@@@Z
++	?get_auto_increment@handler@@UAEX_K00PA_K1@Z
++	?index_next_same@handler@@UAEHPAEPBEI@Z
++	?get_error_message@handler@@UAE_NHPAVString@@@Z
++	?ha_thd@handler@@IBEPAVTHD@@XZ
++	?update_auto_increment@handler@@QAEHXZ
++	?ha_statistic_increment@handler@@IBEXPQsystem_status_var@@K@Z
++	?trans_register_ha@@YAXPAVTHD@@_NPAUhandlerton@@@Z
++	?cmp@Field_blob@@QAEHPBEI0I@Z
++	?set_time@Field_timestamp@@QAEXXZ
++	?sql_print_error@@YAXPBDZZ
++	?sql_print_warning@@YAXPBDZZ
++	?check_global_access@@YA_NPAVTHD@@K@Z
++	?schema_table_store_record@@YA_NPAVTHD@@PAUst_table@@@Z
++	?get_quote_char_for_identifier@@YAHPAVTHD@@PBDI@Z
++	?copy@String@@QAE_NXZ
++	?copy@String@@QAE_NABV1@@Z
++	?copy@String@@QAE_NPBDIPAUcharset_info_st@@@Z
++	?copy_and_convert@@YAIPADIPAUcharset_info_st@@PBDI1PAI@Z
++	?filename_to_tablename@@YAIPBDPADI@Z
++	?strconvert@@YAIPAUcharset_info_st@@PBD0PADIPAI@Z
++	?calculate_key_len@@YAIPAUst_table@@IPBEK@Z
++	?sql_alloc@@YAPAXI@Z
++	?localtime_to_TIME@@YAXPAUst_mysql_time@@PAUtm@@@Z
++	?push_warning@@YAPAVMYSQL_ERROR@@PAVTHD@@W4enum_warning_level@1@IPBD@Z
++	?push_warning_printf@@YAXPAVTHD@@W4enum_warning_level@MYSQL_ERROR@@IPBDZZ
++	?drop_table@handler@@EAEXPBD@Z
++	?column_bitmaps_signal@handler@@UAEXXZ
++	?delete_table@handler@@MAEHPBD@Z
++	?rename_table@handler@@MAEHPBD0@Z
++	?key_map_empty@@3V?$Bitmap@$0EA@@@B
++	?THR_THD@@3PAVTHD@@A
++	?end_of_list@@3Ulist_node@@A
++	?mysql_tmpdir_list@@3Ust_my_tmpdir@@A
++	mysql_query_cache_invalidate4
++	thd_query
++	thd_sql_command
++	thd_get_thread_id
++	thd_get_xid
++	thd_slave_thread
++	thd_non_transactional_update
++	thd_mark_transaction_to_rollback
++	thd_security_context
++	thd_charset
++	thd_test_options
++	thd_ha_data
++	thd_killed
++	thd_tx_isolation
++	thd_tablespace_op
++	thd_sql_command
++	thd_memdup
++	thd_make_lex_string
++	thd_in_lock_tables
++	thd_binlog_format
++	_my_hash_init
++	my_hash_free
++	my_tmpdir
++	check_if_legal_filename
++	my_filename
++	my_sync_dir_by_file
++	alloc_root
++	thr_lock_data_init
++	thr_lock_init
++	thr_lock_delete
++	my_multi_malloc
++	get_charset
++	unpack_filename
++	my_hash_insert
++	my_hash_search
++	my_hash_delete
++	mysql_bin_log_file_pos
++	mysql_bin_log_file_name
++	mysqld_embedded
++	my_thread_name
++	my_malloc
++	my_no_flags_free
++	_sanity
++	_mymalloc
++	_myfree
++	_my_strdup
++	_my_thread_var
++	my_error
++	pthread_cond_init
++	pthread_cond_signal
++	pthread_cond_wait
++	pthread_cond_destroy
++	localtime_r
++	my_strdup
++	deflate
++	deflateEnd
++	deflateReset
++	deflateInit2_
++	inflateEnd
++	inflateInit_
++	inflate
++	compressBound
++	inflateInit2_
++	adler32
++	longlong2str
++	strend
++	my_snprintf
+
+diff -Nur sql/mysqld_x64.def.orig sql/mysqld_x64.def
+--- sql/mysqld_x64.def.orig	1969-12-31 18:00:00 -06:00
++++ sql/mysqld_x64.def		2009-04-09 02:22:04 -05:00
+@@ -0,0 +1,111 @@
++EXPORTS
++	?use_hidden_primary_key@handler@@UEAAXXZ
++	?get_dynamic_partition_info@handler@@UEAAXPEAUPARTITION_INFO@@I@Z
++	?read_first_row@handler@@UEAAHPEAEI@Z
++	?read_range_next@handler@@UEAAHXZ
++	?read_range_first@handler@@UEAAHPEBUst_key_range@@0_N1@Z
++	?read_multi_range_first@handler@@UEAAHPEAPEAUst_key_multi_range@@PEAU2@I_NPEAUst_handler_buffer@@@Z
++	?read_multi_range_next@handler@@UEAAHPEAPEAUst_key_multi_range@@@Z
++	?index_read_idx_map@handler@@UEAAHPEAEIPEBEKW4ha_rkey_function@@@Z
++	?print_error@handler@@UEAAXHH@Z
++	?clone@handler@@UEAAPEAV1@PEAUst_mem_root@@@Z
++	?get_auto_increment@handler@@UEAAX_K00PEA_K1@Z
++	?index_next_same@handler@@UEAAHPEAEPEBEI@Z
++	?get_error_message@handler@@UEAA_NHPEAVString@@@Z
++	?ha_thd@handler@@IEBAPEAVTHD@@XZ
++	?update_auto_increment@handler@@QEAAHXZ
++	?ha_statistic_increment@handler@@IEBAXPEQsystem_status_var@@K@Z
++	?trans_register_ha@@YAXPEAVTHD@@_NPEAUhandlerton@@@Z
++	?cmp@Field_blob@@QEAAHPEBEI0I@Z
++	?set_time@Field_timestamp@@QEAAXXZ
++	?sql_print_error@@YAXPEBDZZ
++	?sql_print_warning@@YAXPEBDZZ
++	?check_global_access@@YA_NPEAVTHD@@K@Z
++	?schema_table_store_record@@YA_NPEAVTHD@@PEAUst_table@@@Z
++	?get_quote_char_for_identifier@@YAHPEAVTHD@@PEBDI@Z
++	?copy@String@@QEAA_NXZ
++	?copy@String@@QEAA_NAEBV1@@Z
++	?copy@String@@QEAA_NPEBDIPEAUcharset_info_st@@@Z
++	?copy_and_convert@@YAIPEADIPEAUcharset_info_st@@PEBDI1PEAI@Z
++	?filename_to_tablename@@YAIPEBDPEADI@Z
++	?strconvert@@YAIPEAUcharset_info_st@@PEBD0PEADIPEAI@Z
++	?calculate_key_len@@YAIPEAUst_table@@IPEBEK@Z
++	?sql_alloc@@YAPEAX_K@Z
++	?localtime_to_TIME@@YAXPEAUst_mysql_time@@PEAUtm@@@Z
++	?push_warning@@YAPEAVMYSQL_ERROR@@PEAVTHD@@W4enum_warning_level@1@IPEBD@Z
++	?push_warning_printf@@YAXPEAVTHD@@W4enum_warning_level@MYSQL_ERROR@@IPEBDZZ
++	?drop_table@handler@@EEAAXPEBD@Z
++	?column_bitmaps_signal@handler@@UEAAXXZ
++	?delete_table@handler@@MEAAHPEBD@Z
++	?rename_table@handler@@MEAAHPEBD0@Z
++	?key_map_empty@@3V?$Bitmap@$0EA@@@B
++	?THR_THD@@3PEAVTHD@@EA
++	?end_of_list@@3Ulist_node@@A
++	?mysql_tmpdir_list@@3Ust_my_tmpdir@@A
++	mysql_query_cache_invalidate4
++	thd_query
++	thd_sql_command
++	thd_get_thread_id
++	thd_get_xid
++	thd_slave_thread
++	thd_non_transactional_update
++	thd_mark_transaction_to_rollback
++	thd_security_context
++	thd_charset
++	thd_test_options
++	thd_ha_data
++	thd_killed
++	thd_tx_isolation
++	thd_tablespace_op
++	thd_sql_command
++	thd_memdup
++	thd_make_lex_string
++	thd_in_lock_tables
++	thd_binlog_format
++	_my_hash_init
++	my_hash_free
++	my_tmpdir
++	check_if_legal_filename
++	my_filename
++	my_sync_dir_by_file
++	alloc_root
++	thr_lock_data_init
++	thr_lock_init
++	thr_lock_delete
++	my_multi_malloc
++	get_charset
++	unpack_filename
++	my_hash_insert
++	my_hash_search
++	my_hash_delete
++	mysql_bin_log_file_pos
++	mysql_bin_log_file_name
++	mysqld_embedded
++	my_thread_name
++	my_malloc
++	my_no_flags_free
++	_sanity
++	_mymalloc
++	_myfree
++	_my_strdup
++	_my_thread_var
++	my_error
++	pthread_cond_init
++	pthread_cond_signal
++	pthread_cond_wait
++	pthread_cond_destroy
++	localtime_r
++	my_strdup
++	deflate
++	deflateEnd
++	deflateReset
++	deflateInit2_
++	inflateEnd
++	inflateInit_
++	inflate
++	compressBound
++	inflateInit2_
++	adler32
++	longlong2str
++	strend
++	my_snprintf
+
+diff -Nur win/configure.js.orig win/configure.js
+--- win/configure.js.orig	2008-09-26 21:18:37 -05:00
++++ win/configure.js	2008-10-01 11:21:27 -05:00
+@@ -50,6 +50,7 @@
+             case "EMBED_MANIFESTS":
+             case "EXTRA_DEBUG":
+             case "WITH_EMBEDDED_SERVER":
++            case "INNODB_DYNAMIC_PLUGIN":
+                     configfile.WriteLine("SET (" + args.Item(i) + " TRUE)");
+                     break;
+             case "MYSQL_SERVER_SUFFIX":
diff --git a/storage/myisam/CMakeLists.txt b/storage/myisam/CMakeLists.txt
index fdc0b64f86a..c05e0046e64 100755
--- a/storage/myisam/CMakeLists.txt
+++ b/storage/myisam/CMakeLists.txt
@@ -12,16 +12,12 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 INCLUDE("${PROJECT_SOURCE_DIR}/win/mysql_manifest.cmake")
 
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
 SET(MYISAM_SOURCES  ft_boolean_search.c ft_nlq_search.c ft_parser.c ft_static.c ft_stem.c
                                 ha_myisam.cc
 				ft_stopwords.c ft_update.c mi_cache.c mi_changed.c mi_check.c
@@ -34,11 +30,9 @@ SET(MYISAM_SOURCES  ft_boolean_search.c ft_nlq_search.c ft_parser.c ft_static.c
 				mi_unique.c mi_update.c mi_write.c rt_index.c rt_key.c rt_mbr.c
 				rt_split.c sort.c sp_key.c ft_eval.h myisamdef.h rt_index.h mi_rkey.c)
 
+MYSQL_STORAGE_ENGINE(MYISAM)
+
 IF(NOT SOURCE_SUBLIBS)
-
-  ADD_LIBRARY(myisam ${MYISAM_SOURCES})
-  ADD_DEPENDENCIES(myisam GenError)
-
   ADD_EXECUTABLE(myisam_ftdump myisam_ftdump.c)
   TARGET_LINK_LIBRARIES(myisam_ftdump myisam mysys debug dbug strings zlib wsock32)
 
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index c14aa2254a3..6f1c712b7f7 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -96,7 +96,7 @@ static void mi_check_print_msg(MI_CHECK *param,	const char* msg_type,
 
   if (!thd->vio_ok())
   {
-    sql_print_error(msgbuf);
+    sql_print_error("%s", msgbuf);
     return;
   }
 
@@ -1810,7 +1810,7 @@ int ha_myisam::info(uint flag)
     stats.data_file_length=  misam_info.data_file_length;
     stats.index_file_length= misam_info.index_file_length;
     stats.delete_length=     misam_info.delete_length;
-    stats.check_time=        misam_info.check_time;
+    stats.check_time=        (ulong) misam_info.check_time;
     stats.mean_rec_length=   misam_info.mean_reclength;
   }
   if (flag & HA_STATUS_CONST)
@@ -1818,7 +1818,7 @@ int ha_myisam::info(uint flag)
     TABLE_SHARE *share= table->s;
     stats.max_data_file_length=  misam_info.max_data_file_length;
     stats.max_index_file_length= misam_info.max_index_file_length;
-    stats.create_time= misam_info.create_time;
+    stats.create_time= (ulong) misam_info.create_time;
     ref_length= misam_info.reflength;
     share->db_options_in_use= misam_info.options;
     stats.block_size= myisam_block_size;        /* record block size */
@@ -1833,7 +1833,7 @@ int ha_myisam::info(uint flag)
     if (share->key_parts)
       memcpy((char*) table->key_info[0].rec_per_key,
 	     (char*) misam_info.rec_per_key,
-	     sizeof(table->key_info[0].rec_per_key[0])*share->key_parts);
+             sizeof(table->key_info[0].rec_per_key[0])*share->key_parts);
     if (share->tmp_table == NO_TMP_TABLE)
       pthread_mutex_unlock(&share->mutex);
 
@@ -1857,7 +1857,7 @@ int ha_myisam::info(uint flag)
     my_store_ptr(dup_ref, ref_length, misam_info.dupp_key_pos);
   }
   if (flag & HA_STATUS_TIME)
-    stats.update_time = misam_info.update_time;
+    stats.update_time = (ulong) misam_info.update_time;
   if (flag & HA_STATUS_AUTO)
     stats.auto_increment_value= misam_info.auto_increment;
 
@@ -1893,6 +1893,12 @@ int ha_myisam::delete_all_rows()
   return mi_delete_all_rows(file);
 }
 
+int ha_myisam::reset_auto_increment(ulonglong value)
+{
+  file->s->state.auto_increment= value;
+  return 0;
+}
+
 int ha_myisam::delete_table(const char *name)
 {
   return mi_delete_table(name);
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index ca44ae9ad87..55a5eac92de 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -101,6 +101,7 @@ class ha_myisam: public handler
   int reset(void);
   int external_lock(THD *thd, int lock_type);
   int delete_all_rows(void);
+  int reset_auto_increment(ulonglong value);
   int disable_indexes(uint mode);
   int enable_indexes(uint mode);
   int indexes_are_disabled(void);
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index ac0be2f01cc..75678375ce7 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -287,8 +287,8 @@ static struct my_option my_long_options[] =
    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "",
     (uchar**) &check_param.use_buffers, (uchar**) &check_param.use_buffers, 0,
-    GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD,
-    (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0},
+    GET_ULL, REQUIRED_ARG, USE_BUFFER_INIT, MALLOC_OVERHEAD,
+    SIZE_T_MAX, MALLOC_OVERHEAD,  IO_SIZE, 0},
   { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE,  "",
     (uchar**) &opt_key_cache_block_size,
     (uchar**) &opt_key_cache_block_size, 0,
@@ -1102,7 +1102,7 @@ static int myisamchk(MI_CHECK *param, char * filename)
       {
 	if (param->testflag & (T_EXTEND | T_MEDIUM))
 	  VOID(init_key_cache(dflt_key_cache,opt_key_cache_block_size,
-                              param->use_buffers, 0, 0));
+                              (size_t) param->use_buffers, 0, 0));
 	VOID(init_io_cache(&param->read_cache,datafile,
 			   (uint) param->read_buffer_length,
 			   READ_CACHE,
@@ -1525,8 +1525,8 @@ static int mi_sort_records(MI_CHECK *param,
   if (share->state.key_root[sort_key] == HA_OFFSET_ERROR)
     DBUG_RETURN(0);				/* Nothing to do */
 
-  init_key_cache(dflt_key_cache, opt_key_cache_block_size, param->use_buffers,
-                 0, 0);
+  init_key_cache(dflt_key_cache, opt_key_cache_block_size,
+                 (size_t) param->use_buffers, 0, 0);
   if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length,
 		   WRITE_CACHE,share->pack.header_length,1,
 		   MYF(MY_WME | MY_WAIT_IF_FULL)))
diff --git a/storage/myisammrg/CMakeLists.txt b/storage/myisammrg/CMakeLists.txt
index 1c94e2bd50c..60cfffc67ff 100755
--- a/storage/myisammrg/CMakeLists.txt
+++ b/storage/myisammrg/CMakeLists.txt
@@ -12,14 +12,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
+INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
 
 SET(MYISAMMRG_SOURCES  myrg_close.c myrg_create.c myrg_delete.c myrg_extra.c myrg_info.c
 				ha_myisammrg.cc
@@ -28,7 +24,4 @@ SET(MYISAMMRG_SOURCES  myrg_close.c myrg_create.c myrg_delete.c myrg_extra.c myr
 				myrg_rprev.c myrg_rrnd.c myrg_rsame.c myrg_static.c myrg_update.c
 				myrg_write.c myrg_records.c)
 
-IF(NOT SOURCE_SUBLIBS)
-  ADD_LIBRARY(myisammrg ${MYISAMMRG_SOURCES})
-  ADD_DEPENDENCIES(myisammrg GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
+MYSQL_STORAGE_ENGINE(MYISAMMRG)
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 492bc15f749..40784e81760 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -546,7 +546,8 @@ int ha_myisammrg::attach_children(void)
 
   if (myrg_attach_children(this->file, this->test_if_locked |
                            current_thd->open_options,
-                           myisammrg_attach_children_callback, this))
+                           myisammrg_attach_children_callback, this,
+                           (my_bool *) &need_compat_check))
   {
     DBUG_PRINT("error", ("my_errno %d", my_errno));
     DBUG_RETURN(my_errno ? my_errno : -1);
@@ -948,11 +949,11 @@ int ha_myisammrg::info(uint flag)
         with such a number, it'll be an error later anyway.
       */
       bzero((char*) table->key_info[0].rec_per_key,
-            sizeof(table->key_info[0].rec_per_key) * table->s->key_parts);
+            sizeof(table->key_info[0].rec_per_key[0]) * table->s->key_parts);
 #endif
       memcpy((char*) table->key_info[0].rec_per_key,
 	     (char*) mrg_info.rec_per_key,
-             sizeof(table->key_info[0].rec_per_key) *
+             sizeof(table->key_info[0].rec_per_key[0]) *
              min(file->keys, table->s->key_parts));
     }
   }
diff --git a/storage/myisammrg/myrg_create.c b/storage/myisammrg/myrg_create.c
index df81b730bfd..eaed470daec 100644
--- a/storage/myisammrg/myrg_create.c
+++ b/storage/myisammrg/myrg_create.c
@@ -46,7 +46,7 @@ int myrg_create(const char *name, const char **table_names,
 	fn_same(buff,name,4);
       *(end=strend(buff))='\n';
       end[1]=0;
-      if (my_write(file,(char*) buff,(uint) (end-buff+1),
+      if (my_write(file,(uchar*) buff,(uint) (end-buff+1),
 		   MYF(MY_WME | MY_NABP)))
 	goto err;
     }
diff --git a/storage/myisammrg/myrg_open.c b/storage/myisammrg/myrg_open.c
index 14ba2853b22..01420f47a0c 100644
--- a/storage/myisammrg/myrg_open.c
+++ b/storage/myisammrg/myrg_open.c
@@ -365,11 +365,14 @@ MYRG_INFO *myrg_parent_open(const char *parent_name,
     The callback returns the MyISAM table handle of the child table.
     Check table definition match.
 
-  @param[in]    m_info          MERGE parent table structure
-  @param[in]    handle_locking  if contains HA_OPEN_FOR_REPAIR, warn about
-                                incompatible child tables, but continue
-  @param[in]    callback        function to call for each child table
-  @param[in]    callback_param  data pointer to give to the callback
+  @param[in]    m_info            MERGE parent table structure
+  @param[in]    handle_locking    if contains HA_OPEN_FOR_REPAIR, warn about
+                                  incompatible child tables, but continue
+  @param[in]    callback          function to call for each child table
+  @param[in]    callback_param    data pointer to give to the callback
+  @param[in]    need_compat_check pointer to ha_myisammrg::need_compat_check
+                                  (we need this one to decide if previously
+                                  allocated buffers can be reused).
 
   @return status
     @retval     0               OK
@@ -382,7 +385,7 @@ MYRG_INFO *myrg_parent_open(const char *parent_name,
 
 int myrg_attach_children(MYRG_INFO *m_info, int handle_locking,
                          MI_INFO *(*callback)(void*),
-                         void *callback_param)
+                         void *callback_param, my_bool *need_compat_check)
 {
   ulonglong  file_offset;
   MI_INFO    *myisam;
@@ -423,6 +426,11 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking,
       m_info->reclength= myisam->s->base.reclength;
       min_keys=  myisam->s->base.keys;
       key_parts= myisam->s->base.key_parts;
+      if (*need_compat_check && m_info->rec_per_key_part)
+      {
+        my_free((char *) m_info->rec_per_key_part, MYF(0));
+        m_info->rec_per_key_part= NULL;
+      }
       if (!m_info->rec_per_key_part)
       {
         if(!(m_info->rec_per_key_part= (ulong*)
diff --git a/storage/mysql_storage_engine.cmake b/storage/mysql_storage_engine.cmake
new file mode 100644
index 00000000000..bb368494898
--- /dev/null
+++ b/storage/mysql_storage_engine.cmake
@@ -0,0 +1,36 @@
+# MYSQL_STORAGE_ENGINE Macro creates a project to build storage engine
+# library. 
+#
+# Parameters:
+# engine - storage engine name.
+# variable ENGINE_BUILD_TYPE should be set to "STATIC" or "DYNAMIC"
+# Remarks:
+# ${engine}_SOURCES  variable containing source files to produce the library must set before
+# calling this macro
+
+MACRO(MYSQL_STORAGE_ENGINE engine)
+IF(NOT SOURCE_SUBLIBS)
+  # Add common include directories
+  INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
+                    ${CMAKE_SOURCE_DIR}/sql
+                    ${CMAKE_SOURCE_DIR}/regex
+                    ${CMAKE_SOURCE_DIR}/extra/yassl/include)
+  STRING(TOUPPER ${engine} engine)
+  STRING(TOLOWER ${engine} libname)
+  IF(${ENGINE_BUILD_TYPE} STREQUAL "STATIC")
+    ADD_DEFINITIONS(-DWITH_${engine}_STORAGE_ENGINE -DMYSQL_SERVER)
+    #Create static library. The name of the library is <storage_engine>.lib
+    ADD_LIBRARY(${libname} ${${engine}_SOURCES})
+    ADD_DEPENDENCIES(${libname} GenError)
+    MESSAGE("build ${engine} as static library")
+  ELSEIF(${ENGINE_BUILD_TYPE} STREQUAL "DYNAMIC")
+    ADD_DEFINITIONS(-DMYSQL_DYNAMIC_PLUGIN)
+    #Create a DLL.The name of the dll is ha_<storage_engine>.dll
+    #The dll is linked to the mysqld executable
+    SET(dyn_libname ha_${libname})
+    ADD_LIBRARY(${dyn_libname} SHARED ${${engine}_SOURCES})
+    TARGET_LINK_LIBRARIES (${dyn_libname}  mysqld)
+    MESSAGE("build ${engine} as DLL")
+  ENDIF(${ENGINE_BUILD_TYPE} STREQUAL "STATIC")
+ENDIF(NOT SOURCE_SUBLIBS)
+ENDMACRO(MYSQL_STORAGE_ENGINE)
diff --git a/storage/ndb/src/kernel/blocks/backup/read.cpp b/storage/ndb/src/kernel/blocks/backup/read.cpp
index 8f4d0485f0e..78f6f2f1b50 100644
--- a/storage/ndb/src/kernel/blocks/backup/read.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/read.cpp
@@ -50,7 +50,7 @@ main(int argc, const char * argv[]){
 
   ndb_init();
   if(argc <= 1){
-    printf("Usage: %s <filename>", argv[0]);
+    printf("Usage: %s <filename>\n", argv[0]);
     exit(1);
   }
   FILE * f = fopen(argv[1], "rb");
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index c1aba0b35c6..07191c436b7 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -5489,10 +5489,10 @@ static MY_CHARSET_HANDLER my_charset_handler=
   my_mb_wc_cp932,	/* mb_wc */
   my_wc_mb_cp932,	/* wc_mb */
   my_mb_ctype_mb,
-  my_caseup_str_8bit,
-  my_casedn_str_8bit,
-  my_caseup_8bit,
-  my_casedn_8bit,
+  my_caseup_str_mb,
+  my_casedn_str_mb,
+  my_caseup_mb,
+  my_casedn_mb,
   my_snprintf_8bit,
   my_long10_to_str_8bit,
   my_longlong10_to_str_8bit,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 3925b76869c..ac426e0d7b5 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4650,10 +4650,10 @@ static MY_CHARSET_HANDLER my_charset_handler=
   my_mb_wc_sjis,	/* mb_wc */
   my_wc_mb_sjis,	/* wc_mb */
   my_mb_ctype_mb,
-  my_caseup_str_8bit,
-  my_casedn_str_8bit,
-  my_caseup_8bit,
-  my_casedn_8bit,
+  my_caseup_str_mb,
+  my_casedn_str_mb,
+  my_caseup_mb,
+  my_casedn_mb,
   my_snprintf_8bit,
   my_long10_to_str_8bit,
   my_longlong10_to_str_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 1ed758bc105..2ea48ddab2f 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7992,6 +7992,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
 static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(size_t))
 {
   cs->pad_char= ' ';
+  cs->ctype= my_charset_utf8_unicode_ci.ctype;
   return create_tailoring(cs, alloc);
 }
 
diff --git a/strings/ctype.c b/strings/ctype.c
index 446eb168804..17ad1256e74 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -328,7 +328,9 @@ my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
   {
     my_wc_t wc;
     int chlen;
-    for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+    for (;
+         (chlen= cs->cset->mb_wc(cs, &wc, (uchar*) str, (uchar*) strend)) > 0;
+         str+= chlen)
     {
       if (wc > 0x7F)
         return MY_REPERTOIRE_UNICODE30;
diff --git a/strings/decimal.c b/strings/decimal.c
index a7770fbb2e1..616d5291482 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -306,7 +306,7 @@ int decimal_actual_fraction(decimal_t *from)
   {
     for (i= DIG_PER_DEC1 - ((frac - 1) % DIG_PER_DEC1);
          *buf0 % powers10[i++] == 0;
-         frac--);
+         frac--) ;
   }
   return frac;
 }
@@ -500,7 +500,7 @@ static void digits_bounds(decimal_t *from, int *start_result, int *end_result)
     stop= (int) ((buf_end - from->buf + 1) * DIG_PER_DEC1);
     i= 1;
   }
-  for (; *buf_end % powers10[i++] == 0; stop--);
+  for (; *buf_end % powers10[i++] == 0; stop--) ;
   *end_result= stop; /* index of position after last decimal digit (from 0) */
 }
 
@@ -1011,7 +1011,7 @@ static int ull2dec(ulonglong from, decimal_t *to)
 
   sanity(to);
 
-  for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE);
+  for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE) ;
   if (unlikely(intg1 > to->len))
   {
     intg1=to->len;
diff --git a/support-files/build-tags b/support-files/build-tags
index 6c80d2638e9..b5386dc79c3 100755
--- a/support-files/build-tags
+++ b/support-files/build-tags
@@ -4,7 +4,7 @@ rm -f TAGS
 filter='\.cc$\|\.c$\|\.h$\|\.yy$'
 
 list="find . -type f"
-bzr root >/dev/null 2>/dev/null && list="bzr ls --kind=file --versioned"
+bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned"
 
 $list |grep $filter |while read f; 
 do
diff --git a/tests/Makefile.am b/tests/Makefile.am
index fbd58b88b9c..ddc6da86e1c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -56,7 +56,7 @@ bug25714_SOURCES=          bug25714.c
 bug25714_DEPENDENCIES=     $(LIBRARIES) $(pkglib_LTLIBRARIES)
 
 # Fix for mit-threads
-DEFS =			-DUNDEF_THREADS_HACK
+DEFS =			-DMYSQL_CLIENT_NO_THREADS
 
 thread_test.o:		thread_test.c
 			$(COMPILE) -c $(INCLUDES) $<
diff --git a/tests/mysql_client_test.c b/tests/mysql_client_test.c
index 6025063846a..b836293442a 100644
--- a/tests/mysql_client_test.c
+++ b/tests/mysql_client_test.c
@@ -103,7 +103,7 @@ if (!opt_silent) \
 
 static void print_error(const char *msg);
 static void print_st_error(MYSQL_STMT *stmt, const char *msg);
-static void client_disconnect(void);
+static void client_disconnect(MYSQL* mysql, my_bool drop_db);
 
 
 /*
@@ -271,10 +271,20 @@ mysql_simple_prepare(MYSQL *mysql_arg, const char *query)
 }
 
 
-/* Connect to the server */
+/**
+   Connect to the server with options given by arguments to this application,
+   stored in global variables opt_host, opt_user, opt_password, opt_db, 
+   opt_port and opt_unix_socket.
 
-static void client_connect(ulong flag)
+   @param flag[in]           client_flag passed on to mysql_real_connect
+   @param protocol[in]       MYSQL_PROTOCOL_* to use for this connection
+   @param auto_reconnect[in] set to 1 for auto reconnect
+   
+   @return pointer to initialized and connected MYSQL object
+*/
+static MYSQL* client_connect(ulong flag, uint protocol, my_bool auto_reconnect)
 {
+  MYSQL* mysql;
   int  rc;
   static char query[MAX_TEST_QUERY_LENGTH];
   myheader_r("client_connect");
@@ -291,6 +301,7 @@ static void client_connect(ulong flag)
   }
   /* enable local infile, in non-binary builds often disabled by default */
   mysql_options(mysql, MYSQL_OPT_LOCAL_INFILE, 0);
+  mysql_options(mysql, MYSQL_OPT_PROTOCOL, &protocol);
 
   if (!(mysql_real_connect(mysql, opt_host, opt_user,
                            opt_password, opt_db ? opt_db:"test", opt_port,
@@ -302,7 +313,7 @@ static void client_connect(ulong flag)
     fprintf(stdout, "\n Check the connection options using --help or -?\n");
     exit(1);
   }
-  mysql->reconnect= 1;
+  mysql->reconnect= auto_reconnect;
 
   if (!opt_silent)
     fprintf(stdout, "OK");
@@ -329,12 +340,14 @@ static void client_connect(ulong flag)
 
   if (!opt_silent)
     fprintf(stdout, "OK");
+
+  return mysql;
 }
 
 
 /* Close the connection */
 
-static void client_disconnect()
+static void client_disconnect(MYSQL* mysql, my_bool drop_db)
 {
   static char query[MAX_TEST_QUERY_LENGTH];
 
@@ -342,13 +355,16 @@ static void client_disconnect()
 
   if (mysql)
   {
-    if (!opt_silent)
-      fprintf(stdout, "\n dropping the test database '%s' ...", current_db);
-    strxmov(query, "DROP DATABASE IF EXISTS ", current_db, NullS);
+    if (drop_db)
+    {
+      if (!opt_silent)
+        fprintf(stdout, "\n dropping the test database '%s' ...", current_db);
+      strxmov(query, "DROP DATABASE IF EXISTS ", current_db, NullS);
 
-    mysql_query(mysql, query);
-    if (!opt_silent)
-      fprintf(stdout, "OK");
+      mysql_query(mysql, query);
+      if (!opt_silent)
+        fprintf(stdout, "OK");
+    }
 
     if (!opt_silent)
       fprintf(stdout, "\n closing the connection ...");
@@ -2464,6 +2480,9 @@ static void test_ps_query_cache()
 
   myheader("test_ps_query_cache");
 
+  rc= mysql_query(mysql, "SET SQL_MODE=''");
+  myquery(rc);
+
   /* prepare the table */
 
   rc= mysql_query(mysql, "drop table if exists t1");
@@ -2506,6 +2525,9 @@ static void test_ps_query_cache()
         mysql_close(lmysql);
         DIE_UNLESS(0);
       }
+      rc= mysql_query(lmysql, "SET SQL_MODE=''");
+      myquery(rc);
+
       if (!opt_silent)
         fprintf(stdout, "OK");
     }
@@ -4240,6 +4262,10 @@ static void test_fetch_date()
 
   myheader("test_fetch_date");
 
+  /* Will not work if sql_mode is NO_ZERO_DATE (implicit if TRADITIONAL) /*/
+  rc= mysql_query(mysql, "SET SQL_MODE=''");
+  myquery(rc);
+
   rc= mysql_query(mysql, "DROP TABLE IF EXISTS test_bind_result");
   myquery(rc);
 
@@ -4954,6 +4980,9 @@ static void test_stmt_close()
   /* set AUTOCOMMIT to ON*/
   mysql_autocommit(lmysql, TRUE);
 
+  rc= mysql_query(lmysql, "SET SQL_MODE = ''");
+  myquery(rc);
+
   rc= mysql_query(lmysql, "DROP TABLE IF EXISTS test_stmt_close");
   myquery(rc);
 
@@ -12088,6 +12117,9 @@ static void test_bug6058()
 
   myheader("test_bug6058");
 
+  rc= mysql_query(mysql, "SET SQL_MODE=''");
+  myquery(rc);
+
   stmt_text= "SELECT CAST('0000-00-00' AS DATE)";
 
   rc= mysql_real_query(mysql, stmt_text, strlen(stmt_text));
@@ -13303,6 +13335,9 @@ static void test_bug8378()
   if (!opt_silent)
     fprintf(stdout, "OK");
 
+  rc= mysql_query(lmysql, "SET SQL_MODE=''");
+  myquery(rc);
+
   len= mysql_real_escape_string(lmysql, out, TEST_BUG8378_IN, 4);
 
   /* No escaping should have actually happened. */
@@ -16389,6 +16424,14 @@ static void test_change_user()
   rc= mysql_query(mysql, buff);
   myquery(rc);
 
+  sprintf(buff,
+          "grant select on %s.* to %s@'localhost' identified by '%s'",
+          db,
+          user_pw,
+          pw);
+  rc= mysql_query(mysql, buff);
+  myquery(rc);
+
   sprintf(buff,
           "grant select on %s.* to %s@'%%'",
           db,
@@ -16396,6 +16439,13 @@ static void test_change_user()
   rc= mysql_query(mysql, buff);
   myquery(rc);
 
+  sprintf(buff,
+          "grant select on %s.* to %s@'localhost'",
+          db,
+          user_no_pw);
+  rc= mysql_query(mysql, buff);
+  myquery(rc);
+
 
   /* Try some combinations */
   rc= mysql_change_user(mysql, NULL, NULL, NULL);
@@ -16552,6 +16602,14 @@ static void test_change_user()
   rc= mysql_query(mysql, buff);
   myquery(rc);
 
+  sprintf(buff, "drop user %s@'localhost'", user_pw);
+  rc= mysql_query(mysql, buff);
+  myquery(rc);
+
+  sprintf(buff, "drop user %s@'localhost'", user_no_pw);
+  rc= mysql_query(mysql, buff);
+  myquery(rc);
+
   DBUG_VOID_RETURN;
 }
 
@@ -17220,6 +17278,11 @@ static void test_bug31669()
   rc= mysql_query(mysql, query);
   myquery(rc);
 
+  strxmov(query, "GRANT ALL PRIVILEGES ON *.* TO '", user, "'@'localhost' IDENTIFIED BY "
+                 "'", buff, "' WITH GRANT OPTION", NullS);
+  rc= mysql_query(mysql, query);
+  myquery(rc);
+
   rc= mysql_query(mysql, "FLUSH PRIVILEGES");
   myquery(rc);
 
@@ -17257,7 +17320,7 @@ static void test_bug31669()
   strxmov(query, "DELETE FROM mysql.user WHERE User='", user, "'", NullS);
   rc= mysql_query(mysql, query);
   myquery(rc);
-  DIE_UNLESS(mysql_affected_rows(mysql) == 1);
+  DIE_UNLESS(mysql_affected_rows(mysql) == 2);
 #endif
 
   DBUG_VOID_RETURN;
@@ -17469,6 +17532,9 @@ static void test_wl4166_2()
 
   myheader("test_wl4166_2");
 
+  rc= mysql_query(mysql, "SET SQL_MODE=''");
+  myquery(rc);
+
   rc= mysql_query(mysql, "drop table if exists t1");
   myquery(rc);
   rc= mysql_query(mysql, "create table t1 (c_int int, d_date date)");
@@ -17662,6 +17728,100 @@ static void test_bug40365(void)
 }
 
 
+/**
+  Subtest for Bug#43560. Verifies that a loss of connection on the server side
+  is handled well by the mysql_stmt_execute() call, i.e., no SIGSEGV due to
+  a vio socket that is cleared upon closed connection.
+
+  Assumes the presence of the close_conn_after_stmt_execute debug feature in
+  the server. Verifies that it is connected to a debug server before proceeding
+  with the test.
+ */
+static void test_bug43560(void)
+{
+  MYSQL*       conn;
+  uint         rc;
+  MYSQL_STMT   *stmt= 0;
+  MYSQL_BIND   bind;
+  my_bool      is_null= 0;
+  char         buffer[256];
+  const uint   BUFSIZE= sizeof(buffer);
+  const char*  values[] = {"eins", "zwei", "drei", "viele", NULL};
+  const char   insert_str[] = "INSERT INTO t1 (c2) VALUES (?)";
+  unsigned long length;
+  
+  DBUG_ENTER("test_bug43560");
+  myheader("test_bug43560");
+
+  /* Make sure we only run against a debug server. */
+  if (!strstr(mysql->server_version, "debug"))
+  {
+    fprintf(stdout, "Skipping test_bug43560: server not DEBUG version\n");
+    DBUG_VOID_RETURN;
+  }
+
+  /*
+    Set up a separate connection for this test to avoid messing up the
+    general MYSQL object used in other subtests. Use TCP protocol to avoid
+    problems with the buffer semantics of AF_UNIX, and turn off auto reconnect.
+  */
+  conn= client_connect(0, MYSQL_PROTOCOL_TCP, 0);
+
+  rc= mysql_query(conn, "DROP TABLE IF EXISTS t1");
+  myquery(rc);
+  rc= mysql_query(conn,
+    "CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT, c2 CHAR(10))");
+  myquery(rc);
+
+  stmt= mysql_stmt_init(conn);
+  check_stmt(stmt);
+  rc= mysql_stmt_prepare(stmt, insert_str, strlen(insert_str));
+  check_execute(stmt, rc);
+
+  bind.buffer_type= MYSQL_TYPE_STRING;
+  bind.buffer_length= BUFSIZE;
+  bind.buffer= buffer;
+  bind.is_null= &is_null;
+  bind.length= &length;
+  rc= mysql_stmt_bind_param(stmt, &bind);
+  check_execute(stmt, rc);
+
+  /* First execute; should succeed. */
+  strncpy(buffer, values[0], BUFSIZE);
+  length= strlen(buffer);
+  rc= mysql_stmt_execute(stmt);
+  check_execute(stmt, rc);
+
+  /* 
+    Set up the server to close this session's server-side socket after
+    next execution of prep statement.
+  */
+  rc= mysql_query(conn,"SET SESSION debug='+d,close_conn_after_stmt_execute'");
+  myquery(rc);
+
+  /* Second execute; should fail due to socket closed during execution. */
+  strncpy(buffer, values[1], BUFSIZE);
+  length= strlen(buffer);
+  rc= mysql_stmt_execute(stmt);
+  DIE_UNLESS(rc && mysql_stmt_errno(stmt) == CR_SERVER_LOST);
+
+  /* 
+    Third execute; should fail (connection already closed), or SIGSEGV in
+    case of a Bug#43560 type regression in which case the whole test fails.
+  */
+  strncpy(buffer, values[2], BUFSIZE);
+  length= strlen(buffer);
+  rc= mysql_stmt_execute(stmt);
+  DIE_UNLESS(rc && mysql_stmt_errno(stmt) == CR_SERVER_LOST);
+
+  client_disconnect(conn, 0);
+  rc= mysql_query(mysql, "DROP TABLE t1");
+  myquery(rc);
+
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   Bug#36326: nested transaction and select
 */
@@ -18090,6 +18250,7 @@ static struct my_tests_st my_tests[]= {
   { "test_wl4166_2", test_wl4166_2 },
   { "test_bug38486", test_bug38486 },
   { "test_bug40365", test_bug40365 },
+  { "test_bug43560", test_bug43560 },
 #ifdef HAVE_QUERY_CACHE
   { "test_bug36326", test_bug36326 },
 #endif
@@ -18218,7 +18379,8 @@ int main(int argc, char **argv)
                         (char**) embedded_server_groups))
     DIE("Can't initialize MySQL server");
 
-  client_connect(0);       /* connect to server */
+  /* connect to server with no flags, default protocol, auto reconnect true */
+  mysql= client_connect(0, MYSQL_PROTOCOL_DEFAULT, 1);
 
   total_time= 0;
   for (iter_count= 1; iter_count <= opt_count; iter_count++)
@@ -18248,7 +18410,7 @@ int main(int argc, char **argv)
 	  fprintf(stderr, "\n\nGiven test not found: '%s'\n", *argv);
 	  fprintf(stderr, "See legal test names with %s -T\n\nAborting!\n",
 		  my_progname);
-	  client_disconnect();
+	  client_disconnect(mysql, 1);
 	  free_defaults(defaults_argv);
 	  exit(1);
 	}
@@ -18261,7 +18423,7 @@ int main(int argc, char **argv)
     /* End of tests */
   }
 
-  client_disconnect();    /* disconnect from server */
+  client_disconnect(mysql, 1);    /* disconnect from server */
 
   free_defaults(defaults_argv);
   print_test_output();
diff --git a/win/Makefile.am b/win/Makefile.am
index 5dc637ae8a3..e5813010740 100644
--- a/win/Makefile.am
+++ b/win/Makefile.am
@@ -16,7 +16,7 @@
 ## Process this file with automake to create Makefile.in
 EXTRA_DIST = build-vs71.bat build-vs8.bat build-vs8_x64.bat build-vs9.bat \
              build-vs9_x64.bat configure.js README mysql_manifest.cmake \
-             create_manifest.js
+             create_manifest.js create_def_file.js
 
 # Don't update the files from bitkeeper
 %::SCCS/s.%
diff --git a/win/configure.js b/win/configure.js
index c80645a73b2..07dccb66f83 100644
--- a/win/configure.js
+++ b/win/configure.js
@@ -39,13 +39,6 @@ try
         var parts = args.Item(i).split('=');
         switch (parts[0])
         {
-            case "WITH_ARCHIVE_STORAGE_ENGINE":
-            case "WITH_BLACKHOLE_STORAGE_ENGINE":
-            case "WITH_EXAMPLE_STORAGE_ENGINE":
-            case "WITH_FEDERATED_STORAGE_ENGINE":
-            case "WITH_INNOBASE_STORAGE_ENGINE":
-            case "WITH_PARTITION_STORAGE_ENGINE":
-            case "__NT__":
             case "CYBOZU":
             case "EMBED_MANIFESTS":
             case "EXTRA_DEBUG":
@@ -66,6 +59,7 @@ try
                     break;
         }
     }
+
     if (actual_port == 0)
 	{
        // if we actually defaulted (as opposed to the pathological case of
@@ -115,7 +109,11 @@ try
                          GetBaseVersion(version) + "\")");
     configfile.WriteLine("SET (MYSQL_VERSION_ID \"" +
                          GetVersionId(version) + "\")");
-
+    var engineOptions = ParsePlugins();
+    for (option in engineOptions)
+    {
+       configfile.WriteLine("SET(" + engineOptions[option] + " TRUE)");
+    }
     configfile.Close();
     
     fso = null;
@@ -185,3 +183,139 @@ function GetVersionId(version)
     id += build;
     return id;
 }
+
+function PluginConfig(isGroup, include)
+{
+    this.isGroup = isGroup;
+    this.include = include;
+}
+
+
+// Parse command line arguments specific to plugins (aka storage engines).
+//
+// --with-plugin-PLUGIN, --with-plugins=group,  --with-plugins=PLUGIN[,PLUGIN...]
+// --without-plugin-PLUGIN is supported.
+//
+// Legacy option WITH_<PLUGIN>_STORAGE_ENGINE is supported as well.
+// The function returns string array with elements like WITH_SOME_STORAGE_ENGINE 
+// or WITHOUT_SOME_STORAGE_ENGINE.
+//
+// This function handles groups, for example effect of specifying --with-plugins=max 
+// is the same as --with-plugins==archive,federated,falcon,innobase...
+
+function ParsePlugins()
+{
+
+    var config = new Array();  
+
+    config["DEFAULT"] = new PluginConfig(true,true);
+    
+    // Parse command line parameters
+    for (i=0; i< WScript.Arguments.length;i++)
+    {
+        var option = WScript.Arguments.Item(i);
+        var match = /WITH_(\w+)_STORAGE_ENGINE/.exec(option);
+        if (match == null)
+            match = /--with-plugin-(\w+)/.exec(option);
+        if (match != null)
+        {
+            config[match[1].toUpperCase()] =  new PluginConfig(false,true);
+            continue;
+        }
+
+        match = /WITHOUT_(\w+)_STORAGE_ENGINE/.exec(option);
+        if (match == null)
+            match = /--without-plugin-(\w+)/.exec(option);
+    
+        if (match != null)
+        {
+            config[match[1].toUpperCase()] =  
+                new PluginConfig(false,false);
+            continue;
+        }
+        
+        match = /--with-plugins=([\w,\-_]+)/.exec(option);
+        if(match != null)
+        {
+        
+            var plugins  = match[1].split(",");
+            for(var key in plugins)
+            {
+                config[plugins[key].toUpperCase()] = 
+                    new PluginConfig(null,true);
+            }
+            continue;
+        }
+        match = /--without-plugins=([\w,\-_]+)/.exec(option);
+        if(match != null)
+        {
+            var plugins = match[1].split(",");
+            for(var key in plugins)
+                config[plugins[key].toUpperCase()] =
+                    new PluginConfig(null, false);
+            continue;
+        }
+    }
+    
+    // Read plugin definitions, find out groups plugins belong to.
+    var fc = new Enumerator(fso.GetFolder("storage").SubFolders);
+    for (;!fc.atEnd(); fc.moveNext())
+    {
+        var subfolder = fc.item();
+        var name =  subfolder.name.toUpperCase();
+        
+        // Handle case where storage engine  was already specified by name in 
+        // --with-plugins or --without-plugins.
+        if (config[name] != undefined)
+        {
+            config[name].isGroup = false;
+            continue;
+        }
+        config[name] = new PluginConfig(false,null);
+        
+        // Handle groups. For each plugin, find out which group it belongs to
+        // If this group was specified on command line for inclusion/exclusion,
+        // then include/exclude the plugin.
+        filename  = subfolder +"\\plug.in";
+        if (fso.FileExists(filename))
+        {
+            var content = fso.OpenTextFile(filename, ForReading).ReadAll();
+            var match = 
+              /MYSQL_STORAGE_ENGINE([ ]*)[\(]([^\)]+)[\)]/.exec(content);
+            if (match== null)
+                continue;
+            match = /\[[\w,\-_]+\][\s]?\)/.exec(match[0]);
+            if (match == null)
+                continue;
+            groups = match[0].split(/[\,\(\)\[\] ]/);
+            for (var key in groups)
+            {
+                var group = groups[key].toUpperCase();
+                if (config[group] != undefined)
+                {
+                    config[group].isGroup = true;
+                    if (config[group].include != null)
+                    {
+                        config[name].include = config[group].include;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    
+    var arr = new Array();
+    for(key in config)
+    {
+        var eng = config[key];
+        if(eng.isGroup != undefined && !eng.isGroup	&& eng.include != undefined)
+        {
+            if (fso.FolderExists("storage\\"+key) || key=="PARTITION")
+            {
+                arr[arr.length] = eng.include? 
+                    "WITH_"+key+"_STORAGE_ENGINE":"WITHOUT_"+key+"_STORAGE_ENGINE";
+            }
+        }
+    }
+    return arr;
+}
diff --git a/win/create_def_file.js b/win/create_def_file.js
new file mode 100644
index 00000000000..aaaf4659736
--- /dev/null
+++ b/win/create_def_file.js
@@ -0,0 +1,220 @@
+// create_def_file.js
+//
+// Copyright (C) 2009 Sun Microsystems
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; version 2 of the License.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+/*
+  This script extracts names and types of globally defined symbols from
+  COFF object files, and writes this information to stdout using .DEF 
+  file format (module definition file used by Microsoft linker)
+  
+  In MySQL this script is used to export symbols from mysqld.exe for use by
+  storage engine DLLs.
+  
+  Usage:
+  cscript create_def_file.js [x86|x64] [object|static_lib|directory ...]
+  
+  If directory is passed as a parameter, script will process all object files
+  and static libraries in this directory and recursively in all subdrectories.
+
+  Note :The script does not work properly if  /GL (global optimization) 
+  compiler option was used to produce object files or libraries. This is a
+  limitation of the dumpbin tool that is used by the script.
+*/
+
+ForReading = 1;
+ForWriting = 2;
+ForAppending = 8;
+
+
+var args = WScript.Arguments;
+  
+// check that we got proper arguments
+if (args.length < 2)
+{
+    echo("Usage: create_def_file <X86|X64> [static_library|objectfile|objectdirectory ...] ");
+    WScript.Quit(1);
+}
+
+
+var is64 = args.Item(0).toLowerCase() == "x64";
+var shell = new ActiveXObject("WScript.Shell");
+var fso = new ActiveXObject("Scripting.FileSystemObject");
+
+OutputSymbols(CollectSymbols());
+
+
+// takes the array that has been built up and writes out mysqld.def
+function OutputSymbols(symbols)
+{
+    var out = WScript.StdOut;
+    out.WriteLine("EXPORTS");
+    for (var sym in symbols)
+       out.WriteLine(sym);
+}
+
+function echo(message)
+{
+    WScript.StdErr.WriteLine(message);
+}
+
+// Extract global symbol names and type from objects
+function CollectSymbols()
+{
+    var uniqueSymbols = new Array();
+
+    try
+    {
+        /*
+         Compiler tools use VS_UNICODE_OUTPUT env. variable as indicator 
+         that they run within IDE, so they can communicate with IDE via 
+         pipes instead of usual stdout/stderr. Refer to 
+         http://blogs.msdn.com/freik/archive/2006/04/05/569025.aspx 
+         for more info.
+         Unset this environment variable.
+        */
+        shell.Environment("PROCESS").Remove("VS_UNICODE_OUTPUT");
+    }
+    catch(e){}
+ 
+    var rspfilename = "dumpsymbols.rsp";
+    CreateResponseFile(rspfilename);
+    var commandline="dumpbin @"+rspfilename;
+    
+    echo("Executing "+commandline);
+    var oExec = shell.Exec(commandline);
+
+    while(!oExec.StdOut.AtEndOfStream)
+    {
+        var line = oExec.StdOut.ReadLine();
+        if (line.indexOf("External") == -1) continue;
+        var columns = line.split(" ");
+        var index = 0;
+        if (columns.length < 3)
+          continue;
+
+        /*
+          If the third column of dumpbin output contains SECTx, 
+          the symbol is defined in that section of the object file. 
+          If UNDEF appears, it is not defined in that object and must
+          be resolved elsewhere. BSS symbols (like uninitialized arrays)
+          appear to have non-zero second column.
+        */
+        if (columns[2].substring(0,4)!="SECT")
+        {
+          if (columns[2] == "UNDEF"  && parseInt(columns[1])==0 )
+            continue;
+        }
+
+        /*
+          Extract undecorated symbol names 
+          between "|" and next whitespace after it.
+        */
+        for (; index < columns.length; index++)
+          if (columns[index] == "|")
+              break;
+
+        var symbol = columns[index + 1];
+        var firstSpace = symbol.indexOf(" ");
+        if (firstSpace != -1)
+            symbol = symbol.substring(0, firstSpace-1);
+
+        // Don't export compiler defined stuff
+        if (IsCompilerDefinedSymbol(symbol))
+            continue;
+        
+        // Correct symbol name for cdecl calling convention on x86
+        symbol = ScrubSymbol(symbol);
+
+        // Check if we have function or data
+        if (line.indexOf("notype () ") == -1)
+            symbol = symbol + " DATA";
+
+        uniqueSymbols[symbol] = 1;
+    }
+    fso.DeleteFile(rspfilename);
+    return uniqueSymbols;
+}
+
+// performs necessary cleanup on the symbol name
+function ScrubSymbol(symbol)
+{
+    if (is64) return symbol;
+    if (symbol.charAt(0) != "_") 
+        return symbol;
+
+    var atSign = symbol.indexOf("@");
+    if (atSign != -1)
+    {
+        var paramSize = symbol.substring(atSign+1, symbol.Length);
+        if (paramSize.match("[0-9]+$")) return symbol;
+    }
+    return symbol.substring(1, symbol.length);
+}
+
+// returns true if the symbol is compiler defined
+function IsCompilerDefinedSymbol(symbol)
+{
+    return ((symbol.indexOf("__real@") != -1) ||
+    (symbol.indexOf("_RTC_") != -1) || 
+    (symbol.indexOf("??_C@_") != -1) ||
+    (symbol.indexOf("??_R") != -1) ||
+    (symbol.indexOf("??_7") != -1)  ||
+    (symbol.indexOf("?_G") != -1) ||           // scalar deleting destructor
+    (symbol.indexOf("?_E") != -1));            // vector deleting destructor
+}
+
+// Creates response file for dumpbin
+function CreateResponseFile(filename)
+{
+  var responseFile = fso.CreateTextFile(filename,true);
+  responseFile.WriteLine("/SYMBOLS");
+  
+  var index = 1;
+  for (; index < args.length; index++)
+  {
+    addToResponseFile(args.Item(index),responseFile);
+  }
+  responseFile.Close();
+}
+
+// Add object file/library to the dumpbin response file.
+// If filename parameter is directory, all objects and libs under 
+// this directory or subdirectories are added.
+function addToResponseFile(filename, responseFile)
+{
+   if (fso.FolderExists(filename))
+   {
+        var folder = fso.getFolder(filename);
+        var enumerator = new Enumerator(folder.files);
+        for (; !enumerator.atEnd(); enumerator.moveNext())
+        {
+            addToResponseFile(enumerator.item().Path, responseFile);
+        }
+        enumerator = new Enumerator(folder.subFolders);
+        for (; !enumerator.atEnd(); enumerator.moveNext())
+        {
+            addToResponseFile(enumerator.item().Path, responseFile);
+        }
+    }
+    else if (fso.FileExists(filename))
+    {
+       var extension = filename.substr(filename.length -3).toLowerCase();
+       if(extension == "lib" || extension == "obj")
+       {
+           responseFile.WriteLine("\""+fso.GetFile(filename).Path+"\"");
+       }
+    }
+}